├── Imgur ├── Auth │ ├── AccessToken.py │ ├── Anonymous.py │ ├── Base.py │ ├── Expired.py │ └── __init__.py ├── DirtyCommenter.py ├── Factory.py ├── Imgur.py ├── RateLimit.py └── __init__.py ├── LICENSE ├── NewMain.py ├── __init__.py ├── blocked.txt ├── borders ├── borders_wmy ├── config.json ├── database ├── ImgurDBConnector.py └── __init__.py ├── docs ├── INSTALL.txt ├── SOME_INFOS.txt ├── TODO.txt └── imgur_finduserid_user.js ├── dupFinder ├── FindDuplicates.py ├── ImageHash.py └── __init__.py ├── mynet ├── Web.py └── __init__.py ├── points ├── sqlite_extension ├── Hamming.c ├── Sqlite3_Hamming.dll ├── sqlite3.h └── sqlite3ext.h ├── stuff ├── Format.py ├── Similarity.py └── __init__.py ├── unblocked.txt ├── utils ├── UpdateToken.py └── __init__.py └── worker ├── CommentSender.py └── __init__.py /Imgur/Auth/AccessToken.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from .Base import Base as AuthBase 4 | import time as dt 5 | 6 | class AccessToken(AuthBase): 7 | def __init__(self, access, refresh, expire_time): 8 | self.access = access 9 | self.refresh = refresh 10 | self.expire_time = expire_time 11 | 12 | def needToAuthorize(self, time): 13 | return (self.expire_time <= time) 14 | 15 | def addAuthorizationHeader(self, request): 16 | request.add_header('Authorization', 'Bearer ' + self.access) 17 | return request 18 | 19 | def getAccessToken(self): 20 | return self.access 21 | 22 | def getRefreshToken(self): 23 | return self.refresh -------------------------------------------------------------------------------- /Imgur/Auth/Anonymous.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | class Anonymous: 4 | def __init__(self, client_id): 5 | self.client_id = client_id 6 | 7 | def needToAuthorize(self): 8 | return False 9 | 10 | def authorize(self): 11 | pass 12 | 13 | def addAuthorizationHeader(self, request): 14 | request.add_header('Authorization', 'Client-ID ' + self.client_id) 15 | return request -------------------------------------------------------------------------------- /Imgur/Auth/Base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | class Base: 4 | def needToAuthorize(self, time): 5 | '''Do we need to refresh our authorization token?''' 6 | pass 7 | def authorize(self, api, requestfactory): 8 | '''Refresh our access token''' 9 | pass 10 | def addAuthorizationHeader(self, request): 11 | pass -------------------------------------------------------------------------------- /Imgur/Auth/Expired.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | class Expired(BaseException): 4 | def __str__(self): 5 | return "Access token invalid or expired." 6 | 7 | -------------------------------------------------------------------------------- /Imgur/Auth/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/Imgur/Auth/__init__.py -------------------------------------------------------------------------------- /Imgur/DirtyCommenter.py: -------------------------------------------------------------------------------- 1 | import json 2 | from time import time, sleep 3 | 4 | def lognprint(*args): 5 | print 'DirtyCommenter', args 6 | 7 | class DirtyCommenter(object): 8 | def __init__(self, factory, config): 9 | self.config = config 10 | self.factory = factory 11 | self.refToken = config['refresh_token'] 12 | self.acToken = 0 13 | self.validTill = 0 14 | 15 | def refreshToken(self): 16 | imgur = self.factory.buildAPI() 17 | req = self.factory.buildRequestOAuthRefresh(self.refToken) 18 | res = imgur.retrieveRaw(req) 19 | 20 | lognprint('Access Token: %s\nRefresh Token: %s\nExpires: %d seconds from now.' % ( 21 | res[1]['access_token'], 22 | res[1]['refresh_token'], 23 | res[1]['expires_in'] 24 | )) 25 | self.validTill = time()+res[1]['expires_in'] - 20 # cause i am to lazy to catch timing errors 26 | self.acToken = res[1]['access_token'] 27 | self.refToken = res[1]['refresh_token'] 28 | self.config['refresh_token'] = self.refToken 29 | 30 | with open('config.json', 'w') as cFile: 31 | json.dump(self.config, cFile) 32 | 33 | # 34 | # ATTENTION: 35 | # I replaced the real functions interacting with imgur with dummy functions, to avoid spamming imgur when testing. 36 | # OFC: you should remove them when in an "stable" state 37 | # 38 | 39 | def writeComment(self, galerie, text, retries=3): 40 | lognprint("DUMMY COMMENTER: Write Comment to gallery %s with retries %i: %s" % (galerie, retries, text)) 41 | 42 | def _writeComment(self, galerie, text, retries=3): 43 | for i in xrange(retries): 44 | try: 45 | if time() >= self.validTill: 46 | self.refreshToken() 47 | 48 | if len(text) > 140: # TODO split into more messages 49 | text = text[:140] 50 | auth = self.factory.buildOAuth(self.acToken, None, int(time())+3600) 51 | imgur = self.factory.buildAPI(auth) 52 | req = self.factory.buildRequest((u'gallery', galerie, u'comment'), { 53 | 'comment': text.encode('utf-8') 54 | }) 55 | res = imgur.retrieve(req) 56 | lognprint(u"Success! https://www.imgur.com/gallery/%s/comment/%s" % (galerie, res['id'])) 57 | return res['id'] 58 | except Exception as e: # TODO catch correct Exception 59 | # TODO except when message contains "returned empty response with code 200" or send and check error code (what to do whith the already posted db ?)) 60 | lognprint(u'write comment exception', str(e), "at gallery", galerie) 61 | return False 62 | 63 | 64 | def writeReply(self, galerie, parentId, text, retries=3): 65 | lognprint("DUMMY COMMENTER: Write Reply to gallery %s, parentid %s, with retries %i: %s." % (galerie, parentId, retries, text)) 66 | 67 | def _writeReply(self, galerie, parentId, text, retries=3): 68 | for i in xrange(retries): 69 | try: 70 | if time() >= self.validTill: 71 | self.refreshToken() 72 | 73 | if len(text) > 140: # TODO split into more messages 74 | text = text[:140] 75 | auth = self.factory.buildOAuth(self.acToken, None, int(time())+3600) 76 | imgur = self.factory.buildAPI(auth) 77 | req = self.factory.buildRequest(('comment',), { 78 | u'comment': text.encode('utf-8'), 79 | u'image_id': galerie, 80 | u'parent_id': parentId 81 | }) 82 | res = imgur.retrieve(req) 83 | lognprint(u"Success! https://www.imgur.com/gallery/%s/comment/%s" % (galerie, res['id'])) 84 | return res['id'] 85 | except Exception as e: # TODO catch correct Exception 86 | lognprint(u'write reply exception', str(e), "at gallerie", galerie) 87 | return False 88 | 89 | def sendAuthReqMessage(self, endpoint, data=None, retries=1): 90 | lognprint("DUMMY COMMENTER: Send generic authed message to endpoint %s." % (endpoint,)) 91 | 92 | def _sendAuthReqMessage(self, endpoint, data=None, retries=1): 93 | for i in xrange(retries): 94 | try: 95 | if time() >= self.validTill: 96 | self.refreshToken() 97 | auth = self.factory.buildOAuth(self.acToken, None, int(time())+3600) 98 | imgur = self.factory.buildAPI(auth) 99 | req = self.factory.buildRequest(endpoint, data=data) 100 | res = imgur.retrieve(req) 101 | lognprint(u"Success sending", endpoint, data) 102 | return res 103 | except Exception as e: # TODO catch correct Exception 104 | lognprint(u'Exception requesting', endpoint, data, ":", str(e)) 105 | return False 106 | 107 | def _msplit_cut_follow(self): 108 | # [] 109 | pass 110 | 111 | def writeComent_(self, gallerie, text, handler=None): 112 | if handler is None: handler = self._msplit_cut 113 | text = handler(text) -------------------------------------------------------------------------------- /Imgur/Factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #import urllib.request, urllib.parse 4 | from urllib2 import Request 5 | from urllib import urlencode 6 | 7 | import base64, os.path 8 | from .Imgur import Imgur 9 | from .RateLimit import RateLimit 10 | from .Auth.AccessToken import AccessToken 11 | from .Auth.Anonymous import Anonymous 12 | 13 | 14 | class Factory: 15 | API_URL = "https://api.imgur.com/" 16 | 17 | def __init__(self, config): 18 | self.config = config 19 | if 'api' in self.config: 20 | self.API_URL = self.config['api'] 21 | 22 | def getAPIUrl(self): 23 | return self.API_URL 24 | 25 | def buildAPI(self, auth = None, ratelimit = None): 26 | if auth is None: 27 | auth = self.buildAnonymousAuth() 28 | if ratelimit is None: 29 | ratelimit = self.buildRateLimit() 30 | return Imgur(self.config['client_id'], self.config['secret'], auth, ratelimit) 31 | 32 | def buildAnonymousAuth(self): 33 | return Anonymous(self.config['client_id']) 34 | 35 | def buildOAuth(self, access, refresh, expire_time): 36 | return AccessToken(access, refresh, expire_time) 37 | 38 | def buildRequest(self, endpoint, data = None): 39 | '''Expects an endpoint like 'image.json' or a tuple like ('gallery', 'hot', 'viral', '0'). 40 | Prepends 3/ and appends \.json to the tuple-form, not the endpoint form.''' 41 | if isinstance(endpoint, str): 42 | url = self.API_URL + endpoint 43 | else: 44 | url = self.API_URL + '3/' + ('/'.join(endpoint)) + ".json" 45 | # # py 3 46 | # req = urllib.request.Request(url) 47 | 48 | print "requesting", url #TODO: a 49 | 50 | req = Request(url) 51 | if data is not None: 52 | req.add_data(urlencode(data).encode('utf-8')) 53 | 54 | return req 55 | 56 | def buildRateLimit(self, limits = None): 57 | '''If none, defaults to fresh rate limits. Else expects keys "client_limit", "user_limit", "user_reset"''' 58 | if limits is not None: 59 | return RateLimit(limits['client_limit'], limits['user_limit'], limits['user_reset']) 60 | else: 61 | return RateLimit() 62 | 63 | def buildRateLimitsFromServer(self, api): 64 | '''Get the rate limits for this application and build a rate limit model from it.''' 65 | req = self.buildRequest('credits') 66 | res = api.retrieve(req) 67 | return RateLimit(res['ClientRemaining'], res['UserRemaining'], res['UserReset']) 68 | 69 | 70 | def buildRequestUploadFromPath(self, path, params = dict()): 71 | fd = open(path, 'rb') 72 | contents = fd.read() 73 | b64 = base64.b64encode(contents) 74 | data = { 75 | 'image': b64, 76 | 'type': 'base64', 77 | 'name': os.path.basename(path) 78 | } 79 | data.update(params) 80 | return self.buildRequest(('upload',), data) 81 | 82 | def buildRequestOAuthTokenSwap(self, grant_type, token): 83 | data = { 84 | 'client_id': self.config['client_id'], 85 | 'client_secret': self.config['secret'], 86 | 'grant_type': grant_type 87 | } 88 | 89 | if grant_type == 'authorization_code': 90 | data['code'] = token 91 | if grant_type == 'pin': 92 | data['pin'] = token 93 | 94 | return self.buildRequest('oauth2/token', data) 95 | 96 | def buildRequestOAuthRefresh(self, refresh_token): 97 | data = { 98 | 'refresh_token': refresh_token, 99 | 'client_id': self.config['client_id'], 100 | 'client_secret': self.config['secret'], 101 | 'grant_type': 'refresh_token' 102 | } 103 | return self.buildRequest('oauth2/token', data) -------------------------------------------------------------------------------- /Imgur/Imgur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from urllib2 import urlopen, HTTPError, URLError 4 | 5 | import json 6 | from .Auth.Expired import Expired 7 | 8 | class Imgur: 9 | 10 | def __init__(self, client_id, secret, auth, ratelimit): 11 | self.client_id = client_id 12 | self.secret = secret 13 | self.auth = auth 14 | self.ratelimit = ratelimit 15 | 16 | def retrieveRaw(self, request): 17 | request = self.auth.addAuthorizationHeader(request) 18 | try: 19 | req = urlopen(request, timeout=20) 20 | except URLError as e: 21 | print e 22 | return (e, {u'success':False, 'data':{'error':{'message':str(e)}}}) 23 | t = req.read() 24 | # print "FU", t 25 | 26 | if not t: 27 | return (req, {u'success':False,'data':{'error':{'message':'%s returned empty response with code %i and content "%s"' % (request.get_full_url(), req.getcode(), str(t))}}}) 28 | res = json.loads(t.decode('utf-8')) 29 | return (req, res) 30 | 31 | def retrieve(self, request): 32 | try: 33 | (req, res) = self.retrieveRaw(request) 34 | except HTTPError as e: 35 | if e.code == 403: 36 | raise Expired() 37 | else: 38 | print("Error %d\n%s\n" % (e.code, e.read())) 39 | raise e 40 | 41 | self.ratelimit.update(req.info()) 42 | if res['success'] is not True: 43 | if 'data' in res and 'error' in res['data']: 44 | raise Exception(res['data']['error']['message']) 45 | raise Exception("Empty return: %s / %s" % (str(res), str(req))) 46 | 47 | return res['data'] 48 | 49 | def getRateLimit(self): 50 | return self.ratelimit 51 | 52 | def getAuth(self): 53 | return self.auth 54 | 55 | def getClientID(self): 56 | return self.client_id -------------------------------------------------------------------------------- /Imgur/RateLimit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time as dt 4 | 5 | class RateLimit: 6 | 7 | def __init__(self, client_remaining = 12500, user_remaining = 500, user_reset = None): 8 | self.client_remaining = client_remaining 9 | self.user_remaining = user_remaining 10 | self.user_reset = user_reset 11 | 12 | def update(self, headers): 13 | '''Update the rate limit state with a fresh API response''' 14 | 15 | if 'X-RateLimit-ClientRemaining' in headers: 16 | self.client_remaining = int(headers['X-RateLimit-ClientRemaining']) 17 | self.user_remaining = int(headers['X-RateLimit-UserRemaining']) 18 | self.user_reset = int(headers['X-RateLimit-UserReset']) 19 | 20 | def is_over(self, time): 21 | return self.would_be_over(0, time) 22 | 23 | def would_be_over(self, cost): 24 | #return self.client_remaining < cost or (self.user_reset is not None and self.user_reset > time and self.user_remaining < cost) 25 | return self.client_remaining < cost or (self.user_reset is not None and self.user_reset > dt.time() and self.user_remaining < cost) 26 | 27 | def __str__(self, time = None): 28 | # can't ask for time by DI when doing str(x). 29 | if time is None: 30 | time = dt.time() 31 | 32 | exp = int(self.user_reset) - int(time) 33 | return "" % (self.client_remaining, self.user_remaining, exp) 34 | -------------------------------------------------------------------------------- /Imgur/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/Imgur/__init__.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 SleepProgger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NewMain.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import sys 3 | import json 4 | from Imgur.Factory import Factory 5 | from time import time, sleep, gmtime, strftime, localtime 6 | from database.ImgurDBConnector import DBConnector 7 | from mynet import Web 8 | from stuff.Similarity import levenshtein_n 9 | from stuff.Format import crudeTimeFormat 10 | from dupFinder.FindDuplicates import SimilarImagesSql 11 | import Imgur.DirtyCommenter 12 | from worker import CommentSender 13 | import re 14 | from traceback import format_exc 15 | import codecs 16 | 17 | # 18 | # TODO 19 | # - PORT TO NEW IMGUR API PYTHON LIB. It is WAY better as the old version, and does't need most of the code i added. 20 | # When done, remove the old imgur api files and use them from the installed module... 21 | # - Do the job split stuff (what did i even ment with that ? oO) 22 | # - check if image is still online and doesn't changed before posting ? 23 | # - Check if image is meme, reaction pic ? 24 | # - check if it was from the same user and other stuff (reddit ...) (same user is done) 25 | # - Synonym before levensthein for title 26 | # - adjust levensthein, weight ops (remove more as change ?) 27 | # - Cooler similarity bar (unicode with ~3 widthes) ? (stars are as good as it gets ? (percentage would be more efficient though.)) 28 | # - Register for repost notification 29 | # - month format for last seen 30 | # - galeries (might not be doable within api limits) 31 | # - gifs (something better as just crc the data ?) 32 | # - add repost tags (done) 33 | # - use logging module instead of my crude own impl. 34 | # 35 | 36 | 37 | logfile = 'imgur.log' 38 | lfile = codecs.open(logfile, 'ab', encoding='utf-8') 39 | def lognprint(*args): 40 | lognprint_base(strftime(u"%a, %d %b %Y %H:%M:%S", localtime()) + u' :\t' + (u' '.join(map(unicode, args))) + u'\n') 41 | def lognprint_base(out): 42 | lfile.write(out) 43 | lfile.flush() 44 | sys.stdout.write(out.encode("utf-8")) 45 | Imgur.DirtyCommenter.lognprint = lognprint 46 | CommentSender.lognprint = lognprint 47 | Web.lognprint = lognprint 48 | DirtyCommenter = Imgur.DirtyCommenter.DirtyCommenter 49 | CommentSender = CommentSender.CommentSender 50 | 51 | def genUnicodeSuccessBar(length, curVal, maxVal, elems=[u'▃',u'▅',u'▇'], blank=u'▁', borderLeft=u'╠', borderRight=u'╣'): 52 | elemsWBlank = elems + [blank] 53 | ret = borderLeft 54 | n = (float(curVal)/maxVal) 55 | full = int(n*length) 56 | rest = int(round((n*length - full)*len(elems))) 57 | ret += elems[-1]*full 58 | if full < length and rest > 0: 59 | ret += elemsWBlank[rest-1] 60 | ret += blank*(length-full-1) 61 | else: 62 | ret += blank*(length-full) 63 | ret += borderRight 64 | return ret 65 | 66 | 67 | 68 | str_duplicate = u"Image last seen %s before at http://imgur.com/gallery/%s %s" 69 | str_similar = u"Image probably last seen %s before at http://imgur.com/gallery/%s %s" 70 | str_duplicate_first = u"First seen %s before at http://imgur.com/gallery/%s %s" 71 | str_similar_first = u"Probably first seen %s before at http://imgur.com/gallery/%s %s" 72 | str_duplicate_firstTimes = u"Seen %i times since %s before at http://imgur.com/gallery/%s %s" 73 | str_similar_firstTimes = u"Probably seen %i times since %s before at http://imgur.com/gallery/%s %s" 74 | 75 | _crudeTimeFormat = crudeTimeFormat 76 | crudeTimeFormat = lambda *args: unicode(_crudeTimeFormat(*args)) 77 | 78 | #exit() 79 | def sendDupMessage(newE, dups, commenter, db, byCrc=True): 80 | # newE = (newE[u'datetime'], "", newE[u'id'], newE[u'title']) 81 | newE = (newE[u'datetime'], "", newE[u'account_id'], newE[u'id'], newE[u'title']) 82 | #datetime, userurl, userid, link, title = dups[-1] 83 | # trip = False 84 | # if lastSeen[2] == firstSeen[2]: 85 | # newE, lastSeen = sorted([newE, lastSeen], reverse=True) 86 | # else: 87 | # newE, lastSeen, firstSeen = sorted([newE, lastSeen, firstSeen], reverse=True) 88 | # trip = True 89 | 90 | if db.commentWritten(newE[2]): 91 | return 92 | 93 | message = list() 94 | 95 | #commenter.appendComment(galleryId, message, retries, parentCommentId=-1, childComment=None): 96 | 97 | # shouldn'T be needed as the db sort it by date, but sometimes there is a bug ?! TODO: 98 | dups.sort() 99 | 100 | lastSeen = dups[-1] 101 | firstSeen = dups[0] 102 | seen = len(dups) 103 | trip = firstSeen[3] != lastSeen[3] 104 | 105 | # last seen 106 | to, tn = (lastSeen[4].lower().strip(), newE[4].lower().strip()) 107 | titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'') 108 | if byCrc: 109 | #commentId = commenter.writeComment(newE[2], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[2], titleInf)) 110 | message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate % (crudeTimeFormat(newE[0]-lastSeen[0]),lastSeen[3], titleInf), 5, -1] 111 | else: 112 | #commentId = commenter.writeComment(newE[2], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[2], titleInf)) 113 | message = [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar % (crudeTimeFormat(newE[0]-lastSeen[0]), lastSeen[3], titleInf), 5, -1] 114 | 115 | 116 | # first seen 117 | if trip: 118 | to, tn = (firstSeen[4].lower().strip(), newE[4].lower().strip()) 119 | titleInf = u"Title similarity: " + genUnicodeSuccessBar(5, 1-levenshtein_n(to, tn), 1, [u'★'], u'☆', u'', u'') 120 | if byCrc: 121 | if seen > 2: 122 | #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) 123 | message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) 124 | else: 125 | #commentId = commenter.writeReply(newE[2], commentId, str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) 126 | message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_duplicate_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) 127 | else: 128 | if seen > 2: 129 | #commentId = commenter.writeReply(newE[2], commentId, str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) 130 | message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_firstTimes % (seen-1, crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) 131 | else: 132 | #commentId = commenter.writeReply(newE[2], commentId, str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[2], titleInf)) 133 | message.append( [commenter.TYPE_COMMENT_OR_REPLY, newE[3], str_similar_first % (crudeTimeFormat(newE[0]-firstSeen[0]), firstSeen[3], titleInf), 7, -1] ) 134 | #sleep(28) 135 | 136 | commenter.queue.append(message) 137 | db.logComment(newE[3], -1) 138 | 139 | 140 | popcount = lambda n: bin(n).count('1') 141 | 142 | 143 | def updateFields(galImage, db): 144 | # update userid 145 | db.execute('UPDATE galeries SET userid = ? WHERE link = ?;', (galImage[u'account_id'], galImage[u'id']) ) 146 | # update size, lastChecked, width, height 147 | db.execute('UPDATE images SET size = ?, width = ?, height = ?, lastChecked = ? WHERE galerieId = (SELECT rowid FROM galeries WHERE link = ?);', (galImage[u'size'], galImage[u'width'], galImage[u'height'], time(), galImage[u'id']) ) 148 | 149 | 150 | from PIL import Image 151 | 152 | 153 | def update_elem(elem, factory, imgur, dupcheck, commenter, times, maxPostTime, alwaysPost=False): 154 | # times -> updates, known, skipped, errors, albums = (0, 0, 0, 0, 0) 155 | #time_hash, time_db_gen, time_db_dup, time_api_request, time_image, time_message = (0, 0, 0, 0, 0, 0) # TIMING 156 | times, times_ = times 157 | 158 | con = dupcheck.db 159 | if elem[u'is_album']: 160 | times[4] += 1 161 | return False 162 | times_[1] -= time() # TIMING 163 | if con.galerieExists(elem[u'id']): 164 | times[1] += 1 165 | updateFields(elem, con) 166 | times_[1] += time() # TIMING 167 | return False 168 | times_[1] += time() # TIMING 169 | 170 | #TODO: 171 | #sleep(3) 172 | #yield True 173 | #time_image -= time() # TIMING 174 | 175 | # dirty fix because when the size is higher x (TODO) imgur delivers the thumbnail as link url 176 | if elem['animated'] and elem[u'link'].endswith("h.gif") and elem['gifv']: 177 | lognprint('WARNING: Switch %s with %s' % (elem['link'], elem['gifv'])) 178 | elem[u'link'] = elem[u'gifv'] 179 | data = Web.request(elem[u'link'], retries=2) 180 | #time_image += time() # TIMING 181 | if not data: 182 | lognprint("Error requesting image", elem[u'link']) 183 | times[3] += 1 184 | return False 185 | try: 186 | #times_[0] -= time() # TIMING 187 | dupcheck.newImage(data, not elem[u'animated']) 188 | #times_[0] += time() # TIMING 189 | # aHashes, dHashes_h, dHashes_v, crc, bits, animated, user, link, datetime, title, iPath, size, bitcountupper, bitcountlower, mDiffA, mDiffdh, mDiffdv 190 | except Exception as e: # TODO catch specific excption 191 | lognprint('EXCEPTION: Hash creation exception:', e) 192 | return False 193 | 194 | times_[2] -= time() # TIMING 195 | nid, ids = con.insertImageAndDups(dupcheck.aHashes, dupcheck.dHashes_h, dupcheck.dHashes_v, dupcheck.crc, dupcheck.bits, 196 | elem[u'animated'], elem[u'account_url'], elem[u'account_id'], elem[u'id'], elem[u'datetime'], elem[u'title'], elem[u'link'], elem[u'size'], dupcheck.bits+22, dupcheck.bits-22, 5, 8, 8, elem[u'width'], elem[u'height'], 197 | 3, 10 198 | ) 199 | times_[2] += time() # TIMING 200 | times_[1] -= time() # TIMING 201 | dups = con.get_image_data_from_ids(ids, elem[u'datetime']) 202 | times_[1] += time() # TIMING 203 | 204 | #lognprint("DEBUG: nid/ins:", nid, ins) 205 | postIt = False 206 | if alwaysPost: 207 | postIt = True 208 | elif elem[u'ups']-elem[u'downs'] < 40: 209 | times[2] += 1 210 | else: 211 | postIt = True 212 | if postIt and dups and len(dups) > 0: 213 | 214 | #gallery, tag, retries, upvote=True 215 | # TODO: this should always be send (or never as tags are shit now anyway) 216 | #if alwaysPost or not ("mrw" in elem[u'title'].lower() or "mfw" in elem[u'title'].lower() or elem[u'title'].lower().startswith("when")): 217 | # commenter.appendTag(elem[u'id'], "rep ost", 7) 218 | #yield True 219 | #ret = comenter.sendAuthReqMessage(('gallery', gallery_id, "vote", "tag", tag, ("up" if upvote else "down")), {}) 220 | 221 | if not alwaysPost and not shouldIPost(con, elem, dups, maxPostTime): 222 | times[2] += 1 223 | else: 224 | lognprint("DUPLICATE:", "(", len(dups), ")", elem[u'id'], "==", dups) 225 | sendDupMessage(elem, dups, commenter, dupcheck.db, elem[u'animated']) 226 | times[0] += 1 227 | con.commit() 228 | return postIt 229 | 230 | 231 | def updateGallery(factory, imgur, dupcheck, commenter, pages=5, start=0, gType="hot", timeD="day", post=True, maxPostTime=60*60*24*3): 232 | con = dupcheck.db 233 | for i in xrange(start, start+pages): 234 | #updates, known, skipped, errors, albums = (0, 0, 0, 0, 0) 235 | times = [0]*5 236 | #time_hash, time_db_gen, time_db_dup, time_api_request, time_image, time_message = (0, 0, 0, 0, 0, 0) # TIMING 237 | times_ = [0]*6 238 | 239 | times_[3] -= time() 240 | req = factory.buildRequest(('gallery', gType, 'viral', str(i), timeD)) 241 | for j in xrange(5): 242 | try: 243 | res = imgur.retrieve(req) 244 | break 245 | except Exception as e: 246 | lognprint("EXCEPTION:", e) 247 | times_[3] += time() # TIMING 248 | sleep(60) 249 | times_[3] -= time() # TIMING 250 | lognprint("Retry #", j+1) 251 | else: 252 | lognprint("Connection problems") 253 | raise StopIteration() 254 | times_[3] += time() # TIMING 255 | 256 | if not res: 257 | lognprint("Page", i, "not found") 258 | continue 259 | 260 | yield True 261 | 262 | #print "PAGE", i 263 | for elem in res: 264 | if not isinstance(elem, dict): 265 | lognprint("Error: Strange elem:", elem) 266 | times[3][0] += 1 267 | continue 268 | 269 | if update_elem(elem, factory, imgur, dupcheck, commenter, [times, times_], maxPostTime, alwaysPost=False): 270 | sleep(3) 271 | yield True 272 | lognprint("PAGE", gType, timeD, i, "=> updates:", times[0], "known:", times[1], "skipped:", times[2], "albums skipped:", times[4], "errors:", times[3], 'client_remaining', imgur.ratelimit.client_remaining) 273 | #time_hash, time_db_gen, time_db_dup, time_api_request, time_image, time_message = (0, 0, 0, 0, 0, 0) # TIMING 274 | lognprint("TIMES: time_hash: %.6f time_db_gen: %.6f time_db_dup: %.6f time_api_request: %.6f time_image: %.6f time_message: %.6f" % tuple(times_)) 275 | sleep(5) 276 | 277 | def shouldIPost(con, elem, dups, maxPostTime): 278 | #return True 279 | #lastDup = dups[-1] 280 | datetime, userurl, userid, link, title = dups[-1] 281 | to, tn = (elem[u'title'].lower().strip(), title.lower().strip()) 282 | levn = levenshtein_n(to, tn) 283 | # TODO: make this readable 284 | return ( 285 | time() < elem[u'datetime'] + maxPostTime 286 | and ( 287 | not ( 288 | # TODO: use userblacklist by id plus names till all entries have ids 289 | ( elem[u'account_url'] is not None and con.is_user_blocked(int(elem[u'account_id']))) 290 | #or ( userurl is not None and lastDup[1].lower() in userBlackList ) 291 | ) 292 | ) 293 | and ( 294 | levn < 0.4 or ( 295 | abs(elem[u'datetime']-datetime) < 60*60*24*60 # time diff < 60 days 296 | and u'mrw' not in elem[u'title'].lower().split(u' ') # not mrw 297 | and u'mfw' not in elem[u'title'].lower().split(u' ') # not mfw 298 | and not elem[u'title'].lower().startswith(u'when') # not start with when 299 | ) 300 | 301 | ) 302 | # TODO: drop this when all user have an userid 303 | and (elem[u'account_url'] is None or elem[u'account_url'] != userurl) 304 | and (elem['account_id'] != 0 or elem['account_id'] != userid) 305 | ) 306 | 307 | def request(factory, imgurApi, data, kdata=None, times=5, delay=30): 308 | req = factory.buildRequest(data, kdata) 309 | for j in xrange(times): 310 | try: 311 | res = imgurApi.retrieve(req) 312 | break 313 | except Exception as e: 314 | lognprint("EXCEPTION requesting '%s' : " % str(kdata), e) 315 | sleep(delay) 316 | lognprint("Retry #", j+1) 317 | else: 318 | lognprint("Connection problems") 319 | return False 320 | return res 321 | 322 | from os.path import isfile 323 | from os import remove 324 | import math 325 | def main(): 326 | if isfile('.lock'): 327 | lognprint("Already running") 328 | exit(0) 329 | lock = open(".lock", "wb") 330 | lock.write(str(time())) 331 | lock.close(); 332 | 333 | lognprint_base("#"*80+"\n") 334 | lognprint("START", strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())) 335 | lognprint_base("#"*80+"\n") 336 | 337 | 338 | 339 | # open our db and stuff 340 | connection = DBConnector("test", re_analyze=False) 341 | dupcheck = SimilarImagesSql(None, connection) 342 | config = None 343 | try: 344 | fd = open('config.json', 'r') 345 | except: 346 | lognprint("config file [config.json] not found.") 347 | sys.exit(1) 348 | try: 349 | config = json.loads(fd.read()) 350 | except: 351 | lognprint("invalid json in config file.") 352 | sys.exit(1) 353 | factory = Factory(config) 354 | imgurApi = factory.buildAPI() 355 | commenter = CommentSender(factory, config, 40) 356 | commentTask = commenter.startSendLoop() 357 | 358 | 359 | 360 | # Adding user ids from our blocked ids file. 361 | # This isn't really secure but works so far. 362 | # The file is cleared after adding. 363 | lognprint("Adding user from blocked.txt") 364 | fd = open("blocked.txt", "rb+") 365 | for line in fd: 366 | line = line.strip() 367 | try: 368 | lognprint("Add user with id %s to blocklist" % line) 369 | connection.add_blocked_user(int(line)) 370 | except Exception as e: 371 | lognprint("Problem adding user to blocklist:", e) 372 | fd.truncate(0) 373 | fd.close() 374 | # Same for user who wishe to be unblocked. 375 | lognprint("Removing user from unblocked.txt") 376 | fd = open("unblocked.txt", "rb+") 377 | for line in fd: 378 | line = line.strip() 379 | try: 380 | lognprint("Unblock user with id %s" % line) 381 | lognprint("Status: ", connection.remove_blocked_user(int(line))) 382 | except Exception as e: 383 | lognprint("Problem unblocking user:", e) 384 | fd.truncate(0) 385 | fd.close() 386 | 387 | 388 | # used for debugging. Not used atm (TODO: REMOVE and use profiler if required ?!?) 389 | # ret = dict() 390 | # debug = False 391 | # if debug: 392 | # connection.galerieExists = Timing.funcHook(connection.galerieExists, ret) 393 | # connection.commit = Timing.funcHook(connection.commit, ret) 394 | # connection.execute = Timing.funcHook(connection.execute, ret) 395 | # connection.getByAvg = Timing.funcHook(connection.getByAvg, ret) 396 | # connection.getByCrc = Timing.funcHook(connection.getByCrc, ret) 397 | # connection.insertImage = Timing.funcHook(connection.insertImage, ret) 398 | # 399 | # ImageHash.aHash_256 = Timing.funcHook(ImageHash.aHash_256, ret) 400 | # ImageHash.dHash_256_h = Timing.funcHook(ImageHash.dHash_256_h, ret) 401 | # ImageHash.dHash_256_v = Timing.funcHook(ImageHash.dHash_256_v, ret) 402 | 403 | 404 | # Crawl the tops of the day from the different galleries, and reanalyze the db, as something seems to get messed up with the indices TODO: check what 405 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, pages=4, start=0, gType="user", timeD="day"): commentTask.next() 406 | #for i in updateGallery(factory, imgurApi, dupcheck, commenter, 3, start=0, gType="user", timeD="day"): commentTask.next() 407 | connection.execute("ANALYZE;") 408 | #for i in updateGallery(factory, imgurApi, dupcheck, commenter, 2, start=4, gType="user", timeD="day"): commentTask.next() 409 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, pages=4, start=0, gType="hot", timeD="day"): commentTask.next() 410 | #for i in updateGallery(factory, imgurApi, dupcheck, commenter, 3, start=0, gType="hot", timeD="day"): commentTask.next() 411 | connection.execute("ANALYZE;") 412 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, pages=4, start=0, gType="top", timeD="day"): commentTask.next() 413 | #for i in updateGallery(factory, imgurApi, dupcheck, commenter, 3, start=0, gType="top", timeD="day"): commentTask.next() 414 | connection.execute("ANALYZE;") 415 | 416 | 417 | # Add old entries. Pretty dirty but works (kind of) 418 | tstep, hstep, nstep = (4, 4, 4) 419 | #tstep, hstep, nstep = (5, 5, 5) 420 | tstart, hstart, nstart = json.load(open('borders', 'rb')) 421 | #map(int, bfile.read().split(';')) 422 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, tstep, start=tstart, gType="top", timeD="all", post=True): commentTask.next() 423 | connection.execute("ANALYZE;") 424 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, hstep, start=hstart, gType="hot", timeD="all", post=True): commentTask.next() 425 | connection.execute("ANALYZE;") 426 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, nstep, start=nstart, gType="user", timeD="all", post=True): commentTask.next() 427 | connection.execute("ANALYZE;") 428 | # save the sites from which to crawl at the next run 429 | json.dump([(tstart+tstep)%2000, (hstart+hstep)%800, (nstart+nstep)%2000], open('borders', 'wb') ) 430 | 431 | w, m, y = json.load(open('borders_wmy', 'rb')) 432 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, 4, start=w, gType="top", timeD="week", post=True): commentTask.next() 433 | connection.execute("ANALYZE;") 434 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, 4, start=m, gType="top", timeD="month", post=True): commentTask.next() 435 | connection.execute("ANALYZE;") 436 | for i in updateGallery(factory, imgurApi, dupcheck, commenter, 4, start=y, gType="top", timeD="year", post=True): commentTask.next() 437 | connection.execute("ANALYZE;") 438 | json.dump([(w+4)%1000, (m+4)%2000, (y+4)%3000], open('borders_wmy', 'wb') ) 439 | 440 | 441 | while len(commenter.queue) > 0: 442 | sleep(1) 443 | commentTask.next() 444 | 445 | 446 | # check notifications and scan posts where they occured. 447 | # TODO: this is ugly as hell and should also move somewhere else. 448 | dcommenter = commenter.commenter #DirtyCommenter(factory, config) 449 | notifies = dcommenter.sendAuthReqMessage('3/notification.json?new=true', retries=3) 450 | n = 0 451 | img_con_id = -1 452 | notis = list() 453 | if notifies: 454 | for noti in notifies['messages']: 455 | nid = noti['id'] 456 | noti = noti['content'] 457 | if noti['with_account'] == u"48" and u"mentioned you in a comment" in noti["last_message"]: 458 | notis.append(nid) 459 | n += 1 460 | img_con_id = noti["id"] 461 | for i in xrange(1, 100): 462 | if n <= 0: break 463 | img_con = dcommenter.sendAuthReqMessage(('conversations', str(img_con_id), str(i)), retries=3) 464 | for message in img_con["messages"][::-1]: 465 | if n <= 0: break 466 | if not u"mentioned you in a comment" in message["body"]: 467 | continue 468 | n -= 1 469 | image_id = re.search("glory at http://imgur\.com/gallery/([a-zA-Z0-9]+)/comment/[0-9]+/", message["body"]) 470 | if not image_id: 471 | lognprint("Unparsable imgur notification '%s'" % str(message)) 472 | continue 473 | image_id = image_id.group(1) 474 | if connection.already_commented(image_id): 475 | lognprint("Got notify for %s but already commented." % str(image_id)) 476 | continue 477 | if connection.galerieExists(image_id): 478 | ori, dups = connection.get_reposts_by_image_hash(image_id) 479 | dups = list(dups) 480 | print "dup len", len(dups), dups 481 | if len(dups) <= 0: 482 | lognprint("Got notify for %s but is no repost." % str(image_id)) 483 | continue 484 | #g.datetime, g.userurl, g.userid, g.link, g.title 485 | newE = {'datetime':ori[0], "account_id":ori[2], "id":image_id, "title":ori[4] } 486 | sendDupMessage(newE, dups, commenter, connection, connection.is_animated_by_hash(image_id)) 487 | connection.commit() 488 | continue 489 | res = request(factory, imgurApi, ('gallery', 'image', str(image_id)), times=2, delay=20) 490 | sleep(20) 491 | if not res: continue 492 | if update_elem(res, factory, imgurApi, dupcheck, commenter, [[0]*5, [0]*6], 60*60*24*365*5, alwaysPost=True): 493 | lognprint("Successfuly summoned at %s." % image_id) 494 | else: 495 | lognprint("Failed summoning at %s." % image_id) 496 | connection.commit() 497 | 498 | if len(notis) > 0: img_con = dcommenter.sendAuthReqMessage(('notification',), {"ids":",".join(map(str, notis))}, retries=3) 499 | 500 | # Write out all remaining comments from the queue 501 | while len(commenter.queue) > 0: 502 | sleep(1) 503 | commentTask.next() 504 | 505 | 506 | 507 | 508 | 509 | # print some debuggin infos 510 | lognprint("Current images indexed:", connection.getImageCount()) 511 | lognprint('Oldest image', (time()-connection.getOldestImage())/31536000.0, 'years') 512 | lognprint('client_remaining', imgurApi.ratelimit.client_remaining) 513 | lognprint('user_remaining', imgurApi.ratelimit.user_remaining) 514 | lognprint('user_reset', imgurApi.ratelimit.user_reset - time(), 'seconds') 515 | 516 | # Get the current points, because i like statistics :) 517 | imgur = factory.buildAPI() 518 | req = factory.buildRequest(('account', 'RepostStatistics')) 519 | ret = imgur.retrieve(req)['reputation'] 520 | d = json.load(open('points', 'r')) 521 | d.append((time(), ret)) 522 | json.dump(d, open('points', 'w')) 523 | lognprint('Points', ret) 524 | 525 | lognprint('Reposts: %i' % connection.getReprostCount()) 526 | lognprint('blocked user:', connection.execute('SELECT count() from blocked_user;').fetchone()) 527 | 528 | # I updated the database format some times, so we need to get the additional data for the old posts. 529 | # This shows how much entries still need updates. TODO: 530 | comments = connection.execute('SELECT count() from comments;').fetchone()[0] 531 | lognprint('comments written:', comments, "-", comments*2) 532 | lognprint('noUserId:', connection.execute('SELECT count() from galeries where userid = 0;').fetchone()) 533 | lognprint('noSize:', connection.execute('SELECT count() from images where size = 0;').fetchone()) 534 | lognprint('noWidth:', connection.execute('SELECT count() from images where width = -1;').fetchone()) 535 | lognprint('noHeight:', connection.execute('SELECT count() from images where height = -1;').fetchone()) 536 | 537 | 538 | 539 | #remove(".lock") 540 | 541 | if __name__ == '__main__': 542 | # exit() 543 | try: 544 | main() 545 | except Exception as e: 546 | lognprint("Last hope catcher: Exception:", format_exc()) 547 | remove(".lock") 548 | 549 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/__init__.py -------------------------------------------------------------------------------- /blocked.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/blocked.txt -------------------------------------------------------------------------------- /borders: -------------------------------------------------------------------------------- 1 | [8, 8, 8] -------------------------------------------------------------------------------- /borders_wmy: -------------------------------------------------------------------------------- 1 | [8, 8, 8] -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | {"secret": "YOUR_SECRET", "client_id": "YOURR_CLIENT_ID"} -------------------------------------------------------------------------------- /database/ImgurDBConnector.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | 4 | # 5 | # Here is a bunch of stuff which needs to get removed and/or is broken. 6 | # The most interesting function for now are: 7 | # - insertImageAndDups 8 | # Inserts the hashes from an image, search for similar known images and add found (near)duplicates 9 | # to a table. Also returns the duplicate image ids. 10 | # - get_image_data_from_ids 11 | # Does this need a description ? 12 | # 13 | # TODO: 14 | # - The search for duplicates is slow as fuck. 15 | # We maybe should tweak the key settings in the db. 16 | # - Remove unused/broken/old functions 17 | # 18 | 19 | 20 | 21 | db_getHashes = 'SELECT ahash_1, ahash_2, ahash_3, ahash_4, dhash_h_1, dhash_h_2, dhash_h_3, dhash_h_4, dhash_v_1, dhash_v_2, dhash_v_3, dhash_v_4, imagepath from images where animated = 0;' 22 | db_getHash = 'SELECT ahash_1, ahash_2, ahash_3, ahash_4, dhash_h_1, dhash_h_2, dhash_h_3, dhash_h_4, dhash_v_1, dhash_v_2, dhash_v_3, dhash_v_4, rowid from images where animated = 0 and imagepath = ?;' 23 | 24 | 25 | 26 | 27 | db_getAvg_ = """ 28 | select galerieId gid 29 | from images 30 | where animated = 0 31 | and bits < ? and bits > ? 32 | and hamming3(ahash_1, ?)+hamming3(ahash_2, ?)+hamming3(ahash_3, ?)+hamming3(ahash_4, ?) <= %i 33 | and hamming3(dhash_h_1, ?)+hamming3(dhash_h_2, ?)+hamming3(dhash_h_3, ?)+hamming3(dhash_h_4, ?) <= %i 34 | and hamming3(dhash_v_1, ?)+hamming3(dhash_v_2, ?)+hamming3(dhash_v_3, ?)+hamming3(dhash_v_4, ?) <= %i 35 | """ 36 | db_getAvg = db_getAvg_ % (3, 5, 5) #(10, 10, 10) #(3, 5, 5) 37 | 38 | db_getAvg_wDiff_ = """ 39 | select rowid iId, 40 | hamming3(ahash_1, ?)+hamming3(ahash_2, ?)+hamming3(ahash_3, ?)+hamming3(ahash_4, ?) 41 | + hamming3(dhash_h_1, ?)+hamming3(dhash_h_2, ?)+hamming3(dhash_h_3, ?)+hamming3(dhash_h_4, ?) 42 | + hamming3(dhash_v_1, ?)+hamming3(dhash_v_2, ?)+hamming3(dhash_v_3, ?)+hamming3(dhash_v_4, ?) diff 43 | from images 44 | where animated = 0 45 | and bits < ? and bits > ? 46 | and hamming3(ahash_1, ?)+hamming3(ahash_2, ?)+hamming3(ahash_3, ?)+hamming3(ahash_4, ?) <= %i 47 | and hamming3(dhash_h_1, ?)+hamming3(dhash_h_2, ?)+hamming3(dhash_h_3, ?)+hamming3(dhash_h_4, ?) <= %i 48 | and hamming3(dhash_v_1, ?)+hamming3(dhash_v_2, ?)+hamming3(dhash_v_3, ?)+hamming3(dhash_v_4, ?) <= %i 49 | """ 50 | db_getAvg_wDiff = db_getAvg_wDiff_ % (3, 5, 5) #(10, 10, 10) #(3, 5, 5) 51 | 52 | 53 | # Prety dirty with the string substitution but for now ok i think 54 | # TODO: not in repostTable check 55 | db_insertDup_LSH_ = """ 56 | INSERT INTO similarImages_byLSH 57 | select %%i nId, irowid iId, hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) diffA, 58 | hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) 59 | + hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) diffD 60 | from ( 61 | select rowid irowid, ahash_1, ahash_2, ahash_3, ahash_4, dhash_h_1, dhash_h_2, dhash_h_3, dhash_h_4, dhash_v_1, dhash_v_2, dhash_v_3, dhash_v_4 from images i 62 | where animated = 0 63 | and ( bits between :lBits and :hBits ) 64 | and hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) <= %i 65 | and hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) <= %i 66 | and hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) <= %i 67 | and :nId != rowid 68 | and i.rowid not in (SELECT imgId_a from similarImages_byLSH where :nId = imgId_b ) 69 | and i.rowid not in (SELECT imgId_b from similarImages_byLSH where :nId = imgId_a ) 70 | ); 71 | """ 72 | db_insertDup_LSH = db_insertDup_LSH_ % (5, 8, 8) #(10, 10, 10) #(3, 5, 5) 73 | 74 | 75 | 76 | 77 | 78 | db_avgExists_ = """ 79 | SELECT g.userurl, g.link, g.datetime, g.title from galeries as g, 80 | (SELECT galerieId from images 81 | where animated = 0 and 82 | ( 83 | hamming3(ahash_1, ?) + 84 | hamming3(ahash_2, ?) + 85 | hamming3(ahash_3, ?) + 86 | hamming3(ahash_4, ?) <= 3 87 | ) and ( 88 | hamming3(dhash_h_1, ?)+ 89 | hamming3(dhash_h_2, ?)+ 90 | hamming3(dhash_h_3, ?)+ 91 | hamming3(dhash_h_4, ?)+ 92 | hamming3(dhash_v_1, ?)+ 93 | hamming3(dhash_v_2, ?)+ 94 | hamming3(dhash_v_3, ?)+ 95 | hamming3(dhash_v_4, ?) <= 10 ) 96 | ) where galerieId=g.rowid order by g.datetime desc LIMIT 1; 97 | """ 98 | 99 | 100 | 101 | class DBConnector(): 102 | def __init__(self, fname, con=None, loadExtensions=True, re_analyze=True): 103 | if not con: 104 | con = sqlite3.connect(fname) 105 | self.con = con 106 | self.create_function = con.create_function 107 | self.cursor = con.cursor 108 | self.execute = con.execute 109 | self.commit = con.commit 110 | 111 | if loadExtensions: 112 | con.enable_load_extension(loadExtensions) 113 | # TODO: dirty, but ok for now 114 | self.load_extension('Sqlite3_Hamming.dll') 115 | 116 | # in case the db got messed up we re analyze at every start 117 | if re_analyze: self.execute("ANALYZE;") 118 | 119 | self.initDb() 120 | print "db init done" 121 | 122 | 123 | def load_extension(self, fPath): 124 | self.execute('SELECT load_extension("%s");' % fPath) 125 | 126 | 127 | def create_function(self, name, paramCount, funPtr): 128 | # Just a placeholder for the create_function method of the connection 129 | raise Exception("Placeholder function called") 130 | 131 | 132 | 133 | # 134 | # Imgur stuff 135 | # 136 | def initDb(self): 137 | # rowid is implicite id 138 | self.execute(""" 139 | CREATE TABLE IF NOT EXISTS galeries ( 140 | userurl VARCHAR , 141 | link VARCHAR, 142 | datetime LONG, 143 | title VARCHAR, 144 | userid INT 145 | ); 146 | """) 147 | self.execute(""" 148 | CREATE TABLE IF NOT EXISTS images ( 149 | galerieId LONG, 150 | imagepath VARCHAR, 151 | animated BOOL, 152 | ahash_1 LONG, ahash_2 LONG, ahash_3 LONG, ahash_4 LONG, 153 | dhash_h_1 LONG, dhash_h_2 LONG, dhash_h_3 LONG, dhash_h_4 LONG, 154 | dhash_v_1 LONG, dhash_v_2 LONG, dhash_v_3 LONG, dhash_v_4 LONG, 155 | crc VARCHAR, 156 | bits INT, /* We use this to speed up the lookup, as when the set bits isn't in the correct range the entry is def. no duplicate */ 157 | size INT, 158 | lastChecked LONG, 159 | width INT, height INT 160 | ); 161 | """) 162 | self.execute(""" 163 | CREATE TABLE IF NOT EXISTS comments ( 164 | postId, 165 | commentId 166 | ); 167 | """) 168 | self.execute(""" 169 | CREATE TABLE IF NOT EXISTS user( 170 | id INT PRIMARY KEY, 171 | secret TEXT 172 | ); 173 | """) 174 | self.execute(""" 175 | CREATE TABLE IF NOT EXISTS spamProt_invalidImages( 176 | userid INT, 177 | timestamp LONG 178 | ); 179 | """) 180 | 181 | self.execute(""" 182 | CREATE TABLE IF NOT EXISTS similarImages_byLSH( 183 | imgId_a LONG, 184 | imgId_b LONG, 185 | diff_a INT, 186 | diff_d INT 187 | ); 188 | """) 189 | self.execute(""" 190 | CREATE TABLE IF NOT EXISTS similarImages_byCRC( 191 | imgId_a LONG, 192 | imgId_b LONG 193 | ); 194 | """) 195 | self.execute(""" 196 | CREATE TABLE IF NOT EXISTS blocked_user( 197 | userid INT PRIMARY KEY 198 | ); 199 | """) 200 | self.execute('CREATE UNIQUE INDEX IF NOT EXISTS ind_CRCDups ON similarImages_byCRC (imgId_a, imgId_b);') 201 | self.execute('CREATE UNIQUE INDEX IF NOT EXISTS ind_LSHDups ON similarImages_byLSH (imgId_a, imgId_b);') 202 | self.execute('CREATE INDEX IF NOT EXISTS gInd ON galeries (link);') 203 | self.execute('CREATE INDEX IF NOT EXISTS bitsI ON images (bits);') 204 | self.execute('CREATE INDEX IF NOT EXISTS image2gallery ON images (galerieId);') 205 | # i don't really know if this does any good (but it seemed like it does) ? TODO: check it 206 | self.execute('CREATE INDEX IF NOT EXISTS hashes on images (ahash_1,ahash_2,ahash_3,ahash_4,dhash_h_1,dhash_h_2,dhash_h_3,dhash_h_4,dhash_v_1,dhash_v_2,dhash_v_3,dhash_v_4);') 207 | # self.load_extension('Sqlite3_Hamming.dll') 208 | self.commit(); 209 | 210 | 211 | ################################## 212 | # user stuff 213 | ################################## 214 | db_validateUser = "SELECT 1 from user where id = ? and secret = ?;" 215 | def validateUser(self, userId, userPw): 216 | return not self.execute(self.db_validateUser, (userId, userPw)).fetchone() is None 217 | 218 | db_antiSpam_invalidImageLimit_del = "DELETE from spamProt_invalidImages where timestamp < ?;" 219 | db_antiSpam_invalidImageLimit_count = "SELECT count() from spamProt_invalidImages where userid = ?;" 220 | def toMuchImageMisses(self, userId, missLimit=10, missTime=180): 221 | self.execute(self.db_antiSpam_invalidImageLimit_del, (time.time() - missTime,)) 222 | return self.execute(self.db_antiSpam_invalidImageLimit_count, (userId,)).fetchone()[0] > missLimit 223 | 224 | 225 | ################################## 226 | # galerie and aggregation stuff 227 | ################################## 228 | db_galerieExists = "SELECT 1 from galeries where link = ?;" 229 | def galerieExists(self, link): 230 | return not self.execute(self.db_galerieExists, (link,)).fetchone() is None 231 | 232 | db_commentWritten = "SELECT 1 from comments where postid = ?;" 233 | def commentWritten(self, postId): 234 | return not self.execute(self.db_commentWritten, (postId,)).fetchone() is None 235 | 236 | db_logComment = "INSERT into comments VALUES (?, ?);" 237 | def logComment(self, postId, commentId): 238 | return not self.execute(self.db_logComment, (postId, commentId)).fetchone() is None 239 | 240 | db_already_commented = "SELECT 1 FROM comments WHERE postId = ?;" 241 | def already_commented(self, postId): 242 | return not self.execute(self.db_already_commented, (postId,)).fetchone() is None 243 | 244 | db_getImageCount = "SELECT count() from images;" 245 | def getImageCount(self): 246 | return self.execute(self.db_getImageCount).fetchone()[0] 247 | 248 | def getOldestImage(self): 249 | return self.execute('select min(datetime) from galeries;').fetchone()[0] 250 | 251 | ################################## 252 | # Find images 253 | ################################## 254 | db_crcExists = "SELECT g.datetime, g.userurl, g.link, g.title from images as i, galeries as g where g.rowid = i.galerieId and i.crc = ? order by datetime desc LIMIT 1;" 255 | def findLastByCRC(self, crc): 256 | return self.execute(self.db_crcExists, (crc,)).fetchone() 257 | 258 | db_crcExists_first = "SELECT g.datetime, g.userurl, g.link, g.title from images as i, galeries as g where g.rowid = i.galerieId and i.crc = ? order by datetime asc LIMIT 1;" 259 | def findFirstByCRC(self, crc): 260 | return self.execute(self.db_crcExists_first, (crc,)).fetchone() 261 | 262 | db_avgExists = """ 263 | SELECT g.datetime, g.userurl, g.link, g.title from galeries as g, 264 | ( %s ) where gid=g.rowid order by g.datetime desc LIMIT 1; 265 | """ % (db_getAvg ,) 266 | def findLastByHash(self, aHashes, dHashes_h, dHashes_v, bitcountlower, bitcountupper): 267 | return self.execute(self.db_avgExists, (bitcountupper, bitcountlower)+aHashes+dHashes_h+dHashes_v).fetchone() 268 | 269 | db_avgExists_first = """ 270 | SELECT g.datetime, g.userurl, g.link, g.title from galeries as g, 271 | ( %s ) where gid=g.rowid order by g.datetime asc LIMIT 1; 272 | """ % (db_getAvg ,) 273 | def findFirstByHash(self, aHashes, dHashes_h, dHashes_v, bitcountlower, bitcountupper): 274 | return self.execute(self.db_avgExists_first, (bitcountupper, bitcountlower)+aHashes+dHashes_h+dHashes_v).fetchone() 275 | 276 | 277 | db_avgExists_all = """ 278 | SELECT g.datetime, g.userurl, g.link, g.title from galeries as g, 279 | ( %s ) where gid=g.rowid order by g.datetime desc; 280 | """ % (db_getAvg ,) 281 | def findAllByHash(self, aHashes, dHashes_h, dHashes_v, bitcountlower, bitcountupper, buffersize=1024): 282 | for r in self.execute(self.db_avgExists_all, (bitcountupper, bitcountlower)+aHashes+dHashes_h+dHashes_v).fetchmany(buffersize): 283 | yield r 284 | 285 | db_crcExists_all = "SELECT g.datetime, g.userurl, g.link, g.title from images as i, galeries as g where g.rowid = i.galerieId and i.crc = ? order by datetime desc;" 286 | def findAllByCRC(self, crc, buffersize=1024): 287 | for r in self.execute(self.db_crcExists_all, (crc, )).fetchmany(buffersize): 288 | yield r 289 | 290 | 291 | db_avgExists_all_before = """ 292 | SELECT g.datetime, g.userurl, g.userid, g.link, g.title from galeries as g, images i, similarImages_byLSH s 293 | WHERE (i.rowid = s.imgId_a OR i.rowid = s.imgId_b) AND i.rowid != :iid 294 | AND g.rowid = i.galerieId 295 | AND (:iid = s.imgId_a OR :iid = s.imgId_b) 296 | AND diff_a <= :maxA AND diff_d <= :maxD 297 | AND g.datetime <= :date 298 | ORDER BY g.datetime ASC; 299 | """ 300 | def findAllByHash_before(self, imageId, dateTime, maxAHash=3, maxDHash=10): 301 | cur = self.cursor() 302 | #print "findAllByHash_before", "start", self.db_avgExists_all_before.replace(":iid", str(imageId)).replace(":date", str(dateTime)).replace(":maxA", str(maxAHash)).replace(":maxD", str(maxDHash)) 303 | for r in cur.execute(self.db_avgExists_all_before, {'iid':imageId, 'date':dateTime, 'maxA':maxAHash, 'maxD':maxDHash}).fetchmany(): 304 | yield r 305 | #print "findAllByHash_before", "done" 306 | self.commit() 307 | 308 | db_crcExists_all_ = """SELECT g.datetime, g.userurl, g.userid, g.link, g.title 309 | from similarImages_byCRC s, images as i, galeries as g 310 | WHERE (i.rowid = s.imgId_a OR i.rowid = s.imgId_b) AND i.rowid != :iid 311 | AND g.rowid = i.galerieId 312 | AND (:iid = s.imgId_a OR :iid = s.imgId_b) 313 | AND g.datetime <= :date 314 | ORDER BY datetime ASC; 315 | """ 316 | def findAllByCRC_before(self, imageId, dateTime): 317 | cur = self.cursor() 318 | #print "findAllByCRC_before", "start", self.db_crcExists_all_.replace(":iid", str(imageId)).replace(":date", str(dateTime)) 319 | for r in cur.execute(self.db_crcExists_all_, {'iid':imageId, 'date':dateTime}).fetchmany(): 320 | yield r 321 | #print "findAllByCRC_before", "done" 322 | self.commit() 323 | 324 | 325 | def get_reposts_by_image_hash(self, hash_): 326 | tmp = self.execute('SELECT i.rowid, animated, g.datetime FROM galeries as g, images as i WHERE g.rowid = i.galerieId AND g.link = ?;', (hash_,)).fetchone() 327 | ori = self.execute('SELECT g.datetime, g.userurl, g.userid, g.link, g.title FROM galeries as g, images as i WHERE g.rowid = i.galerieId AND g.link = ?;', (hash_,)).fetchone() 328 | if not tmp: return False 329 | iid, animated, timestamp = tmp 330 | if animated: 331 | return (ori, self.findAllByCRC_before(iid, timestamp)) 332 | return (ori, self.findAllByHash_before(iid, timestamp)) 333 | 334 | def is_animated_by_hash(self, hash_): 335 | return self.execute('SELECT animated FROM galeries as g, images as i WHERE g.rowid = i.galerieId AND g.link = ?;', (hash_,)).fetchone()[0] 336 | 337 | 338 | def get_image_data_from_ids(self, ids, olderAs): 339 | ret = list() 340 | cur = self.cursor() 341 | for tmp_id in ids: 342 | tmp = cur.execute("""SELECT g.datetime, g.userurl, g.userid, g.link, g.title 343 | FROM images as i, galeries as g 344 | WHERE g.rowid = i.galerieId AND i.rowid = ? and g.datetime < ? ORDER BY g.datetime ASC;""", (tmp_id, olderAs)).fetchone() 345 | if tmp: ret.append(tmp) 346 | return ret 347 | 348 | db_avgSum = 'select count() from ( %s )' % (db_getAvg ,) 349 | def getAvgSum(self, aHashes, dHashes_h, dHashes_v, bitcountlower, bitcountupper): 350 | return self.execute(self.db_avgSum, (bitcountupper, bitcountlower)+aHashes+dHashes_h+dHashes_v).fetchone()[0] 351 | 352 | db_crcSum = "SELECT count() from images where crc = ?;" 353 | def getCrcSum(self, crc): 354 | return self.execute(self.db_crcSum, (crc,)).fetchone()[0] 355 | 356 | 357 | ################################## 358 | # Insert, change, remove 359 | ################################## 360 | db_insertGalerie = "INSERT INTO galeries VALUES (?, ?, ?, ?, ?);" 361 | db_insertImage = "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);" 362 | def insertImage_(self, user, link, datetime, title, iPath, animated, aHashes, dHashes_h, dHashes_v, crc, bits): 363 | cur = self.cursor() 364 | cur.execute(self.db_insertGalerie, (user, link, datetime, title, time.time())) 365 | cur.execute(self.db_insertImage, (cur.lastrowid, iPath, animated)+aHashes+dHashes_h+dHashes_v+(crc,bits)) 366 | self.commit() 367 | 368 | def insertImage(self, aHashes, dHashes_h, dHashes_v, crc, bits, animated, user, userid, link, datetime, title, iPath, size): 369 | cur = self.cursor() 370 | cur.execute(self.db_insertGalerie, (user, link, datetime, title, userid)) 371 | cur.execute(self.db_insertImage, (cur.lastrowid, iPath, animated)+aHashes+dHashes_h+dHashes_v+(crc,bits, size, time.time())) 372 | self.commit() 373 | 374 | 375 | # db_findDuplicates_LSH = """ 376 | # select rowid, hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) diffA, 377 | # hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) 378 | # + hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) diffD 379 | # from images 380 | # where animated = 0 381 | # and ( bits between :lBits and :hBits ) 382 | # and hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) <= :min_aDiff 383 | # and hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) <= :min_dhDiff 384 | # and hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) <= :min_dvDiff 385 | # and :nId != rowid; 386 | # """ 387 | 388 | db_findDuplicates_LSH_ = """ 389 | select rowid, hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) diffA, 390 | hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) 391 | + hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) diffD 392 | from images 393 | where animated = 0 394 | and ( bits between :lBits and :hBits ) 395 | and hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) <= :min_aDiff 396 | and hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) <= :min_dhDiff 397 | and hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) <= :min_dvDiff 398 | and :nId != rowid; 399 | """ 400 | db_findDuplicates_LSH = """ 401 | select rowid, hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) diffA, 402 | hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) 403 | + hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) diffD 404 | from (SELECT * FROM images WHERE animated = 0 AND ( bits BETWEEN :lBits AND :hBits) AND :nId != rowid) 405 | WHERE hamming3(ahash_1, :aH_1 )+hamming3(ahash_2, :aH_2 )+hamming3(ahash_3, :aH_3 )+hamming3(ahash_4, :aH_4 ) <= :min_aDiff 406 | and hamming3(dhash_h_1, :dH_h_1 )+hamming3(dhash_h_2, :dH_h_2 )+hamming3(dhash_h_3, :dH_h_3 )+hamming3(dhash_h_4, :dH_h_4 ) <= :min_dhDiff 407 | and hamming3(dhash_v_1, :dH_v_1 )+hamming3(dhash_v_2, :dH_v_2 )+hamming3(dhash_v_3, :dH_v_3 )+hamming3(dhash_v_4, :dH_v_4 ) <= :min_dvDiff 408 | ; 409 | """ 410 | 411 | db_insertDuplicate_LSH = 'INSERT INTO similarImages_byLSH VALUES (?, ?, ?, ?)' 412 | db_findDuplicates_CRC = """ 413 | SELECT rowid from images i 414 | WHERE animated=1 AND crc = :crc 415 | AND :nId != rowid; 416 | """ 417 | db_insertDuplicate_CRC = 'INSERT INTO similarImages_byCRC VALUES (?, ?)' 418 | def insertImageAndDups(self, aHashes, dHashes_h, dHashes_v, crc, bits, animated, user, userid, link, datetime, title, iPath, size, bitcountupper, bitcountlower, mDiffA, mDiffdh, mDiffdv, width, height, retMDiffA, retMDiffd): 419 | cur = self.cursor() 420 | cur.execute(self.db_insertGalerie, (user, link, datetime, title, userid)) 421 | cur.execute(self.db_insertImage, (cur.lastrowid, iPath, animated)+aHashes+dHashes_h+dHashes_v+(crc,bits, size, time.time(), width, height)) 422 | nId = cur.lastrowid 423 | ids = list() 424 | # inserted = 0 425 | 426 | if animated: 427 | for dupId in cur.execute(self.db_findDuplicates_CRC, dict({ 428 | 'nId':nId, 429 | 'crc':crc 430 | })).fetchmany(-1): 431 | dupId = dupId[0] 432 | try: 433 | cur.execute(self.db_insertDuplicate_CRC, tuple(sorted((nId, dupId)))) 434 | # inserted += 1 435 | ids.append(dupId) 436 | except sqlite3.Error as e: 437 | pass # should only be the not unique exception 438 | # print "SQLITE EXCEPTION CRC", e 439 | else: 440 | for dupId, diffA, diffD in cur.execute(self.db_findDuplicates_LSH, dict({ 441 | 'nId':nId, 442 | 'min_aDiff':mDiffA, 'min_dhDiff':mDiffdh, 'min_dvDiff':mDiffdv, 443 | 'aH_1':aHashes[0], 'aH_2':aHashes[1], 'aH_3':aHashes[2], 'aH_4':aHashes[3], 444 | 'dH_h_1':dHashes_h[0], 'dH_h_2':dHashes_h[1], 'dH_h_3':dHashes_h[2], 'dH_h_4':dHashes_h[3], 445 | 'dH_v_1':dHashes_v[0], 'dH_v_2':dHashes_v[1], 'dH_v_3':dHashes_v[2], 'dH_v_4':dHashes_v[3], 446 | 'lBits':bitcountlower, 'hBits':bitcountupper 447 | })).fetchmany(-1): 448 | try: 449 | cur.execute(self.db_insertDuplicate_LSH, tuple(sorted((nId, dupId))) + (diffA, diffD) ) 450 | #inserted += 1 451 | if diffA <= retMDiffA and diffD <= retMDiffd: 452 | ids.append(dupId) 453 | except sqlite3.Error as e: 454 | pass # should only be the not unique exception 455 | # print "SQLITE EXCEPTION LSH", e 456 | self.commit() 457 | return (nId, ids) 458 | 459 | 460 | 461 | 462 | def iterHashes(self, bufferSize=10000000): 463 | for r in self.con.execute(self.db_getHashes).fetchmany(bufferSize): 464 | #if not r: break 465 | #for row in r: 466 | yield r 467 | 468 | def getHash(self, imagepath): 469 | d = self.con.execute(db_getHash, (imagepath,)).fetchone() 470 | return d 471 | 472 | ################################## 473 | # Misc 474 | ################################## 475 | db_imageRepostCount = "SELECT count() from similarImages_byLSH" 476 | def getRepostCount_images(self): 477 | return self.con.execute(self.db_imageRepostCount).fetchone()[0] 478 | db_animationRepostCount = "SELECT count() from similarImages_byCRC" 479 | def getRepostCount_animations(self): 480 | return self.con.execute(self.db_animationRepostCount).fetchone()[0] 481 | def getReprostCount(self): 482 | return self.getRepostCount_images() + self.getRepostCount_animations() 483 | 484 | def add_blocked_user(self, userid): 485 | return self.con.execute("INSERT INTO blocked_user VALUES (?);", (userid,)) 486 | def remove_blocked_user(self, userid): 487 | return self.con.execute("DELETE FROM blocked_user where userid = ?;", (userid,)) 488 | def is_user_blocked(self, userid): 489 | return self.con.execute("SELECT userid from blocked_user WHERE userid = ?;", (userid,)).fetchone() is not None 490 | 491 | 492 | if __name__ == '__main__': 493 | pass -------------------------------------------------------------------------------- /database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/database/__init__.py -------------------------------------------------------------------------------- /docs/INSTALL.txt: -------------------------------------------------------------------------------- 1 | This code is written for python 2.7, and should not be used with 3.x, although it could be made portable. 2 | 3 | ####################### 4 | Required python modules: 5 | - PIL or PILLOW(?) 6 | 7 | ####################### 8 | Add hamming distance extension library to sqlite. (Extension info: https://www.sqlite.org/loadext.html) 9 | Compile sqlite_extension/Sqlite3_Hamming.c 10 | or use sqlite_extension/Sqlite3_Hamming.dll when on windows and using the x86 python version (is there even a 64 bit one ?). 11 | 12 | ####################### 13 | Create config 14 | - Request a new API key from imgur and put them into the config.json file. 15 | - Run RepostStatistics.utils.UpdateToken to update your token and allow actions for your imgur account. 16 | -------------------------------------------------------------------------------- /docs/SOME_INFOS.txt: -------------------------------------------------------------------------------- 1 | ############################ 2 | BLACKLIST: 3 | Add userIDs to the blocked.txt / unblocked.txt files to blacklist/unblacklist imgur users. 4 | This files are both read , handled and cleaned at each startup. 5 | To get the imgurid use the imgur_finduserid_user.js with greasemonkey (firefox) or tampermonkey addon (chrome). 6 | The userid is visible on messages near the date. 7 | 8 | ############################ 9 | LOCK FILE: 10 | I use a lock file to avoid running twice at the same time. If the app crashes it might leave the lockfile and thus make starting impossible. 11 | In that case just remove the .lock file. -------------------------------------------------------------------------------- /docs/TODO.txt: -------------------------------------------------------------------------------- 1 | - Use the new imgur python module and remove all old and not needed stuff. 2 | 3 | - Remove all the not required stuff. 4 | 5 | - Put the imgur interaction in another thread with a queue so the rest can run while data is fetched/uploaded and vice versa. 6 | 7 | - Use the logging module instead of my ugly one. 8 | Also clean the log output a bit. 9 | 10 | - Put the settings (how many sites to crawl) into the config file 11 | 12 | - There is much room to make the sql statements faster. Esp. i think i have way to many indices which seriously bloat the db (atm. 500mb in my instance). 13 | Also splitting the a/d/v hashes into an own table might lead to a better performance (use a foreign key to combine them with the image data). 14 | 15 | - Maybe use an mysql/maria whatever DB instead of sqlite for better performance due strict datatypes 16 | 17 | - Put the border stuff (last crawled pages) into the config ?! -------------------------------------------------------------------------------- /docs/imgur_finduserid_user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name imgur_show_user_id 3 | // @namespace foo 4 | // @include http://imgur.com/account/messages 5 | // @version 1 6 | // @grant none 7 | // ==/UserScript== 8 | // As bookmarklet link: javascript:(function()%7B%24('.message-subject').append(%22%20ID%3A%20%22%20%2B%20%24('.thread-wrapper').attr('data-with-id'))%7D)() 9 | 10 | // for messages at load time 11 | $(document).ready(function(){ 12 | $('.date-text').append(" ID: " + $('.thread-wrapper').attr('data-with-id')) 13 | }); 14 | 15 | // for dynamically loaded messages 16 | window.observer = new MutationObserver(function(mutations) { 17 | for(var i=0; i < mutations.length; ++i){ 18 | var mutation = mutations[i]; 19 | for(var j=0; j < mutation.addedNodes.length; ++j){ 20 | var node = mutation.addedNodes[j]; 21 | if(node.className != "thread-wrapper") continue; 22 | $('.date-text').append(" ID: " + $('.thread-wrapper').attr('data-with-id')) 23 | } 24 | } 25 | }); 26 | var target = document.querySelector('body'); 27 | observer.observe(target, { subtree: true, childList: true}); -------------------------------------------------------------------------------- /dupFinder/FindDuplicates.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | from time import time 4 | from database import ImgurDBConnector 5 | import hashlib 6 | import ImageHash 7 | from StringIO import StringIO 8 | 9 | # 10 | # TODO: 11 | # Add a "have i seen this before" feature via notify thanks to DeathSummer (done - add credits somewhere ?) 12 | # 13 | 14 | def searchForFiles(path, extensions=None): 15 | #print "search in", path 16 | for root, subFolders, files in os.walk(path): 17 | root = root.rstrip("/") +"/" 18 | for fName in files: 19 | if not extensions or fName.endswith(extensions): 20 | yield root + fName 21 | 22 | # def to64BitSigInts(bits): 23 | # return tuple(struct.unpack("q", struct.pack("Q", int(bits[i:i+64], 2)))[0] for i in xrange(0, len(bits), 64)) 24 | 25 | 26 | 27 | ########## 28 | # hash functions for lsh 29 | def aHash_256b(img): 30 | #img.show() 31 | img = img.resize((16, 16), Image.ANTIALIAS) 32 | #img.show() 33 | pix = img.getdata() 34 | #img.show() 35 | print list(pix) 36 | avg = sum(pix) / (16.0*16) 37 | return list((1 if x < avg else 0) for x in pix) 38 | def dHash_256_hb(img): 39 | size = (17, 16) 40 | img = img.resize(size, Image.ANTIALIAS) 41 | pixels = list(img.getdata()) 42 | i = 0 43 | r = list() 44 | for y in range(16): 45 | for x in range(16): 46 | r.append(1 if pixels[i] 0 and d[0][1] <= self.maxHashDiff 76 | 77 | def insertImageHash(self, image, metaData=None): 78 | self.hashes.index(self.imageHash(image), metaData) 79 | 80 | def insertAndCheckImageHash(self, image, metaData=None): 81 | hash = self.imageHash(image) 82 | d = self.hashes.query(hash, distance_func='hamming') 83 | self.hashes.index(hash, metaData) 84 | return len(d) > 0 and d[0][1] <= self.maxHashDiff 85 | 86 | def insertAndGetImageHash(self, image, metaData=None, maxRows=None): 87 | hash = self.imageHash(image) 88 | #print hash 89 | #print len(hash), self.hashes.input_dim 90 | 91 | dups = self.hashes.query(hash, num_results=maxRows, distance_func='hamming') 92 | print dups 93 | d = tuple(x[0][1] for x in dups if x[1] <= self.maxHashDiff) 94 | self.hashes.index(hash, metaData) 95 | return d 96 | 97 | #def insertIfNew(self, image, metadata=None): 98 | 99 | def insertHash(self, hash, metaData=None): 100 | self.hashes.index(hash, metaData) 101 | 102 | 103 | 104 | 105 | popcount = lambda n: bin(n).count('1') 106 | 107 | 108 | # Not sure if anything except newImage() works atm TBH. 109 | # Anyway the idea is to wrap the storage (db/ram/...) and hash creation in this class(es) 110 | # IE: ~ = sim = SimilarImagesSql("somedb") 111 | # dups = sim.findDups(Image.load("someimage")) ... 112 | class SimilarImagesSql(object): 113 | # todo setable init / check / hash fucntions 114 | 115 | 116 | def __init__(self, databasename, databaseConnection=None): 117 | if databaseConnection: 118 | self.db = databaseConnection 119 | else: 120 | self.db = ImgurDBConnector.DBConnector(databasename) 121 | 122 | #self.findFirstByCRC = self.db.findFirstByCRC 123 | #self.findLastByCRC = self.db.findLastByCRC 124 | 125 | self.getImageCount = self.db.getImageCount 126 | 127 | 128 | def newImage(self, image, similarity=True): 129 | #print image 130 | self.crc = hashlib.md5(image).hexdigest() 131 | 132 | self.similarity = similarity 133 | #print self.crc 134 | 135 | if similarity: 136 | img = Image.open(StringIO(image)) 137 | img = img.convert("L") # convert to gray 138 | self.aHashes = ImageHash.aHash_256(img) 139 | self.dHashes_h = ImageHash.dHash_256_h(img) 140 | self.dHashes_v = ImageHash.dHash_256_v(img) 141 | self.bits = sum(map(popcount, self.aHashes+self.dHashes_h+self.dHashes_v)) 142 | if sum(self.aHashes+self.dHashes_h+self.dHashes_v) == 0: 143 | raise Exception('Image hash is zero') 144 | else: 145 | self.aHashes, self.dHashes_h, self.dHashes_v = ((0,0,0,0), (0,0,0,0), (0,0,0,0)) 146 | self.bits = 0 147 | 148 | def getOneByCrc(self): 149 | return self.db.findFirstByCRC(self.crc) 150 | def getOneByAvg(self): 151 | #print (self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 152 | return self.db.findFirstByHash(self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 153 | 154 | 155 | def getNewest(self): 156 | if self.similarity: 157 | return self.db.findLastByHash(self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 158 | else: 159 | return self.db.findLastByCRC(self.crc) 160 | def getOldest(self): 161 | if self.similarity: 162 | return self.db.findFirstByHash(self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 163 | else: 164 | return self.db.findFirstByCRC(self.crc) 165 | 166 | def getSimilarCount(self): 167 | if self.similarity: 168 | return self.db.getAvgSum(self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 169 | else: 170 | return self.db.getCrcSum(self.crc) 171 | 172 | 173 | 174 | def getAllByCrc(self): 175 | return self.db.findAllByCRC(self.crc) 176 | def getAllByAvg(self): 177 | return self.db.findAllByHash(self.aHashes, self.dHashes_h, self.dHashes_v, self.bits-20, self.bits+20) 178 | 179 | 180 | 181 | def getAllSimilar(self, image, similarity=True): 182 | self.newImage(image, similarity) 183 | if similarity: 184 | return list(self.getAllByAvg()) 185 | else: 186 | return list(self.getAllByCrc()) 187 | 188 | 189 | def insertAndGetOne(self, image, metadata=None, similarity=True): 190 | self.newImage(image, similarity) 191 | if similarity: 192 | r = self.getOneByAvg() 193 | else: 194 | r = self.getOneByCrc() 195 | self.db.insertImage(self.aHashes, self.dHashes_h, self.dHashes_v, self.crc, self.bits, *metadata) 196 | return r 197 | 198 | def insertImage(self, *args): 199 | self.db.insertImage(self.aHashes, self.dHashes_h, self.dHashes_v, self.crc, self.bits, *args) 200 | 201 | 202 | def imageKnown(self): 203 | pass 204 | 205 | 206 | def inserAndGetAll(self): 207 | pass 208 | 209 | 210 | 211 | 212 | 213 | 214 | if __name__ == '__main__': 215 | # Some testig (might be broken, no clue tbh.) 216 | path = "./pics_folder" 217 | import pstats 218 | import cProfile 219 | def test(): 220 | dupcheck = SimilarImagesSql(':memory:') 221 | stime = time() 222 | times = 0 223 | dupsi = 0 224 | for fname in searchForFiles(path, tuple(('.jpg', '.JPG', '-jpeg', '.JPEG', '.gif', '', '.GIF', '.png', '.PNG', '.bmp', '.BMP'))): 225 | #print fname 226 | try: 227 | data = open(fname, 'rb').read() 228 | dups = dupcheck.insertAndGetOne(data, (False, u"", u"", 1, fname.decode(), u"")) 229 | except Exception as e: 230 | print "EXCEPTION:", e 231 | continue 232 | times += 1 233 | if dups: 234 | print fname, dups 235 | dupsi += 1 236 | Image.open(fname).show() 237 | Image.open(dups[3]).show() 238 | stime = time()-stime 239 | print 'Need %f seconds for %i (%i) images. (%f/is)' %(stime, times, dupsi, (times/stime)) 240 | #print cProfile.run('test()', sort=1) 241 | test() -------------------------------------------------------------------------------- /dupFinder/ImageHash.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from itertools import imap 3 | import operator 4 | #from Timing import funcHook, printTimings 5 | import struct 6 | 7 | # This cast to signed ints is ugly as fuck and also dangerous. TODO: Fix this. 8 | def to64BitSigInts(bits): 9 | return tuple(struct.unpack("q", struct.pack("Q", int(bits[i:i+64], 2)))[0] for i in xrange(0, len(bits), 64)) 10 | 11 | def hamming_distance_str(strA, strB): 12 | return sum(imap(operator.ne, strA, strB)) 13 | def hamming_distance_(strA, strB): 14 | return sum(imap(operator.ne, bin(strA), bin(strB))) 15 | #def hamming_distance(numA, numB): 16 | #a = struct.unpack("q", struct.pack("Q", ulong))[0] 17 | #bin(numA^(numB)).count("1") 18 | def hamming_distance(numA, numB): 19 | return bin(numA^(numB)).count("1") 20 | 21 | # http://blog.safariflow.com/2013/11/26/image-hashing-with-python/ 22 | def aHash(img, bSize=8): 23 | #img = Image.open(imageName) 24 | #img = img.convert("L") # convert to gray 25 | img = img.resize((bSize, bSize), Image.ANTIALIAS) 26 | #img.show() 27 | pix = img.getdata() 28 | avg = sum(pix) / (bSize*bSize) 29 | bits = int("".join(map(lambda x: '1' if x < avg else '0', pix)), 2) 30 | return bits 31 | 32 | 33 | 34 | def dHash_h(img): 35 | size = (9, 8) 36 | img = img.resize(size, Image.ANTIALIAS) 37 | pixels = list(img.getdata()) 38 | i = 0 39 | r = list() 40 | for y in range(8): 41 | for x in range(8): 42 | r.append("1" if pixels[i] 100: break 109 | #i+=1 110 | 111 | #full = path + file 112 | #print full 113 | #if os.path.isfile(full) and (extensions and file.endswith(extensions)): 114 | try: 115 | img = Image.open(fname) 116 | img = img.convert("L") # convert to gray 117 | except Exception as e: 118 | print "EXCEPTION:", e 119 | continue 120 | hashes = (aHash_256(img), dHash_256_h(img), dHash_256_v(img)) 121 | #print full, hex(h), hex(h2), hex(h3) 122 | for i in range(3): 123 | if hashes[i] in files[i]: 124 | print "DUP: ", i, fname, files[i][hashes[i]] 125 | files[i][hashes[i]].append(fname) 126 | else: 127 | files[i][hashes[i]] = [fname] 128 | 129 | if __name__ == '__main__': 130 | searchForDups("res/", ("jpg", "jpeg", "bmp", "png")) 131 | 132 | -------------------------------------------------------------------------------- /dupFinder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/dupFinder/__init__.py -------------------------------------------------------------------------------- /mynet/Web.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import socket 3 | from time import sleep 4 | 5 | def lognprint(*args): 6 | pass 7 | 8 | # just a little function allowing to retry http requests 9 | # TODO: move this function to somewhere more appropriate 10 | def request(url, data=None, timeout=20, retries=1, retryTime=5): 11 | for i in xrange(retries): 12 | try: 13 | return urllib2.urlopen(url, data, timeout).read() 14 | except urllib2.HTTPError as e: 15 | lognprint( "Error requesting %s: %s" % (url, e.reason()) ) 16 | except socket.error as e: 17 | lognprint( "Error requesting %s: %s" % (url, str(e)) ) 18 | lognprint( "Retry #", i+1 ) 19 | sleep(retryTime) 20 | return None 21 | 22 | if __name__ == '__main__': 23 | pass -------------------------------------------------------------------------------- /mynet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/mynet/__init__.py -------------------------------------------------------------------------------- /points: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /sqlite_extension/Hamming.c: -------------------------------------------------------------------------------- 1 | // math magic from wikki: http://en.wikipedia.org/wiki/Hamming_weight 2 | 3 | 4 | #include 5 | #include 6 | //#include # not sure if required on win TODO: test 7 | #include "sqlite3ext.h" 8 | SQLITE_EXTENSION_INIT1 9 | 10 | typedef sqlite3_int64 uint64_t; 11 | const uint64_t m1 = 0x5555555555555555; //binary: 0101... 12 | const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. 13 | const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... 14 | const uint64_t m8 = 0x00ff00ff00ff00ff; //binary: 8 zeros, 8 ones ... 15 | const uint64_t m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ... 16 | const uint64_t m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones 17 | const uint64_t hff = 0xffffffffffffffff; //binary: all ones 18 | const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... 19 | 20 | //This is a naive implementation, shown for comparison, 21 | //and to help in understanding the better functions. 22 | //It uses 24 arithmetic operations (shift, add, and). 23 | static int popcount_1(uint64_t x) { 24 | x = (x & m1 ) + ((x >> 1) & m1 ); //put count of each 2 bits into those 2 bits 25 | x = (x & m2 ) + ((x >> 2) & m2 ); //put count of each 4 bits into those 4 bits 26 | x = (x & m4 ) + ((x >> 4) & m4 ); //put count of each 8 bits into those 8 bits 27 | x = (x & m8 ) + ((x >> 8) & m8 ); //put count of each 16 bits into those 16 bits 28 | x = (x & m16) + ((x >> 16) & m16); //put count of each 32 bits into those 32 bits 29 | x = (x & m32) + ((x >> 32) & m32); //put count of each 64 bits into those 64 bits 30 | return x; 31 | } 32 | 33 | //This uses fewer arithmetic operations than any other known 34 | //implementation on machines with slow multiplication. 35 | //It uses 17 arithmetic operations. 36 | static int popcount_2(uint64_t x) { 37 | x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits 38 | x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 39 | x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits 40 | x += x >> 8; //put count of each 16 bits into their lowest 8 bits 41 | x += x >> 16; //put count of each 32 bits into their lowest 8 bits 42 | x += x >> 32; //put count of each 64 bits into their lowest 8 bits 43 | return x & 0x7f; 44 | } 45 | 46 | //This uses fewer arithmetic operations than any other known 47 | //implementation on machines with fast multiplication. 48 | //It uses 12 arithmetic operations, one of which is a multiply. 49 | static int popcount_3(uint64_t x) { 50 | x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits 51 | x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 52 | x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits 53 | return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 54 | } 55 | 56 | 57 | 58 | static void popcount(sqlite3_context *context, int argc, sqlite3_value **argv){ 59 | sqlite3_result_int64(context, popcount_2(sqlite3_value_int(argv[0])) ); 60 | } 61 | static void xor(sqlite3_context *context, int argc, sqlite3_value **argv){ 62 | sqlite3_result_int64(context, sqlite3_value_int64(argv[0])^sqlite3_value_int64(argv[1])); 63 | } 64 | static void hamming1(sqlite3_context *context, int argc, sqlite3_value **argv){ 65 | sqlite3_result_int64(context, popcount_1(sqlite3_value_int64(argv[0])^sqlite3_value_int64(argv[1]))); 66 | } 67 | static void hamming2(sqlite3_context *context, int argc, sqlite3_value **argv){ 68 | sqlite3_result_int64(context, popcount_2(sqlite3_value_int64(argv[0])^sqlite3_value_int64(argv[1]))); 69 | } 70 | static void hamming3(sqlite3_context *context, int argc, sqlite3_value **argv){ 71 | uint64_t x = sqlite3_value_int64(argv[0])^sqlite3_value_int64(argv[1]); 72 | x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits 73 | x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 74 | x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits 75 | sqlite3_result_int64(context, (x * h01)>>56); //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 76 | } 77 | 78 | #ifdef _WIN32 79 | __declspec(dllexport) 80 | #endif 81 | 82 | /* SQLite invokes this routine once when it loads the extension. 83 | ** Create new functions, collating sequences, and virtual table 84 | ** modules here. This is usually the only exported symbol in 85 | ** the shared library. 86 | */ 87 | int sqlite3_extension_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi){ 88 | SQLITE_EXTENSION_INIT2(pApi) 89 | int rc = SQLITE_OK; 90 | sqlite3_create_function(db, "xor", 2, SQLITE_INTEGER, 0, xor, 0, 0); 91 | sqlite3_create_function(db, "hamming1", 2, SQLITE_INTEGER, 0, hamming1, 0, 0); 92 | sqlite3_create_function(db, "hamming2", 2, SQLITE_INTEGER, 0, hamming2, 0, 0); 93 | sqlite3_create_function(db, "hamming3", 2, SQLITE_INTEGER, 0, hamming3, 0, 0); 94 | sqlite3_create_function(db, "popcount", 1, SQLITE_INTEGER, 0, popcount, 0, 0); 95 | return rc; 96 | } 97 | // SELECT load_extension('Sqlite3_Hamming.dll'); 98 | -------------------------------------------------------------------------------- /sqlite_extension/Sqlite3_Hamming.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/sqlite_extension/Sqlite3_Hamming.dll -------------------------------------------------------------------------------- /sqlite_extension/sqlite3ext.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** 2006 June 7 3 | ** 4 | ** The author disclaims copyright to this source code. In place of 5 | ** a legal notice, here is a blessing: 6 | ** 7 | ** May you do good and not evil. 8 | ** May you find forgiveness for yourself and forgive others. 9 | ** May you share freely, never taking more than you give. 10 | ** 11 | ************************************************************************* 12 | ** This header file defines the SQLite interface for use by 13 | ** shared libraries that want to be imported as extensions into 14 | ** an SQLite instance. Shared libraries that intend to be loaded 15 | ** as extensions by SQLite should #include this file instead of 16 | ** sqlite3.h. 17 | */ 18 | #ifndef _SQLITE3EXT_H_ 19 | #define _SQLITE3EXT_H_ 20 | #include "sqlite3.h" 21 | 22 | typedef struct sqlite3_api_routines sqlite3_api_routines; 23 | 24 | /* 25 | ** The following structure holds pointers to all of the SQLite API 26 | ** routines. 27 | ** 28 | ** WARNING: In order to maintain backwards compatibility, add new 29 | ** interfaces to the end of this structure only. If you insert new 30 | ** interfaces in the middle of this structure, then older different 31 | ** versions of SQLite will not be able to load each others' shared 32 | ** libraries! 33 | */ 34 | struct sqlite3_api_routines { 35 | void * (*aggregate_context)(sqlite3_context*,int nBytes); 36 | int (*aggregate_count)(sqlite3_context*); 37 | int (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*)); 38 | int (*bind_double)(sqlite3_stmt*,int,double); 39 | int (*bind_int)(sqlite3_stmt*,int,int); 40 | int (*bind_int64)(sqlite3_stmt*,int,sqlite_int64); 41 | int (*bind_null)(sqlite3_stmt*,int); 42 | int (*bind_parameter_count)(sqlite3_stmt*); 43 | int (*bind_parameter_index)(sqlite3_stmt*,const char*zName); 44 | const char * (*bind_parameter_name)(sqlite3_stmt*,int); 45 | int (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*)); 46 | int (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*)); 47 | int (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*); 48 | int (*busy_handler)(sqlite3*,int(*)(void*,int),void*); 49 | int (*busy_timeout)(sqlite3*,int ms); 50 | int (*changes)(sqlite3*); 51 | int (*close)(sqlite3*); 52 | int (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*, 53 | int eTextRep,const char*)); 54 | int (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*, 55 | int eTextRep,const void*)); 56 | const void * (*column_blob)(sqlite3_stmt*,int iCol); 57 | int (*column_bytes)(sqlite3_stmt*,int iCol); 58 | int (*column_bytes16)(sqlite3_stmt*,int iCol); 59 | int (*column_count)(sqlite3_stmt*pStmt); 60 | const char * (*column_database_name)(sqlite3_stmt*,int); 61 | const void * (*column_database_name16)(sqlite3_stmt*,int); 62 | const char * (*column_decltype)(sqlite3_stmt*,int i); 63 | const void * (*column_decltype16)(sqlite3_stmt*,int); 64 | double (*column_double)(sqlite3_stmt*,int iCol); 65 | int (*column_int)(sqlite3_stmt*,int iCol); 66 | sqlite_int64 (*column_int64)(sqlite3_stmt*,int iCol); 67 | const char * (*column_name)(sqlite3_stmt*,int); 68 | const void * (*column_name16)(sqlite3_stmt*,int); 69 | const char * (*column_origin_name)(sqlite3_stmt*,int); 70 | const void * (*column_origin_name16)(sqlite3_stmt*,int); 71 | const char * (*column_table_name)(sqlite3_stmt*,int); 72 | const void * (*column_table_name16)(sqlite3_stmt*,int); 73 | const unsigned char * (*column_text)(sqlite3_stmt*,int iCol); 74 | const void * (*column_text16)(sqlite3_stmt*,int iCol); 75 | int (*column_type)(sqlite3_stmt*,int iCol); 76 | sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol); 77 | void * (*commit_hook)(sqlite3*,int(*)(void*),void*); 78 | int (*complete)(const char*sql); 79 | int (*complete16)(const void*sql); 80 | int (*create_collation)(sqlite3*,const char*,int,void*, 81 | int(*)(void*,int,const void*,int,const void*)); 82 | int (*create_collation16)(sqlite3*,const void*,int,void*, 83 | int(*)(void*,int,const void*,int,const void*)); 84 | int (*create_function)(sqlite3*,const char*,int,int,void*, 85 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 86 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 87 | void (*xFinal)(sqlite3_context*)); 88 | int (*create_function16)(sqlite3*,const void*,int,int,void*, 89 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 90 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 91 | void (*xFinal)(sqlite3_context*)); 92 | int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); 93 | int (*data_count)(sqlite3_stmt*pStmt); 94 | sqlite3 * (*db_handle)(sqlite3_stmt*); 95 | int (*declare_vtab)(sqlite3*,const char*); 96 | int (*enable_shared_cache)(int); 97 | int (*errcode)(sqlite3*db); 98 | const char * (*errmsg)(sqlite3*); 99 | const void * (*errmsg16)(sqlite3*); 100 | int (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**); 101 | int (*expired)(sqlite3_stmt*); 102 | int (*finalize)(sqlite3_stmt*pStmt); 103 | void (*free)(void*); 104 | void (*free_table)(char**result); 105 | int (*get_autocommit)(sqlite3*); 106 | void * (*get_auxdata)(sqlite3_context*,int); 107 | int (*get_table)(sqlite3*,const char*,char***,int*,int*,char**); 108 | int (*global_recover)(void); 109 | void (*interruptx)(sqlite3*); 110 | sqlite_int64 (*last_insert_rowid)(sqlite3*); 111 | const char * (*libversion)(void); 112 | int (*libversion_number)(void); 113 | void *(*malloc)(int); 114 | char * (*mprintf)(const char*,...); 115 | int (*open)(const char*,sqlite3**); 116 | int (*open16)(const void*,sqlite3**); 117 | int (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); 118 | int (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); 119 | void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*); 120 | void (*progress_handler)(sqlite3*,int,int(*)(void*),void*); 121 | void *(*realloc)(void*,int); 122 | int (*reset)(sqlite3_stmt*pStmt); 123 | void (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*)); 124 | void (*result_double)(sqlite3_context*,double); 125 | void (*result_error)(sqlite3_context*,const char*,int); 126 | void (*result_error16)(sqlite3_context*,const void*,int); 127 | void (*result_int)(sqlite3_context*,int); 128 | void (*result_int64)(sqlite3_context*,sqlite_int64); 129 | void (*result_null)(sqlite3_context*); 130 | void (*result_text)(sqlite3_context*,const char*,int,void(*)(void*)); 131 | void (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*)); 132 | void (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*)); 133 | void (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*)); 134 | void (*result_value)(sqlite3_context*,sqlite3_value*); 135 | void * (*rollback_hook)(sqlite3*,void(*)(void*),void*); 136 | int (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*, 137 | const char*,const char*),void*); 138 | void (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*)); 139 | char * (*snprintf)(int,char*,const char*,...); 140 | int (*step)(sqlite3_stmt*); 141 | int (*table_column_metadata)(sqlite3*,const char*,const char*,const char*, 142 | char const**,char const**,int*,int*,int*); 143 | void (*thread_cleanup)(void); 144 | int (*total_changes)(sqlite3*); 145 | void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*); 146 | int (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*); 147 | void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*, 148 | sqlite_int64),void*); 149 | void * (*user_data)(sqlite3_context*); 150 | const void * (*value_blob)(sqlite3_value*); 151 | int (*value_bytes)(sqlite3_value*); 152 | int (*value_bytes16)(sqlite3_value*); 153 | double (*value_double)(sqlite3_value*); 154 | int (*value_int)(sqlite3_value*); 155 | sqlite_int64 (*value_int64)(sqlite3_value*); 156 | int (*value_numeric_type)(sqlite3_value*); 157 | const unsigned char * (*value_text)(sqlite3_value*); 158 | const void * (*value_text16)(sqlite3_value*); 159 | const void * (*value_text16be)(sqlite3_value*); 160 | const void * (*value_text16le)(sqlite3_value*); 161 | int (*value_type)(sqlite3_value*); 162 | char *(*vmprintf)(const char*,va_list); 163 | /* Added ??? */ 164 | int (*overload_function)(sqlite3*, const char *zFuncName, int nArg); 165 | /* Added by 3.3.13 */ 166 | int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); 167 | int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); 168 | int (*clear_bindings)(sqlite3_stmt*); 169 | /* Added by 3.4.1 */ 170 | int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*, 171 | void (*xDestroy)(void *)); 172 | /* Added by 3.5.0 */ 173 | int (*bind_zeroblob)(sqlite3_stmt*,int,int); 174 | int (*blob_bytes)(sqlite3_blob*); 175 | int (*blob_close)(sqlite3_blob*); 176 | int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64, 177 | int,sqlite3_blob**); 178 | int (*blob_read)(sqlite3_blob*,void*,int,int); 179 | int (*blob_write)(sqlite3_blob*,const void*,int,int); 180 | int (*create_collation_v2)(sqlite3*,const char*,int,void*, 181 | int(*)(void*,int,const void*,int,const void*), 182 | void(*)(void*)); 183 | int (*file_control)(sqlite3*,const char*,int,void*); 184 | sqlite3_int64 (*memory_highwater)(int); 185 | sqlite3_int64 (*memory_used)(void); 186 | sqlite3_mutex *(*mutex_alloc)(int); 187 | void (*mutex_enter)(sqlite3_mutex*); 188 | void (*mutex_free)(sqlite3_mutex*); 189 | void (*mutex_leave)(sqlite3_mutex*); 190 | int (*mutex_try)(sqlite3_mutex*); 191 | int (*open_v2)(const char*,sqlite3**,int,const char*); 192 | int (*release_memory)(int); 193 | void (*result_error_nomem)(sqlite3_context*); 194 | void (*result_error_toobig)(sqlite3_context*); 195 | int (*sleep)(int); 196 | void (*soft_heap_limit)(int); 197 | sqlite3_vfs *(*vfs_find)(const char*); 198 | int (*vfs_register)(sqlite3_vfs*,int); 199 | int (*vfs_unregister)(sqlite3_vfs*); 200 | int (*xthreadsafe)(void); 201 | void (*result_zeroblob)(sqlite3_context*,int); 202 | void (*result_error_code)(sqlite3_context*,int); 203 | int (*test_control)(int, ...); 204 | void (*randomness)(int,void*); 205 | sqlite3 *(*context_db_handle)(sqlite3_context*); 206 | int (*extended_result_codes)(sqlite3*,int); 207 | int (*limit)(sqlite3*,int,int); 208 | sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*); 209 | const char *(*sql)(sqlite3_stmt*); 210 | int (*status)(int,int*,int*,int); 211 | int (*backup_finish)(sqlite3_backup*); 212 | sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*); 213 | int (*backup_pagecount)(sqlite3_backup*); 214 | int (*backup_remaining)(sqlite3_backup*); 215 | int (*backup_step)(sqlite3_backup*,int); 216 | const char *(*compileoption_get)(int); 217 | int (*compileoption_used)(const char*); 218 | int (*create_function_v2)(sqlite3*,const char*,int,int,void*, 219 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 220 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 221 | void (*xFinal)(sqlite3_context*), 222 | void(*xDestroy)(void*)); 223 | int (*db_config)(sqlite3*,int,...); 224 | sqlite3_mutex *(*db_mutex)(sqlite3*); 225 | int (*db_status)(sqlite3*,int,int*,int*,int); 226 | int (*extended_errcode)(sqlite3*); 227 | void (*log)(int,const char*,...); 228 | sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64); 229 | const char *(*sourceid)(void); 230 | int (*stmt_status)(sqlite3_stmt*,int,int); 231 | int (*strnicmp)(const char*,const char*,int); 232 | int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*); 233 | int (*wal_autocheckpoint)(sqlite3*,int); 234 | int (*wal_checkpoint)(sqlite3*,const char*); 235 | void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*); 236 | int (*blob_reopen)(sqlite3_blob*,sqlite3_int64); 237 | int (*vtab_config)(sqlite3*,int op,...); 238 | int (*vtab_on_conflict)(sqlite3*); 239 | /* Version 3.7.16 and later */ 240 | int (*close_v2)(sqlite3*); 241 | const char *(*db_filename)(sqlite3*,const char*); 242 | int (*db_readonly)(sqlite3*,const char*); 243 | int (*db_release_memory)(sqlite3*); 244 | const char *(*errstr)(int); 245 | int (*stmt_busy)(sqlite3_stmt*); 246 | int (*stmt_readonly)(sqlite3_stmt*); 247 | int (*stricmp)(const char*,const char*); 248 | int (*uri_boolean)(const char*,const char*,int); 249 | sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64); 250 | const char *(*uri_parameter)(const char*,const char*); 251 | char *(*vsnprintf)(int,char*,const char*,va_list); 252 | int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*); 253 | }; 254 | 255 | /* 256 | ** The following macros redefine the API routines so that they are 257 | ** redirected throught the global sqlite3_api structure. 258 | ** 259 | ** This header file is also used by the loadext.c source file 260 | ** (part of the main SQLite library - not an extension) so that 261 | ** it can get access to the sqlite3_api_routines structure 262 | ** definition. But the main library does not want to redefine 263 | ** the API. So the redefinition macros are only valid if the 264 | ** SQLITE_CORE macros is undefined. 265 | */ 266 | #ifndef SQLITE_CORE 267 | #define sqlite3_aggregate_context sqlite3_api->aggregate_context 268 | #ifndef SQLITE_OMIT_DEPRECATED 269 | #define sqlite3_aggregate_count sqlite3_api->aggregate_count 270 | #endif 271 | #define sqlite3_bind_blob sqlite3_api->bind_blob 272 | #define sqlite3_bind_double sqlite3_api->bind_double 273 | #define sqlite3_bind_int sqlite3_api->bind_int 274 | #define sqlite3_bind_int64 sqlite3_api->bind_int64 275 | #define sqlite3_bind_null sqlite3_api->bind_null 276 | #define sqlite3_bind_parameter_count sqlite3_api->bind_parameter_count 277 | #define sqlite3_bind_parameter_index sqlite3_api->bind_parameter_index 278 | #define sqlite3_bind_parameter_name sqlite3_api->bind_parameter_name 279 | #define sqlite3_bind_text sqlite3_api->bind_text 280 | #define sqlite3_bind_text16 sqlite3_api->bind_text16 281 | #define sqlite3_bind_value sqlite3_api->bind_value 282 | #define sqlite3_busy_handler sqlite3_api->busy_handler 283 | #define sqlite3_busy_timeout sqlite3_api->busy_timeout 284 | #define sqlite3_changes sqlite3_api->changes 285 | #define sqlite3_close sqlite3_api->close 286 | #define sqlite3_collation_needed sqlite3_api->collation_needed 287 | #define sqlite3_collation_needed16 sqlite3_api->collation_needed16 288 | #define sqlite3_column_blob sqlite3_api->column_blob 289 | #define sqlite3_column_bytes sqlite3_api->column_bytes 290 | #define sqlite3_column_bytes16 sqlite3_api->column_bytes16 291 | #define sqlite3_column_count sqlite3_api->column_count 292 | #define sqlite3_column_database_name sqlite3_api->column_database_name 293 | #define sqlite3_column_database_name16 sqlite3_api->column_database_name16 294 | #define sqlite3_column_decltype sqlite3_api->column_decltype 295 | #define sqlite3_column_decltype16 sqlite3_api->column_decltype16 296 | #define sqlite3_column_double sqlite3_api->column_double 297 | #define sqlite3_column_int sqlite3_api->column_int 298 | #define sqlite3_column_int64 sqlite3_api->column_int64 299 | #define sqlite3_column_name sqlite3_api->column_name 300 | #define sqlite3_column_name16 sqlite3_api->column_name16 301 | #define sqlite3_column_origin_name sqlite3_api->column_origin_name 302 | #define sqlite3_column_origin_name16 sqlite3_api->column_origin_name16 303 | #define sqlite3_column_table_name sqlite3_api->column_table_name 304 | #define sqlite3_column_table_name16 sqlite3_api->column_table_name16 305 | #define sqlite3_column_text sqlite3_api->column_text 306 | #define sqlite3_column_text16 sqlite3_api->column_text16 307 | #define sqlite3_column_type sqlite3_api->column_type 308 | #define sqlite3_column_value sqlite3_api->column_value 309 | #define sqlite3_commit_hook sqlite3_api->commit_hook 310 | #define sqlite3_complete sqlite3_api->complete 311 | #define sqlite3_complete16 sqlite3_api->complete16 312 | #define sqlite3_create_collation sqlite3_api->create_collation 313 | #define sqlite3_create_collation16 sqlite3_api->create_collation16 314 | #define sqlite3_create_function sqlite3_api->create_function 315 | #define sqlite3_create_function16 sqlite3_api->create_function16 316 | #define sqlite3_create_module sqlite3_api->create_module 317 | #define sqlite3_create_module_v2 sqlite3_api->create_module_v2 318 | #define sqlite3_data_count sqlite3_api->data_count 319 | #define sqlite3_db_handle sqlite3_api->db_handle 320 | #define sqlite3_declare_vtab sqlite3_api->declare_vtab 321 | #define sqlite3_enable_shared_cache sqlite3_api->enable_shared_cache 322 | #define sqlite3_errcode sqlite3_api->errcode 323 | #define sqlite3_errmsg sqlite3_api->errmsg 324 | #define sqlite3_errmsg16 sqlite3_api->errmsg16 325 | #define sqlite3_exec sqlite3_api->exec 326 | #ifndef SQLITE_OMIT_DEPRECATED 327 | #define sqlite3_expired sqlite3_api->expired 328 | #endif 329 | #define sqlite3_finalize sqlite3_api->finalize 330 | #define sqlite3_free sqlite3_api->free 331 | #define sqlite3_free_table sqlite3_api->free_table 332 | #define sqlite3_get_autocommit sqlite3_api->get_autocommit 333 | #define sqlite3_get_auxdata sqlite3_api->get_auxdata 334 | #define sqlite3_get_table sqlite3_api->get_table 335 | #ifndef SQLITE_OMIT_DEPRECATED 336 | #define sqlite3_global_recover sqlite3_api->global_recover 337 | #endif 338 | #define sqlite3_interrupt sqlite3_api->interruptx 339 | #define sqlite3_last_insert_rowid sqlite3_api->last_insert_rowid 340 | #define sqlite3_libversion sqlite3_api->libversion 341 | #define sqlite3_libversion_number sqlite3_api->libversion_number 342 | #define sqlite3_malloc sqlite3_api->malloc 343 | #define sqlite3_mprintf sqlite3_api->mprintf 344 | #define sqlite3_open sqlite3_api->open 345 | #define sqlite3_open16 sqlite3_api->open16 346 | #define sqlite3_prepare sqlite3_api->prepare 347 | #define sqlite3_prepare16 sqlite3_api->prepare16 348 | #define sqlite3_prepare_v2 sqlite3_api->prepare_v2 349 | #define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 350 | #define sqlite3_profile sqlite3_api->profile 351 | #define sqlite3_progress_handler sqlite3_api->progress_handler 352 | #define sqlite3_realloc sqlite3_api->realloc 353 | #define sqlite3_reset sqlite3_api->reset 354 | #define sqlite3_result_blob sqlite3_api->result_blob 355 | #define sqlite3_result_double sqlite3_api->result_double 356 | #define sqlite3_result_error sqlite3_api->result_error 357 | #define sqlite3_result_error16 sqlite3_api->result_error16 358 | #define sqlite3_result_int sqlite3_api->result_int 359 | #define sqlite3_result_int64 sqlite3_api->result_int64 360 | #define sqlite3_result_null sqlite3_api->result_null 361 | #define sqlite3_result_text sqlite3_api->result_text 362 | #define sqlite3_result_text16 sqlite3_api->result_text16 363 | #define sqlite3_result_text16be sqlite3_api->result_text16be 364 | #define sqlite3_result_text16le sqlite3_api->result_text16le 365 | #define sqlite3_result_value sqlite3_api->result_value 366 | #define sqlite3_rollback_hook sqlite3_api->rollback_hook 367 | #define sqlite3_set_authorizer sqlite3_api->set_authorizer 368 | #define sqlite3_set_auxdata sqlite3_api->set_auxdata 369 | #define sqlite3_snprintf sqlite3_api->snprintf 370 | #define sqlite3_step sqlite3_api->step 371 | #define sqlite3_table_column_metadata sqlite3_api->table_column_metadata 372 | #define sqlite3_thread_cleanup sqlite3_api->thread_cleanup 373 | #define sqlite3_total_changes sqlite3_api->total_changes 374 | #define sqlite3_trace sqlite3_api->trace 375 | #ifndef SQLITE_OMIT_DEPRECATED 376 | #define sqlite3_transfer_bindings sqlite3_api->transfer_bindings 377 | #endif 378 | #define sqlite3_update_hook sqlite3_api->update_hook 379 | #define sqlite3_user_data sqlite3_api->user_data 380 | #define sqlite3_value_blob sqlite3_api->value_blob 381 | #define sqlite3_value_bytes sqlite3_api->value_bytes 382 | #define sqlite3_value_bytes16 sqlite3_api->value_bytes16 383 | #define sqlite3_value_double sqlite3_api->value_double 384 | #define sqlite3_value_int sqlite3_api->value_int 385 | #define sqlite3_value_int64 sqlite3_api->value_int64 386 | #define sqlite3_value_numeric_type sqlite3_api->value_numeric_type 387 | #define sqlite3_value_text sqlite3_api->value_text 388 | #define sqlite3_value_text16 sqlite3_api->value_text16 389 | #define sqlite3_value_text16be sqlite3_api->value_text16be 390 | #define sqlite3_value_text16le sqlite3_api->value_text16le 391 | #define sqlite3_value_type sqlite3_api->value_type 392 | #define sqlite3_vmprintf sqlite3_api->vmprintf 393 | #define sqlite3_overload_function sqlite3_api->overload_function 394 | #define sqlite3_prepare_v2 sqlite3_api->prepare_v2 395 | #define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 396 | #define sqlite3_clear_bindings sqlite3_api->clear_bindings 397 | #define sqlite3_bind_zeroblob sqlite3_api->bind_zeroblob 398 | #define sqlite3_blob_bytes sqlite3_api->blob_bytes 399 | #define sqlite3_blob_close sqlite3_api->blob_close 400 | #define sqlite3_blob_open sqlite3_api->blob_open 401 | #define sqlite3_blob_read sqlite3_api->blob_read 402 | #define sqlite3_blob_write sqlite3_api->blob_write 403 | #define sqlite3_create_collation_v2 sqlite3_api->create_collation_v2 404 | #define sqlite3_file_control sqlite3_api->file_control 405 | #define sqlite3_memory_highwater sqlite3_api->memory_highwater 406 | #define sqlite3_memory_used sqlite3_api->memory_used 407 | #define sqlite3_mutex_alloc sqlite3_api->mutex_alloc 408 | #define sqlite3_mutex_enter sqlite3_api->mutex_enter 409 | #define sqlite3_mutex_free sqlite3_api->mutex_free 410 | #define sqlite3_mutex_leave sqlite3_api->mutex_leave 411 | #define sqlite3_mutex_try sqlite3_api->mutex_try 412 | #define sqlite3_open_v2 sqlite3_api->open_v2 413 | #define sqlite3_release_memory sqlite3_api->release_memory 414 | #define sqlite3_result_error_nomem sqlite3_api->result_error_nomem 415 | #define sqlite3_result_error_toobig sqlite3_api->result_error_toobig 416 | #define sqlite3_sleep sqlite3_api->sleep 417 | #define sqlite3_soft_heap_limit sqlite3_api->soft_heap_limit 418 | #define sqlite3_vfs_find sqlite3_api->vfs_find 419 | #define sqlite3_vfs_register sqlite3_api->vfs_register 420 | #define sqlite3_vfs_unregister sqlite3_api->vfs_unregister 421 | #define sqlite3_threadsafe sqlite3_api->xthreadsafe 422 | #define sqlite3_result_zeroblob sqlite3_api->result_zeroblob 423 | #define sqlite3_result_error_code sqlite3_api->result_error_code 424 | #define sqlite3_test_control sqlite3_api->test_control 425 | #define sqlite3_randomness sqlite3_api->randomness 426 | #define sqlite3_context_db_handle sqlite3_api->context_db_handle 427 | #define sqlite3_extended_result_codes sqlite3_api->extended_result_codes 428 | #define sqlite3_limit sqlite3_api->limit 429 | #define sqlite3_next_stmt sqlite3_api->next_stmt 430 | #define sqlite3_sql sqlite3_api->sql 431 | #define sqlite3_status sqlite3_api->status 432 | #define sqlite3_backup_finish sqlite3_api->backup_finish 433 | #define sqlite3_backup_init sqlite3_api->backup_init 434 | #define sqlite3_backup_pagecount sqlite3_api->backup_pagecount 435 | #define sqlite3_backup_remaining sqlite3_api->backup_remaining 436 | #define sqlite3_backup_step sqlite3_api->backup_step 437 | #define sqlite3_compileoption_get sqlite3_api->compileoption_get 438 | #define sqlite3_compileoption_used sqlite3_api->compileoption_used 439 | #define sqlite3_create_function_v2 sqlite3_api->create_function_v2 440 | #define sqlite3_db_config sqlite3_api->db_config 441 | #define sqlite3_db_mutex sqlite3_api->db_mutex 442 | #define sqlite3_db_status sqlite3_api->db_status 443 | #define sqlite3_extended_errcode sqlite3_api->extended_errcode 444 | #define sqlite3_log sqlite3_api->log 445 | #define sqlite3_soft_heap_limit64 sqlite3_api->soft_heap_limit64 446 | #define sqlite3_sourceid sqlite3_api->sourceid 447 | #define sqlite3_stmt_status sqlite3_api->stmt_status 448 | #define sqlite3_strnicmp sqlite3_api->strnicmp 449 | #define sqlite3_unlock_notify sqlite3_api->unlock_notify 450 | #define sqlite3_wal_autocheckpoint sqlite3_api->wal_autocheckpoint 451 | #define sqlite3_wal_checkpoint sqlite3_api->wal_checkpoint 452 | #define sqlite3_wal_hook sqlite3_api->wal_hook 453 | #define sqlite3_blob_reopen sqlite3_api->blob_reopen 454 | #define sqlite3_vtab_config sqlite3_api->vtab_config 455 | #define sqlite3_vtab_on_conflict sqlite3_api->vtab_on_conflict 456 | /* Version 3.7.16 and later */ 457 | #define sqlite3_close_v2 sqlite3_api->close_v2 458 | #define sqlite3_db_filename sqlite3_api->db_filename 459 | #define sqlite3_db_readonly sqlite3_api->db_readonly 460 | #define sqlite3_db_release_memory sqlite3_api->db_release_memory 461 | #define sqlite3_errstr sqlite3_api->errstr 462 | #define sqlite3_stmt_busy sqlite3_api->stmt_busy 463 | #define sqlite3_stmt_readonly sqlite3_api->stmt_readonly 464 | #define sqlite3_stricmp sqlite3_api->stricmp 465 | #define sqlite3_uri_boolean sqlite3_api->uri_boolean 466 | #define sqlite3_uri_int64 sqlite3_api->uri_int64 467 | #define sqlite3_uri_parameter sqlite3_api->uri_parameter 468 | #define sqlite3_uri_vsnprintf sqlite3_api->vsnprintf 469 | #define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 470 | #endif /* SQLITE_CORE */ 471 | 472 | #ifndef SQLITE_CORE 473 | /* This case when the file really is being compiled as a loadable 474 | ** extension */ 475 | # define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; 476 | # define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; 477 | # define SQLITE_EXTENSION_INIT3 \ 478 | extern const sqlite3_api_routines *sqlite3_api; 479 | #else 480 | /* This case when the file is being statically linked into the 481 | ** application */ 482 | # define SQLITE_EXTENSION_INIT1 /*no-op*/ 483 | # define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ 484 | # define SQLITE_EXTENSION_INIT3 /*no-op*/ 485 | #endif 486 | 487 | #endif /* _SQLITE3EXT_H_ */ 488 | -------------------------------------------------------------------------------- /stuff/Format.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def formatit(elems, value): 4 | for pval, pname in elems[::-1]: 5 | if pval <= value: 6 | return "%.1f %s" % (value / pval, pname) 7 | return "%.1f %s" % (value, elems[0][1]) 8 | 9 | crudeTimeFormat = functools.partial(formatit, ((1.0, "seconds"), (60.0, "minutes"), (60.0*60.0, "hours"), (60.0*60.0*24.0, "days"), (7.0*60.0*60.0*24.0, "weeks"), (60.0*60.0*24.0*356, "years"))) 10 | 11 | if __name__ == '__main__': 12 | print crudeTimeFormat(60.0*60.0*24.0*35.0) -------------------------------------------------------------------------------- /stuff/Similarity.py: -------------------------------------------------------------------------------- 1 | from itertools import imap 2 | import operator 3 | 4 | def hamming(strA, strB): 5 | return sum(imap(operator.ne, strA, strB)) 6 | 7 | 8 | # http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python 9 | def levenshtein(s1, s2): 10 | if len(s1) < len(s2): 11 | return levenshtein(s2, s1) 12 | # len(s1) >= len(s2) 13 | if len(s2) == 0: 14 | return len(s1) 15 | previous_row = xrange(len(s2) + 1) 16 | for i, c1 in enumerate(s1): 17 | current_row = [i + 1] 18 | for j, c2 in enumerate(s2): 19 | insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer 20 | deletions = current_row[j] + 1 # than s2 21 | substitutions = previous_row[j] + (c1 != c2) 22 | current_row.append(min(insertions, deletions, substitutions)) 23 | previous_row = current_row 24 | return previous_row[-1] 25 | 26 | # normalised levenshtein 27 | def levenshtein_n(s1, s2): 28 | if len(s1)+len(s2) == 0: return 0 29 | return levenshtein(s1, s2) / float(max(len(s1), len(s2))) 30 | 31 | if __name__ == '__main__': 32 | while True: 33 | sa = raw_input("Word A:") 34 | sb = raw_input("Word B:") 35 | print "max word len", max(len(sa), len(sb)) 36 | print "hamming:", hamming(sa, sb) 37 | print "levenshtein:", levenshtein(sa, sb) 38 | print "levenshtein_n:", levenshtein_n(sa, sb) 39 | print "stars from 5:", (1-levenshtein_n(sa, sb))*5 40 | # round(5 * (1 - (levenshtein(t1, t2) / max(len(t1), len(t2))))) 41 | -------------------------------------------------------------------------------- /stuff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/stuff/__init__.py -------------------------------------------------------------------------------- /unblocked.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/unblocked.txt -------------------------------------------------------------------------------- /utils/UpdateToken.py: -------------------------------------------------------------------------------- 1 | from Imgur.Factory import Factory 2 | import sys 3 | import json 4 | from urllib2 import HTTPError 5 | 6 | def getAndSaveNewToken(factory, config, configPath): 7 | pin = raw_input('Please visit this URL to get a PIN to authorize: \n' + factory.getAPIUrl() + "oauth2/authorize?client_id=" + config['client_id'] + '&response_type=pin\n and insert that pin.') 8 | imgur = factory.buildAPI() 9 | req = factory.buildRequestOAuthTokenSwap('pin', pin) 10 | try: 11 | res = imgur.retrieveRaw(req) 12 | except HTTPError as e: 13 | print("Error %d\n%s" % (e.code, e.read().decode('utf8'))) 14 | raise e 15 | 16 | print("Access Token: %s\nRefresh Token: %s\nExpires: %d seconds from now." % ( 17 | res[1]['access_token'], 18 | res[1]['refresh_token'], 19 | res[1]['expires_in'] 20 | )) 21 | config['refresh_token'] = res[1]['refresh_token'] 22 | json.dump(config, open(configPath, 'w+')) 23 | return True 24 | 25 | 26 | if __name__ == '__main__': 27 | config = None 28 | try: 29 | fd = open('../config.json', 'r') 30 | except: 31 | print("config file [config.json] not found.") 32 | sys.exit(1) 33 | try: 34 | config = json.loads(fd.read()) 35 | except: 36 | print("invalid json in config file.") 37 | sys.exit(1) 38 | factory = Factory(config) 39 | getAndSaveNewToken(factory, config, '../config.json') -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/utils/__init__.py -------------------------------------------------------------------------------- /worker/CommentSender.py: -------------------------------------------------------------------------------- 1 | from Imgur.DirtyCommenter import DirtyCommenter 2 | from collections import deque 3 | from time import time 4 | from urllib import urlencode, quote 5 | 6 | 7 | # 8 | # TODO: 9 | # - Use maxlen for the deque to auto drop at large deque, or check and return false if deque is full ? 10 | # 11 | 12 | def lognprint(*args): 13 | print 'CommentSender:', args 14 | 15 | class CommentSender(object): 16 | """ 17 | Contains a generator to send comments from a queue 18 | Job format: DEPRECATED 19 | (galleryId, message, retries, parentCommentId, (child comment)) 20 | (galleryId, message, retries, parentCommentId ) 21 | (galleryId, message, retries, -1) 22 | # This should be the correct one 23 | (TYPE_COMMENT_OR_REPLY, galleryId, message, retries, -1) 24 | (TYPE_TAG, galleryId, tag, retries, upvote (boolean)) 25 | """ 26 | 27 | TYPE_COMMENT_OR_REPLY = 1 28 | TYPE_TAG = 2 29 | 30 | def __init__(self, factory, config, commentWait=60, failWait=120): 31 | self.run = False 32 | self.lastPost = 0 33 | self.commentWait, self.failWait = commentWait, failWait 34 | self.queue = deque() 35 | self.commenter = DirtyCommenter(factory, config) 36 | 37 | def startSendLoop(self): 38 | self.run = True 39 | queue = self.queue 40 | commenter = self.commenter 41 | curWait = self.commentWait 42 | while self.run: 43 | t = time() 44 | if len(queue) == 0 or t-self.lastPost < curWait: 45 | yield False 46 | continue 47 | self.lastPost = t 48 | msg = queue.popleft() 49 | if msg[0] == self.TYPE_COMMENT_OR_REPLY: 50 | if msg[4] == -1: 51 | commentId = commenter.writeComment(msg[1], msg[2], 1) 52 | else: 53 | commentId = commenter.writeReply(msg[1], msg[4], msg[2], 1) 54 | if commentId == False: 55 | if msg[3] < 1: 56 | raise Exception("Max retries for comment send:", msg) 57 | lognprint('%i retries remaining to send comment: %s' % (msg[3]-1, msg)) 58 | msg[3] -= 1 59 | queue.appendleft(msg) 60 | curWait = self.failWait 61 | continue 62 | curWait = self.commentWait 63 | if len(msg) == 6: 64 | msg[5][4] = commentId 65 | queue.appendleft(msg[5]) 66 | elif msg[0] == self.TYPE_TAG: 67 | ret = commenter.sendAuthReqMessage(('gallery', msg[1], "vote", "tag", msg[2], ("up" if msg[4] else "down")), {}, retries=1) 68 | #lognprint(u"Tag send response from", msg[1], ret) 69 | if not ret: 70 | if msg[3] < 1: 71 | raise Exception("Max retries for tag send:", msg) 72 | lognprint('%i retries remaining to send tag: %s' % (msg[3]-1, msg)) 73 | msg[3] -= 1 74 | queue.appendleft(msg) 75 | curWait = self.failWait 76 | else: 77 | curWait = self.commentWait 78 | yield True 79 | 80 | 81 | # TODO: use this 1 11 82 | def appendComment(self, galleryId, message, retries, parentCommentId=-1, childComment=None): 83 | if childComment: 84 | self.queue.append([self.TYPE_COMMENT_OR_REPLY, galleryId, message, retries, parentCommentId, childComment]) 85 | else: 86 | self.queue.append([self.TYPE_COMMENT_OR_REPLY, galleryId, message, retries, parentCommentId]) 87 | 88 | def appendTag(self, gallery, tag, retries, upvote=True): 89 | self.queue.append([self.TYPE_TAG, gallery, quote(tag), retries, upvote]) 90 | 91 | if __name__ == '__main__': 92 | import sys 93 | import json 94 | from Imgur.Factory import Factory 95 | 96 | 97 | 98 | config = None 99 | try: 100 | fd = open('../config.json', 'r') 101 | except: 102 | lognprint("config file [config.json] not found.") 103 | sys.exit(1) 104 | try: 105 | config = json.loads(fd.read()) 106 | except: 107 | lognprint("invalid json in config file.") 108 | sys.exit(1) 109 | factory = Factory(config) 110 | 111 | sender = CommentSender(factory, config, 30) 112 | sender.appendComment('z2slH3u', 'message', 1) 113 | for i in sender.startSendLoop(): pass 114 | -------------------------------------------------------------------------------- /worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SleepProgger/RepostStatistics/c9e652d4a98e85105b543deabfb9eda2ff2a5271/worker/__init__.py --------------------------------------------------------------------------------