├── .gitignore ├── README.md ├── lib ├── Api.py ├── Chats.py ├── ChatsUI.py ├── Common │ ├── Common.py │ ├── README.md │ └── __init__.py ├── Database.py ├── Dialogs.py ├── DialogsUI.py ├── Media.py ├── Messages.py ├── MessagesUI.py ├── UIApi.py ├── Users.py ├── __init__.py └── vk_auth.py └── vk-backup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | backup/* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VK-Backup 2 | 3 | This script can backup data from vk.com onto your storage. It will create backup directory, fill it by data and update it in next run time. 4 | 5 | I returned to this project and found that oh boy oh boy it's not working anymore... What a shame, 6 | so I tried my best to figure out what's up with it - and on python2 it was run well, but user 7 | messages were not working, so I found that vk.com don't give access to `messages` for some weird 8 | third-party apps (like vk-backup) anymore. I checked their backup solution - and it's a shame: no 9 | media downloading - just really bad thing they've prepared to say "oh we have backup", NO you 10 | don't! 11 | 12 | I ported it to python3 and also prepared a workaround to get the user messages through UI API. So 13 | it become a little bit less usable, but at least you can get something more than available tools... 14 | 15 | ## Features: 16 | 17 | * Store known users & friends 18 | * Dialogs 19 | * Chats 20 | * Attachments media 21 | * Wall 22 | * Photos 23 | 24 | ## Usage: 25 | 26 | To use - first you need to download the repository files - both clone or zip files will work well. 27 | ```sh 28 | git clone https://github.com/rabits/vk-backup.git 29 | ``` 30 | 31 | Run `./vk-backup/vk-backup.py --help` to see the available options. 32 | 33 | You can use config ini file to store some of your configuration... 34 | 35 | There are 2 API are used - and both needs their own way to authenticate and get required tokens: 36 | * Regular API provided by vk.com: stable - used to get non-messages data 37 | * UI API of vk.com: unstable one - is used to get messages data 38 | 39 | So in order to properly run, you need to find those UI API credentials: 40 | 1. Login to vk.com with the account you want to backup 41 | 2. Open browser dev console (press F12 or find in menu), switch to Network tab 42 | 3. Go to your VK Messanger and find in dev console link starts with URL `https://vk.com/al_im.php` 43 | 4. Copy it's value as CURL 44 | 45 | After that you need to run vk-backup like: 46 | ``` 47 | ./vk-backup/vk-backup.py --config-file cfg.ini -- 48 | ``` 49 | It will be a quite huge command, that will look like: 50 | ``` 51 | ./vk-backup/vk-backup.py --config-file cfg.ini -- curl 'https://vk.com/al_im.php?act=a_start' --compressed -X POST -H 'User-Agent: Mozilla/5.0 ... 52 | ``` 53 | 54 | It will ask you to enter the provided link into your browser and paste back the API url after login. 55 | 56 | ## Privacy 57 | 58 | The script has no intent to anyhow share your login/password or any other private information with 59 | the others. The sole purpose of the script is to download your profile data from vk.com. Script has 60 | a relatively small codebase, so you can check yourself - only vk.com is accessed and no write API 61 | functions are used in it. 62 | 63 | ## TODO 64 | 65 | * Groups 66 | * Photo albums, audio, video 67 | * Advanced configuration 68 | 69 | ## Known issues 70 | 71 | Tested on 2 profiles and works well, but if you will find some issues - please don't hesitate and 72 | create a ticket or send PR to fix it, thanks! 73 | 74 | ## Requirements 75 | * python 3.5 76 | 77 | ## Support 78 | If you like kitties or my opensource development - you can support me by a small bitcoin donation :) 79 | 80 | My bitcoin wallet: `15phQNwkVs3fXxvxzBkhuhXA2xoKikPfUy` 81 | 82 | -------------------------------------------------------------------------------- /lib/Api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Api 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Can interact with vk api 8 | Required: python3.5 9 | ''' 10 | 11 | import time, json 12 | import urllib.request 13 | from urllib.parse import urlencode 14 | 15 | from . import Common as c 16 | from . import vk_auth 17 | 18 | c.log('debug', 'Init Api') 19 | 20 | # Session start time 21 | _START_TIME = int(time.time()) 22 | 23 | # Vk application ID 24 | _CLIENT_ID = '4603710' 25 | 26 | # Get token & user_id by login 27 | (_TOKEN, _USER_ID) = vk_auth.auth(c.cfg('user'), c.cfg('password'), _CLIENT_ID, "messages,audio,docs,video,photos,wall,friends,stories") 28 | 29 | # Last time api call to prevent service overloading 30 | _LAST_API_CALL = 0 31 | 32 | def request(method, params): 33 | global _LAST_API_CALL 34 | diff = time.time() - _LAST_API_CALL 35 | if diff < 0.4: 36 | time.sleep(0.4) 37 | _LAST_API_CALL = time.time() 38 | 39 | data = {} 40 | for retry in range(5): 41 | try: 42 | params['access_token'] = _TOKEN 43 | params['v'] = '5.81' 44 | url = "https://api.vk.com/method/" + method 45 | req = urllib.request.Request(url) 46 | with urllib.request.urlopen(req, urlencode(params).encode()) as ret: 47 | encoding = ret.info().get_content_charset('utf-8') 48 | data = json.loads(ret.read().decode(encoding)) 49 | if 'response' not in data: 50 | if 'error' in data: 51 | c.log('warning', 'Api responded error: %s' % data['error']['error_msg']) 52 | if data['error']['error_code'] in [7, 15, 212, 801]: 53 | # 7 - No rights to execute this method 54 | # 15 - Access denied 55 | # 212 - Access to post comments denied 56 | # 801 - Comments for this video are closed 57 | return 58 | elif data['error']['error_code'] in [10]: 59 | continue 60 | else: 61 | raise Exception('unknown error code %i, "%s", data: %s' % (data['error']['error_code'], method, data)) 62 | else: 63 | raise Exception('no correct response while calling api method "%s", data: %s' % (method, data)) 64 | break 65 | except Exception as e: 66 | c.log('warning', 'Retry request %s %i (5): %s' % (method, retry, str(e))) 67 | time.sleep(2.0*(retry+1)) 68 | 69 | if 'response' not in data: 70 | c.log('error', 'Unable to process request') 71 | return None 72 | 73 | return data['response'] 74 | 75 | def getUserId(): 76 | return _USER_ID 77 | 78 | def getStartTime(): 79 | return _START_TIME 80 | 81 | -------------------------------------------------------------------------------- /lib/Chats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Chats 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Chats management 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from .Database import Database 14 | 15 | from . import Api 16 | from . import Messages 17 | from .Users import S as Users 18 | 19 | class Chats(Database): 20 | def requestChatInfo(self, chat_id): 21 | if str(chat_id) not in self.data: 22 | self.data[str(chat_id)] = { 23 | 'id': chat_id, 24 | 'log': [] 25 | } 26 | data = Api.request('messages.getChat', {'chat_id': chat_id}) 27 | if data == None: 28 | return 29 | if len(data['users']) > 0: 30 | Users.requestUsers(data['users']) 31 | self.data[str(chat_id)]['data'] = data 32 | 33 | def requestChatMessages(self, chat_id): 34 | chat = self.getChat(chat_id) 35 | c.log('info', 'Requesting chat messages for chat %s "%s"' % (chat_id, chat['data']['title'])) 36 | 37 | Messages.requestMessages({'chat_id': chat_id}, self.data[str(chat_id)]) 38 | 39 | def getChat(self, chat_id): 40 | if str(chat_id) not in self.data: 41 | self.requestChatInfo(chat_id) 42 | return self.data[str(chat_id)] 43 | 44 | S = Chats() 45 | -------------------------------------------------------------------------------- /lib/ChatsUI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup ChatsUI 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Chats UI API management 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from .Database import Database 14 | 15 | from . import UIApi 16 | from . import MessagesUI 17 | from .Users import S as Users 18 | 19 | class Chats(Database): 20 | def requestChatInfo(self, chat_id): 21 | real_chat_id = chat_id - 2000000000 if chat_id > 2000000000 else chat_id 22 | if str(real_chat_id) not in self.data: 23 | self.data[str(real_chat_id)] = { 24 | 'id': chat_id, 25 | 'log': [] 26 | } 27 | 28 | # Getting additional data about chat 29 | # payload: 30 | # - 31 | # - - name: 32 | # memberIds: 33 | # ownerId: 34 | chat = UIApi.request('al_im.php', { 35 | "_smt": "im:1", 36 | "act": "a_start", 37 | "al": "1", 38 | "block": "true", 39 | "gid": "0", 40 | "history": "0", # We don't need history here, just chat data 41 | "im_v": "3", 42 | "msgid": "0", 43 | "peer": str(chat_id), 44 | "prevpeer": "0", 45 | }) 46 | if chat == None: 47 | return 48 | if not isinstance(chat[1][0], dict): 49 | c.log('error', 'VK returned malformed payload, please make sure provided curl session token is not expired: %s' % (chat,)) 50 | return 51 | data = { 52 | 'id': chat_id, 53 | 'admin_id': chat[1][0].get('ownerId', 0), 54 | 'title': chat[1][0].get('name', ''), 55 | 'type': 'chat', 56 | 'users': chat[1][0].get('memberIds', []) or [], 57 | } 58 | if len(data['users']) > 0: 59 | Users.requestUsers(data['users']) 60 | self.data[str(real_chat_id)]['data'] = data 61 | 62 | return self.data[str(real_chat_id)] 63 | 64 | def requestChatMessages(self, chat_id): 65 | chat = self.getChat(chat_id) 66 | if chat: 67 | c.log('info', 'Requesting chat messages for chat %s "%s"' % (chat_id, chat['data']['title'])) 68 | else: 69 | c.log('info', 'Requesting chat messages for chat %s' % (chat_id,)) 70 | 71 | real_chat_id = chat_id - 2000000000 if chat_id > 2000000000 else chat_id 72 | MessagesUI.requestMessages(chat_id, self.data[str(real_chat_id)]) 73 | 74 | def getChat(self, chat_id): 75 | real_chat_id = chat_id - 2000000000 if chat_id > 2000000000 else chat_id 76 | if str(real_chat_id) not in self.data: 77 | self.requestChatInfo(chat_id) 78 | return self.data[str(real_chat_id)] 79 | 80 | S = Chats() 81 | -------------------------------------------------------------------------------- /lib/Common/Common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''Common 1.0.1 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: A part of Common Library 8 | Required: python2.7 9 | ''' 10 | 11 | from sys import stderr, stdout, exit as sysexit 12 | from os import name as os_name 13 | if os_name == 'nt': 14 | geteuid = lambda: 1 15 | else: 16 | from os import geteuid 17 | from time import strftime, time, localtime 18 | 19 | from optparse import OptionParser 20 | import configparser 21 | 22 | def init_begin(doc): 23 | # Check that script not runned by root 24 | if geteuid() == 0: 25 | log('error', 'Script is running by the root user, but this is really dangerous! Please use unprivileged user.') 26 | # sysexit(1) 27 | 28 | # Get name & version from doc 29 | try: 30 | tmp = doc.split('\n', 1)[0].split(' ') 31 | global _VERSION, _NAME 32 | _VERSION = tmp.pop() 33 | _NAME = ' '.join(tmp) 34 | except: 35 | log('warning', 'Unable to get script "%s %s" from provided doc: "%s"' % (_NAME, _VERSION, doc)) 36 | 37 | # Prepare optparser 38 | global _PARSER, option 39 | _PARSER = OptionParser(usage='./%prog [options]', version=_VERSION) 40 | option = _PARSER.add_option 41 | 42 | def init_end(): 43 | # Add default options and parse cmd 44 | option('--config-file', type='string', dest='config-file', metavar='FILE', 45 | default=None, help='get configuration from ini file (replaced by command line parameters) [%default]') 46 | option('--config-example', action='callback', callback=_exampleini, 47 | default=None, help='print example ini config file to stdout') 48 | option('--log-file', type='string', dest='log-file', metavar='FILE', 49 | default=None, help='copy log output to file [%default]') 50 | option('-v', '--verbose', action='store_true', dest='verbose', 51 | help='verbose mode - moar output to stdout') 52 | option('-q', '--quiet', action='store_false', dest='verbose', 53 | help='silent mode - no output to stdout') 54 | global _PARSER, _CFG, _ARGS 55 | (_CFG, _ARGS) = _PARSER.parse_args() 56 | _CFG = vars(_CFG) 57 | 58 | # Parsing config file 59 | if _CFG['config-file'] != None: 60 | try: 61 | config = configparser.ConfigParser() 62 | config.read(_CFG['config-file']) 63 | 64 | for key in _PARSER.option_list: 65 | if None not in [key.dest, key.type]: 66 | if _CFG[key.dest] is key.default: 67 | try: 68 | if key.type in ['int', 'float', 'boolean']: 69 | val = getattr(config, 'get%s' % key.type)(_NAME, key.dest) 70 | else: 71 | val = config.get(_NAME, key.dest) 72 | _CFG[key.dest] = val 73 | except configparser.NoOptionError: 74 | continue 75 | except: 76 | _PARSER.error('Unable to parse config file. Please specify header and available options') 77 | 78 | # LOGGING 79 | if _CFG['log-file'] != None: 80 | class Tee(object): 81 | def __init__(self, *files): 82 | self.files = files 83 | def write(self, obj): 84 | for f in self.files: 85 | f.write(obj) 86 | f.flush() 87 | 88 | import codecs 89 | logfile = codecs.open(_CFG['log-file'], 'a', 'utf-8') 90 | global stdout, stderr 91 | stdout = Tee(stdout, logfile) 92 | stderr = Tee(stderr, logfile) 93 | 94 | global log 95 | if _CFG['verbose'] == True: 96 | # Debug log 97 | import inspect 98 | def newlog(logtype, message): 99 | func = inspect.currentframe().f_back 100 | log_time = time() 101 | if logtype != 'error': 102 | stdout.write('[%s.%s %s, line:%03u]:\t %s\n' % (strftime('%H:%M:%S', localtime(log_time)), str(log_time % 1)[2:8], logtype.upper(), func.f_lineno, ' ' * (len(inspect.stack()) - 1) + message)) 103 | else: 104 | stderr.write('[%s.%s %s, line:%03u]:\t %s\n' % (strftime('%H:%M:%S', localtime(log_time)), str(log_time % 1)[2:8], logtype.upper(), func.f_lineno, ' ' * (len(inspect.stack()) - 1) + message)) 105 | log = newlog 106 | elif _CFG['verbose'] == False: 107 | # Only error log 108 | def newlog(logtype, message): 109 | if logtype.lower() == 'error': 110 | stderr.write('[%s %s]:\t %s\n' % (strftime('%H:%M:%S'), logtype.upper(), message)) 111 | log = newlog 112 | 113 | def log(logtype, message): 114 | # Default non-debug log 115 | if logtype.lower() != 'debug': 116 | if logtype.lower() != 'error': 117 | stdout.write('[%s %s]:\t %s\n' % (strftime('%H:%M:%S'), logtype.upper(), message)) 118 | else: 119 | stderr.write('[%s %s]:\t %s\n' % (strftime('%H:%M:%S'), logtype.upper(), message)) 120 | 121 | def option(): 122 | log('error', 'Unable to use option before init_start(__doc__) execution.') 123 | sysexit(1) 124 | 125 | _NAME = '' 126 | _VERSION = '' 127 | _PARSER = None 128 | _CFG = {} 129 | _ARGS = [] 130 | 131 | def _exampleini(option, opt, value, parser): 132 | print('[%s]' % _NAME) 133 | for key in parser.option_list: 134 | if None not in [key.dest, key.type] and key.dest != 'config-file': 135 | print('%s: %s' % (key.dest, key.default)) 136 | sysexit() 137 | -------------------------------------------------------------------------------- /lib/Common/README.md: -------------------------------------------------------------------------------- 1 | Common Python 2 | ============= 3 | 4 | Common python tools for advanced scripts. Please, look into __init__.py to get more information about library interface and usage example. 5 | 6 | Support: 7 | -------- 8 | If you want to support my open-source development (or whatever), you can do it by a small Bitcoin donation: 9 | 10 | My bitcoin wallet: `15phQNwkVs3fXxvxzBkhuhXA2xoKikPfUy` 11 | 12 | -------------------------------------------------------------------------------- /lib/Common/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''Common Library 1.0 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Common script tools 8 | Required: python2.7 9 | 10 | Reserved options: 11 | --config-file 12 | --config-example 13 | --log-file 14 | -v --verbose 15 | -q --quiet 16 | 17 | Usage: 18 | #!/usr/bin/python 19 | # -*- coding: UTF-8 -*- 20 | """Script Name 0.9 21 | Some additional description 22 | """ 23 | from lib import Common as c 24 | c.init_begin(__doc__) 25 | c.option('-u', '--user', type='string', dest='user', metavar='NAME', default=None, help='Username (required)') 26 | c.init_end() 27 | 28 | c.log('info', 'Script %s v%s was started, %s!' % (c.name(), c.version(), c.cfg('user'))) 29 | ''' 30 | 31 | from . import Common as C 32 | 33 | ## init_begin(doc) 34 | # Begin block of common library init 35 | # 36 | def init_begin(doc): 37 | global option 38 | C.init_begin(doc) 39 | option = C.option 40 | 41 | ## init_end() 42 | # End block of common library init 43 | # 44 | def init_end(): 45 | global log 46 | C.init_end() 47 | log = C.log 48 | 49 | ## log(logtype, message) 50 | # Log message types: 51 | # debug - displayed only in verbose mode 52 | # ... - any other messages you want 53 | # error - displayed even if verbose set to quiet 54 | # 55 | log = C.log 56 | 57 | ## option(...) link to OptParser.add_option(...) 58 | # Set options data in init block 59 | # 60 | option = C.option 61 | 62 | ## cfg(key, val = None) 63 | # Return cmd or config option 64 | # Will replace cfg with key by val if val is set 65 | # 66 | def cfg(key, val = None): 67 | if val != None: 68 | C._CFG[key] = val 69 | return C._CFG[key] 70 | 71 | ## args() 72 | # Return script input args 73 | # 74 | def args(): 75 | return C._ARGS 76 | 77 | ## name() 78 | # Return script name from __doc__ 79 | # 80 | def name(): 81 | return C._NAME 82 | 83 | ## version() 84 | # Return script version from __doc__ 85 | # 86 | def version(): 87 | return C._VERSION 88 | 89 | -------------------------------------------------------------------------------- /lib/Database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Database 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Database storage class 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | import json, os 14 | 15 | class Database: 16 | def __init__(self): 17 | c.log('debug', 'Init %s' % self.__class__.__name__) 18 | 19 | self.data = {} 20 | 21 | self.path = os.path.join(c.cfg('backup-dir'), self.__class__.__name__) 22 | 23 | # Loading local data from the storage 24 | self.load() 25 | 26 | # ids - array of indexes to store, otherwise will store everything 27 | def store(self, ids = None): 28 | if not ids: 29 | ids = list(self.data.keys()) 30 | c.log('debug', 'Store %s (%i)' % (self.__class__.__name__, len(ids))) 31 | for i in ids: 32 | path = os.path.join(self.path, str(i) + '.json') 33 | if not os.path.isdir(os.path.dirname(path)): 34 | os.makedirs(os.path.dirname(path)) 35 | with open(path, 'w') as outfile: 36 | try: 37 | json.dump(self.data[str(i)], outfile, indent=1, ensure_ascii=False, sort_keys=True) 38 | except Exception as e: 39 | c.log('error', 'Unable to save to json, skipping: %s' % (self.data[str(i)],)) 40 | 41 | def load(self, subdir = None): 42 | path = self.path if subdir == None else os.path.join(self.path, subdir) 43 | 44 | if not os.path.isdir(path): 45 | c.log('debug', 'DB directory "%s" not found' % path) 46 | return 47 | 48 | listdir = os.listdir(path) 49 | dirs = [ d for d in listdir if d != 'storage' and os.path.isdir(os.path.join(path, d)) ] 50 | 51 | for d in dirs: 52 | if subdir == None: 53 | self.load(d) 54 | else: 55 | self.load(os.path.join(subdir, d)) 56 | 57 | files = [ f for f in listdir if f.endswith('.json') ] 58 | c.log('debug', 'Loading files %s %s (%i)' % (self.__class__.__name__, path, len(files))) 59 | 60 | for f in files: 61 | filename = os.path.join(path, f) 62 | data_path = os.path.splitext(f)[0] if subdir == None else os.path.join(subdir, os.path.splitext(f)[0]) 63 | with open(filename) as fd: 64 | try: 65 | data = json.load(fd) 66 | self.data[data_path] = data 67 | except Exception as e: 68 | c.log('error', 'Unable to load json, skipping: %s' % (filename,)) 69 | 70 | -------------------------------------------------------------------------------- /lib/Dialogs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Dialogs 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Dialogs management 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from .Database import Database 14 | 15 | from . import Api 16 | from . import Messages 17 | from .Users import S as Users 18 | from .Chats import S as Chats 19 | 20 | class Dialogs(Database): 21 | def requestDialogs(self): 22 | c.log('debug', 'Requesting dialogs') 23 | 24 | req_data = {'count': 200, 'preview_length': 1, 'offset': 0} 25 | 26 | while True: 27 | data = Api.request('messages.getDialogs', req_data) 28 | if data == None: 29 | return 30 | count = data['count'] 31 | data = data['items'] 32 | for d in data: 33 | if 'chat_id' in d['message']: 34 | Chats.requestChatMessages(d['message']['chat_id']) 35 | else: 36 | self.requestMessages(d['message']['user_id']) 37 | 38 | req_data['offset'] += 200 39 | if req_data['offset'] >= count: 40 | break 41 | 42 | def requestMessages(self, user_id): 43 | user = Users.getUser(user_id) 44 | c.log('info', 'Requesting messages for user %s %s %s' % (user_id, user['data']['first_name'], user['data']['last_name'])) 45 | 46 | if user_id not in self.data: 47 | self.data[str(user_id)] = { 48 | 'id' : user_id, 49 | 'log' : [] 50 | } 51 | 52 | Messages.requestMessages({'user_id': user_id}, self.data[str(user_id)]) 53 | 54 | def getMessages(self, user_id): 55 | if str(user_id) not in self.data: 56 | self.requestMessages(user_id) 57 | return self.data[str(user_id)] 58 | 59 | S = Dialogs() 60 | -------------------------------------------------------------------------------- /lib/DialogsUI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup MessagesUI 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Messages UI API management 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from .Database import Database 14 | 15 | from . import UIApi 16 | from . import MessagesUI 17 | from .Users import S as Users 18 | from .ChatsUI import S as ChatsUI 19 | 20 | class Dialogs(Database): 21 | def requestDialogs(self): 22 | c.log('debug', 'Requesting dialogs through UI API') 23 | 24 | # Getting the dialogs data 25 | # Processed format: 26 | # payload: 27 | # - 28 | # - - - 29 | # - 30 | dialogs = UIApi.request('al_im.php', { 31 | "act":"a_dialogs_preload", 32 | "al":"1", 33 | "gid":"0", 34 | "im_v":"3", 35 | "rs":"", 36 | }) 37 | if dialogs == None: 38 | c.log('error', 'No dialogs returned, please make sure provided curl session token is not expired: %s' % (dialogs,)) 39 | return 40 | 41 | if not isinstance(dialogs[1][0], list): 42 | c.log('error', 'VK returned malformed payload, please make sure provided curl session token is not expired: %s' % (dialogs,)) 43 | return 44 | 45 | for d in dialogs[1][0]: 46 | did = d[0] 47 | dname = d[1] 48 | c.log('debug', 'Processing messages: %s %s' % (did, dname)) 49 | 50 | if did > 2000000000: 51 | ChatsUI.requestChatMessages(did) 52 | ChatsUI.store([did - 2000000000]) 53 | else: 54 | self.requestMessages(did) 55 | self.store([did]) 56 | 57 | def requestMessages(self, dialog_id): 58 | if dialog_id > 0: 59 | user = Users.getUser(dialog_id) 60 | c.log('info', 'Requesting messages for user: %s %s %s' % (dialog_id, user['data'].get('first_name'), user['data'].get('last_name'))) 61 | else: 62 | c.log('info', 'Requesting messages for group: %s' % (dialog_id,)) 63 | 64 | if str(dialog_id) not in self.data: 65 | self.data[str(dialog_id)] = { 66 | 'id' : dialog_id, 67 | 'log' : [] 68 | } 69 | 70 | MessagesUI.requestMessages(dialog_id, self.data[str(dialog_id)]) 71 | 72 | def getMessages(self, dialog_id): 73 | if str(dialog_id) not in self.data: 74 | self.requestMessages(dialog_id) 75 | return self.data[str(dialog_id)] 76 | 77 | S = Dialogs() 78 | -------------------------------------------------------------------------------- /lib/Media.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Media 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Media download 8 | Required: python3.5 9 | ''' 10 | 11 | import time, urllib.request, urllib.error, urllib.parse, os 12 | from urllib.parse import urlparse 13 | import threading 14 | from queue import Queue 15 | 16 | from . import Common as c 17 | 18 | from .Database import Database 19 | 20 | from . import Api 21 | 22 | _PHOTO_TYPES = {'s', 'm', 'o', 'p', 'q', 'r', 'x', 'y', 'z', 'w'} 23 | 24 | class Media(Database): 25 | class Downloader(threading.Thread): 26 | def __init__(self, queue, report): 27 | threading.Thread.__init__(self) 28 | self.queue = queue 29 | self.report = report 30 | self.waiting = True 31 | self._stop = threading.Event() 32 | 33 | def run(self): 34 | c.log('debug', 'Downloader thread started') 35 | while not self._stop.isSet(): 36 | if not self.queue.empty(): 37 | self.waiting = False 38 | url = self.queue.get() 39 | response = url.download() 40 | if response == False and url.tried < 3: 41 | self.queue.put(url) 42 | elif response == False and url.tried == 3: 43 | self.report['failure'].append(url) 44 | c.log('warning', 'Downloader thread failed: %s : %s' % (url.url, url.error)) 45 | elif response == True: 46 | self.report['success'].append(url) 47 | self.queue.task_done() 48 | else: 49 | self.waiting = True 50 | time.sleep(2) 51 | c.log('debug', 'Downloader thread stopped') 52 | 53 | def stop(self): 54 | self._stop.set() 55 | 56 | class Download(object): 57 | def __init__(self, url, destination): 58 | self.url = url 59 | self.destination = destination 60 | self.tried = 0 61 | self.success = False 62 | self.error = None 63 | 64 | def download(self): 65 | if self.tried > 0: 66 | time.sleep(self.tried * 2) 67 | self.tried += 1 68 | try: 69 | directory = os.path.dirname(self.destination) 70 | if not os.path.isdir(directory): 71 | os.makedirs(directory) 72 | 73 | with urllib.request.urlopen(self.url, None, 30) as u: 74 | with open(self.destination, 'wb') as outfile: 75 | # TODO: limit by size 76 | size = int(u.getheader('Content-Length', -1)) 77 | while True: 78 | b = u.read(8192) 79 | if not b: 80 | break 81 | outfile.write(b) 82 | 83 | self.success = True 84 | 85 | except Exception as e: 86 | self.error = e 87 | 88 | return self.success 89 | 90 | def stopDownloads(self): 91 | c.log('debug', 'Stopping download threads (%i)' % len(self.threads)) 92 | for i in self.threads: 93 | i.stop() 94 | 95 | def __init__(self): 96 | Database.__init__(self) 97 | 98 | self.total_downloads = 0 99 | self.queue = Queue(0) 100 | self.report = {'success':[], 'failure':[]} 101 | self.threads = [] 102 | 103 | for i in range(c.cfg('download-threads')): 104 | thread = self.Downloader(self.queue, self.report) 105 | thread.start() 106 | self.threads.append(thread) 107 | if self.queue.qsize() > 0: 108 | self.queue.join() 109 | 110 | def store(self): 111 | c.log('info', 'Waiting downloads complete: ~%i...' % self.queue.qsize()) 112 | while not self.queue.empty(): 113 | c.log('info', '[%s] %i left' % (''.join([str(int(not t.waiting)) for t in self.threads]), self.queue.qsize())) 114 | time.sleep(5) 115 | 116 | self.stopDownloads() 117 | 118 | c.log('info', 'Downloaded %i of %i' % (len(self.report['success']), self.total_downloads)) 119 | if len(self.report['failure']) > 0: 120 | c.log('warning', ' download failed: %i' % len(self.report['failure'])) 121 | for url in self.report['failure']: 122 | c.log('debug', ' %s' % url.url) 123 | 124 | Database.store(self) 125 | 126 | def loadAttachments(self, data): 127 | attachments = [] 128 | if 'attachments' in data: 129 | attachments.extend(data['attachments']) 130 | if 'attachment' in data: 131 | attachments.append(data['attachment']) 132 | if 'copy_history' in data: 133 | for subdata in data['copy_history']: 134 | self.loadAttachments(subdata) 135 | for attach in attachments: 136 | c.log('debug', 'Processing %s' % attach['type']) 137 | funcname = 'process' + attach['type'].title() 138 | if funcname in dir(self): 139 | getattr(self, funcname)(attach[attach['type']]) 140 | else: 141 | c.log('error', ' media processing function "Media.%s" is not implemented' % funcname) 142 | c.log('debug', str(attach)) 143 | 144 | def addDownload(self, url, path = None): 145 | if url == '': 146 | c.log('warning', 'Skipping empty url') 147 | return path 148 | 149 | if path == None: 150 | path = os.path.join(self.path, 'storage') + urlparse(url).path 151 | 152 | if os.path.isfile(path): 153 | c.log('debug', 'Skipping, file %s already exists' % path) 154 | return path 155 | 156 | c.log('debug', 'Adding media to queue "%s"' % url) 157 | self.total_downloads += 1 158 | self.queue.put(self.Download(url, path)) 159 | 160 | return path 161 | 162 | def preprocess(self, data, data_type): 163 | # TODO: limit by type 164 | mydata = data.copy() 165 | data.clear() 166 | if 'id' not in mydata: 167 | c.log('warning', 'Unable to find "id" field in data "%s"' % (mydata,)) 168 | return 169 | data['id'] = mydata['id'] 170 | 171 | if 'owner_id' in mydata: 172 | path = os.path.join(data_type, str(mydata['owner_id']), str(mydata['id'])) 173 | data['owner_id'] = mydata['owner_id'] 174 | else: 175 | path = os.path.join(data_type, str(mydata['id'])) 176 | 177 | if 'suffix' in mydata: 178 | data['suffix'] = mydata['suffix'] 179 | 180 | if path in self.data: 181 | return path 182 | 183 | self.data[path] = mydata 184 | 185 | return path 186 | 187 | def requestComments(self, data, data_type, owner_id): 188 | if str(owner_id) != Api.getUserId(): 189 | return 190 | 191 | c.log('debug', 'Requesting comments for %s %i' % (data_type, data['id'])) 192 | 193 | if data_type == 'photo': 194 | api_method = 'photos.getComments' 195 | api_id_name = 'photo_id' 196 | elif data_type == 'video': 197 | api_method = 'video.getComments' 198 | api_id_name = 'video_id' 199 | elif data_type == 'wall': 200 | api_method = 'wall.getComments' 201 | api_id_name = 'post_id' 202 | else: 203 | c.log('warning', 'Unable to request comments for %s %i - not implemented' % (data_type, data['id'])) 204 | return 205 | 206 | if 'comments' not in data: 207 | data['comments'] = {} 208 | if not isinstance(data['comments'], dict): 209 | data['comments'] = {} 210 | 211 | req_data = {'owner_id': int(owner_id), api_id_name: int(data['id']), 'count': 100, 'offset': 0} 212 | 213 | while True: 214 | subdata = Api.request(api_method, req_data) 215 | if subdata == None: 216 | return 217 | count = subdata['count'] 218 | subdata = subdata['items'] 219 | for d in subdata: 220 | data['comments'][str(d['date'])] = d 221 | self.loadAttachments(data['comments'][str(d['date'])]) 222 | 223 | req_data['offset'] += 100 224 | if req_data['offset'] >= count: 225 | break 226 | 227 | def processPhoto(self, data): 228 | c.log('debug', 'Processing photo media') 229 | path = self.preprocess(data, 'photo') 230 | if 'localpath' not in self.data[path]: 231 | url = None 232 | if 'sizes' not in self.data[path]: 233 | c.log('error', 'Unable to find photo sizes in %s' % str(self.data[path])) 234 | lastwh = 0 235 | # Selecting the biggest image out of available sizes 236 | for size_val in self.data[path].get('sizes', []): 237 | wh = size_val['width'] * size_val['height'] 238 | if wh == 0: 239 | # Using latest in sizes array 240 | url = size_val['url'] 241 | elif lastwh < wh: 242 | lastwh = wh 243 | url = size_val['url'] 244 | 245 | if url == None: 246 | c.log('warning', 'Valid url not found in %s' % str(self.data[path])) 247 | return 248 | 249 | self.data[path]['url'] = url 250 | self.data[path]['localpath'] = self.addDownload(url) 251 | self.requestComments(self.data[path], 'photo', self.data[path]['owner_id']) 252 | 253 | def processDoc(self, data): 254 | c.log('debug', 'Processing doc media') 255 | path = self.preprocess(data, 'doc') 256 | if 'localpath' not in self.data[path] and 'url' in self.data[path]: 257 | self.data[path]['localpath'] = self.addDownload(self.data[path]['url']) 258 | 259 | def processAudio(self, data): 260 | c.log('debug', 'Processing audio media') 261 | path = self.preprocess(data, 'audio') 262 | if 'localpath' not in self.data[path] and 'url' in self.data[path]: 263 | self.data[path]['localpath'] = self.addDownload(self.data[path]['url']) 264 | 265 | def processWall(self, data): 266 | c.log('debug', 'Processing wall media') 267 | if 'comments' in data: 268 | data['comments'].pop('count', None) 269 | data['comments'].pop('can_post', None) 270 | self.requestComments(data, 'wall', data['from_id']) 271 | self.loadAttachments(data) 272 | 273 | def processGeo(self, data): 274 | self.preprocess(data, 'geo') 275 | c.log('debug', 'Skipping geo media - no data to download') 276 | 277 | def processVideo(self, data): 278 | path = self.preprocess(data, 'video') 279 | self.requestComments(self.data[path], 'video', self.data[path]['owner_id']) 280 | c.log('debug', 'Skipping video media - size of the file is too big') 281 | 282 | def processSticker(self, data): 283 | self.preprocess(data, 'sticker') 284 | c.log('debug', 'Skipping sticker media') 285 | 286 | def processLink(self, data): 287 | c.log('debug', 'Skipping link media - no data to download') 288 | 289 | def processPoll(self, data): 290 | self.preprocess(data, 'poll') 291 | c.log('debug', 'Skipping poll media - no data to download') 292 | 293 | def processNote(self, data): 294 | self.preprocess(data, 'note') 295 | c.log('debug', 'Skipping note media - no data to download') 296 | 297 | def processPresent(self, data): 298 | self.preprocess(data, 'present') 299 | c.log('debug', 'Skipping present media') 300 | 301 | # Returns list of photo types sorted from worse to best quality 302 | def getPhotoTypes(self): 303 | return _PHOTO_TYPES 304 | 305 | S = Media() 306 | -------------------------------------------------------------------------------- /lib/Messages.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Messages 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Module provided common message requests 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from . import Api 14 | from .Media import S as Media 15 | 16 | def requestMessages(request, msgs_data): 17 | request['count'] = 200 18 | request['offset'] = -200 19 | if len(msgs_data['log']) == 0: 20 | request['rev'] = 1 21 | request['offset'] = 0 22 | else: 23 | request['start_message_id'] = msgs_data['log'][-1]['id'] 24 | 25 | while True: 26 | data = Api.request('messages.getHistory', request) 27 | if data == None: 28 | return 29 | count = data['count'] 30 | data = data['items'] 31 | 32 | if len(data) == 0: 33 | c.log('info', ' no new messages %i (%i)' % (len(msgs_data['log']), count)) 34 | break 35 | 36 | # Switch to get history by message id 37 | if 'start_message_id' not in request: 38 | request['offset'] = -200 39 | request.pop('rev', None) 40 | else: 41 | data.reverse() 42 | 43 | processMessages(data) 44 | msgs_data['log'].extend(data) 45 | 46 | request['start_message_id'] = data[-1]['id'] 47 | c.log('info', ' loaded %i, stored %i (%i)' % (len(data), len(msgs_data['log']), count)) 48 | if len(data) < 200: 49 | c.log('info', ' done') 50 | break 51 | 52 | def processMessages(data): 53 | for d in data: 54 | d.pop('user_id', None) 55 | d.pop('read_state', None) 56 | d.pop('chat_id', None) 57 | Media.loadAttachments(d) 58 | if 'fwd_messages' in d: 59 | processMessages(d['fwd_messages']) 60 | -------------------------------------------------------------------------------- /lib/MessagesUI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup Messages UI 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Module provided common message requests through UI API 8 | Required: python3.5 9 | ''' 10 | 11 | from . import Common as c 12 | 13 | from . import Api 14 | from . import UIApi 15 | from .Media import S as Media 16 | 17 | from html.parser import HTMLParser 18 | 19 | # This class will find the needed attribute inside a block with specific attribute=value 20 | # The result will be stored in found_data as [(, , )] 21 | class BlockDataHTMLParser(HTMLParser): 22 | def __init__(self, to_find, to_get): 23 | HTMLParser.__init__(self) 24 | 25 | # What attribute and value to find 26 | self.to_find = dict( (k, str(v)) for k, v in to_find.items() ) 27 | 28 | # Which attributes contains data to harvest 29 | self.to_get = to_get 30 | 31 | # Contains data to track the insides of the block 32 | self.in_block = None 33 | self.in_block_tag = None 34 | self.in_block_counter = 0 35 | 36 | self.found_data = [] 37 | 38 | def _checkFindAttrs(self, tag, attrs): 39 | for attr, value in self.to_find.items(): 40 | if attr in attrs and attrs[attr] == value: 41 | c.log('debug', 'Found starting block tag: %s %s' % (tag, attrs)) 42 | self.in_block = {attr: value} 43 | self.in_block_tag = tag 44 | 45 | def _checkGetAttrs(self, attrs): 46 | for attr in self.to_get: 47 | if attr in attrs: 48 | c.log('debug', 'Found needed data attr: %s %s' % (attr, attrs[attr])) 49 | self.found_data.append((self.in_block, self.in_block_tag, attrs[attr])) 50 | 51 | def handle_starttag(self, tag, attrs): 52 | attrs = dict((name.lower(), value) for name, value in attrs) 53 | if self.in_block: 54 | self.in_block_counter += 1 55 | else: 56 | self._checkFindAttrs(tag, attrs) 57 | 58 | if self.in_block: 59 | self._checkGetAttrs(attrs) 60 | 61 | def handle_startendtag(self, tag, attrs): 62 | attrs = dict((name.lower(), value) for name, value in attrs) 63 | # TODO: In theory it could be the block start/end tag, so need to start block and finish it 64 | if self.in_block: 65 | self._checkGetAttrs(attrs) 66 | 67 | def handle_endtag(self, tag): 68 | if self.in_block: 69 | # In theory we need to validate tag as well - but you know those html devs... 70 | if self.in_block_counter == 0: 71 | c.log('debug', 'Found the end tag of the block: %s' % (tag,)) 72 | self.in_block = None 73 | self.in_block_tag = None 74 | 75 | self.in_block_counter -= 1 76 | 77 | # Getting messages for the user id 78 | # is received in batches starting from last batch 79 | # Processed format: 80 | # payload: 81 | # - 82 | # - - 83 | # - : 84 | # - 85 | # - - second bit is out 86 | # - 87 | # - 88 | # - 89 | # - attach_type: (ex: "photo", "video", "audio", "doc") 90 | # attach: (ex: "-192626588_456239023", "-192626588_457241236") 91 | def requestMessages(user_id, msgs_data): 92 | # We don't have to process everything - just till the last downloaded message ID 93 | stop_msg_id = msgs_data['log'][-1]['id'] if len(msgs_data['log']) > 0 else None 94 | 95 | new_user_messages = [] 96 | 97 | # Continue to request messages until the end 98 | continue_process = True 99 | while continue_process: 100 | msgs = UIApi.request('al_im.php', { 101 | "_smt": "im:3", 102 | "act": "a_history", 103 | "al": "1", 104 | "gid": "0", 105 | "im_v": "3", 106 | "offset": str(len(new_user_messages)), 107 | "peer": str(user_id), 108 | "toend": "0", 109 | "whole": "0", 110 | }) 111 | 112 | if not msgs or not isinstance(msgs[1][1], dict): 113 | c.log('warning', 'Messages of the user are not in dict format, skipping: %s' % (msgs,)) 114 | break 115 | 116 | # Sorting messages in reverse order to process the latest first 117 | msgs_list = list(msgs[1][1].values()) 118 | 119 | if len(msgs_list) < 1: 120 | # There is no messages available 121 | break 122 | 123 | for msg in msgs_list: 124 | d = processMessage(msg, msgs[1][0]) 125 | if d['id'] == stop_msg_id: 126 | # We reached last loaded message, so no need to proceed further 127 | continue_process = False 128 | break 129 | new_user_messages.insert(0, d) 130 | 131 | c.log('debug', ' loaded %i' % (len(new_user_messages,))) 132 | 133 | msgs_data['log'].extend(new_user_messages) 134 | c.log('info', ' total loaded %i, stored %i' % (len(new_user_messages), len(msgs_data['log']))) 135 | 136 | # Processing message from VK JSON 137 | def processMessage(msg, html): 138 | # msg[1] contains bitmap where second bit is outgoing msg (1 if current user sent, 0 if incoming from remote user) 139 | out = (msg[1] & 0b10) and 1 # Will output 0 or 1 140 | # if it's chat - user id will be in msg[5], otherwise if out == 1 then it's current user, otherwise msg[2] 141 | from_id = msg[5].get('from') or (Api.getUserId() if out == 1 else msg[2]) 142 | d = { 143 | "id": msg[0], 144 | "from_id": int(from_id), 145 | "date": msg[3], 146 | "body": msg[4], 147 | "out": out, 148 | } 149 | 150 | # Processing attachments 151 | attachments = {} 152 | # First - looking for all the data and place it in one 153 | for key, val in msg[5].items(): 154 | if key.startswith('attach'): 155 | if key == 'attach_count': 156 | continue 157 | att_index = int(key[6:].split('_', 1)[0]) 158 | if att_index not in attachments: 159 | attachments[att_index] = {'data':{}} 160 | 161 | if key.endswith('_type'): 162 | attachments[att_index]['type'] = val 163 | else: 164 | dkey = key.split('_', 1) 165 | if len(dkey) < 2: 166 | attachments[att_index]['data'][''] = val 167 | else: 168 | attachments[att_index]['data'][dkey[1]] = val 169 | 170 | if len(attachments) > 0: 171 | d['attachments'] = list(dict(sorted(attachments.items())).values()) 172 | 173 | # Processing attachments to properly format them and download if needed 174 | for ind, att in enumerate(d['attachments']): 175 | d['attachments'][ind] = processAttachment(att, d['id'], html) 176 | 177 | # We need to process attachments of the message and download available media 178 | Media.loadAttachments(d) 179 | 180 | return d 181 | 182 | # Processing normalized attachment in format: 183 | # type: 184 | # : 185 | # id: 186 | # owner_id: 187 | def processAttachment(att, msg_id, html): 188 | c.log('debug', 'Processing UI attachment for message id: %s' % (msg_id,)) 189 | try: 190 | # Most of the attachments are not send directly in the response - because UI API is returning 191 | # html to describe the messages, some of the important vars (forwarded messages for example) 192 | # are containing only in HTML, so it's kind of downside of using UI API... We need to receive 193 | # the important data via regular API call and store it in the attachment data. 194 | if att['type'] in ('photo', 'doc', 'audio', 'video', 'audio_playlist', 'story', 'article', 'wall'): 195 | (att_own, att_id) = att['data'][''].split('_', 1) 196 | suffix = '' 197 | if '_' in att_id: # Some audio id's could look like "1422450_456239108_23dd849dd82775b33e" 198 | (att_id, suffix) = att_id.split('_', 1) 199 | att[att['type']] = { 200 | 'id': int(att_id), 201 | 'owner_id': int(att_own), 202 | } 203 | if suffix != '': 204 | att[att['type']]['suffix'] = suffix 205 | 206 | if att['type'] == 'photo': 207 | photo_data = getUIPhotoData(att['data'][''], msg_id) 208 | if photo_data: 209 | att[att['type']] = photo_data 210 | else: 211 | c.log('warning', 'Received photo data contains no photo record: %s : %s' % (UIApi.dumpData(photo_data), att)) 212 | elif att['type'] == 'wall': 213 | wall_data = Api.request('wall.getById', {'posts': att['data']['']}) 214 | if wall_data and len(wall_data) == 1: 215 | att[att['type']] = wall_data[0] 216 | else: 217 | c.log('warning', 'Received wall data contains no wall record: %s : %s' % (UIApi.dumpData(wall_data), att)) 218 | elif att['type'] == 'story': 219 | story_data = Api.request('stories.getById', {'stories': att['data']['']}) 220 | if story_data and len(story_data) == 1: 221 | att[att['type']] = story_data[0] 222 | else: 223 | c.log('warning', 'Received story data contains no story record: %s : %s' % (UIApi.dumpData(story_data), att)) 224 | elif att['type'] == 'doc': 225 | if 'kind' in att['data']: 226 | if att['data']['kind'] == 'audiomsg': 227 | # Locating the mp3 data of audio msg in the provided html 228 | parser = BlockDataHTMLParser({'data-msgid': msg_id}, ['data-mp3', 'data-ogg']) 229 | parser.feed(html) 230 | parser.close() 231 | if len(parser.found_data) > 0: 232 | att[att['type']]['url'] = parser.found_data[0][2] 233 | else: 234 | c.log('warning', 'Unable to find UI doc kind processor to get URL for : %s : %s' % (att, html)) 235 | else: 236 | # Default doc have just 237 | parser = BlockDataHTMLParser({'data-msgid': msg_id}, ['href']) 238 | parser.feed(html) 239 | parser.close() 240 | for d in parser.found_data: 241 | if d[2].startswith('/doc'+att['data']['']): 242 | att[att['type']]['url'] = 'https://vk.com'+d[2] 243 | break 244 | if 'url' not in att[att['type']]: 245 | c.log('warning', 'Unable to find doc URL for : %s' % (att,)) 246 | elif att['type'] == 'audio': 247 | # Audio a bit hard to download, so just getting the additional info from data-audio 248 | # Looks like: '[115130048,48899827,"","Мать твою так","Трупный яд",198,0,0,"",0,2,"im","[]","ee7c995bc3ff6379c7\/\/aea86bbdf7be02f3ac\/\/\/fb6512a8793e3443ab\/","",{"duration":198,"content_id":"48899827_115130048","puid22":11,"account_age_type":3,"_SITEID":276,"vk_id":10170169,"ver":251116},"","","",false,"c86ec74eU35nMQdVLYSxwyMDBdn8HvHsyMFLeXCULiReSHC1eDVfdwa2vpxZLBNYeAI",0,0,true,"f0bbcf1e510c4358f1",false,"",false]' 249 | parser = BlockDataHTMLParser({'data-msgid': msg_id}, ['data-audio']) 250 | parser.feed(html) 251 | parser.close() 252 | for d in parser.found_data: 253 | if d[2].startswith('[%s,%s,' % (att[att['type']]['id'], att[att['type']]['owner_id'])): 254 | att[att['type']]['info_str'] = d[2] 255 | break 256 | elif att['type'] == 'link': 257 | att[att['type']] = { 258 | 'description': att['data'].get('desc'), 259 | 'image_src': att['data'].get('photo'), 260 | 'title': att['data'].get('title'), 261 | 'url': att['data']['url'], 262 | } 263 | elif att['type'] == 'call': 264 | att[att['type']] = { 265 | 'id': att['data'].get(''), 266 | 'initiator_id': int(att['data'].get('call_initiator_id', '0')), 267 | 'receiver_id': int(att['data'].get('call_receiver_id', '0')), 268 | 'state': att['data'].get('call_state'), 269 | 'video': att['data'].get('call_video'), 270 | } 271 | elif att['type'] == 'sticker': 272 | att[att['type']] = { 273 | 'id': int(att['data']['']), 274 | } 275 | if 'product_id' in att['data']: 276 | att[att['type']]['product_id'] = int(att['data']['product_id']) 277 | if 'kind' in att['data']: 278 | att[att['type']]['kind'] = att['data']['kind'] 279 | elif att['type'] == 'gift': 280 | att[att['type']] = { 281 | 'id': int(att['data']['']), 282 | } 283 | elif att['type'] == 'poll': 284 | id_list = att['data'][''].split('_', 1) 285 | if len(id_list) < 2: 286 | att[att['type']] = { 287 | 'id': int(id_list[0]), 288 | } 289 | else: 290 | att[att['type']] = { 291 | 'id': int(id_list[1]), 292 | 'owner_id': int(id_list[0]), 293 | } 294 | elif att['type'] == 'audio_playlist': 295 | att[att['type']] = { 296 | 'id': int(att['data']['']), 297 | } 298 | else: 299 | c.log('error', 'Unable to find attachment processor for:') 300 | c.log('error', '%s' % (UIApi.dumpData(att),)) 301 | except Exception as e: 302 | c.log('error', 'Exception happened during processing of the next attachment:') 303 | c.log('error', '%s' % (UIApi.dumpData(att),)) 304 | raise e 305 | 306 | del att['data'] 307 | 308 | return att 309 | 310 | # Getting photo info through UI for the photo - regular API will not give access usually 311 | # Processed format: 312 | # payload: 313 | # - 314 | # - - 315 | # - 316 | # - 317 | # - - id: 318 | # _src: 319 | # _: 320 | # - 321 | # - 322 | # - 323 | # ... 324 | def getUIPhotoData(photo_fullid, msg_id): 325 | c.log('debug', 'Getting UI photo data for %s of message %s' % (photo_fullid, msg_id)) 326 | 327 | photos = UIApi.request('al_photos.php', { 328 | '_smt': 'im:6', 329 | 'act': 'show', 330 | 'al': '1', 331 | 'dmcah': '', 332 | 'gid': '0', 333 | 'list': 'mail'+str(msg_id), # Requires the message id it was attached to 334 | 'module': 'im', 335 | 'photo': str(photo_fullid), 336 | }) 337 | 338 | if not photos or len(photos[1]) < 4 or not isinstance(photos[1][3], list): 339 | c.log('warning', 'Requested photos returned in bad format: %s' % (photos,)) 340 | return {} 341 | 342 | photo = None 343 | # Looking in the list of returned photos the one we need - it can return a bunch of them 344 | for p in photos[1][3]: 345 | if p['id'] == photo_fullid: 346 | photo = p 347 | break 348 | 349 | if not photo: 350 | c.log('error', 'Requested photo is not present in the returned data: %s' % (photos,)) 351 | return {} 352 | 353 | # Using UI API to reproduce the regular API data format like: 354 | # id: 162282203 355 | # owner_id: 98371283 356 | # album_id: -2 357 | # date: 1301912968 358 | # post_id: 1012 359 | # text: '' 360 | # web_view_token: '0628ebc11d1eb2dc01' 361 | # sizes: 362 | # - height: 0 363 | # type: s 364 | # width: 0 365 | # url: https://sun2-12.userapi.com/c10000/u98371283/-6/s_4992d41b.jpg 366 | # ... 367 | pid = photo_fullid.split('_', 2) 368 | out = { 369 | 'id': int(pid[1]), 370 | 'owner_id': int(pid[0]), 371 | # TODO: Parse date of photo 372 | 'text': photo['desc'], 373 | 'sizes': [], 374 | } 375 | if len(pid) > 2: 376 | out['suffix'] = pid[2] 377 | for t in Media.getPhotoTypes(): 378 | attr = (t+'_') 379 | if attr in photo: 380 | if len(photo[attr]) < 3: 381 | c.log('warning', 'Photo size definition lacks of width/height data: %s' % (photo,)) 382 | out['sizes'].append({ 383 | # Sometimes photo[attr][0] could contain partial url (only path of it, so using _src instead) 384 | 'url': photo[attr][0] if len(photo[attr][0]) > len(photo[attr+'src']) else photo[attr+'src'], 385 | 'width': photo[attr][1] if len(photo[attr]) > 1 else 0, 386 | 'height': photo[attr][2] if len(photo[attr]) > 2 else 0, 387 | }) 388 | return out 389 | -------------------------------------------------------------------------------- /lib/UIApi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | '''VK-Backup UIApi 4 | 5 | Author: Rabit 6 | License: GPL v3 7 | Description: Can interact with vk UI API 8 | Required: python3.5 9 | ''' 10 | 11 | import sys 12 | import time, json 13 | import urllib.request 14 | from urllib.parse import urlencode 15 | 16 | from . import Common as c 17 | from . import vk_auth 18 | 19 | c.log('debug', 'Init UIApi') 20 | 21 | # Getting curl parameters to initialize the headers to access UI API 22 | if '--' not in sys.argv: 23 | c.log('error', 'Unable to locate "--" parameter which separates vk-backup params from curl params, please read README on how to use vk-backup') 24 | sys.exit(1) 25 | 26 | curl_params_index = sys.argv.index('--') + 1 27 | if sys.argv[curl_params_index] != 'curl': 28 | c.log('error', 'Unable to locate "curl" parameter which starts curl params to access VK UI API, please read README on how to use vk-backup') 29 | sys.exit(1) 30 | 31 | _HEADERS = {} 32 | needed_headers = ['Authorization', 'Cookie'] 33 | process_arg = curl_params_index # Starting with curl_params_index to not process vk-backup params 34 | while process_arg < len(sys.argv)-1: 35 | process_arg += 1 36 | if sys.argv[process_arg] != '-H': 37 | continue 38 | 39 | process_arg += 1 40 | # This argument is a header, check with the needed headers list 41 | if any([ True for h in needed_headers if sys.argv[process_arg].startswith(h+': ') ]): 42 | (k, v) = sys.argv[process_arg].split(': ', 1) 43 | _HEADERS[k] = v 44 | 45 | if len(_HEADERS) < len(needed_headers): 46 | c.log('error', 'Not all the required headers was found in the provided curl request') 47 | sys.exit(1) 48 | 49 | # Last time api call to prevent service overloading 50 | _LAST_API_CALL = 0 51 | 52 | def _requestJson(url, data): 53 | data_en = urlencode(data).encode() 54 | 55 | req = urllib.request.Request(url) 56 | req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/116.0') 57 | for k, v in _HEADERS.items(): 58 | req.add_header(k, v) 59 | 60 | with urllib.request.urlopen(req, data_en) as ret: 61 | encoding = ret.info().get_content_charset('utf-8') 62 | ret_data = ret.read().decode(encoding) 63 | if ret_data.startswith('