├── .gitignore
├── Download.py
├── Api.py
├── README.md
├── main.py
└── PostParser.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 


--------------------------------------------------------------------------------
/Download.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'rast'
 2 | 
 3 | import urllib2
 4 | from os.path import join, exists, isfile, splitext
 5 | import sys
 6 | import logging
 7 | 
 8 | def download(urllist, root_dir):
 9 |     """Thanks to StackOverflow guys"""
10 |     for url, name, subdir in urllist:
11 |         if name is None:
12 |             name = url.split('/')[-1]
13 |         dir = join(root_dir, subdir)
14 |         fname = join(dir, name)
15 |         u = urllib2.urlopen(url)
16 | 
17 |         #file might exist, so add (1) or (2) etc
18 |         counter = 1
19 |         if exists(fname) and isfile(fname):
20 |             name, ext = splitext(fname)
21 |             fname = name + " ({})".format(counter) + ext
22 |         while exists(fname) and isfile(fname):
23 |             counter += 1
24 |             name, ext = splitext(fname)
25 |             fname = name[:-4] + " ({})".format(counter) + ext
26 |         logging.info(u"Start dl: {}".format(fname))
27 |         f = open(fname, 'wb')
28 |         meta = u.info()
29 |         file_size = int(meta.getheaders("Content-Length")[0])
30 |         sys.stdout.write("Downloading: %s (%s kb)\n" % (fname.encode('ascii', 'ignore'), file_size/1024))
31 | 
32 |         file_size_dl = 0
33 |         block_sz = 8192
34 |         while True:
35 |             buffer = u.read(block_sz)
36 |             if not buffer:
37 |                 break
38 | 
39 |             file_size_dl += len(buffer)
40 |             f.write(buffer)
41 |             status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
42 |             status = status + chr(8)*(len(status)+1)
43 |             sys.stdout.write(status)
44 | 
45 |         f.close()
46 |         logging.info(u" End  dl: {}".format(fname))


--------------------------------------------------------------------------------
/Api.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'rast'
 2 | 
 3 | import json
 4 | import urllib2
 5 | from urllib import urlencode
 6 | import re
 7 | from time import sleep
 8 | import logging
 9 | 
10 | def auth(args):
11 |     """Interact with user to get access_token"""
12 | 
13 |     url = "https://oauth.vk.com/oauth/authorize?" + \
14 |           "redirect_uri=https://oauth.vk.com/blank.html&response_type=token&" + \
15 |           "client_id=%s&scope=%s&display=wap" % (args.app_id, ",".join(args.access_rights))
16 | 
17 |     print("Please open this url:\n\n\t{}\n".format(url))
18 |     raw_url = raw_input("Grant access to your acc and copy resulting URL here: ")
19 |     res = re.search('access_token=([0-9A-Fa-f]+)', raw_url, re.I)
20 |     if res is not None:
21 |         return res.groups()[0]
22 |     else:
23 |         return None
24 | 
25 | def captcha(data):
26 |     """Ask user to solve captcha"""
27 |     logging.debug("Captcha needed..")
28 |     print("VK thinks you're a bot - and you are ;)")
29 |     print("They want you to solve CAPTCHA. Please open this URL, and type here a captcha solution:")
30 |     print("\n\t{}\n".format(data[u'error'][u'captcha_img']))
31 |     solution = raw_input("Solution = ").strip()
32 |     return data[u'error'][u'captcha_sid'], solution
33 | 
34 | 
35 | def call_api(method, params, args):
36 |     while True:
37 |         if isinstance(params, list):
38 |             params_list = [kv for kv in params]
39 |         elif isinstance(params, dict):
40 |             params_list = params.items()
41 |         else:
42 |             params_list = [params]
43 |         params_list.append(("access_token", args.token))
44 |         url = "https://api.vk.com/method/%s?%s" % (method, urlencode(params_list))
45 | 
46 |         json_stuff = urllib2.urlopen(url).read()
47 |         result = json.loads(json_stuff)
48 |         if u'error' in result.keys():
49 |             if result[u'error'][u'error_code'] == 6:  # too many requests
50 |                 logging.debug("Too many requests per second, sleeping..")
51 |                 sleep(1)
52 |                 continue
53 |             elif result[u'error'][u'error_code'] == 14:  # captcha needed :\
54 |                 sid, key = captcha(result)
55 |                 params.extend([(u"captcha_sid", sid), (u"captcha_key", key)])
56 |                 continue
57 |             else:
58 |                 msg = "API call resulted in error ({}): {}".format(result[u'error'][u'error_code'],
59 |                                                                    result[u'error'][u'error_msg'])
60 |                 logging.error(msg)
61 |                 raise RuntimeError(msg)
62 |         else:
63 |             logging.debug("API call succeeded: {}".format(url))
64 |             break
65 | 
66 |     if not u'response' in result.keys():
67 |         msg = "API call result has no response"
68 |         logging.error(msg)
69 |         raise RuntimeError(msg)
70 |     else:
71 |         #logging.debug("API call answer: {}".format(str(result[u'response'])))
72 |         return result[u'response'], json_stuff
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | vkd (en)
  2 | ===
  3 | WARNING: it is dead, not supported and does not work. See https://github.com/Rast1234/VOffline instead.
  4 | ===
  5 | 
  6 | VK dumper - save stuff from your vk.com to local drive
  7 | ---
  8 | 
  9 | **Features:**
 10 | 
 11 | * Download almost anything from your wall:
 12 |     * Posts
 13 |     * Multimedia attachments
 14 |     * Comments
 15 |     * Comment attachments
 16 |     * Some extra raw info for your history
 17 | * Download audio tracks
 18 |     * Correct fime naming
 19 |     * Text stored also (if available)
 20 |     * Sort by playlists (as subfolders)
 21 | * Download docs
 22 |     * Auto-change filename if exists
 23 |     * Write correct extension
 24 | * **TODO:** Downloading video 
 25 | * **TODO** Downloading notes
 26 | 
 27 | **Known limitations, bugs and other considerations:**
 28 | 
 29 | * Unable to download videos (Anyone need this?)
 30 | * Unable to download note comments (Maybe an unneeded feature?)
 31 | * Tested on a particular user, audio tested on a group
 32 | * Can't grant access by itself, so you need to enter auth token manually (see below)
 33 | * If wall, documents or audio list has been modified during work, it **will** cause unpredictable things.
 34 | * Sometimes you need to solve CAPTCHA (interactively)
 35 | 
 36 | Usage and requirements
 37 | ---
 38 | You need **Python 2.7** only for this to work. Should work under different OSes, not tested.  
 39 | For all available list of arguments see `--help` output.
 40 | Essential options described below:
 41 | * `-i / --id`    User ID or group ID to dump. Group ID should be prefixed with `-`, e.x. `-123456`
 42 | * `-a / --app_id`	Application ID, see below.
 43 | * `-m / --mode` Working mode. Supported modes are `wall`, `docs` and `audio`, you may specify all at the same time. Script will download your wall, docs or audio tracks, respectively.
 44 | 
 45 | How-to
 46 | ---
 47 | 1. Clone this repo
 48 | 2. Register your own [vk app here](https://vk.com/editapp?act=create) to get Application ID. Be sure to select **standalone** application type.
 49 | 3. Write down your Application ID (ex. **1234567**).
 50 | 4. Find your [profile ID here](https://vk.com/settings) near the bottom of the page. For example, **1**.
 51 | 5. What do you want to dump? `audio`, `wall` or `docs`?
 52 | 6. Now specify everything to the script:
 53 | 
 54 |         main.py --app_id 1234567 --id 1 --mode wall audio docs
 55 | 
 56 | 7. Script will ask you to go to a given URL, so, do it :)
 57 | 8. Give access for this application to your profile.
 58 | 9. You will be redirected to a white page with text about security.
 59 | 10. Copy URL of this page. Yeah, I know, VK tells you not to do that for security reasons. But this application is your just registered app, and this script is open-source, so go and read sources if you don't trust me..
 60 | 11. Paste URL int script, it is waiting for you!
 61 | 12. Enjoy downloading process. This will take a lot of time, though.
 62 | 
 63 | You may set limits of posts and audio tracks to download, change directory to store data, etc. See `--help` output.
 64 | 
 65 | Results
 66 | ---
 67 | According to working mode and wall posts' contents, corresponding dirs and files will be created.
 68 | Everything will be stored in specified directory, say, `some_dir`:
 69 |    
 70 |       some_dir                                     (base directory)
 71 |       +---- 9876543                                (user id)
 72 |             +---- docs                             (documents stored here)
 73 |             |     +---- cat.gif
 74 |             |     +---- my_archive.zip
 75 |             |     +---- ...
 76 |             +---- audio                            (audio tracks ant dexts)
 77 |             |     +---- Artist1 - Track.mp3
 78 |             |     +---- Artist2 - Track.mp3
 79 |             |     +---- Artist2 - Track.mp3.txt    (text for that song)
 80 | 			|     +---- Album1                     (audio album name)
 81 |             |     |     +---- Artist - track.mp3   (tracks in that album)
 82 |             |     +---- ...
 83 |             +---- post_1234                        (wall post id)
 84 |             |     +---- text.html                  (post text)
 85 |             |     +---- image.jpg                  (any multimedia attachments)
 86 |             |     +---- music.mp3                  
 87 |             |     +---- ...
 88 |             |     +---- media_urls.txt             (list of attachments' urls)
 89 |             |     +---- comments.json              (raw comments, reply from vk server)
 90 |             |     +---- raw.json                   (raw post, reply from vk server)
 91 |             |     +---- note_1234                  (note, if attached to post)
 92 |             |     |     +---- text.html            (note text)
 93 |             |     |     +---- raw.json             (raw note, reply from vk server)
 94 |             |     +---- comments                   (comments dir)
 95 |             |           +---- text.html            (all comments' text)
 96 |             |           +---- raw.json             (raw comments, reply from vk server)
 97 |             |           +---- image.jpg            (any multimedia attachments)
 98 |             |           +---- music.mp3                  
 99 |             |           +---- ...
100 |             |           +---- media_urls.txt       (list of attachments' urls)
101 |             +---- post_1235
102 |             |     +---- ...
103 |             +---- ...
104 |                         
105 | 
106 | 
107 | Bugs
108 | ---
109 | Any bug reports, pul requests appreciated. Open an issue [here](https://github.com/Rast1234/vkd/issues) or [PM me](https://vk.com/rast1234)
110 | 
111 | Credits:
112 | ---
113 | 
114 | Me :)  
115 | http://habrahabr.ru/post/143972/ (call_api function)  
116 | http://stackoverflow.com/questions/22676/how-do-i-download-a-file-over-http-using-python (fancy progressbar)  
117 | Pavel Durov for great social network and its buggy API :)  
118 |     
119 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | 
  3 | """Save everything from your VK wall"""
  4 | 
  5 | __author__ = "Rast"
  6 | 
  7 | import logging
  8 | import argparse
  9 | from collections import defaultdict
 10 | from PostParser import PostParser
 11 | from Api import call_api, auth
 12 | import os
 13 | 
 14 | def arg_parse():
 15 |     argparser = argparse.ArgumentParser()
 16 |     argparser.add_argument("-d", "--dir",
 17 |                         type=str,
 18 |                         help="Directory to store dumped data",
 19 |                         dest="directory",
 20 |                         required=False,
 21 |                         default=".")
 22 |     argparser.add_argument("-i", "--id",
 23 |                         type=int,
 24 |                         help="User ID to dump. To dump a group, specify its ID with '-' prefix",
 25 |                         metavar="USER_ID|-GROUP_ID",
 26 |                         dest="id",
 27 |                         required=True)
 28 |     argparser.add_argument("-t", "--token",
 29 |                         type=str,
 30 |                         help="Access token, generated by VK for session",
 31 |                         dest="token",
 32 |                         required=False)
 33 |     argparser.add_argument("-a", "--app_id",
 34 |                         type=int,
 35 |                         help="Your application ID to access VK API",
 36 |                         dest="app_id",
 37 |                         required=True)
 38 |     argparser.add_argument("-m", "--mode",
 39 |                         option_strings=['wall', 'audio', 'video', 'notes', 'docs'],
 40 |                         nargs="+",
 41 |                         help="What to dump. Possible values: "+', '.join(['wall', 'audio', 'video', 'notes']),
 42 |                         dest="mode",
 43 |                         required=True)
 44 | 
 45 |     argparser.add_argument("--wall_start",
 46 |                         type=int,
 47 |                         help="Post number to start from (first is 0)",
 48 |                         dest="wall_start",
 49 |                         required=False,
 50 |                         metavar="INT",
 51 |                         default=0)
 52 |     argparser.add_argument("--wall_end",
 53 |                         type=int,
 54 |                         help="Post number to end at (0 = all posts)",
 55 |                         dest="wall_end",
 56 |                         required=False,
 57 |                         metavar="INT",
 58 |                         default=0)
 59 | 
 60 |     argparser.add_argument("--audio_start",
 61 |                         type=int,
 62 |                         help="Audio number to start from (first is 0)",
 63 |                         dest="audio_start",
 64 |                         required=False,
 65 |                         metavar="INT",
 66 |                         default=0)
 67 |     argparser.add_argument("--audio_end",
 68 |                         type=int,
 69 |                         help="Audio number to end at (0 = all audios)",
 70 |                         dest="audio_end",
 71 |                         required=False,
 72 |                         metavar="INT",
 73 |                         default=0)
 74 | 
 75 |     argparser.add_argument("--video_start",
 76 |                         type=int,
 77 |                         help="Video number to start from (first is 0)",
 78 |                         dest="video_start",
 79 |                         required=False,
 80 |                         metavar="INT",
 81 |                         default=0)
 82 |     argparser.add_argument("--video_end",
 83 |                         type=int,
 84 |                         help="Video number to end at (0 = all videos)",
 85 |                         dest="video_end",
 86 |                         required=False,
 87 |                         metavar="INT",
 88 |                         default=0)
 89 | 
 90 |     argparser.add_argument("--notes_start",
 91 |                         type=int,
 92 |                         help="Note number to start from (first is 0)",
 93 |                         dest="notes_start",
 94 |                         required=False,
 95 |                         metavar="INT",
 96 |                         default=0)
 97 |     argparser.add_argument("--notes_end",
 98 |                         type=int,
 99 |                         help="Note number to end at (0 = all notes)",
100 |                         dest="notes_end",
101 |                         required=False,
102 |                         metavar="INT",
103 |                         default=0)
104 | 
105 |     argparser.add_argument("--docs_start",
106 |                         type=int,
107 |                         help="Document number to start from (first is 0)",
108 |                         dest="docs_start",
109 |                         required=False,
110 |                         metavar="INT",
111 |                         default=0)
112 |     argparser.add_argument("--docs_end",
113 |                         type=int,
114 |                         help="Document number to end at (0 = all docs)",
115 |                         dest="docs_end",
116 |                         required=False,
117 |                         metavar="INT",
118 |                         default=0)
119 | 
120 |     argparser.add_argument("-v", "--verbose", action="store_true",
121 |                         help="Print more info to STDOUT while processing")
122 |     argparser.add_argument("--no-download",
123 |                         action="store_true",
124 |                         help="Do not download attachments, only store links",
125 |                         dest="no_download",
126 |                         required=False)
127 |     args = argparser.parse_args()
128 |     return args
129 | 
130 | def process_post(number, post_data, post_parser, json_stuff):
131 |     """Post-processing :)"""
132 |     data = defaultdict(lambda: "", post_data[1])
133 |     post_parser(number, data, json_stuff)
134 | 
135 | def process_audio(number, audio_data, post_parser, json_stuff):
136 |     """Audio-processing"""
137 |     #data = defaultdict(lambda: "", audio_data[1])
138 |     try:
139 |         data = {'attachments': [{'type': 'audio',
140 |                                 'audio': audio_data[0],
141 |                                 }],
142 |                 'id' : 'audio'
143 |                 }
144 | 
145 |         post_parser(number, data, json_stuff)
146 |     except IndexError: # deleted :(
147 |         logging.warning("Deleted track: {}".format(str(audio_data)))
148 |         return
149 | 
150 | def process_doc(number, doc_data, post_parser, json_stuff):
151 |     """Doc-processing"""
152 |     data = {'attachments': [{'type': 'doc',
153 |                              'doc': doc_data,
154 |                                 }],
155 |                 'id' : 'doc'
156 |                 }
157 |     post_parser(number, data, json_stuff)
158 | 
159 | 
160 | def ranges(start, end, count):
161 |     """Determine ranges"""
162 |     if end == 0:
163 |         end = count
164 |     if not 0 <= start < count + 1:
165 |         raise RuntimeError("Start argument not in valid range")
166 |     if not start <= end <= count:
167 |         raise RuntimeError("End argument not in valid range")
168 |     logging.info("Working range: from {} to {}".format(start, end))
169 |     total = end - start
170 |     return start, end, total
171 | 
172 | def main():
173 |     """Main function"""
174 | 
175 |     args = arg_parse()
176 |     args.access_rights = ["wall", "audio", "friends", "notes", "video", "docs"]
177 |     args.token = auth(args) if args.token is None else args.token
178 |     if args.token is None:
179 |         raise RuntimeError("Access token not found")
180 | 
181 | 
182 |     if 'wall' in args.mode:
183 |         #determine posts count
184 |         (response, json_stuff) = call_api("wall.get", [("owner_id", args.id), ("count", 1), ("offset", 0)], args)
185 |         count = response[0]
186 |         logging.info("Total posts: {}".format(count))
187 |         print("Wall dowload start")
188 |         args.wall_start, args.wall_end, total = ranges(args.wall_start, args.wall_end, count)
189 |         counter = 0.0  # float for %
190 |         post_parser = PostParser(args.directory, str(args.id), args)
191 |         for x in xrange(args.wall_start, args.wall_end):
192 |             if args.verbose and counter % 10 == 0:
193 |                 print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
194 |             (post, json_stuff) = call_api("wall.get", [("owner_id", args.id), ("count", 1), ("offset", x)], args)
195 |             process_post(("wall post", x), post, post_parser, json_stuff)
196 |             counter += 1
197 |         if args.verbose:
198 |             print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
199 | 
200 |     if 'audio' in args.mode:
201 |         #determine audio count
202 |         (response, json_stuff) = call_api("audio.getCount", [("oid", args.id)], args)
203 |         count = response
204 |         logging.info("Total audio tracks: {}".format(count))
205 |         print("Audio dowload start")
206 |         args.audio_start, args.audio_end, total = ranges(args.audio_start, args.audio_end, count)
207 |         counter = 0.0  # float for %
208 |         #audio_dir = os.path.join(str(args.id), 'audio')
209 |         audio_dir = str(args.id)
210 |         post_parser = PostParser(args.directory, audio_dir, args)
211 |         id_param = "uid" if args.id > 0 else "gid"
212 |         args.id *= -1 if args.id < 0 else 1
213 |         for x in xrange(args.audio_start, args.audio_end):
214 |             if args.verbose and counter % 10 == 0:
215 |                 print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
216 |             (audio, json_stuff) = call_api("audio.get", [(id_param, args.id), ("count", 1), ("offset", x)], args)
217 |             process_audio(("audiotrack", x), audio, post_parser, json_stuff)
218 |             counter += 1
219 |         if args.verbose:
220 |             print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
221 | 
222 |     if 'video' in args.mode:
223 |         raise NotImplementedError("Video mode is not written yet, sorry :(")
224 |     if 'notes' in args.mode:
225 |         raise NotImplementedError("Notes mode is not written yet, sorry :(")
226 |     if 'docs' in args.mode:
227 |         # get ALL docs
228 |         (response, json_stuff) = call_api("docs.get", [("oid", args.id)], args)
229 |         count = response[0]
230 |         data = response[1:]
231 |         logging.info("Total documents: {}".format(count))
232 |         print("Wall dowload start")
233 |         args.docs_start, args.docs_end, total = ranges(args.docs_start, args.docs_end, count)
234 |         counter = 0.0  # float for %
235 |         docs_dir = str(args.id)
236 |         post_parser = PostParser(args.directory, docs_dir, args)
237 |         data = data[args.docs_start:args.docs_end]
238 |         num = args.docs_start
239 |         for x in data:
240 |             if args.verbose and counter % 10 == 0:
241 |                 print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
242 |             process_doc(("document", num), x, post_parser, json_stuff)
243 |             counter += 1
244 |             num += 1
245 |         if args.verbose:
246 |             print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
247 | 
248 | if __name__ == '__main__':
249 |     logging.basicConfig(format=u"""%(filename).6s : %(lineno)4d #%(levelname)8s [%(asctime)s] %(message)s""",
250 |                             level=logging.DEBUG,
251 |                             filename=u'report.log')
252 |     ok = False
253 |     try:
254 |         logging.info("Start")
255 |         main()
256 |         logging.info("End")
257 |         ok = True
258 |         print("")
259 |     except KeyboardInterrupt:
260 |         logging.critical("Interrupted by keystroke")
261 |         print "\nWhy, cruel world?.."
262 |     finally:
263 |         if not ok:
264 |             logging.critical("Fail")
265 | 


--------------------------------------------------------------------------------
/PostParser.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'rast'
  2 | 
  3 | import logging
  4 | from os import path, makedirs
  5 | import json
  6 | #from ThreadedDownload import ThreadedDownload  # buggy like hell
  7 | from Download import download
  8 | from Api import call_api
  9 | from collections import defaultdict
 10 | import re
 11 | 
 12 | def make_dir(base_dir, name):
 13 |     """Make new dir into base dir, return concatenation"""
 14 |     if path.exists(base_dir) and path.isdir(base_dir):
 15 |         directory = path.join(base_dir, name)
 16 |         if path.exists(directory) and path.isdir(directory):
 17 |             #raise RuntimeError("Directory already exists: {}".format(directory))
 18 |             return directory
 19 |         else:
 20 |             makedirs(directory)
 21 |             return directory
 22 |     else:
 23 |         raise RuntimeError("Directory does not exist: {}".format(base_dir))
 24 | 
 25 | def escape(name):
 26 |     """Escape the filename"""
 27 |     result =  unicode(re.sub('[^+=\-()$!#%&,.\w\s]', '_', name, flags=re.UNICODE).strip())
 28 |     #print("\t{}\n\t{}".format(name, result))
 29 |     return result[:250]
 30 | 
 31 | 
 32 | class PostParser(object):
 33 |     """Parses given post into data lists (text, music, photos, info, etc.)
 34 | 
 35 |         parse post - store useful data:
 36 |             id (of the post)
 37 |             to_id (always user?)
 38 |             from_id (post author)
 39 |             date (unix timestamp, convert to time)
 40 |             text (unicode)
 41 |             attachments: (multimedia!)
 42 |                 type (type name)
 43 |                 <type>:
 44 |                     ...
 45 |             comments: (obvious)
 46 |                 count
 47 |                 can_post (0|1)
 48 |             likes: (people list)
 49 |                 count
 50 |                 user_likes (if user liked it)
 51 |                 can_like
 52 |                 can_publish
 53 |             reposts: (people list)
 54 |                 count
 55 |                 user_reposted (0|1)
 56 |             signer_id (if group, and if post is signed)
 57 |             copy_owner_id (if repost, author's id)
 58 |             copy_post_id (if repost, original post id)
 59 |             copy_text (if repost, user's response)
 60 | 
 61 | """
 62 | 
 63 |     def __init__(self, base_dir, subdir, args):
 64 |         """Make directory for current user"""
 65 |         self.directory = make_dir(base_dir, subdir)
 66 |         self.args = args
 67 | 
 68 |     def __call__(self, tpl, raw_data, json_stuff):
 69 |         """Process whole post into directory"""
 70 |         keys = []
 71 |         funcs = []
 72 |         self.urls = []
 73 |         self.prefix = tpl[0]
 74 |         self.number = tpl[1]
 75 |         ignore = ['id', 'to_id', 'from_id', 'date',
 76 |                   'likes', 'reposts', 'signer_id',
 77 |                   'copy_owner_id', 'copy_post_id', 'copy_post_date',
 78 |                   'copy_post_type', 'reply_count', 'post_type',
 79 |                   'post_source', 'online', 'attachment', 'copy_text',
 80 |                   'media', 'can_edit',
 81 |                   # comments fix
 82 |                   'uid', 'cid', 'reply_to_cid', 'reply_to_uid',
 83 |                   'reply_owner_id', 'reply_post_id',
 84 |                 ]
 85 |         for k in raw_data.keys():
 86 |             if k in ignore:
 87 |                 continue
 88 |             try:
 89 |                 f = getattr(self, k)
 90 |                 keys.append(k)
 91 |                 funcs.append(f)
 92 |             except AttributeError:
 93 |                 logging.warning("Not implemented: {}".format(k))
 94 |         logging.info("Saving: {} for {}".format(', '.join(keys), raw_data['id']))
 95 |         self.post_directory = make_dir(self.directory, str(raw_data['id']))
 96 | 
 97 |         self.save_raw(json_stuff)
 98 |         for (f, k) in zip(funcs, keys):
 99 |             f(k, raw_data)
100 | 
101 |         if self.urls and not self.args.no_download:
102 |             download(self.urls,
103 |                       self.post_directory,
104 |             )
105 | 
106 |     def text(self, key, raw_data):
107 |         """Save text of the note"""
108 |         text = raw_data['text']
109 |         users_text = raw_data['copy_text']
110 |         stuff = ''
111 |         if raw_data['copy_post_id'] == '':  # user's post
112 |             if text == '':
113 |                 return
114 |             else:
115 |                 stuff = '<h1>Text:</h1>\n' + text
116 |         else:  # repost
117 |             if text == '':
118 |                 if users_text == '':
119 |                     return
120 |                 else:
121 |                     stuff = '<h1>Text:</h1>\n' + users_text
122 |             else:
123 |                 if users_text == '':
124 |                     stuff = '<h1>Original text:</h1>\n' + text
125 |                 else:
126 |                     stuff = "<h1>User's text:</h1>\n" + users_text + \
127 |                             '<h1>Original text:</h1>\n' + text
128 | 
129 | 
130 |         f_name = path.join(self.post_directory, 'text.html')
131 |         out_file = open(f_name, 'a+')
132 |         out_file.write(stuff.encode("utf-8"))
133 |         out_file.close()
134 | 
135 |     def attachments(self, key, raw_data):
136 |         """Save all attachments"""
137 |         f_args = []
138 |         funcs = []
139 |         for att in raw_data[key]:
140 |             t = att['type']
141 |             k = 'dl_' + t
142 |             try:
143 |                 f = getattr(self, k)
144 |                 f_args.append(att[t])
145 |                 funcs.append(f)
146 |             except AttributeError:
147 |                 logging.warning("Not implemented downloader: {}".format(t))
148 |         for (f, a) in zip(funcs, f_args):
149 |             f(a)
150 | 
151 |     def comments(self, key, data):
152 |         """Save all comments"""
153 |         count = data[key]['count']
154 |         if count == 0:
155 |             return
156 |         comments = [count, ]
157 |         for x in xrange(data[key]['count']):
158 |             (comment_data, json_stuff) = call_api("wall.getComments",
159 |                                                 [("owner_id", self.args.id),
160 |                                                     ("post_id", data["id"]),
161 |                                                     ("sort", "asc"),
162 |                                                     ("offset", x),
163 |                                                     ("count", 1),
164 |                                                     ("preview_length", 0),
165 |                                                     ("need_likes", 1),
166 |                                                     ("v", 4.4),
167 |                                                  ], self.args)
168 |             comments.append(comment_data[1])
169 |             cdata = defaultdict(lambda: '', comment_data[1])
170 |             pp = PostParser(self.post_directory, 'comments', self.args)
171 |             pp(('comment to ',self.number), cdata, json_stuff)
172 |         json_data = json.dumps(comments, indent=4, ensure_ascii=False)
173 |         f_name = path.join(self.post_directory, 'comments.json')
174 |         out_file = open(f_name, 'a+')
175 |         out_file.write(json_data.encode('utf-8'))
176 |         out_file.close()
177 | 
178 |     def save_raw(self, data):
179 |         """Save raw post data"""
180 |         data = json.loads(data)
181 |         data = json.dumps(data, indent=4, ensure_ascii=False)
182 | 
183 |         f_name = path.join(self.post_directory, 'raw.json')
184 |         out_file = open(f_name, 'a+')
185 |         out_file.write(data.encode('utf-8'))
186 |         out_file.close()
187 | 
188 |     def save_url(self, url, name=None, subdir=''):
189 |         if name is not None:
190 |             name = escape(name)
191 |         self.urls.append((url, name, subdir))
192 |         f_name = path.join(self.post_directory, 'media_urls.txt')
193 |         out_file = open(f_name, 'a+')
194 |         out_file.write(url)
195 |         out_file.write('\n')
196 |         out_file.close()
197 | 
198 |     def dl_photo(self, data):
199 |         """Download a photo
200 |             vk is a bit crazy, it stores photo in a bunch of sizes:
201 |             src
202 |             src_small
203 |             src_big
204 |             src_xbig
205 |             src_xxbig
206 |             src_xxxbig
207 |             (and what else?)
208 |         """
209 |         sizes = ['src_xxxbig', 'src_xxbig', 'src_xbig', 'src_big', 'src', 'src_small']
210 |         url = None
211 |         for s in sizes:
212 |             try:
213 |                 url = data[s]  # try to get biggest size
214 |                 break
215 |             except KeyError:
216 |                 pass
217 |         if url is None:
218 |             logging.error("Unable to get photo url!")
219 |         else:
220 |             self.save_url(url)
221 | 
222 |     def dl_link(self, data):
223 |         """Store links in a file"""
224 |         url = data['url']
225 |         f_name = path.join(self.post_directory, 'links.txt')
226 |         out_file = open(f_name, 'a+')
227 |         out_file.write(url)
228 |         out_file.write('\n')
229 |         out_file.close()
230 | 
231 |     def dl_photos_list(self, data):
232 |         """Download list of photos"""
233 |         for x in data:
234 |             self.dl_photo(x)
235 | 
236 |     def dl_audio(self, data):
237 |         initial_data = data
238 |         aid = data["aid"]
239 |         owner = data["owner_id"]
240 |         request = "{}_{}".format(owner, aid)
241 |         (audio_data, json_stuff) = call_api("audio.getById", [("audios", request), ], self.args)
242 |         album = 'no_album'
243 |         try:
244 |             data = audio_data[0]
245 |             artist = data['artist'][:100]
246 |             title= data['title'][:100]
247 |             name = u"{} - {}.mp3".format(artist, title)
248 |             #album = data['album']  # API changed, no time to fix
249 |             #album = get_album_name(owner, album, self.args)
250 |             #album = escape(album)
251 |             make_dir(self.post_directory, album)
252 |             self.save_url(data["url"], name, album)
253 |         except IndexError: # deleted :(
254 |             logging.warning("Deleted track: {}".format(str(initial_data)))
255 |             return
256 | 
257 |         # store lyrics if any
258 |         try:
259 |             lid = data["lyrics_id"]
260 |         except KeyError:
261 |             return
262 |         (lyrics_data, json_stuff) = call_api("audio.getLyrics", [("lyrics_id", lid), ], self.args)
263 |         text = lyrics_data["text"].encode('utf-8')
264 |         name = escape(name)
265 |         f_name = path.join(self.post_directory, album)
266 |         f_name = path.join(f_name, name+'.txt')
267 |         # escape!
268 |         out_file = open(f_name, 'a+')
269 |         out_file.write(text)
270 |         out_file.write('\n')
271 |         out_file.close()
272 | 
273 | 
274 |     """Download video
275 |         There's a walkaround:
276 |         http://habrahabr.ru/sandbox/57173/
277 |         But this requires authorization as another app
278 | 
279 |     def dl_video(self, data):
280 | 
281 |         #print data
282 |     """
283 | 
284 | 
285 |     def dl_doc(self, data):
286 |         """Download document (GIFs, etc.)"""
287 |         url = data["url"]
288 |         name = data["title"]
289 |         name, ext = path.splitext(name)
290 |         name = name + '.' + data["ext"]
291 |         self.save_url(url, name)
292 | 
293 |     def dl_note(self, data):
294 |         """Download note, not comments"""
295 |         (note_data, json_stuff) = call_api("notes.getById", [
296 |             ("owner_id", data["owner_id"]),
297 |             ("nid", data["nid"]),
298 |             ], self.args)
299 |         stuff = u"<h1>{title}</h1>\n{text}".format(**note_data)
300 |         ndir = make_dir(self.post_directory, 'note_'+note_data["id"])
301 |         f_name = path.join(ndir, 'text.html')
302 |         out_file = open(f_name, 'a+')
303 |         out_file.write(stuff.encode("utf-8"))
304 |         out_file.close()
305 | 
306 |         ndata = json.dumps(note_data, indent=4, ensure_ascii=False)
307 | 
308 |         f_name = path.join(ndir, 'raw.json')
309 |         out_file = open(f_name, 'a+')
310 |         out_file.write(ndata.encode("utf-8"))
311 |         out_file.close()
312 | 


--------------------------------------------------------------------------------