├── download
    └── 下载的文件在这里.txt
├── .gitignore
├── share-url.txt
├── v0200f230000bkmk0rimac2mte5mhai0.mp4
├── requirements.txt
├── share-url.bat
├── fuck-byted-acrawler.js
├── amemv-video-ripper.py
└── README.md


/download/下载的文件在这里.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .DS_Store


--------------------------------------------------------------------------------
/share-url.txt:
--------------------------------------------------------------------------------
1 | http://v.douyin.com/fKq7su/


--------------------------------------------------------------------------------
/v0200f230000bkmk0rimac2mte5mhai0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yalarc/GetDouYinApplication/HEAD/v0200f230000bkmk0rimac2mte5mhai0.mp4


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2019.3.9
2 | chardet==3.0.4
3 | idna==2.8
4 | PySocks==1.6.8
5 | requests==2.21.0
6 | six==1.12.0
7 | urllib3==1.24.3
8 | 


--------------------------------------------------------------------------------
/share-url.bat:
--------------------------------------------------------------------------------
 1 | http://v.douyin.com/64kGps/
 2 | http://v.douyin.com/6417bb/
 3 | http://v.douyin.com/64kGps/
 4 | http://v.douyin.com/64eXMG/
 5 | http://v.douyin.com/6Q4CYU/
 6 | http://v.douyin.com/6cry3g/
 7 | http://v.douyin.com/6cq8tX/
 8 | http://v.douyin.com/6whRKJ/
 9 | http://v.douyin.com/6cBdKF/
10 | http://v.douyin.com/6cY3oK/
11 | http://v.douyin.com/6cyX5A/
12 | http://v.douyin.com/6cjU2b/
13 | http://v.douyin.com/6crJtr/
14 | http://v.douyin.com/6w74Gs/
15 | http://v.douyin.com/6wKvcs/
16 | http://v.douyin.com/M125mw/
17 | http://v.douyin.com/M15wUe/
18 | http://v.douyin.com/M125ff/
19 | http://v.douyin.com/haeoGF/
20 | http://v.douyin.com/h5T63S/
21 | http://v.douyin.com/haL6Vc/
22 | http://v.douyin.com/Pp5VM9/
23 | http://v.douyin.com/PpsgV2/
24 | http://v.douyin.com/PpuLaR/
25 | http://v.douyin.com/Pp7vJV/
26 | http://v.douyin.com/PpaFb2/
27 | http://v.douyin.com/PtNSES/
28 | http://v.douyin.com/PtR5NX/
29 | http://v.douyin.com/PGKHHU/
30 | http://v.douyin.com/Pt13dX/
31 | http://v.douyin.com/PGoGHo/
32 | http://v.douyin.com/PtF5o5/
33 | http://v.douyin.com/PG7oUa/
34 | http://v.douyin.com/PGpKu8/
35 | http://v.douyin.com/PGG7oN/
36 | http://v.douyin.com/PtRtcW/
37 | http://v.douyin.com/PGtVDF/
38 | http://v.douyin.com/PtLmGs/
39 | http://v.douyin.com/PG3otm/
40 | http://v.douyin.com/PG36Yw/
41 | http://v.douyin.com/PGwPpc/
42 | http://v.douyin.com/PGGExs/
43 | http://v.douyin.com/PGEMb4/
44 | http://v.douyin.com/PtFk27/
45 | http://v.douyin.com/PGK1PY/
46 | http://v.douyin.com/PGK3VK/
47 | http://v.douyin.com/PGvufg/
48 | http://v.douyin.com/PGohgw/
49 | http://v.douyin.com/PtdhnR/
50 | 


--------------------------------------------------------------------------------
/fuck-byted-acrawler.js:
--------------------------------------------------------------------------------
  1 | function generateSignature(userId) {
  2 |     this.navigator = {
  3 |         userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
  4 |     }
  5 |     var e = {}
  6 | 
  7 |     var r = (function () {
  8 |         function e(e, a, r) {
  9 |             return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a)
 10 |         }
 11 | 
 12 |         function a(e, a, r) {
 13 |             return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a)
 14 |         }
 15 | 
 16 |         function r(e, a, r) {
 17 |             var n, t, s = {}, b = s.d = r ? r.d + 1 : 0;
 18 |             for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n];
 19 |             for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t];
 20 |             return c(e, 0, s)
 21 |         }
 22 | 
 23 |         function c(t, b, k) {
 24 |             function u(e) {
 25 |                 v[x++] = e
 26 |             }
 27 | 
 28 |             function f() {
 29 |                 return g = t.charCodeAt(b++) - 32, t.substring(b, b += g)
 30 |             }
 31 | 
 32 |             function l() {
 33 |                 try {
 34 |                     y = c(t, b, k)
 35 |                 } catch (e) {
 36 |                     h = e, y = l
 37 |                 }
 38 |             }
 39 | 
 40 |             for (var h, y, d, g, v = [], x = 0; ;) switch (g = t.charCodeAt(b++) - 32) {
 41 |                 case 1:
 42 |                     u(!v[--x]);
 43 |                     break;
 44 |                 case 4:
 45 |                     v[x++] = f();
 46 |                     break;
 47 |                 case 5:
 48 |                     u(function (e) {
 49 |                         var a = 0, r = e.length;
 50 |                         return function () {
 51 |                             var c = a < r;
 52 |                             return c && u(e[a++]), c
 53 |                         }
 54 |                     }(v[--x]));
 55 |                     break;
 56 |                 case 6:
 57 |                     y = v[--x], u(v[--x](y));
 58 |                     break;
 59 |                 case 8:
 60 |                     if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y;
 61 |                     break;
 62 |                 case 9:
 63 |                     v[x++] = c;
 64 |                     break;
 65 |                 case 10:
 66 |                     u(s(v[--x]));
 67 |                     break;
 68 |                 case 11:
 69 |                     y = v[--x], u(v[--x] + y);
 70 |                     break;
 71 |                 case 12:
 72 |                     for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length;
 73 |                     u(String.fromCharCode.apply(null, d));
 74 |                     break;
 75 |                 case 13:
 76 |                     y = v[--x], h = delete v[--x][y];
 77 |                     break;
 78 |                 case 14:
 79 |                     v[x++] = t.charCodeAt(b++) - 32;
 80 |                     break;
 81 |                 case 59:
 82 |                     u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []);
 83 |                     break;
 84 |                 case 61:
 85 |                     u(v[--x][t.charCodeAt(b++) - 32]);
 86 |                     break;
 87 |                 case 62:
 88 |                     g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0;
 89 |                     break;
 90 |                 case 65:
 91 |                     h = v[--x], y = v[--x], v[--x][y] = h;
 92 |                     break;
 93 |                 case 66:
 94 |                     u(e(t[b++], v[--x], v[--x]));
 95 |                     break;
 96 |                 case 67:
 97 |                     y = v[--x], d = v[--x], u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y));
 98 |                     break;
 99 |                 case 68:
100 |                     u(e((g = t[b++]) < "<" ? (b--, f()) : g + g, v[--x], v[--x]));
101 |                     break;
102 |                 case 70:
103 |                     u(!1);
104 |                     break;
105 |                 case 71:
106 |                     v[x++] = n;
107 |                     break;
108 |                 case 72:
109 |                     v[x++] = +f();
110 |                     break;
111 |                 case 73:
112 |                     u(parseInt(f(), 36));
113 |                     break;
114 |                 case 75:
115 |                     if (v[--x]) {
116 |                         b++;
117 |                         break
118 |                     }
119 |                 case 74:
120 |                     g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g;
121 |                     break;
122 |                 case 76:
123 |                     u(k[t.charCodeAt(b++) - 32]);
124 |                     break;
125 |                 case 77:
126 |                     y = v[--x], u(v[--x][y]);
127 |                     break;
128 |                 case 78:
129 |                     g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g));
130 |                     break;
131 |                 case 79:
132 |                     g = t.charCodeAt(b++) - 32, u(k["$" + g]);
133 |                     break;
134 |                 case 81:
135 |                     h = v[--x], v[--x][f()] = h;
136 |                     break;
137 |                 case 82:
138 |                     u(v[--x][f()]);
139 |                     break;
140 |                 case 83:
141 |                     h = v[--x], k[t.charCodeAt(b++) - 32] = h;
142 |                     break;
143 |                 case 84:
144 |                     v[x++] = !0;
145 |                     break;
146 |                 case 85:
147 |                     v[x++] = void 0;
148 |                     break;
149 |                 case 86:
150 |                     u(v[x - 1]);
151 |                     break;
152 |                 case 88:
153 |                     h = v[--x], y = v[--x], v[x++] = h, v[x++] = y;
154 |                     break;
155 |                 case 89:
156 |                     u(function () {
157 |                         function e() {
158 |                             return r(e.y, arguments, k)
159 |                         }
160 | 
161 |                         return e.y = f(), e.x = c, e
162 |                     }());
163 |                     break;
164 |                 case 90:
165 |                     v[x++] = null;
166 |                     break;
167 |                 case 91:
168 |                     v[x++] = h;
169 |                     break;
170 |                 case 93:
171 |                     h = v[--x];
172 |                     break;
173 |                 case 0:
174 |                     return v[--x];
175 |                 default:
176 |                     u((g << 16 >> 16) - 16)
177 |             }
178 |         }
179 | 
180 |         var n = this, t = n.Function, s = Object.keys || function (e) {
181 |             var a = {}, r = 0;
182 |             for (var c in e) a[r++] = c;
183 |             return a.length = r, a
184 |         }, b = {}, k = {};
185 |         return r
186 |     })()
187 | 
188 |     ('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb<k7l l!r&lengthb%^l$1+s$jl  s#i$1ek1s$gr#tack4)zgr#tac$! +0o![#cj?o ]!l$b%s"o ]!l"l$b*b^0d#>>>s!0s%yA0s"l"l!r&lengthb<k+l"^l"1+s"jl  s&l&z0l!$ +["cs\'(0l#i\'1ps9wxb&s() &{s)/s(gr&Stringr,fromCharCodes)0s*yWl ._b&s o!])l l Jb<k$.aj;l .Tb<k$.gj/l .^b<k&i"-4j!+& s+yPo!]+s!l!l Hd>&l!l Bd>&+l!l <d>&+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld<l 4d#>>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd<l 6d#>>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d<l"b|&+l-l(l!b^&+l-l&zl\'g,)gk}ejo{cm,)|yn~Lij~em["cl$b%@d<l&zl\'l $ +["cl$b%b|&+l-l%8d<@b|l!b^&+ q$sign ', [e])
189 | 
190 |     return e.sign(userId)
191 | }
192 | 
193 | var _ = process.argv.splice(2)
194 | 
195 | console.log(generateSignature(_[0]))
196 | 


--------------------------------------------------------------------------------
/amemv-video-ripper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import codecs
  3 | import copy
  4 | import getopt
  5 | import hashlib
  6 | import json
  7 | import os
  8 | import re
  9 | import sys
 10 | import time
 11 | import urllib
 12 | from threading import Thread
 13 | 
 14 | import requests
 15 | from urllib import parse
 16 | from six.moves import queue as Queue
 17 | 
 18 | 
 19 | # Setting timeout
 20 | TIMEOUT = 10
 21 | 
 22 | # Retry times
 23 | RETRY = 5
 24 | 
 25 | # Numbers of downloading threads concurrently
 26 | THREADS = 5
 27 | 
 28 | HEADERS = {
 29 |     'accept-encoding': 'gzip, deflate, br',
 30 |     'accept-language': 'zh-CN,zh;q=0.9',
 31 |     'pragma': 'no-cache',
 32 |     'cache-control': 'no-cache',
 33 |     'upgrade-insecure-requests': '1',
 34 |     'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
 35 | }
 36 | 
 37 | 
 38 | def download(medium_type, uri, medium_url, target_folder):
 39 |     requests.packages.urllib3.disable_warnings()
 40 |     headers = copy.copy(HEADERS)
 41 |     file_name = uri
 42 |     if medium_type == 'video':
 43 |         file_name += '.mp4'
 44 |         headers['user-agent'] = 'Aweme/27014 CFNetwork/974.2.1 Darwin/18.0.0'
 45 |     elif medium_type == 'image':
 46 |         file_name += '.jpg'
 47 |         file_name = file_name.replace("/", "-")
 48 |     else:
 49 |         return
 50 | 
 51 |     file_path = os.path.join(target_folder, file_name)
 52 |     if os.path.isfile(file_path) and os.path.getsize(file_path) > 0:
 53 |         print(file_name + " 已经爬取过了，文件保存在 " + file_path + " 放弃爬取")
 54 |         return
 55 | 
 56 |     # print("Downloading %s from %s.\n" % (file_name, medium_url))
 57 |     # VIDEOID_DICT[VIDEO_ID] = 1  # 记录已经下载的视频
 58 |     retry_times = 0
 59 |     while retry_times < RETRY:
 60 |         try:
 61 |             res = requests.get(medium_url, headers=headers, stream=True, timeout=TIMEOUT, allow_redirects=False,verify=False)
 62 |             # if resp.status_code == 403:
 63 |             #     retry_times = RETRY
 64 |             #     print("Access Denied when retrieve %s.\n" % medium_url)
 65 |             #     raise Exception("Access Denied")
 66 |             # if resp.status_code == 200:
 67 | 
 68 |             resp_url = res.headers['Location']
 69 |             print("Downloading_LONG %s from %s\n" % (file_name, resp_url))
 70 |             resp = requests.get(resp_url, stream=True, timeout=TIMEOUT,verify=False)
 71 |             with open(file_path, 'wb') as fh:
 72 |                 for chunk in resp.iter_content(chunk_size=1024, decode_unicode=True):
 73 |                     fh.write(chunk)
 74 |             break
 75 |         except:
 76 |             pass
 77 |         retry_times += 1
 78 |     else:
 79 |         try:
 80 |             os.remove(file_path)
 81 |         except OSError:
 82 |             pass
 83 |         print("Failed to retrieve %s from %s.\n" % (uri, medium_url))
 84 |         time.sleep(1)
 85 | 
 86 | 
 87 | def get_real_address(url):
 88 |     requests.packages.urllib3.disable_warnings()
 89 |     if url.find('v.douyin.com') < 0:
 90 |         return url
 91 |     res = requests.get(url, headers=HEADERS, allow_redirects=False,verify=False)
 92 |     return res.headers['Location'] if res.status_code == 302 else None
 93 | 
 94 | 
 95 | def get_dytk(url):
 96 |     requests.packages.urllib3.disable_warnings()
 97 |     res = requests.get(url, headers=HEADERS,verify=False)
 98 |     if not res:
 99 |         return None
100 |     dytk = re.findall("dytk: '(.*)'", res.content.decode('utf-8'))
101 |     if len(dytk):
102 |         return dytk[0]
103 |     return None
104 | 
105 | 
106 | class DownloadWorker(Thread):
107 |     def __init__(self, queue):
108 |         Thread.__init__(self)
109 |         self.queue = queue
110 | 
111 |     def run(self):
112 |         while True:
113 |             medium_type, uri, download_url, target_folder = self.queue.get()
114 |             download(medium_type, uri, download_url, target_folder)
115 |             self.queue.task_done()
116 | 
117 | 
118 | class CrawlerScheduler(object):
119 | 
120 |     def __init__(self, items):
121 |         self.numbers = []
122 |         self.challenges = []
123 |         self.musics = []
124 |         for i in range(len(items)):
125 |             url = get_real_address(items[i])
126 |             if not url:
127 |                 continue
128 |             if re.search('share/user', url):
129 |                 self.numbers.append(url)
130 |             if re.search('share/challenge', url):
131 |                 self.challenges.append(url)
132 |             if re.search('share/music', url):
133 |                 self.musics.append(url)
134 | 
135 |         self.queue = Queue.Queue()
136 |         self.scheduling()
137 | 
138 |     @staticmethod
139 |     def generateSignature(value):
140 |         p = os.popen('node fuck-byted-acrawler.js %s' % value)
141 |         return p.readlines()[0]
142 | 
143 |     @staticmethod
144 |     def calculateFileMd5(filename):
145 |         hmd5 = hashlib.md5()
146 |         fp = open(filename, "rb")
147 |         hmd5.update(fp.read())
148 |         return hmd5.hexdigest()
149 | 
150 |     def scheduling(self):
151 |         for x in range(THREADS):
152 |             worker = DownloadWorker(self.queue)
153 |             worker.daemon = True
154 |             worker.start()
155 | 
156 |         for url in self.numbers:
157 |             self.download_user_videos(url)
158 |         for url in self.challenges:
159 |             self.download_challenge_videos(url)
160 |         for url in self.musics:
161 |             self.download_music_videos(url)
162 | 
163 |     def download_user_videos(self, url):
164 |         number = re.findall(r'share/user/(\d+)', url)
165 |         if not len(number):
166 |             return
167 |         dytk = get_dytk(url)
168 |         hostname = urllib.parse.urlparse(url).hostname
169 |         if hostname != 't.tiktok.com' and not dytk:
170 |             return
171 |         user_id = number[0]
172 |         params = parse.parse_qs(parse.urlparse(url).query)
173 |         sec_uid = params['sec_uid'][0]
174 |         video_count = self._download_user_media(sec_uid, dytk, url)
175 |         self.queue.join()
176 |         print("\nAweme number %s, video number %s\n\n" %
177 |               (user_id, str(video_count)))
178 |         print("\nFinish Downloading All the videos from %s\n\n" % user_id)
179 | 
180 |     def download_challenge_videos(self, url):
181 |         challenge = re.findall('share/challenge/(\d+)', url)
182 |         if not len(challenge):
183 |             return
184 |         challenges_id = challenge[0]
185 |         video_count = self._download_challenge_media(challenges_id, url)
186 |         self.queue.join()
187 |         print("\nAweme challenge #%s, video number %d\n\n" %
188 |               (challenges_id, video_count))
189 |         print("\nFinish Downloading All the videos from #%s\n\n" % challenges_id)
190 | 
191 |     def download_music_videos(self, url):
192 |         music = re.findall('share/music/(\d+)', url)
193 |         if not len(music):
194 |             return
195 |         musics_id = music[0]
196 |         video_count = self._download_music_media(musics_id, url)
197 |         self.queue.join()
198 |         print("\nAweme music @%s, video number %d\n\n" %
199 |               (musics_id, video_count))
200 |         print("\nFinish Downloading All the videos from @%s\n\n" % musics_id)
201 | 
202 |     def _join_download_queue(self, aweme, target_folder):
203 |         try:
204 |             if aweme.get('video', None):
205 |                 uri = aweme['video']['play_addr']['uri']
206 |                 download_url = "https://aweme.snssdk.com/aweme/v1/play/?{0}"
207 |                 download_params = {
208 |                     'video_id': uri,
209 |                     'line': '0',
210 |                     'ratio': '720p',
211 |                     'media_type': '4',
212 |                     'vr_type': '0',
213 |                     'test_cdn': 'None',
214 |                     'improve_bitrate': '0',
215 |                     'iid': '35628056608',
216 |                     'device_id': '46166618998',
217 |                     'os_api': '18',
218 |                     'app_name': 'aweme',
219 |                     'channel': 'App%20Store',
220 |                     'idfa': '00000000-0000-0000-0000-000000000000',
221 |                     'device_platform': 'iphone',
222 |                     'build_number': '27014',
223 |                     'vid': '2ED380A7-F09C-6C9E-90F5-862D58F3129C',
224 |                     'openudid': '21dae85eeac1da35a69e2a0ffeaeef61c78a2e98',
225 |                     'device_type': 'iPhone8%2C2',
226 |                     'app_version': '2.7.0',
227 |                     'version_code': '2.7.0',
228 |                     'os_version': '12.0',
229 |                     'screen_width': '1242',
230 |                     'aid': '1128',
231 |                     'ac': 'WIFI',
232 |                     'is_play_url': '1'
233 | 
234 |                 }
235 |                 if aweme.get('hostname') == 't.tiktok.com':
236 |                     download_url = 'http://api.tiktokv.com/aweme/v1/play/?{0}'
237 |                     download_params = {
238 |                         'video_id': uri,
239 |                         'line': '0',
240 |                         'ratio': '720p',
241 |                         'media_type': '4',
242 |                         'vr_type': '0',
243 |                         'test_cdn': 'None',
244 |                         'improve_bitrate': '0',
245 |                         'version_code': '1.7.2',
246 |                         'language': 'en',
247 |                         'app_name': 'trill',
248 |                         'vid': 'D7B3981F-DD46-45A1-A97E-428B90096C3E',
249 |                         'app_version': '1.7.2',
250 |                         'device_id': '6619780206485964289',
251 |                         'channel': 'App Store',
252 |                         'mcc_mnc': '',
253 |                         'tz_offset': '28800'
254 |                     }
255 |                 share_info = aweme.get('share_info', {})
256 |                 url = download_url.format(
257 |                     '&'.join([key + '=' + download_params[key] for key in download_params]))
258 |                 self.queue.put(('video',
259 |                                 uri,
260 |                                 url, target_folder))
261 |             else:
262 |                 if aweme.get('image_infos', None):
263 |                     image = aweme['image_infos']['label_large']
264 |                     self.queue.put(
265 |                         ('image', image['uri'], image['url_list'][0], target_folder))
266 | 
267 |         except KeyError:
268 |             return
269 |         except UnicodeDecodeError:
270 |             print("Cannot decode response data from DESC %s" % aweme['desc'])
271 |             return
272 | 
273 |     # def __download_favorite_media(self, user_id, dytk, hostname, signature, favorite_folder, video_count):
274 |     #     if not os.path.exists(favorite_folder):
275 |     #         os.makedirs(favorite_folder)
276 |     #     # favorite_video_url = "https://%s/aweme/8v1/aweme/favorite/" % hostname
277 |     #     favorite_video_url = "https://%s/web/api/v2/aweme/like/" % hostname
278 |     #     favorite_video_params = {
279 |     #         'user_id': str(user_id),
280 |     #         'count': '21',
281 |     #         'max_cursor': '0',
282 |     #         'aid': '1128',
283 |     #         '_signature': signature,
284 |     #         'dytk': dytk
285 |     #     }
286 |     #     max_cursor = None
287 |     #     while True:
288 |     #         if max_cursor:
289 |     #             favorite_video_params['max_cursor'] = str(max_cursor)
290 |     #         res = requests.get(favorite_video_url,
291 |     #                            headers=HEADERS, params=favorite_video_params)
292 |     #         contentJson = json.loads(res.content.decode('utf-8'))
293 |     #         favorite_list = contentJson.get('aweme_list', [])
294 |     #         for aweme in favorite_list:
295 |     #             video_count += 1
296 |     #             aweme['hostname'] = hostname
297 |     #             self._join_download_queue(aweme, favorite_folder)
298 |     #         if contentJson.get('has_more'):
299 |     #             max_cursor = contentJson.get('max_cursor')
300 |     #         else:
301 |     #             break
302 |     #     return video_count
303 | 
304 |     def _download_user_media(self, user_id, dytk, url):
305 |         current_folder = os.getcwd()
306 |         target_folder = os.path.join(current_folder, 'download/%s' % user_id)
307 |         if not os.path.isdir(target_folder):
308 |             os.mkdir(target_folder)
309 | 
310 |         if not user_id:
311 |             print("Number %s does not exist" % user_id)
312 |             return
313 |         hostname = urllib.parse.urlparse(url).hostname
314 |         signature = self.generateSignature(str(user_id))
315 |         # user_video_url = "https://%s/aweme/v1/aweme/post/" % hostname
316 |         user_video_url = "https://%s/web/api/v2/aweme/post/" % hostname
317 |         # user_video_params = {
318 |         #     'user_id': str(user_id),
319 |         #     'count': '21',
320 |         #     'max_cursor': '0',
321 |         #     'aid': '1128',
322 |         #     '_signature': signature,
323 |         #     'dytk': dytk
324 |         # }
325 |         user_video_params = {
326 |             'sec_uid': str(user_id),
327 |             'count': '21',
328 |             'max_cursor': '0',
329 |             'aid': '1128',
330 |             '_signature': signature,
331 |             'dytk': dytk
332 |         }
333 |         if hostname == 't.tiktok.com':
334 |             user_video_params.pop('dytk')
335 |             user_video_params['aid'] = '1180'
336 | 
337 |         max_cursor, video_count = None, 0
338 |         while True:
339 |             if max_cursor:
340 |                 user_video_params['max_cursor'] = str(max_cursor)
341 |             res = requests.get(user_video_url, headers=HEADERS,
342 |                                params=user_video_params,verify=False)
343 |             contentJson = json.loads(res.content.decode('utf-8'))
344 |             aweme_list = contentJson.get('aweme_list', [])
345 |             for aweme in aweme_list:
346 |                 video_count += 1
347 |                 aweme['hostname'] = hostname
348 |                 self._join_download_queue(aweme, target_folder)
349 |             if contentJson.get('has_more'):
350 |                 max_cursor = contentJson.get('max_cursor')
351 |             else:
352 |                 break
353 |         # if True:
354 |         #     favorite_folder = target_folder + '/favorite'
355 |         #     video_count = self.__download_favorite_media(
356 |         #         user_id, dytk, hostname, signature, favorite_folder, video_count)
357 | 
358 |         if video_count == 0:
359 |             print("There's no video in number %s." % user_id)
360 | 
361 |         return video_count
362 | 
363 |     def _download_challenge_media(self, challenge_id, url):
364 |         if not challenge_id:
365 |             print("Challenge #%s does not exist" % challenge_id)
366 |             return
367 |         current_folder = os.getcwd()
368 |         target_folder = os.path.join(
369 |             current_folder, 'download/#%s' % challenge_id)
370 |         if not os.path.isdir(target_folder):
371 |             os.mkdir(target_folder)
372 | 
373 |         hostname = urllib.parse.urlparse(url).hostname
374 |         signature = self.generateSignature(str(challenge_id) + '9' + '0')
375 | 
376 |         challenge_video_url = "https://%s/aweme/v1/challenge/aweme/" % hostname
377 |         challenge_video_params = {
378 |             'ch_id': str(challenge_id),
379 |             'count': '9',
380 |             'cursor': '0',
381 |             'aid': '1128',
382 |             'screen_limit': '3',
383 |             'download_click_limit': '0',
384 |             '_signature': signature
385 |         }
386 | 
387 |         cursor, video_count = None, 0
388 |         while True:
389 |             if cursor:
390 |                 challenge_video_params['cursor'] = str(cursor)
391 |                 challenge_video_params['_signature'] = self.generateSignature(
392 |                     str(challenge_id) + '9' + str(cursor))
393 |             res = requests.get(challenge_video_url,
394 |                                headers=HEADERS, params=challenge_video_params,verify=False)
395 |             try:
396 |                 contentJson = json.loads(res.content.decode('utf-8'))
397 |             except:
398 |                 print(res.content)
399 |             aweme_list = contentJson.get('aweme_list', [])
400 |             if not aweme_list:
401 |                 break
402 |             for aweme in aweme_list:
403 |                 aweme['hostname'] = hostname
404 |                 video_count += 1
405 |                 self._join_download_queue(aweme, target_folder)
406 |                 print("number: ", video_count)
407 |             if contentJson.get('has_more'):
408 |                 cursor = contentJson.get('cursor')
409 |             else:
410 |                 break
411 |         if video_count == 0:
412 |             print("There's no video in challenge %s." % challenge_id)
413 |         return video_count
414 | 
415 |     def _download_music_media(self, music_id, url):
416 |         if not music_id:
417 |             print("Challenge #%s does not exist" % music_id)
418 |             return
419 |         current_folder = os.getcwd()
420 |         target_folder = os.path.join(current_folder, 'download/@%s' % music_id)
421 |         if not os.path.isdir(target_folder):
422 |             os.mkdir(target_folder)
423 | 
424 |         hostname = urllib.parse.urlparse(url).hostname
425 |         signature = self.generateSignature(str(music_id))
426 |         music_video_url = "https://%s/aweme/v1/music/aweme/?{0}" % hostname
427 |         music_video_params = {
428 |             'music_id': str(music_id),
429 |             'count': '9',
430 |             'cursor': '0',
431 |             'aid': '1128',
432 |             'screen_limit': '3',
433 |             'download_click_limit': '0',
434 |             '_signature': signature
435 |         }
436 |         if hostname == 't.tiktok.com':
437 |             for key in ['screen_limit', 'download_click_limit', '_signature']:
438 |                 music_video_params.pop(key)
439 |             music_video_params['aid'] = '1180'
440 | 
441 |         cursor, video_count = None, 0
442 |         while True:
443 |             if cursor:
444 |                 music_video_params['cursor'] = str(cursor)
445 |                 music_video_params['_signature'] = self.generateSignature(
446 |                     str(music_id) + '9' + str(cursor))
447 | 
448 |             url = music_video_url.format(
449 |                 '&'.join([key + '=' + music_video_params[key] for key in music_video_params]))
450 |             res = requests.get(url, headers=HEADERS,verify=False)
451 |             contentJson = json.loads(res.content.decode('utf-8'))
452 |             aweme_list = contentJson.get('aweme_list', [])
453 |             if not aweme_list:
454 |                 break
455 |             for aweme in aweme_list:
456 |                 aweme['hostname'] = hostname
457 |                 video_count += 1
458 |                 self._join_download_queue(aweme, target_folder)
459 |             if contentJson.get('has_more'):
460 |                 cursor = contentJson.get('cursor')
461 |             else:
462 |                 break
463 |         if video_count == 0:
464 |             print("There's no video in music %s." % music_id)
465 |         return video_count
466 | 
467 | 
468 | def usage():
469 |     print("1. Please create file share-url.txt under this same directory.\n"
470 |           "2. In share-url.txt, you can specify amemv share page url separated by "
471 |           "comma/space/tab/CR. Accept multiple lines of text\n"
472 |           "3. Save the file and retry.\n\n"
473 |           "Sample File Content:\nurl1,url2\n\n"
474 |           "Or use command line options:\n\n"
475 |           "Sample:\npython amemv-video-ripper.py url1,url2\n\n\n")
476 |     print(u"未找到share-url.txt文件，请创建.\n"
477 |           u"请在文件中指定抖音分享页面URL，并以 逗号/空格/tab/表格鍵/回车符 分割，支持多行.\n"
478 |           u"保存文件并重试.\n\n"
479 |           u"例子: url1,url12\n\n"
480 |           u"或者直接使用命令行参数指定链接\n"
481 |           u"例子: python amemv-video-ripper.py url1,url2")
482 | 
483 | 
484 | def parse_sites(fileName):
485 |     with open(fileName, "rb") as f:
486 |         txt = f.read().rstrip().lstrip()
487 |         txt = codecs.decode(txt, 'utf-8')
488 |         txt = txt.replace("\t", ",").replace(
489 |             "\r", ",").replace("\n", ",").replace(" ", ",")
490 |         txt = txt.split(",")
491 |     numbers = list()
492 |     for raw_site in txt:
493 |         site = raw_site.lstrip().rstrip()
494 |         if site:
495 |             numbers.append(site)
496 |     return numbers
497 | 
498 | 
499 | download_favorite = False
500 | 
501 | if __name__ == "__main__":
502 |     content, opts, args = None, None, []
503 | 
504 |     try:
505 |         if len(sys.argv) >= 2:
506 |             opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["favorite"])
507 |     except getopt.GetoptError as err:
508 |         usage()
509 |         sys.exit(2)
510 | 
511 |     if not args:
512 |         # check the sites file
513 |         filename = "share-url.txt"
514 |         if os.path.exists(filename):
515 |             content = parse_sites(filename)
516 |         else:
517 |             usage()
518 |             sys.exit(1)
519 |     else:
520 |         content = (args[0] if args else '').split(",")
521 | 
522 |     if len(content) == 0 or content[0] == "":
523 |         usage()
524 |         sys.exit(1)
525 | 
526 |     if opts:
527 |         for o, val in opts:
528 |             if o in ("--favorite"):
529 |                 download_favorite = True
530 |                 break
531 | 
532 |     CrawlerScheduler(content)
533 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 文章链接：https://mp.weixin.qq.com/s?__biz=MzI4OTQxNjU2Mw==&mid=2247483973&idx=1&sn=e5f1fc8141993bedab4968cff9ac50d2&chksm=ec2e3659db59bf4f6dd2c8346980c96dacb99e01d518c233b20278c2f5de6b672e37d632bf88&token=829399530&lang=zh_CN#rd
  2 | 
  3 | ### 常见问题：1.报 node 命令找不到 ：需要安装 node.js 环境 2.执行完成后，下载的失败率很高，修改 device_id 参数  4长视频下载问题：长视频下载在分享链接爬取不到（我是没找到），app里可以看到，但是videoid不一样，app的加密不好破解，所以暂时长视频只能下载前20秒 5修改抖音获取视频列表接口，签名破解大概需要20-30秒时间，运行不报错的情况下请耐心等待哦
  4 | 
  5 | 
  6 | 导读：直接上干货，爬取抖音小姐姐视频列表，并去水印下载（仅供学习使用，不做商业用途，如有侵权，联系作者删除）；接18年初，Python基础篇更新。
  7 | 
  8 | 比如我想获取抖音网红“惠子”小姐姐的主页列表视频，第一步在抖音上打开惠子的主页，右上角点击一下，可以看到一个分享按钮，点击分享，找到复制链接->  http://v.douyin.com/9GEGSp/ 。把链接放到浏览器中短链接被自动解析，变成长链接：
  9 | https://www.iesdouyin.com/share/user/73838190950?u_code=128dfi636&sec_uid=MS4wLjABAAAAHmQ4DqHKN8IdfWWd52sYaGS6zaZaOTghOZ4ysZ0z_YM&timestamp=1571884619&utm_source=copy&utm_campaign=client_share&utm_medium=android&share_app_name=douyin ，在长链接中就可以看到一些用户信息，有没有用我们先列出来！
 10 | 
 11 | 
 12 | key | value
 13 | ---|---
 14 | user | 73838190950
 15 | u_code |128dfi636
 16 | sec_uid |MS4wLjABAAAAHmQ4DqHKN8IdfWWd52sYaGS6zaZaOTghOZ4ysZ0z_YM
 17 | timestamp |1571884619
 18 | utm_source |copy
 19 | utm_campaign |client_share
 20 | utm_medium |android
 21 | share_app_name |douyin
 22 | 
 23 | 打开浏览器开发者工具，找到对应的视频列表请求接口，一个一个排查终于找到这个链接：https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAAHmQ4DqHKN8IdfWWd52sYaGS6zaZaOTghOZ4ysZ0z_YM&count=21&max_cursor=0&aid=1128&_signature=QOtJJBARHVwzHUNLqlT-mEDrST&dytk=593d265a74e3384e06112b423ef268da
 24 | 
 25 | key | value
 26 | ---|---
 27 | sec_uid | MS4wLjABAAAAHmQ4DqHKN8IdfWWd52sYaGS6zaZaOTghOZ4ysZ0z_YM
 28 | count |21
 29 | max_cursor |1567769380000
 30 | aid |1128
 31 | _signature |F1OCixATSudkpYjkPsX5FRdTgp
 32 | dytk |593d265a74e3384e06112b423ef268da
 33 | 
 34 | 返回的数据：
 35 | 
 36 | ```
 37 |      Json:
 38 | {
 39 | "max_cursor": 1569668211000,
 40 | "min_cursor": 1571815003000,
 41 | "has_more": true,
 42 | -"extra": {
 43 | "now": 1571888892000,
 44 | "logid": "2019102411481201001404709304158BDD"
 45 | },
 46 | "status_code": 0,
 47 | -"aweme_list": [
 48 | -{
 49 | -"statistics": {
 50 | "aweme_id": "6750893105127378180",
 51 | "comment_count": 1240,
 52 | "digg_count": 30000,
 53 | "play_count": 675000,
 54 | "share_count": 79,
 55 | "forward_count": 17
 56 | },
 57 | "image_infos": null,
 58 | "uniqid_position": null,
 59 | "long_video": null,
 60 | "aweme_id": "6750893105127378180",
 61 | +"text_extra": [ … ],
 62 | "position": null,
 63 | "geofencing": null,
 64 | "promotions": null,
 65 | "desc": "#看啥啥都缺 ，爱买女孩绝不认输。",
 66 | "aweme_type": 4,
 67 | "comment_list": null,
 68 | "video_text": null,
 69 | "cha_list": null,
 70 | -"video": {
 71 | +"cover": { … },
 72 | "width": 720,
 73 | -"origin_cover": {
 74 | -"url_list": [
 75 | "http://p3-dy.byteimg.com/large/tos-cn-p-0015/6e83730009fe4fc2a3eeddbf06b0dbbf_1571815007.jpeg",
 76 | "http://p9-dy.byteimg.com/large/tos-cn-p-0015/6e83730009fe4fc2a3eeddbf06b0dbbf_1571815007.jpeg",
 77 | "http://p1-dy.byteimg.com/large/tos-cn-p-0015/6e83730009fe4fc2a3eeddbf06b0dbbf_1571815007.jpeg"
 78 | ],
 79 | "uri": "large/tos-cn-p-0015/6e83730009fe4fc2a3eeddbf06b0dbbf_1571815007"
 80 | },
 81 | "has_watermark": false,
 82 | -"play_addr_lowbr": {
 83 | "uri": "v0200ff80000bmnvs5ignbh26fqqufbg",
 84 | -"url_list": [
 85 | "https://aweme.snssdk.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=0&ratio=540p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1",
 86 | "https://api.amemv.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=1&ratio=540p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1"
 87 | ]
 88 | },
 89 | "bit_rate": null,
 90 | "vid": "v0200ff80000bmnvs5ignbh26fqqufbg",
 91 | -"play_addr": {
 92 | "uri": "v0200ff80000bmnvs5ignbh26fqqufbg",
 93 | -"url_list": [
 94 | "https://aweme.snssdk.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=0&ratio=540p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1",
 95 | "https://api.amemv.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=1&ratio=540p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1"
 96 | ]
 97 | },
 98 | "height": 1280,
 99 | -"dynamic_cover": {
100 | -"url_list": [
101 | "https://p3-dy.byteimg.com/obj/tos-cn-p-0015/f4f71ff403574d768a87e7ef3501a7cc_1571815009",
102 | "https://p9-dy.byteimg.com/obj/tos-cn-p-0015/f4f71ff403574d768a87e7ef3501a7cc_1571815009",
103 | "https://p1-dy.byteimg.com/obj/tos-cn-p-0015/f4f71ff403574d768a87e7ef3501a7cc_1571815009"
104 | ],
105 | "uri": "tos-cn-p-0015/f4f71ff403574d768a87e7ef3501a7cc_1571815009"
106 | },
107 | "ratio": "540p",
108 | -"download_addr": {
109 | "uri": "v0200ff80000bmnvs5ignbh26fqqufbg",
110 | -"url_list": [
111 | "https://aweme.snssdk.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=0&ratio=540p&watermark=0&media_type=4&vr_type=0&improve_bitrate=0&logo_name=aweme_self",
112 | "https://api.amemv.com/aweme/v1/play/?video_id=v0200ff80000bmnvs5ignbh26fqqufbg&line=1&ratio=540p&watermark=0&media_type=4&vr_type=0&improve_bitrate=0&logo_name=aweme_self"
113 | ]
114 | },
115 | "duration": 61824
116 | },
117 | "video_labels": null,
118 | "label_top_text": null
119 | }
120 | ]
121 | }
122 | ```
123 | 通过返回的参数可以看到我们需要的数据都在这里，在这里不着急解析数据，通过对比请求参数，别的参数都是现成的在主页代码中都可以找到，基本可以确定` _signature `参数是加密字符串，接下来我们就跟踪这个参数的形成过程。通过搜索，确定了它在`index_10ae3b3.js`中生成的 ，截图如下：
124 | 
125 | 通过截图我们知道，`signature` 是通过`_bytedAcrawler`对象获取的，顺着我们查看它的生成过程：截图如下：
126 | 
127 | 它是在`base_327cc85.js`生成的,截图如下：
128 | 
129 | 通过分析，`_signature` 获取比较复杂，js代码已经被混淆压制，直接分析算法过程比较难，但是我们可以通过执行签名的算法代码，并返回对应的签名结果。把被压过的js保存下来，执行`_bytedAcrawler.sign("")`获取参数签名。
130 | 
131 | 分析完成后，开始python模拟手机数据请求：
132 | 
133 | 1.读取主页链接：支持同时爬去多个小姐姐的主页视频列表，在`share-url.txt`中输入每个URL通过逗号/空格/tab/表格鍵/回车符 分割，支持多行，也可以使用命令进行指定链接`python amemv-video-ripper.py url1,url2...`，解析文本数据/命令行数据；
134 | 
135 | ```
136 |  content, opts, args = None, None, []
137 | 
138 |     try:
139 |         if len(sys.argv) >= 2:
140 |             opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["favorite"])
141 |     except getopt.GetoptError as err:
142 |         usage()
143 |         sys.exit(2)
144 | 
145 |     if not args:
146 |         # check the sites file
147 |         filename = "share-url.txt"
148 |         if os.path.exists(filename):
149 |             content = parse_sites(filename)
150 |         else:
151 |             usage()
152 |             sys.exit(1)
153 |     else:
154 |         content = (args[0] if args else '').split(",")
155 | 
156 |     if len(content) == 0 or content[0] == "":
157 |         usage()
158 |         sys.exit(1)
159 | 
160 |     if opts:
161 |         for o, val in opts:
162 |             if o in ("--favorite"):
163 |                 download_favorite = True
164 |                 break
165 | 
166 |     CrawlerScheduler(content)
167 | ```
168 | 2.获取列表视频：
169 | 
170 | ```
171 | class CrawlerScheduler(object):
172 | 
173 |     def __init__(self, items):
174 |         self.numbers = []
175 |         self.challenges = []
176 |         self.musics = []
177 |         for i in range(len(items)):
178 |             url = get_real_address(items[i])
179 |             if not url:
180 |                 continue
181 |             if re.search('share/user', url):
182 |                 self.numbers.append(url)
183 |             if re.search('share/challenge', url):
184 |                 self.challenges.append(url)
185 |             if re.search('share/music', url):
186 |                 self.musics.append(url)
187 | 
188 |         self.queue = Queue.Queue()
189 |         self.scheduling()
190 | 
191 |     #通过node执行fuck-byted-acrawler.js得到签名
192 |     @staticmethod
193 |     def generateSignature(value):
194 |         p = os.popen('node fuck-byted-acrawler.js %s' % value)
195 |         return p.readlines()[0]
196 | 
197 |     @staticmethod
198 |     def calculateFileMd5(filename):
199 |         hmd5 = hashlib.md5()
200 |         fp = open(filename, "rb")
201 |         hmd5.update(fp.read())
202 |         return hmd5.hexdigest()
203 | 
204 |     def scheduling(self):
205 |         for x in range(THREADS):
206 |             worker = DownloadWorker(self.queue)
207 |             worker.daemon = True
208 |             worker.start()
209 | 
210 |         for url in self.numbers:
211 |             self.download_user_videos(url)
212 |         for url in self.challenges:
213 |             self.download_challenge_videos(url)
214 |         for url in self.musics:
215 |             self.download_music_videos(url)
216 | 
217 |     def download_user_videos(self, url):
218 |         number = re.findall(r'share/user/(\d+)', url)
219 |         if not len(number):
220 |             return
221 |         dytk = get_dytk(url)
222 |         hostname = urllib.parse.urlparse(url).hostname
223 |         if hostname != 't.tiktok.com' and not dytk:
224 |             return
225 |         user_id = number[0]
226 |         video_count = self._download_user_media(user_id, dytk, url)
227 |         self.queue.join()
228 |         print("\nAweme number %s, video number %s\n\n" %
229 |               (user_id, str(video_count)))
230 |         print("\nFinish Downloading All the videos from %s\n\n" % user_id)
231 | 
232 |     def download_challenge_videos(self, url):
233 |         challenge = re.findall('share/challenge/(\d+)', url)
234 |         if not len(challenge):
235 |             return
236 |         challenges_id = challenge[0]
237 |         video_count = self._download_challenge_media(challenges_id, url)
238 |         self.queue.join()
239 |         print("\nAweme challenge #%s, video number %d\n\n" %
240 |               (challenges_id, video_count))
241 |         print("\nFinish Downloading All the videos from #%s\n\n" % challenges_id)
242 | 
243 |     def download_music_videos(self, url):
244 |         music = re.findall('share/music/(\d+)', url)
245 |         if not len(music):
246 |             return
247 |         musics_id = music[0]
248 |         video_count = self._download_music_media(musics_id, url)
249 |         self.queue.join()
250 |         print("\nAweme music @%s, video number %d\n\n" %
251 |               (musics_id, video_count))
252 |         print("\nFinish Downloading All the videos from @%s\n\n" % musics_id)
253 | 
254 |     def _join_download_queue(self, aweme, target_folder):
255 |         try:
256 |             if aweme.get('video', None):
257 |                 uri = aweme['video']['play_addr']['uri']
258 |                 download_url = "https://aweme.snssdk.com/aweme/v1/play/?{0}"
259 |                 download_params = {
260 |                     'video_id': uri,
261 |                     'line': '0',
262 |                     'ratio': '720p',
263 |                     'media_type': '4',
264 |                     'vr_type': '0',
265 |                     'test_cdn': 'None',
266 |                     'improve_bitrate': '0',
267 |                     'iid': '35628056608',
268 |                     'device_id': '46166618999',
269 |                     'os_api': '18',
270 |                     'app_name': 'aweme',
271 |                     'channel': 'App%20Store',
272 |                     'idfa': '00000000-0000-0000-0000-000000000000',
273 |                     'device_platform': 'iphone',
274 |                     'build_number': '27014',
275 |                     'vid': '2ED380A7-F09C-6C9E-90F5-862D58F3129C',
276 |                     'openudid': '21dae85eeac1da35a69e2a0ffeaeef61c78a2e98',
277 |                     'device_type': 'iPhone8%2C2',
278 |                     'app_version': '2.7.0',
279 |                     'version_code': '2.7.0',
280 |                     'os_version': '12.0',
281 |                     'screen_width': '1242',
282 |                     'aid': '1128',
283 |                     'ac': 'WIFI'
284 |                 }
285 |                 if aweme.get('hostname') == 't.tiktok.com':
286 |                     download_url = 'http://api.tiktokv.com/aweme/v1/play/?{0}'
287 |                     download_params = {
288 |                         'video_id': uri,
289 |                         'line': '0',
290 |                         'ratio': '720p',
291 |                         'media_type': '4',
292 |                         'vr_type': '0',
293 |                         'test_cdn': 'None',
294 |                         'improve_bitrate': '0',
295 |                         'version_code': '1.7.2',
296 |                         'language': 'en',
297 |                         'app_name': 'trill',
298 |                         'vid': 'D7B3981F-DD46-45A1-A97E-428B90096C3E',
299 |                         'app_version': '1.7.2',
300 |                         'device_id': '6619780206485964289',
301 |                         'channel': 'App Store',
302 |                         'mcc_mnc': '',
303 |                         'tz_offset': '28800'
304 |                     }
305 |                 share_info = aweme.get('share_info', {})
306 |                 url = download_url.format(
307 |                     '&'.join([key + '=' + download_params[key] for key in download_params]))
308 |                 self.queue.put(('video',
309 |                                 uri + "-" + share_info.get('share_desc', uri),
310 |                                 url, target_folder))
311 |             else:
312 |                 if aweme.get('image_infos', None):
313 |                     image = aweme['image_infos']['label_large']
314 |                     self.queue.put(
315 |                         ('image', image['uri'], image['url_list'][0], target_folder))
316 | 
317 |         except KeyError:
318 |             return
319 |         except UnicodeDecodeError:
320 |             print("Cannot decode response data from DESC %s" % aweme['desc'])
321 |             return
322 | 
323 | 
324 |     def _download_user_media(self, user_id, dytk, url):
325 |         current_folder = os.getcwd()
326 |         target_folder = os.path.join(current_folder, 'download/%s' % user_id)
327 |         if not os.path.isdir(target_folder):
328 |             os.mkdir(target_folder)
329 | 
330 |         if not user_id:
331 |             print("Number %s does not exist" % user_id)
332 |             return
333 |         hostname = urllib.parse.urlparse(url).hostname
334 |         signature = self.generateSignature(str(user_id))
335 |         user_video_url = "https://%s/aweme/v1/aweme/post/" % hostname
336 |         user_video_params = {
337 |             'user_id': str(user_id),
338 |             'count': '21',
339 |             'max_cursor': '0',
340 |             'aid': '1128',
341 |             '_signature': signature,
342 |             'dytk': dytk
343 |         }
344 |         if hostname == 't.tiktok.com':
345 |             user_video_params.pop('dytk')
346 |             user_video_params['aid'] = '1180'
347 | 
348 |         max_cursor, video_count = None, 0
349 |         while True:
350 |             if max_cursor:
351 |                 user_video_params['max_cursor'] = str(max_cursor)
352 |             res = requests.get(user_video_url, headers=HEADERS,
353 |                                params=user_video_params)
354 |             contentJson = json.loads(res.content.decode('utf-8'))
355 |             aweme_list = contentJson.get('aweme_list', [])
356 |             for aweme in aweme_list:
357 |                 video_count += 1
358 |                 aweme['hostname'] = hostname
359 |                 self._join_download_queue(aweme, target_folder)
360 |             if contentJson.get('has_more'):
361 |                 max_cursor = contentJson.get('max_cursor')
362 |             else:
363 |                 break
364 |         # if True:
365 |         #     favorite_folder = target_folder + '/favorite'
366 |         #     video_count = self.__download_favorite_media(
367 |         #         user_id, dytk, hostname, signature, favorite_folder, video_count)
368 | 
369 |         if video_count == 0:
370 |             print("There's no video in number %s." % user_id)
371 | 
372 |         return video_count
373 | 
374 |     def _download_challenge_media(self, challenge_id, url):
375 |         if not challenge_id:
376 |             print("Challenge #%s does not exist" % challenge_id)
377 |             return
378 |         current_folder = os.getcwd()
379 |         target_folder = os.path.join(
380 |             current_folder, 'download/#%s' % challenge_id)
381 |         if not os.path.isdir(target_folder):
382 |             os.mkdir(target_folder)
383 | 
384 |         hostname = urllib.parse.urlparse(url).hostname
385 |         signature = self.generateSignature(str(challenge_id) + '9' + '0')
386 | 
387 |         challenge_video_url = "https://%s/aweme/v1/challenge/aweme/" % hostname
388 |         challenge_video_params = {
389 |             'ch_id': str(challenge_id),
390 |             'count': '9',
391 |             'cursor': '0',
392 |             'aid': '1128',
393 |             'screen_limit': '3',
394 |             'download_click_limit': '0',
395 |             '_signature': signature
396 |         }
397 | 
398 |         cursor, video_count = None, 0
399 |         while True:
400 |             if cursor:
401 |                 challenge_video_params['cursor'] = str(cursor)
402 |                 challenge_video_params['_signature'] = self.generateSignature(
403 |                     str(challenge_id) + '9' + str(cursor))
404 |             res = requests.get(challenge_video_url,
405 |                                headers=HEADERS, params=challenge_video_params)
406 |             try:
407 |                 contentJson = json.loads(res.content.decode('utf-8'))
408 |             except:
409 |                 print(res.content)
410 |             aweme_list = contentJson.get('aweme_list', [])
411 |             if not aweme_list:
412 |                 break
413 |             for aweme in aweme_list:
414 |                 aweme['hostname'] = hostname
415 |                 video_count += 1
416 |                 self._join_download_queue(aweme, target_folder)
417 |                 print("number: ", video_count)
418 |             if contentJson.get('has_more'):
419 |                 cursor = contentJson.get('cursor')
420 |             else:
421 |                 break
422 |         if video_count == 0:
423 |             print("There's no video in challenge %s." % challenge_id)
424 |         return video_count
425 | 
426 |     def _download_music_media(self, music_id, url):
427 |         if not music_id:
428 |             print("Challenge #%s does not exist" % music_id)
429 |             return
430 |         current_folder = os.getcwd()
431 |         target_folder = os.path.join(current_folder, 'download/@%s' % music_id)
432 |         if not os.path.isdir(target_folder):
433 |             os.mkdir(target_folder)
434 | 
435 |         hostname = urllib.parse.urlparse(url).hostname
436 |         signature = self.generateSignature(str(music_id))
437 |         music_video_url = "https://%s/aweme/v1/music/aweme/?{0}" % hostname
438 |         music_video_params = {
439 |             'music_id': str(music_id),
440 |             'count': '9',
441 |             'cursor': '0',
442 |             'aid': '1128',
443 |             'screen_limit': '3',
444 |             'download_click_limit': '0',
445 |             '_signature': signature
446 |         }
447 |         if hostname == 't.tiktok.com':
448 |             for key in ['screen_limit', 'download_click_limit', '_signature']:
449 |                 music_video_params.pop(key)
450 |             music_video_params['aid'] = '1180'
451 | 
452 |         cursor, video_count = None, 0
453 |         while True:
454 |             if cursor:
455 |                 music_video_params['cursor'] = str(cursor)
456 |                 music_video_params['_signature'] = self.generateSignature(
457 |                     str(music_id) + '9' + str(cursor))
458 | 
459 |             url = music_video_url.format(
460 |                 '&'.join([key + '=' + music_video_params[key] for key in music_video_params]))
461 |             res = requests.get(url, headers=HEADERS)
462 |             contentJson = json.loads(res.content.decode('utf-8'))
463 |             aweme_list = contentJson.get('aweme_list', [])
464 |             if not aweme_list:
465 |                 break
466 |             for aweme in aweme_list:
467 |                 aweme['hostname'] = hostname
468 |                 video_count += 1
469 |                 self._join_download_queue(aweme, target_folder)
470 |             if contentJson.get('has_more'):
471 |                 cursor = contentJson.get('cursor')
472 |             else:
473 |                 break
474 |         if video_count == 0:
475 |             print("There's no video in music %s." % music_id)
476 |         return video_count
477 | ```
478 | 3.下载视频：
479 | 
480 | ```
481 | #下载相关的逻辑
482 | def download(medium_type, uri, medium_url, target_folder):
483 | 
484 |     headers = copy.copy(HEADERS)
485 |     file_name = uri
486 |     if medium_type == 'video':
487 |         file_name += '.mp4'
488 |         headers['user-agent'] = 'Aweme/27014 CFNetwork/974.2.1 Darwin/18.0.0'
489 |     elif medium_type == 'image':
490 |         file_name += '.jpg'
491 |         file_name = file_name.replace("/", "-")
492 |     else:
493 |         return
494 | 
495 |     file_path = os.path.join(target_folder, file_name)
496 |     if os.path.isfile(file_path):
497 |         print(file_name + " 已经爬取过了，文件保存在 " + file_path + " 放弃爬取")
498 |         return
499 | 
500 |     print("Downloading %s from %s.\n" % (file_name, medium_url))
501 |     # VIDEOID_DICT[VIDEO_ID] = 1  # 记录已经下载的视频
502 |     retry_times = 0
503 |     while retry_times < RETRY:
504 |         try:
505 |             resp = requests.get(medium_url, headers=headers, stream=True, timeout=TIMEOUT)
506 |             if resp.status_code == 403:
507 |                 retry_times = RETRY
508 |                 print("Access Denied when retrieve %s.\n" % medium_url)
509 |                 raise Exception("Access Denied")
510 |             with open(file_path, 'wb') as fh:
511 |                 for chunk in resp.iter_content(chunk_size=1024):
512 |                     fh.write(chunk)
513 |             break
514 |         except:
515 |             pass
516 |         retry_times += 1
517 |     else:
518 |         try:
519 |             os.remove(file_path)
520 |         except OSError:
521 |             pass
522 |         print("Failed to retrieve %s from %s.\n" % (uri, medium_url))
523 |         time.sleep(1)
524 | 
525 | ```
526 | 4.其他：
527 | 
528 | ```
529 | #通过短链接-获取长链接
530 | def get_real_address(url):
531 |     if url.find('v.douyin.com') < 0:
532 |         return url
533 |     res = requests.get(url, headers=HEADERS, allow_redirects=False)
534 |     return res.headers['Location'] if res.status_code == 302 else None
535 | 
536 | # 得到dytk参数
537 | def get_dytk(url):
538 |     res = requests.get(url, headers=HEADERS)
539 |     if not res:
540 |         return None
541 |     dytk = re.findall("dytk: '(.*)'", res.content.decode('utf-8'))
542 |     if len(dytk):
543 |         return dytk[0]
544 |     return None
545 | 
546 | # 下载管理器
547 | class DownloadWorker(Thread):
548 |     def __init__(self, queue):
549 |         Thread.__init__(self)
550 |         self.queue = queue
551 | 
552 |     def run(self):
553 |         while True:
554 |             medium_type, uri, download_url, target_folder = self.queue.get()
555 |             download(medium_type, uri, download_url, target_folder)
556 |             self.queue.task_done()
557 | ```
558 | 
559 | 5.执行截图：
560 | 
561 | 
562 | 
563 | 6.源码获取：
564 | 
565 | 
566 | 7.去水印说明：其实抖音列表返回了无水印视频链接和有水印链接，没有涉及对视频水印的处理
567 | 
568 | 
569 | 
570 | 
571 | 
572 | 
573 | 
574 | 
575 | 
576 | 
577 | 
578 | 
579 | 
580 | 
581 | 
582 | 
583 | 
584 | 
585 | 
586 | 
587 | 
588 | 
589 | 
590 | 
591 | 
592 | 
593 | 
594 | 
595 | 
596 | 
597 | 


--------------------------------------------------------------------------------