├── .gitattributes
├── LICENSE
├── README.md
├── _config.yml
├── kuaishou.py
├── run.bat
└── screenshots
    └── 1.png


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 muyangren907
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 快手批量下载脚本
 2 | ===========
 3 | 
 4 | ![](https://raw.githubusercontent.com/muyangren907/Kwai_download_script/master/screenshots/1.png)
 5 | 
 6 | ## 使用
 7 | 
 8 | ### ①将json文件获取到该项目同一文件夹下
 9 | ### ②确保已经安装了python3
10 | ### ③执行程序
11 | - Windows下
12 | 在cmd窗口执行如下指令
13 | ```
14 | py -3 kuaishou.py
15 | ```
16 | - Linux下
17 | 在终端执行如下指令
18 | ```
19 | python3 kuaishou.py
20 | ```
21 | 
22 | ## 说明
23 | -	若该主播正在直播，则获取到的第一个json文件第一项为直播信息（即feeds数组中的第一项），需要删去，否则程序运行会报错
24 | - 获取到的json项目分类有：视频(包含视频和图片电影)，图集(即长图)，图片
25 | - 下载的文件命名规则下面有说明，对于过长的标题，只截取前30字，并清除不能出现在文件夹或文件名中的字符
26 | - 程序编写平台为Windows 10，使用JetBrains PyCharm作为ide，python版本为3.6，文本文件均采用UTF-8编码
27 | - 默认线程数为30，源代码中有详细注释，可根据实际情况进行更改
28 | 
29 | #### 下载的文件命名规则
30 | - 首先会在该文件夹下生成以主播快手昵称为名的文件夹(假设为user_name)
31 | -	视频保存为mp4文件，命名为"作品编号_标题.mp4"，保存在user_name中
32 | - 图集会在user_name下生成一个文件夹，文件夹命名为"作品编号_标题"，文件夹下图片为webp格式，命名为"序号.webp"，序号从0开始编号
33 | - 图片保存为jpg文件，命名为"作品编号_标题.jpg"，保存在user_name中
34 | - user_name下还会生成一个user_name.txt的文件，记录了下载时间，每种类型文件的个数
35 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect


--------------------------------------------------------------------------------
/kuaishou.py:
--------------------------------------------------------------------------------
  1 | #! python3
  2 | # -*-coding:utf-8-*-
  3 | # author: muyangren907
  4 | import json,os,codecs,pprint,re,time,queue,urllib,socket,threading
  5 | from urllib.request import urlretrieve
  6 | 
  7 | itemnum = 0
  8 | havedownload = 0
  9 | vq = queue.Queue()
 10 | def download(vq):
 11 |     while True:
 12 |         # vq.put([caption, photo_id, mv_urls, atlas, cover_urls],path)
 13 |         videomes = vq.get()
 14 |         caption = videomes[0]
 15 |         photo_id = videomes[1]
 16 |         mv_urls = videomes[2]
 17 |         atlas = videomes[3]
 18 |         cover_urls = videomes[4]
 19 |         path = videomes[5]
 20 |         # itemnum = videomes[6]
 21 | 
 22 |         global havedownload
 23 | 
 24 |         if mv_urls!="None" :
 25 |             downfile = os.path.join(path, str(photo_id)+"_"+caption + ".mp4")   #filename = photo_id+caption
 26 |             try:
 27 |               urlretrieve(mv_urls,downfile)
 28 |             except IOError:
 29 |                 downfile = os.path.join(path, "错误" + '%s.mp4') % photo_id
 30 |                 try:
 31 |                     urlretrieve(mv_urls, downfile)
 32 |                 except (socket.error, urllib.ContentTooShortError):
 33 |                     print("请求被断开，休眠2秒")
 34 |                     time.sleep(2)
 35 |                     urlretrieve(mv_urls,downfile)
 36 |             havedownload+=1
 37 |             print("(%d/%d)视频下载完成: %s_%s"% (havedownload,itemnum,photo_id,caption))
 38 |             vq.task_done()
 39 |         else:
 40 |             if atlas[0]!="None" :
 41 |                 caption = caption.replace(".","。")
 42 |                 if os.path.exists(path+"/"+str(photo_id) + "_" + caption) == False:
 43 |                     os.mkdir(path+"/"+str(photo_id) + "_" + caption)
 44 |                 for atlasindex in range(len(atlas)):
 45 |                     atlas_url = atlas[atlasindex]
 46 |                     downfile = os.path.join(path+"/"+str(photo_id) + "_" + caption, str(atlasindex) + ".webp")  # filename = atlasindex
 47 |                     try:
 48 |                         urlretrieve(atlas_url, downfile)
 49 |                     except IOError:
 50 |                         downfile = os.path.join(path+"/"+str(photo_id) + "_" + caption, "错误" + '%s%s.webp') %(photo_id,atlasindex)
 51 |                         try:
 52 |                             urlretrieve(atlas_url, downfile)
 53 |                         except (socket.error, urllib.ContentTooShortError):
 54 |                             print("请求被断开，休眠2秒")
 55 |                             time.sleep(2)
 56 |                             urlretrieve(atlas_url, downfile)
 57 |                 havedownload += 1
 58 |                 print("(%d/%d)图集下载完成: %s_%s" % (havedownload,itemnum,photo_id,caption))
 59 |                 vq.task_done()
 60 |             else:
 61 |                 downfile = os.path.join(path, str(photo_id) + "_" + caption + ".jpg")  # filename = photo_id+caption
 62 |                 try:
 63 |                     urlretrieve(cover_urls, downfile)
 64 |                 except IOError:
 65 |                     downfile = os.path.join(path, "错误" + '%s.mp4') % photo_id
 66 |                     try:
 67 |                         urlretrieve(cover_urls, downfile)
 68 |                     except (socket.error, urllib.ContentTooShortError):
 69 |                         print("请求被断开，休眠2秒")
 70 |                         time.sleep(2)
 71 |                         urlretrieve(cover_urls, downfile)
 72 |                 havedownload += 1
 73 |                 print("(%d/%d)图片下载完成: %s_%s" % (havedownload,itemnum,photo_id,caption))
 74 |                 vq.task_done()
 75 | 
 76 | 
 77 | 
 78 | def main():
 79 |     user_name = ""
 80 |     user_id = 0
 81 | 
 82 |     localtime = time.asctime(time.localtime(time.time())) #get time
 83 |     # count number
 84 |     global itemnum
 85 |     videonum = 0
 86 |     atlasnum = 0
 87 |     picturenum = 0
 88 | 
 89 | 
 90 |     filelist = os.listdir("./")  # get the file list
 91 | 
 92 |     jsonfilename = []
 93 |     for file_index in range(len(filelist)):
 94 |         filestr = str(filelist[file_index])
 95 |         if filestr.find(".json", 0, len(filestr)) != -1:
 96 |             jsonfilename.append(filestr)  # add json file name to jsonfilename list
 97 |     print("json文件总数为: " + str(len(jsonfilename)))
 98 | 
 99 |     for file_index in range(len(jsonfilename)):
100 |         jsonfile = open("./"+jsonfilename[file_index],"r",encoding="utf8") #open json file
101 |         jsonstr = jsonfile.read() #read file to jsonstr
102 |         jsonobj = json.loads(jsonstr)
103 | 
104 |         user_name = jsonobj['feeds'][0]['user_name'].replace("/","")    #get user_name
105 |         user_id = jsonobj['feeds'][0]['user_id']    #get user_id
106 |         # print(user_name+" "+str(user_id))
107 | 
108 |         if os.path.exists("./"+user_name) == False:
109 |             os.mkdir("./"+user_name)    #mkdir using user_name
110 | 
111 |         mv_urls = "None"
112 |         atlas = ["None"]
113 |         cover_urls = "None"
114 | 
115 |         for item in jsonobj['feeds']:
116 |             itemnum+=1
117 |             # pprint.pprint(itme)
118 |             caption = item['caption']
119 | 
120 |             notchar = ["?", "*", "/", "\\", "<", ">", ":", "\"", "|", "\n","\r"," "]  # These characters cannot appear in the file name
121 |             for chari in range(len(notchar)):
122 |                 caption = caption.replace(notchar[chari], "")
123 |             caption = caption[0:29] #file name can't be too long
124 | 
125 |             photo_id = item['photo_id']
126 |             if 'main_mv_urls' in item :
127 |                 videonum+=1
128 |                 mv_urls = item['main_mv_urls'][0]['url']
129 |             else :
130 |                 mv_urls = "None"
131 |                 # print(photo_id)
132 |                 if 'atlas' in item["ext_params"] :
133 |                     atlasnum+=1
134 |                     atlas = item["ext_params"]['atlas']['list']
135 |                     for atlas_index in range(len(atlas)):
136 |                         atlas[atlas_index]="http://"+item["ext_params"]['atlas']['cdnList'][0]['cdn']+atlas[atlas_index]  #url=cdn+relative_url
137 |                         # print(atlas[atlas_index])
138 |                 else :
139 |                     picturenum+=1
140 |                     atlas=["None"]
141 |                     cover_urls = item['cover_urls'][0]['url']
142 |                     # print(cover_urls)
143 |             # print(caption)
144 |             vq.put([caption,photo_id,mv_urls,atlas,cover_urls,"./"+user_name])
145 | 
146 |             # fp =open("./"+user_name+"/"+caption+".txt","w")
147 |             # fp.close()
148 |         # print(user_name + str(user_id))
149 | 
150 |         jsonfile.close() #close file
151 |     print("itemnum\t"+str(itemnum)+"\nvideonum\t"+str(videonum)+"\natlasnum\t"+str(atlasnum)+"\npicturenum\t"+str(picturenum))
152 |     if os.path.exists("./" + user_name + "/" + user_name + ".txt") == False:
153 |         user_mes_file = codecs.open("./" + user_name + "/" + user_name + ".txt", "w","utf-8")
154 |         user_mes_file.write("download_time\t"+localtime+"\n")
155 |         user_mes_file.write("user_name\t" + user_name + "\nuser_id\t" + str(user_id) + "\n")
156 |         user_mes_file.write("itemnum\t"+str(itemnum)+"\nvideonum\t"+str(videonum)+"\natlasnum\t"+str(atlasnum)+"\npicturenum\t"+str(picturenum))
157 |         user_mes_file.close()
158 |     threadnum = 32  # thread number
159 |     for thread_num in range(threadnum):
160 |         t = threading.Thread(target=download,args=(vq,))
161 |         t.setDaemon(True)
162 |         t.start()
163 |     vq.join()
164 |     # print(str(itemnum)+" "+str(videonum)+" "+str(atlasnum)+" "+str(picturenum))
165 | main()


--------------------------------------------------------------------------------
/run.bat:
--------------------------------------------------------------------------------
1 | .\venv\Scripts\python.exe kuaishou.py
2 | pause
3 | 


--------------------------------------------------------------------------------
/screenshots/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codenewer/Kwai_download_script/fa133947617a6e577b3e4524e440306522e602d9/screenshots/1.png


--------------------------------------------------------------------------------