├── .gitattributes ├── LICENSE ├── README.md ├── _config.yml ├── kuaishou.py ├── run.bat └── screenshots └── 1.png /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 muyangren907 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 快手批量下载脚本 2 | =========== 3 | 4 | ![](https://raw.githubusercontent.com/muyangren907/Kwai_download_script/master/screenshots/1.png) 5 | 6 | ## 使用 7 | 8 | ### ①将json文件获取到该项目同一文件夹下 9 | ### ②确保已经安装了python3 10 | ### ③执行程序 11 | - Windows下 12 | 在cmd窗口执行如下指令 13 | ``` 14 | py -3 kuaishou.py 15 | ``` 16 | - Linux下 17 | 在终端执行如下指令 18 | ``` 19 | python3 kuaishou.py 20 | ``` 21 | 22 | ## 说明 23 | - 若该主播正在直播,则获取到的第一个json文件第一项为直播信息(即feeds数组中的第一项),需要删去,否则程序运行会报错 24 | - 获取到的json项目分类有:视频(包含视频和图片电影),图集(即长图),图片 25 | - 下载的文件命名规则下面有说明,对于过长的标题,只截取前30字,并清除不能出现在文件夹或文件名中的字符 26 | - 程序编写平台为Windows 10,使用JetBrains PyCharm作为ide,python版本为3.6,文本文件均采用UTF-8编码 27 | - 默认线程数为30,源代码中有详细注释,可根据实际情况进行更改 28 | 29 | #### 下载的文件命名规则 30 | - 首先会在该文件夹下生成以主播快手昵称为名的文件夹(假设为user_name) 31 | - 视频保存为mp4文件,命名为"作品编号_标题.mp4",保存在user_name中 32 | - 图集会在user_name下生成一个文件夹,文件夹命名为"作品编号_标题",文件夹下图片为webp格式,命名为"序号.webp",序号从0开始编号 33 | - 图片保存为jpg文件,命名为"作品编号_标题.jpg",保存在user_name中 34 | - user_name下还会生成一个user_name.txt的文件,记录了下载时间,每种类型文件的个数 35 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-architect -------------------------------------------------------------------------------- /kuaishou.py: -------------------------------------------------------------------------------- 1 | #! python3 2 | # -*-coding:utf-8-*- 3 | # author: muyangren907 4 | import json,os,codecs,pprint,re,time,queue,urllib,socket,threading 5 | from urllib.request import urlretrieve 6 | 7 | itemnum = 0 8 | havedownload = 0 9 | vq = queue.Queue() 10 | def download(vq): 11 | while True: 12 | # vq.put([caption, photo_id, mv_urls, atlas, cover_urls],path) 13 | videomes = vq.get() 14 | caption = videomes[0] 15 | photo_id = videomes[1] 16 | mv_urls = videomes[2] 17 | atlas = videomes[3] 18 | cover_urls = videomes[4] 19 | path = videomes[5] 20 | # itemnum = videomes[6] 21 | 22 | global havedownload 23 | 24 | if mv_urls!="None" : 25 | downfile = os.path.join(path, str(photo_id)+"_"+caption + ".mp4") #filename = photo_id+caption 26 | try: 27 | urlretrieve(mv_urls,downfile) 28 | except IOError: 29 | downfile = os.path.join(path, "错误" + '%s.mp4') % photo_id 30 | try: 31 | urlretrieve(mv_urls, downfile) 32 | except (socket.error, urllib.ContentTooShortError): 33 | print("请求被断开,休眠2秒") 34 | time.sleep(2) 35 | urlretrieve(mv_urls,downfile) 36 | havedownload+=1 37 | print("(%d/%d)视频下载完成: %s_%s"% (havedownload,itemnum,photo_id,caption)) 38 | vq.task_done() 39 | else: 40 | if atlas[0]!="None" : 41 | caption = caption.replace(".","。") 42 | if os.path.exists(path+"/"+str(photo_id) + "_" + caption) == False: 43 | os.mkdir(path+"/"+str(photo_id) + "_" + caption) 44 | for atlasindex in range(len(atlas)): 45 | atlas_url = atlas[atlasindex] 46 | downfile = os.path.join(path+"/"+str(photo_id) + "_" + caption, str(atlasindex) + ".webp") # filename = atlasindex 47 | try: 48 | urlretrieve(atlas_url, downfile) 49 | except IOError: 50 | downfile = os.path.join(path+"/"+str(photo_id) + "_" + caption, "错误" + '%s%s.webp') %(photo_id,atlasindex) 51 | try: 52 | urlretrieve(atlas_url, downfile) 53 | except (socket.error, urllib.ContentTooShortError): 54 | print("请求被断开,休眠2秒") 55 | time.sleep(2) 56 | urlretrieve(atlas_url, downfile) 57 | havedownload += 1 58 | print("(%d/%d)图集下载完成: %s_%s" % (havedownload,itemnum,photo_id,caption)) 59 | vq.task_done() 60 | else: 61 | downfile = os.path.join(path, str(photo_id) + "_" + caption + ".jpg") # filename = photo_id+caption 62 | try: 63 | urlretrieve(cover_urls, downfile) 64 | except IOError: 65 | downfile = os.path.join(path, "错误" + '%s.mp4') % photo_id 66 | try: 67 | urlretrieve(cover_urls, downfile) 68 | except (socket.error, urllib.ContentTooShortError): 69 | print("请求被断开,休眠2秒") 70 | time.sleep(2) 71 | urlretrieve(cover_urls, downfile) 72 | havedownload += 1 73 | print("(%d/%d)图片下载完成: %s_%s" % (havedownload,itemnum,photo_id,caption)) 74 | vq.task_done() 75 | 76 | 77 | 78 | def main(): 79 | user_name = "" 80 | user_id = 0 81 | 82 | localtime = time.asctime(time.localtime(time.time())) #get time 83 | # count number 84 | global itemnum 85 | videonum = 0 86 | atlasnum = 0 87 | picturenum = 0 88 | 89 | 90 | filelist = os.listdir("./") # get the file list 91 | 92 | jsonfilename = [] 93 | for file_index in range(len(filelist)): 94 | filestr = str(filelist[file_index]) 95 | if filestr.find(".json", 0, len(filestr)) != -1: 96 | jsonfilename.append(filestr) # add json file name to jsonfilename list 97 | print("json文件总数为: " + str(len(jsonfilename))) 98 | 99 | for file_index in range(len(jsonfilename)): 100 | jsonfile = open("./"+jsonfilename[file_index],"r",encoding="utf8") #open json file 101 | jsonstr = jsonfile.read() #read file to jsonstr 102 | jsonobj = json.loads(jsonstr) 103 | 104 | user_name = jsonobj['feeds'][0]['user_name'].replace("/","") #get user_name 105 | user_id = jsonobj['feeds'][0]['user_id'] #get user_id 106 | # print(user_name+" "+str(user_id)) 107 | 108 | if os.path.exists("./"+user_name) == False: 109 | os.mkdir("./"+user_name) #mkdir using user_name 110 | 111 | mv_urls = "None" 112 | atlas = ["None"] 113 | cover_urls = "None" 114 | 115 | for item in jsonobj['feeds']: 116 | itemnum+=1 117 | # pprint.pprint(itme) 118 | caption = item['caption'] 119 | 120 | notchar = ["?", "*", "/", "\\", "<", ">", ":", "\"", "|", "\n","\r"," "] # These characters cannot appear in the file name 121 | for chari in range(len(notchar)): 122 | caption = caption.replace(notchar[chari], "") 123 | caption = caption[0:29] #file name can't be too long 124 | 125 | photo_id = item['photo_id'] 126 | if 'main_mv_urls' in item : 127 | videonum+=1 128 | mv_urls = item['main_mv_urls'][0]['url'] 129 | else : 130 | mv_urls = "None" 131 | # print(photo_id) 132 | if 'atlas' in item["ext_params"] : 133 | atlasnum+=1 134 | atlas = item["ext_params"]['atlas']['list'] 135 | for atlas_index in range(len(atlas)): 136 | atlas[atlas_index]="http://"+item["ext_params"]['atlas']['cdnList'][0]['cdn']+atlas[atlas_index] #url=cdn+relative_url 137 | # print(atlas[atlas_index]) 138 | else : 139 | picturenum+=1 140 | atlas=["None"] 141 | cover_urls = item['cover_urls'][0]['url'] 142 | # print(cover_urls) 143 | # print(caption) 144 | vq.put([caption,photo_id,mv_urls,atlas,cover_urls,"./"+user_name]) 145 | 146 | # fp =open("./"+user_name+"/"+caption+".txt","w") 147 | # fp.close() 148 | # print(user_name + str(user_id)) 149 | 150 | jsonfile.close() #close file 151 | print("itemnum\t"+str(itemnum)+"\nvideonum\t"+str(videonum)+"\natlasnum\t"+str(atlasnum)+"\npicturenum\t"+str(picturenum)) 152 | if os.path.exists("./" + user_name + "/" + user_name + ".txt") == False: 153 | user_mes_file = codecs.open("./" + user_name + "/" + user_name + ".txt", "w","utf-8") 154 | user_mes_file.write("download_time\t"+localtime+"\n") 155 | user_mes_file.write("user_name\t" + user_name + "\nuser_id\t" + str(user_id) + "\n") 156 | user_mes_file.write("itemnum\t"+str(itemnum)+"\nvideonum\t"+str(videonum)+"\natlasnum\t"+str(atlasnum)+"\npicturenum\t"+str(picturenum)) 157 | user_mes_file.close() 158 | threadnum = 32 # thread number 159 | for thread_num in range(threadnum): 160 | t = threading.Thread(target=download,args=(vq,)) 161 | t.setDaemon(True) 162 | t.start() 163 | vq.join() 164 | # print(str(itemnum)+" "+str(videonum)+" "+str(atlasnum)+" "+str(picturenum)) 165 | main() -------------------------------------------------------------------------------- /run.bat: -------------------------------------------------------------------------------- 1 | .\venv\Scripts\python.exe kuaishou.py 2 | pause 3 | -------------------------------------------------------------------------------- /screenshots/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codenewer/Kwai_download_script/fa133947617a6e577b3e4524e440306522e602d9/screenshots/1.png --------------------------------------------------------------------------------