├── .gitignore ├── DataManager.py ├── Executor.py ├── FileNameParser.py ├── JAVInfoGetter.py ├── LICENSE ├── README.md ├── Setting.py ├── config-template.json ├── demo.gif ├── demo_files ├── IENE-777.mp4 ├── KAWD-777.mp4 ├── STar-777.mp4 └── nykd-54.mp4 ├── getch.py ├── main.py ├── requirements.txt ├── utils.py └── webpage_getter.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.code-workspace 3 | config.json 4 | db-*.json 5 | *-cookie.txt 6 | renameHistory.txt 7 | dist/ 8 | build/ -------------------------------------------------------------------------------- /DataManager.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import utils 4 | 5 | 6 | class DataManager: 7 | def __init__(self, setting): 8 | self.setting = setting 9 | self.dbpath = Path("db-" + self.setting.language + ".json") 10 | 11 | if not self.dbpath.exists(): 12 | self.dbpath.touch() 13 | self.dbdata = {} 14 | 15 | with open(self.dbpath, "r", encoding="utf-8") as dbfile: 16 | dbtext = dbfile.read() 17 | if not dbtext: 18 | self.dbdata = {} 19 | else: 20 | self.dbdata = json.loads(dbtext) 21 | 22 | def AddRecord(self, info): 23 | self.dbdata.update({info["bangou"]: info}) 24 | 25 | def Save(self): 26 | print(utils.whiteBackStr("save db")) 27 | json.dump(self.dbdata, open(self.dbpath, "w", 28 | encoding="utf-8"), ensure_ascii=False) 29 | 30 | def Search(self, bangou): 31 | if bangou in self.dbdata: 32 | return self.dbdata[bangou] 33 | else: 34 | return None 35 | -------------------------------------------------------------------------------- /Executor.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from getch import getch 3 | from pathlib import Path 4 | import utils 5 | import re 6 | from utils import lenInBytes 7 | import sys 8 | from datetime import date 9 | 10 | 11 | class Executor: 12 | def __init__(self, setting): 13 | self.setting = setting 14 | day = date.today().strftime("%Y%m%d") 15 | self.renameRecords = open( 16 | f"renameHistory_{day}.txt", "a", encoding="utf-8") # TODO: filename to config 17 | 18 | def HandleFiles(self, info, bangou, fileNames): 19 | print( 20 | f"===== 2/3: handle bangou {utils.yellowStr(bangou)}") 21 | self.HandleBangou(info, fileNames[bangou][0]) 22 | 23 | if len(fileNames[bangou]) > 1: # need to rename files with index 24 | for index, fileName in enumerate(fileNames[bangou]): 25 | self.HandleFile(info, fileName, index) 26 | else: 27 | self.HandleFile(info, fileNames[bangou][0]) 28 | 29 | def HandleBangou(self, info, path): # only save one copy of album and thumb 30 | if self.setting.saveAlbum: 31 | self.SaveAlbum(info, path) 32 | if self.setting.saveThumb: 33 | self.SaveThumb(info, path) 34 | 35 | def HandleFile(self, info, path, index=-1): 36 | print( 37 | f"===== 3/3: handle file {utils.yellowStr(str(path))}") 38 | self.Rename(info, path, index) 39 | # optional TODO: fill video meta description in video file 40 | # TODO: option: new folder for all video file, for the same actor, for the same tag # create link 41 | 42 | def getValidWindowsFileName(self, fileName): 43 | """ 44 | https://docs.microsoft.com/zh-tw/windows/win32/fileio/naming-a-file?redirectedfrom=MSDN 45 | """ 46 | return re.sub(r"[><:\"/\\\|\?*]", "_", fileName) 47 | 48 | def Rename(self, info, path, index): 49 | newFileName = self.setting.fileNameFormat 50 | for key in info: 51 | infokey = "{" + key + "}" 52 | infovalue = info[key] 53 | if type(infovalue) is list: 54 | infovalue = "" 55 | for element in info[key]: 56 | infovalue = infovalue + "[" + element + "]" 57 | newFileName = newFileName.replace(infokey, infovalue) 58 | 59 | if "win" in sys.platform: 60 | newFileName = self.getValidWindowsFileName(newFileName) 61 | 62 | # handle multiple files with the same bangou 63 | numberStr = ("_" + str(index+1)) if (index != -1) else "" 64 | # handle file name too long error 65 | if lenInBytes(newFileName) + lenInBytes(numberStr) + lenInBytes(path.suffix) > self.setting.maxFileLength: 66 | print(utils.blueBackStr(f"File name too long: {newFileName}")) 67 | maxFileLength = self.setting.maxFileLength - \ 68 | lenInBytes(path.suffix) - lenInBytes(numberStr) 69 | while lenInBytes(newFileName) > maxFileLength: 70 | newFileName = newFileName[0:-1] 71 | print( 72 | f"After truncate file name: {utils.blueBackStr(newFileName)}") 73 | newName = newFileName + numberStr + path.suffix 74 | 75 | if path.name == newName: 76 | print( 77 | f"File {utils.grayBackStr(str(path))} no need to rename") 78 | return 79 | 80 | self.DoRename(path, newName) 81 | 82 | def DoRename(self, path, newName): 83 | newPath = path.parents[0] / newName 84 | 85 | print(f"Rename {utils.blueBackStr(str(path))}\n" + 86 | f"To {utils.greenBackStr(str(newPath))}") 87 | 88 | if self.setting.dryRun: 89 | return 90 | 91 | if self.setting.renameCheck: 92 | print(utils.blueBackStr(f"Do you want to execute rename?(Y/n)")) 93 | response = getch() 94 | print(response) 95 | if response.lower() == "n": 96 | print("User cancel rename") 97 | return 98 | 99 | try: 100 | self.renameRecords.write(f"{path} -> {newPath}\n") 101 | self.renameRecords.flush() 102 | path.rename(newPath) 103 | except Exception as e: 104 | print( 105 | utils.redBackStr(f"Rename [{str(path)}] to [{str(newPath)}] failed")) 106 | print(e) 107 | 108 | def SaveAlbum(self, info, path): 109 | if not info["album"]: 110 | print("Album link not found") 111 | return 112 | 113 | albumFileName = info["bangou"] + ".jpg" 114 | albumPath = Path(path.parents[0] / albumFileName) 115 | 116 | if albumPath.exists(): 117 | print( 118 | f"Album {utils.blueBackStr(str(albumPath))} already exists") 119 | return 120 | self.DoSaveAlbum(info["album"], albumPath) 121 | 122 | def DoSaveAlbum(self, fileURL, albumPath): 123 | print( 124 | f"Save album {utils.greenBackStr(str(albumPath))}") 125 | 126 | if self.setting.dryRun: 127 | return 128 | 129 | with open(albumPath, 'wb') as albumFile: 130 | fileObject = requests.get(fileURL) 131 | albumFile.write(fileObject.content) 132 | 133 | def SaveThumb(self, info, path): 134 | if not info["thumbs"]: 135 | print("Thumbnail link not found") 136 | return 137 | 138 | for index, thumb in enumerate(info["thumbs"]): 139 | fileName = info["bangou"] + "_thumb" + \ 140 | str(index).zfill(2) + ".jpg" 141 | filePath = Path(path.parents[0] / fileName) 142 | 143 | if filePath.exists(): 144 | print( 145 | f"Thumbnail {utils.blueBackStr(str(filePath))} already exists") 146 | continue 147 | 148 | self.DoSaveThumb(thumb, filePath) 149 | 150 | def DoSaveThumb(self, fileURL, filePath): 151 | print( 152 | f"Save thumbnail {utils.greenBackStr(str(filePath))}") 153 | 154 | if self.setting.dryRun: 155 | return 156 | 157 | with open(filePath, 'wb') as thumbFile: 158 | fileObject = requests.get(fileURL) 159 | thumbFile.write(fileObject.content) 160 | -------------------------------------------------------------------------------- /FileNameParser.py: -------------------------------------------------------------------------------- 1 | import mimetypes 2 | import re 3 | from pathlib import Path 4 | import utils 5 | import json 6 | 7 | 8 | class BangouHandler: # abstract 9 | def __init__(self, next): 10 | self.next = next 11 | 12 | def DoNext(self, fileName): 13 | if self.next: 14 | return self.next.Handle(fileName) 15 | else: 16 | return "" 17 | 18 | 19 | class FC2BangouHandler(BangouHandler): 20 | def __init__(self, next): 21 | BangouHandler.__init__(self, next) 22 | self.fc2BangouRE = re.compile(r"(fc2)-*(ppv)*-*(\d{4,9})") 23 | 24 | def Handle(self, fileName): 25 | result = self.fc2BangouRE.search(fileName) 26 | 27 | if result: 28 | return "fc2-ppv-" + result.group(3) 29 | else: 30 | return self.DoNext(fileName) 31 | 32 | 33 | class GeneralBangouHandler(BangouHandler): 34 | def __init__(self, next): 35 | BangouHandler.__init__(self, next) 36 | self.generalBangouRE = re.compile(r"([a-zA-Z]{2,5})\-+(\d{2,5})") 37 | 38 | def Handle(self, fileName): 39 | result = self.generalBangouRE.search(fileName) 40 | 41 | if result: 42 | return result.group(1) + "-" + result.group(2) 43 | else: 44 | return self.DoNext(fileName) 45 | 46 | 47 | class GeneralLooseBangouHandler(BangouHandler): 48 | def __init__(self, next): 49 | BangouHandler.__init__(self, next) 50 | self.generalLooseBangouRE = re.compile( 51 | r"([a-zA-Z]{2,5})\s*\-*\s*(\d{2,5})") 52 | 53 | def Handle(self, fileName): 54 | result = self.generalLooseBangouRE.search(fileName) 55 | 56 | if result: 57 | bangou = result.group(1) + "-" + result.group(2) 58 | if bangou == "MP-4": # special case 59 | bangou = "" 60 | if bangou: 61 | return bangou 62 | return self.DoNext(fileName) 63 | 64 | 65 | class FileNameParser: 66 | def __init__(self, minFileSizeMB, ignoreWords): 67 | self.minFileSizeMB = minFileSizeMB 68 | self.ignoreWords = ignoreWords 69 | # TODO: fit different bangou format 70 | self.bangouHandler = FC2BangouHandler( 71 | GeneralBangouHandler( 72 | GeneralLooseBangouHandler(None))) 73 | 74 | # TODO: filename to config 75 | filePath = Path("BangouToFilename.txt") 76 | self.prettyPrinterFile = utils.createPrettyPrinter( 77 | open(filePath, "w", encoding="utf-8")) 78 | self.prettyPrinter = utils.createPrettyPrinter() 79 | 80 | def GetFiles(self, fileNames, fileDir): 81 | videoFileList = [] 82 | path = Path(fileDir) 83 | 84 | mimetypes.init() 85 | # Add new unknown video file extension if needed 86 | mimetypes.add_type('video/vnd.rn-realmedia-vbr', '.rmvb') 87 | mimetypes.add_type('video/rm', '.rm') 88 | mimetypes.add_type('video/x-flv', '.flv') 89 | mimetypes.add_type('video/dcv', '.dcv') 90 | 91 | for file in path.glob("**/*"): 92 | if file.is_dir(): 93 | continue 94 | if file.suffix in mimetypes.types_map: 95 | mimetype = mimetypes.types_map[file.suffix] 96 | if "video" in mimetype: 97 | videoFileList.append(file) 98 | # else: 99 | # print("unknown file extension: " + file.suffix) 100 | 101 | for fileName in videoFileList: 102 | stat = fileName.stat() 103 | fileSizeMB = stat.st_size >> 20 104 | if self.minFileSizeMB > fileSizeMB: 105 | # print(f"ignore {str(fileName)} because file too small") 106 | continue 107 | 108 | bangou = self.ParseBangou(fileName.name) 109 | if not bangou: 110 | print(f"bangou not found in file {fileName.name}") 111 | continue 112 | 113 | bangou = bangou.upper() 114 | if bangou in fileNames: 115 | fileNames[bangou].append(fileName) 116 | fileNames[bangou].sort() 117 | else: 118 | fileNames[bangou] = [fileName] 119 | 120 | # print("Legal video files with bangou") 121 | self.prettyPrinterFile.pprint(fileNames) 122 | # self.prettyPrinter.pprint(fileNames) 123 | return fileNames 124 | 125 | def ParseBangou(self, fileName): 126 | fileName = fileName.lower() 127 | for ignoreWord in self.ignoreWords: 128 | fileName = fileName.replace(ignoreWord, "") 129 | fileName = fileName.replace("_", "-") 130 | 131 | return self.bangouHandler.Handle(fileName) 132 | -------------------------------------------------------------------------------- /JAVInfoGetter.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from webpage_getter import WebPageGetter_JavLibrary, WebPageGetter_JavDB 6 | 7 | # TODO: find chinese title source website 8 | 9 | 10 | def getText(element): 11 | return element.getText() 12 | 13 | 14 | class JAVInfoGetter: 15 | def __init__(self, setting, dataManager): 16 | self.setting = setting 17 | self.dataManager = dataManager 18 | 19 | def GetInfo(self, bangou, fileName): 20 | print(f"Try to get info from {self.__class__.__name__}") 21 | info = self.dataManager.Search(bangou) 22 | if info: 23 | if "title" in info and info["title"]: 24 | print(f"Find complete info of {bangou} in db") 25 | return info, True 26 | elif not self.setting.retryFailedDB: # directly use incomplete info, no retry 27 | print(f"Find incomplete info of {bangou} in db") 28 | return info, False 29 | 30 | info = dict() 31 | link = self.GetWebContent(bangou) 32 | 33 | if not link: 34 | print("Get Webpage Failed") 35 | info["bangou"] = bangou 36 | return info, False 37 | 38 | # print(self.soup.prettify()) 39 | info["bangou"] = self.ParseBangou() 40 | info["title"] = self.ParseTitle(info["bangou"]) 41 | info["tags"] = self.ParseTag() 42 | info["director"] = self.ParseDirector() 43 | info["maker"] = self.ParseMaker() 44 | info["actors"] = self.ParseActor() 45 | info["album"] = self.ParseAlbum() 46 | info["duration"] = self.ParseDuration() 47 | info["date"] = self.ParseDate() 48 | info["thumbs"] = self.ParseThumbs() 49 | info["rating"] = self.ParseRating() 50 | info["link"] = link 51 | 52 | self.dataManager.AddRecord(info) 53 | 54 | if not info["title"]: 55 | info["bangou"] = bangou 56 | return info, False 57 | else: 58 | info["title"] = info["title"].replace( 59 | info["bangou"], "").strip(" ") 60 | 61 | print(json.dumps(info, indent=4, ensure_ascii=False)) 62 | 63 | # BUG: Weird, there are two bangous, maybe it's a bug 64 | if bangou != info["bangou"]: 65 | info2 = info.copy() 66 | info2["bangou"] = bangou 67 | print(f"two bangous: {bangou} {info['bangou']}") 68 | 69 | return info, True 70 | 71 | 72 | class JAVInfoGetter_javlibrary(JAVInfoGetter): 73 | def __init__(self, setting, dataManager): 74 | super().__init__(setting, dataManager) 75 | self.webPageGetter = WebPageGetter_JavLibrary( 76 | cookieFilePath=self.setting.javlibraryCookieFilePath, waitTime=self.setting.getInfoInterval) 77 | 78 | def GetWebContent(self, bangou): 79 | link = "http://www.javlibrary.com/" + self.setting.language + \ 80 | "/vl_searchbyid.php?keyword=" + bangou 81 | 82 | source = self.webPageGetter.getPage(link) 83 | self.soup = BeautifulSoup(source, "html.parser") 84 | 85 | # has multiple search result 86 | if self.soup.select_one(".videothumblist"): 87 | try: 88 | link = "http://www.javlibrary.com/" + self.setting.language + "/" + \ 89 | self.soup.select_one(".videothumblist").select_one( 90 | ".video").select_one("a")["href"] 91 | response = requests.get(link) 92 | self.soup = BeautifulSoup(response.text, "html.parser") 93 | except: 94 | link = "" 95 | 96 | return link 97 | 98 | def ParseBangou(self): 99 | try: 100 | return self.soup.select_one("#video_id").select_one(".text").getText() 101 | except: 102 | return "" 103 | 104 | def ParseTitle(self, bangou): 105 | try: 106 | return self.soup.select_one( 107 | "#video_title").select_one("a").getText() 108 | except: 109 | return "" 110 | 111 | def ParseTag(self): 112 | try: 113 | return list(map(getText, self.soup.select_one("#video_genres").select("a"))) 114 | except: 115 | return "" 116 | 117 | def ParseMaker(self): 118 | try: 119 | return self.soup.select_one("#video_maker").select_one("a").getText() 120 | except: 121 | return "" 122 | 123 | def ParseDirector(self): 124 | try: 125 | return self.soup.select_one("#video_director").select_one("a").getText() 126 | except: 127 | return "" 128 | 129 | def ParseActor(self): 130 | try: 131 | return list(map(getText, self.soup.select_one("#video_cast").select("a"))) 132 | except: 133 | return "" 134 | 135 | def ParseAlbum(self): 136 | try: 137 | return "http:" + self.soup.select_one("#video_jacket").select_one("img").get("src") 138 | except: 139 | return "" 140 | 141 | def ParseDuration(self): 142 | try: 143 | return self.soup.select_one("#video_length").select_one(".text").getText() 144 | except: 145 | return "" 146 | 147 | def ParseDate(self): 148 | try: 149 | return self.soup.select_one("#video_date").select_one(".text").getText() 150 | except: 151 | return "" 152 | 153 | def ParseThumbs(self): # FIXME: sometimes no thumb 154 | try: 155 | imgs = self.soup.select_one(".previewthumbs").select("img") 156 | imgs = imgs[1:] # remove "../img/player.gif" 157 | imgs = [img["src"] for img in imgs] 158 | return imgs 159 | except: 160 | return "" 161 | 162 | def ParseRating(self): 163 | try: 164 | text = self.soup.select_one( 165 | "#video_review").select_one(".score").getText() 166 | rate = re.search("(\d+.*\d)", text).group(0) 167 | return str(float(rate)) 168 | except: 169 | return "" 170 | 171 | 172 | class JAVInfoGetter_javdb(JAVInfoGetter): 173 | """ 174 | TODO: now only support english version 175 | """ 176 | 177 | def __init__(self, setting, dataManager): 178 | super().__init__(setting, dataManager) 179 | self.webPageGetter = WebPageGetter_JavDB( 180 | cookieFilePath=self.setting.javdbCookieFilePath, waitTime=self.setting.getInfoInterval) 181 | 182 | def GetWebContent(self, bangou): 183 | link = "http://javdb.com/search?q=" + bangou 184 | print(link) 185 | source, simpletitle = self.webPageGetter.getPage(link) 186 | if not source and not simpletitle: 187 | return "" 188 | 189 | self.soup = BeautifulSoup(source, "html.parser") 190 | try: 191 | infos = self.soup.select_one( 192 | ".movie-panel-info").select(".panel-block") 193 | except: 194 | # not found, use simple title as info 195 | print("Detail page not found, use simple title") 196 | self.infoDict = dict() 197 | self.infoDict["title"] = simpletitle 198 | self.infoDict["ID"] = bangou 199 | return link 200 | 201 | self.infoDict = dict() 202 | for info in infos: 203 | key = info.select_one("strong") 204 | if not key: 205 | continue 206 | key = key.getText().strip(":") 207 | value = info.select_one("span").getText() 208 | self.infoDict[key] = value 209 | return link 210 | 211 | def ParseBangou(self): 212 | try: 213 | return self.infoDict["ID"] 214 | except: 215 | return "" 216 | 217 | def ParseTitle(self, bangou): 218 | try: 219 | return self.infoDict["title"] 220 | except: 221 | pass 222 | try: 223 | return self.soup.select_one(".title").select_one("strong").getText() 224 | except: 225 | return "" 226 | 227 | def ParseTag(self): 228 | try: 229 | tags = self.infoDict["Tags"].split(",") 230 | tags = [tag.strip(u"\xa0").strip(" ") for tag in tags] 231 | return tags 232 | except: 233 | return "" 234 | 235 | def ParseMaker(self): 236 | try: 237 | return self.infoDict["Maker"] 238 | except: 239 | return "" 240 | 241 | def ParseDirector(self): 242 | try: 243 | return self.infoDict["Director"] 244 | except: 245 | return "" 246 | 247 | def ParseActor(self): 248 | try: 249 | return self.infoDict["Actor(s)"] 250 | except: 251 | return "" 252 | 253 | def ParseAlbum(self): 254 | try: 255 | return self.soup.select_one(".video-cover")["src"] 256 | except: 257 | return "" 258 | 259 | def ParseDuration(self): 260 | try: 261 | duration = self.infoDict["Duration"] 262 | duration = re.search("\d+", duration).group(0) 263 | return duration 264 | except: 265 | return "" 266 | 267 | def ParseDate(self): 268 | try: 269 | return self.infoDict["Date"] 270 | except: 271 | return "" 272 | 273 | def ParseThumbs(self): 274 | try: 275 | imgs = self.soup.select_one(".preview-images").select("a") 276 | imgs = [img["href"] for img in imgs] 277 | return imgs 278 | except: 279 | return "" 280 | 281 | def ParseRating(self): 282 | try: 283 | rating = self.infoDict["Rating"] 284 | rating = re.search("\d+.\d+", rating).group(0) 285 | return rating 286 | except: 287 | return "" 288 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 gitqwerty777 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JAV-Info 2 | 3 | ![](https://img.shields.io/github/downloads/gitqwerty777/JAV-Info/total.svg) 4 | 5 | > A simple tool to rename local video files by jav unique id(bangou); it can also download album image and thumbnails images 6 | 7 | ## Demo 8 | 9 | - ![Demo](demo.gif) 10 | 11 | ## Usage 12 | 13 | Download Windows(.exe) version at release(Deprecated❗) or directly use Python 14 | 15 | ### Requirement 16 | 17 | - Python3.6 or newer 18 | - Install packages in `requirements.txt` 19 | - Put correct version of `ChromeDriver.exe` at `Path` 20 | - Download from 21 | - Modify `config.json` from `config-template.json` 22 | 23 | Usage: `python main.py` 24 | 25 | ## Config 26 | 27 | This program will read config from `config.json`. 28 | 29 | You can modify from `config-template.json`. 30 | 31 | | Key | Description | 32 | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------ | 33 | | fileDirs | Input directories, Unix-like file path is preferred | 34 | | getInfoInterval | Time interval to retrieve data from source website in second, do not set too small | 35 | | fileNameFormat | Format of new file name, see detail below | 36 | | language | `tw`, `cn`, `en`, `ja` for javlibrary, english only in javdb | 37 | | saveAlbum | Save album image in the same directory of video file | 38 | | saveThumb | Save thumbnails in the same directory of video file | 39 | | dryRun | Run without real execution | 40 | | maxFileLength | Maximum file name length in bytes, reduce this value if "file name too long" error happens | 41 | | minFileSizeMB | Minimum file size(in MB) to rename | 42 | | renameCheck | Ask before every rename operation | 43 | | ignoreWords | Ignore list of words in filename to prevent parse bangou error, e.g., "1080p-123.mp4" will possibly be parsed as `p-123` | 44 | | retryFailedDB | Retrieve failed data in database from source website again | 45 | | javlibraryCookieFilePath | Your logined cookie files from website | 46 | | javdbCookieFilePath | Your logined cookie files from website | 47 | 48 | ### Tags in fileNameFormat 49 | 50 | Recommend to include `{bangou}` in filename in order to do further renaming. 51 | 52 | | Tags | Description | 53 | | ---------- | ---------------------------------------------------------------------- | 54 | | {bangou} | The unique ID of jav | 55 | | {title} | Title may include actors' name, guarantee not include bangou | 56 | | {tags} | Tags in source website | 57 | | {director} | | 58 | | {maker} | Maker of the video, often related to the first(english) part of bangou | 59 | | {actors} | | 60 | | {duration} | The length of video in minutes | 61 | | {date} | Release date | 62 | | {rating} | User rating from source website | 63 | | {album} | Link of album image, **not recommend to use** | 64 | | {thumbs} | Link of thumbnails, **not recommend to use** | 65 | | {link} | Link of information source, **not recommend to use** | 66 | 67 | ## Database 68 | 69 | All queries will be saved in `db-{language}.json`. 70 | 71 | You can do dry run to check the rename progress and then execute without retrieving data again. 72 | 73 | Failed requests will also be saved, so clean the database if something went wrong. 74 | 75 | ## Note 76 | 77 | - Input **filename** should include bangou, or it cannot be renamed 78 | - If there exist multiple files that have the same bangou, they will be renamed with the suffix serial number, ordered by original file name 79 | 80 | ## Future Work 81 | 82 | - Execute 83 | - fill video metadata in file 84 | - options for new folder 85 | - FileName 86 | - fit more types of bangou 87 | - Database 88 | - use other method instead directly loading into memory 89 | - find other database which has chinese title 90 | - UI 91 | - interface to search local database 92 | 93 | 94 | ## Source Website 95 | 96 | - [javlibrary](https://www.javlibrary.com) 97 | - [javdb](https://javdb.com) -------------------------------------------------------------------------------- /Setting.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class Setting: 5 | def __init__(self): 6 | with open("config.json", encoding="utf-8") as configFile: 7 | settingJson = json.load(configFile) 8 | 9 | try: 10 | self.fileDirs = settingJson["fileDirs"] 11 | self.getInfoInterval = settingJson["getInfoInterval"] 12 | self.fileNameFormat = settingJson["fileNameFormat"] 13 | self.language = settingJson["language"] 14 | self.saveAlbum = settingJson["saveAlbum"] 15 | self.dryRun = settingJson["dryRun"] 16 | self.maxFileLength = settingJson["maxFileLength"] 17 | self.minFileSizeMB = settingJson["minFileSizeMB"] 18 | self.renameCheck = settingJson["renameCheck"] 19 | self.saveThumb = settingJson["saveThumb"] 20 | self.ignoreWords = settingJson["ignoreWords"] 21 | self.retryFailedDB = settingJson["retryFailedDB"] 22 | self.javlibraryCookieFilePath = settingJson["javlibraryCookieFilePath"] 23 | self.javdbCookieFilePath = settingJson["javdbCookieFilePath"] 24 | # TODO: enable db or not 25 | except: 26 | print("read config file failed") 27 | exit(0) 28 | -------------------------------------------------------------------------------- /config-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "fileDirs": [ 3 | "./demo_files" 4 | ], 5 | "ignoreWords": [ 6 | "1080p" 7 | ], 8 | "getInfoInterval": 0.5, 9 | "fileNameFormat": "[{bangou}]{title}", 10 | "language": "tw", 11 | "saveAlbum": false, 12 | "saveThumb": false, 13 | "dryRun": true, 14 | "maxFileLength": 255, 15 | "minFileSizeMB": 0, 16 | "renameCheck": false, 17 | "retryFailedDB": true, 18 | "javlibraryCookieFilePath": "./javlibrary-cookie.txt", 19 | "javdbCookieFilePath": "./javdb-cookie.txt" 20 | } -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo.gif -------------------------------------------------------------------------------- /demo_files/IENE-777.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/IENE-777.mp4 -------------------------------------------------------------------------------- /demo_files/KAWD-777.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/KAWD-777.mp4 -------------------------------------------------------------------------------- /demo_files/STar-777.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/STar-777.mp4 -------------------------------------------------------------------------------- /demo_files/nykd-54.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/nykd-54.mp4 -------------------------------------------------------------------------------- /getch.py: -------------------------------------------------------------------------------- 1 | # ref: https://stackoverflow.com/a/510364/2678970 2 | 3 | class _Getch: 4 | """Gets a single character from standard input. Does not echo to the screen.""" 5 | 6 | def __init__(self): 7 | try: 8 | self.impl = _GetchWindows() 9 | except ImportError: 10 | self.impl = _GetchUnix() 11 | 12 | def __call__(self): return self.impl() 13 | 14 | 15 | class _GetchUnix: 16 | def __init__(self): 17 | import tty 18 | import sys 19 | 20 | def __call__(self): 21 | import sys 22 | import tty 23 | import termios 24 | fd = sys.stdin.fileno() 25 | old_settings = termios.tcgetattr(fd) 26 | try: 27 | tty.setraw(sys.stdin.fileno()) 28 | ch = sys.stdin.read(1) 29 | finally: 30 | termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) 31 | return ch 32 | 33 | 34 | class _GetchWindows: 35 | def __init__(self): 36 | import msvcrt 37 | 38 | def __call__(self): 39 | import msvcrt 40 | return msvcrt.getch().decode() 41 | 42 | 43 | getch = _Getch() 44 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import colorama 2 | import utils 3 | from Setting import Setting 4 | from FileNameParser import FileNameParser 5 | from DataManager import DataManager 6 | from JAVInfoGetter import JAVInfoGetter_javlibrary, JAVInfoGetter_javdb 7 | from Executor import Executor 8 | from getch import getch 9 | 10 | 11 | class JAVInfoGetter: 12 | def __init__(self, setting, fileNameParser, dataManager, infoGetters, executor): 13 | self.setting = setting 14 | self.fileNameParser = fileNameParser 15 | self.dataManager = dataManager 16 | self.infoGetters = infoGetters 17 | self.executor = executor 18 | self.renameFailedFile = open( 19 | "renameFailedHistory.txt", "a", encoding="utf-8") # TODO: filename to config 20 | 21 | def getInfo(self): 22 | try: 23 | fileNames = {} 24 | for fileDir in self.setting.fileDirs: 25 | fileNames = fileNameParser.GetFiles( 26 | fileNames, fileDir) 27 | for bangou in fileNames: 28 | self.renameByBangou(bangou, fileNames) 29 | except Exception as e: 30 | print(e) 31 | finally: 32 | self.dataManager.Save() 33 | 34 | def renameByBangou(self, bangou, fileNames): 35 | info = None 36 | success = False 37 | print( 38 | f"===== 1/3: get bangou info {utils.yellowStr(bangou)}") 39 | for infoGetter in self.infoGetters: 40 | # Get the first complete info 41 | info, success = infoGetter.GetInfo( 42 | bangou, str(fileNames[bangou])) 43 | if success: 44 | break 45 | if not success: 46 | utils.logError( 47 | f"Get Info from bangou {bangou} failed. File name {str(fileNames[bangou])}") 48 | utils.writeText(self.renameFailedFile, 49 | f"{bangou} {fileNames[bangou]}\n") 50 | self.dataManager.AddRecord(info) 51 | return 52 | assert info 53 | self.executor.HandleFiles(info, bangou, fileNames) 54 | 55 | 56 | def checkDryRun(setting): 57 | if setting.dryRun: 58 | utils.logError( 59 | f"This is dry run version.\nSet dryRun to false in config.json to execute") 60 | else: 61 | utils.logError( 62 | f"This is not dry run version.\nDry run is recommended before execution.\nDo you want to continue?(y/N)") 63 | response = getch() 64 | if response.lower() != "y": 65 | exit(0) 66 | 67 | 68 | if __name__ == "__main__": 69 | colorama.init() 70 | 71 | setting = Setting() 72 | checkDryRun(setting) 73 | 74 | fileNameParser = FileNameParser(setting.minFileSizeMB, setting.ignoreWords) 75 | dataManager = DataManager(setting) 76 | infoGetters = [JAVInfoGetter_javlibrary(setting, dataManager), JAVInfoGetter_javdb( 77 | setting, dataManager)] 78 | executor = Executor(setting) 79 | javInfoGetter = JAVInfoGetter( 80 | setting, fileNameParser, dataManager, infoGetters, executor) 81 | 82 | javInfoGetter.getInfo() 83 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.10.0 2 | colorama==0.4.4 3 | requests==2.26.0 4 | selenium==4.1.3 -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import colorama 2 | import pprint 3 | 4 | 5 | def createPrettyPrinter(stream=None): 6 | return pprint.PrettyPrinter(indent=0, width=60, stream=stream) 7 | 8 | 9 | def backColorStr(s, color): 10 | return f"{color}{s}{colorama.Back.RESET}" 11 | 12 | 13 | def foreColorStr(s, color): 14 | return f"{color}{s}{colorama.Fore.RESET}" 15 | 16 | 17 | def grayBackStr(s): 18 | return backColorStr(s, colorama.Back.LIGHTMAGENTA_EX) 19 | 20 | 21 | def whiteBackStr(s): 22 | return foreColorStr(backColorStr(s, colorama.Back.WHITE), colorama.Fore.BLACK) 23 | 24 | 25 | def yellowStr(s): 26 | return foreColorStr(s, colorama.Fore.YELLOW) 27 | 28 | 29 | def blueBackStr(s): 30 | return backColorStr(s, colorama.Back.BLUE) 31 | 32 | 33 | def greenBackStr(s): 34 | return backColorStr(s, colorama.Back.GREEN) 35 | 36 | 37 | def redBackStr(s): 38 | return backColorStr(s, colorama.Back.RED) 39 | 40 | 41 | def logError(s): 42 | print(redBackStr(s)) 43 | 44 | 45 | def lenInBytes(string): 46 | return len(string.encode("utf-8")) 47 | 48 | 49 | def writeText(file, str): 50 | file.write(str) 51 | file.flush() 52 | -------------------------------------------------------------------------------- /webpage_getter.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pathlib import Path 3 | 4 | from http import cookiejar 5 | from bs4 import BeautifulSoup 6 | 7 | from selenium import webdriver 8 | from selenium.webdriver.common.by import By 9 | from selenium.webdriver.support.wait import WebDriverWait 10 | 11 | 12 | class WebPageGetter(object): 13 | def __init__(self, cookieFilePath, waitTime): 14 | """ 15 | Put correct version of ChromeDriver.exe at path from https://chromedriver.chromium.org/downloads 16 | """ 17 | options = webdriver.ChromeOptions() 18 | options.add_argument("--headless") 19 | options.add_argument("--ignore-certificate-errors-spki-list") 20 | self.browser = webdriver.Chrome(options=options) 21 | self.cookies = cookiejar.MozillaCookieJar(cookieFilePath) 22 | self.cookies.load() 23 | self.waitTime = waitTime 24 | 25 | def addCookies(self): 26 | # https://stackoverflow.com/questions/41906704/selenium-add-cookies-from-cookiejar 27 | for cookie in self.cookies: 28 | cookie_dict = {'domain': cookie.domain, 'name': cookie.name, 29 | 'value': cookie.value, 'secure': cookie.secure} 30 | if cookie.expires: 31 | cookie_dict['expiry'] = cookie.expires 32 | if cookie.path_specified: 33 | cookie_dict['path'] = cookie.path 34 | self.browser.add_cookie(cookie_dict) 35 | 36 | def getPage(self, url): 37 | raise NotImplementedError 38 | 39 | def simpleGetPage(self, url): 40 | print(f"Get page {url}") 41 | self.browser.get(self.baseUrl) 42 | self.addCookies() 43 | self.browser.get(url) 44 | time.sleep(self.waitTime) 45 | 46 | def __del__(self): 47 | self.browser.close() 48 | 49 | 50 | class WebPageGetter_JavLibrary(WebPageGetter): 51 | def __init__(self, cookieFilePath, waitTime): 52 | super().__init__(cookieFilePath, waitTime) 53 | self.baseUrl = "https://www.javlibrary.com/" 54 | 55 | def getPage(self, url): 56 | self.simpleGetPage(url) 57 | button = self.browser.find_elements( 58 | by=By.CLASS_NAME, value="btnAdultAgree") 59 | if button: 60 | button[0].click() 61 | time.sleep(self.waitTime) 62 | 63 | return self.browser.page_source 64 | 65 | 66 | class WebPageGetter_JavDB(WebPageGetter): 67 | def __init__(self, cookieFilePath, waitTime): 68 | super().__init__(cookieFilePath, waitTime) 69 | self.baseUrl = "https://javdb.com" 70 | 71 | def getPage(self, url): 72 | self.simpleGetPage(url) 73 | try: 74 | WebDriverWait(self.browser, self.waitTime).until( 75 | lambda x: x.find_element(By.ID, "videos")) 76 | except Exception as e: 77 | return "", "" 78 | 79 | videolink = "http://javdb.com/" + \ 80 | self.browser.find_element( 81 | by=By.XPATH, value='//*[@id="videos"]/div/div[1]/a').get_attribute('pathname') + "?locale=en" 82 | simpletitle = self.browser.find_element( 83 | by=By.XPATH, value='//*[@id="videos"]/div/div[1]/a/div[3]').text 84 | #print(f"videolink {videolink}") 85 | #print(f"simpletitle {simpletitle}") 86 | 87 | self.simpleGetPage(videolink) 88 | 89 | # with open(url.split("=")[-1]+".html", "w", encoding="utf-8") as f: 90 | # f.write(self.browser.page_source) 91 | 92 | return self.browser.page_source, simpletitle 93 | 94 | 95 | --------------------------------------------------------------------------------