├── .gitignore
├── DataManager.py
├── Executor.py
├── FileNameParser.py
├── JAVInfoGetter.py
├── LICENSE
├── README.md
├── Setting.py
├── config-template.json
├── demo.gif
├── demo_files
    ├── IENE-777.mp4
    ├── KAWD-777.mp4
    ├── STar-777.mp4
    └── nykd-54.mp4
├── getch.py
├── main.py
├── requirements.txt
├── utils.py
└── webpage_getter.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.code-workspace
3 | config.json
4 | db-*.json
5 | *-cookie.txt
6 | renameHistory.txt
7 | dist/
8 | build/


--------------------------------------------------------------------------------
/DataManager.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | import utils
 4 | 
 5 | 
 6 | class DataManager:
 7 |     def __init__(self, setting):
 8 |         self.setting = setting
 9 |         self.dbpath = Path("db-" + self.setting.language + ".json")
10 | 
11 |         if not self.dbpath.exists():
12 |             self.dbpath.touch()
13 |             self.dbdata = {}
14 | 
15 |         with open(self.dbpath, "r", encoding="utf-8") as dbfile:
16 |             dbtext = dbfile.read()
17 |             if not dbtext:
18 |                 self.dbdata = {}
19 |             else:
20 |                 self.dbdata = json.loads(dbtext)
21 | 
22 |     def AddRecord(self, info):
23 |         self.dbdata.update({info["bangou"]: info})
24 | 
25 |     def Save(self):
26 |         print(utils.whiteBackStr("save db"))
27 |         json.dump(self.dbdata, open(self.dbpath, "w",
28 |                   encoding="utf-8"), ensure_ascii=False)
29 | 
30 |     def Search(self, bangou):
31 |         if bangou in self.dbdata:
32 |             return self.dbdata[bangou]
33 |         else:
34 |             return None
35 | 


--------------------------------------------------------------------------------
/Executor.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from getch import getch
  3 | from pathlib import Path
  4 | import utils
  5 | import re
  6 | from utils import lenInBytes
  7 | import sys
  8 | from datetime import date
  9 | 
 10 | 
 11 | class Executor:
 12 |     def __init__(self, setting):
 13 |         self.setting = setting
 14 |         day = date.today().strftime("%Y%m%d")
 15 |         self.renameRecords = open(
 16 |             f"renameHistory_{day}.txt", "a", encoding="utf-8")  # TODO: filename to config
 17 | 
 18 |     def HandleFiles(self, info, bangou, fileNames):
 19 |         print(
 20 |             f"===== 2/3: handle bangou {utils.yellowStr(bangou)}")
 21 |         self.HandleBangou(info, fileNames[bangou][0])
 22 | 
 23 |         if len(fileNames[bangou]) > 1:  # need to rename files with index
 24 |             for index, fileName in enumerate(fileNames[bangou]):
 25 |                 self.HandleFile(info, fileName, index)
 26 |         else:
 27 |             self.HandleFile(info, fileNames[bangou][0])
 28 | 
 29 |     def HandleBangou(self, info, path):  # only save one copy of album and thumb
 30 |         if self.setting.saveAlbum:
 31 |             self.SaveAlbum(info, path)
 32 |         if self.setting.saveThumb:
 33 |             self.SaveThumb(info, path)
 34 | 
 35 |     def HandleFile(self, info, path, index=-1):
 36 |         print(
 37 |             f"===== 3/3: handle file {utils.yellowStr(str(path))}")
 38 |         self.Rename(info, path, index)
 39 |         # optional TODO: fill video meta description in video file
 40 |         # TODO: option: new folder for all video file, for the same actor, for the same tag # create link
 41 | 
 42 |     def getValidWindowsFileName(self, fileName):
 43 |         """
 44 |         https://docs.microsoft.com/zh-tw/windows/win32/fileio/naming-a-file?redirectedfrom=MSDN
 45 |         """
 46 |         return re.sub(r"[><:\"/\\\|\?*]", "_", fileName)
 47 | 
 48 |     def Rename(self, info, path, index):
 49 |         newFileName = self.setting.fileNameFormat
 50 |         for key in info:
 51 |             infokey = "{" + key + "}"
 52 |             infovalue = info[key]
 53 |             if type(infovalue) is list:
 54 |                 infovalue = ""
 55 |                 for element in info[key]:
 56 |                     infovalue = infovalue + "[" + element + "]"
 57 |             newFileName = newFileName.replace(infokey, infovalue)
 58 | 
 59 |         if "win" in sys.platform:
 60 |             newFileName = self.getValidWindowsFileName(newFileName)
 61 | 
 62 |         # handle multiple files with the same bangou
 63 |         numberStr = ("_" + str(index+1)) if (index != -1) else ""
 64 |         # handle file name too long error
 65 |         if lenInBytes(newFileName) + lenInBytes(numberStr) + lenInBytes(path.suffix) > self.setting.maxFileLength:
 66 |             print(utils.blueBackStr(f"File name too long: {newFileName}"))
 67 |             maxFileLength = self.setting.maxFileLength - \
 68 |                 lenInBytes(path.suffix) - lenInBytes(numberStr)
 69 |             while lenInBytes(newFileName) > maxFileLength:
 70 |                 newFileName = newFileName[0:-1]
 71 |             print(
 72 |                 f"After truncate file name: {utils.blueBackStr(newFileName)}")
 73 |         newName = newFileName + numberStr + path.suffix
 74 | 
 75 |         if path.name == newName:
 76 |             print(
 77 |                 f"File {utils.grayBackStr(str(path))} no need to rename")
 78 |             return
 79 | 
 80 |         self.DoRename(path, newName)
 81 | 
 82 |     def DoRename(self, path, newName):
 83 |         newPath = path.parents[0] / newName
 84 | 
 85 |         print(f"Rename {utils.blueBackStr(str(path))}\n" +
 86 |               f"To     {utils.greenBackStr(str(newPath))}")
 87 | 
 88 |         if self.setting.dryRun:
 89 |             return
 90 | 
 91 |         if self.setting.renameCheck:
 92 |             print(utils.blueBackStr(f"Do you want to execute rename?(Y/n)"))
 93 |             response = getch()
 94 |             print(response)
 95 |             if response.lower() == "n":
 96 |                 print("User cancel rename")
 97 |                 return
 98 | 
 99 |         try:
100 |             self.renameRecords.write(f"{path} -> {newPath}\n")
101 |             self.renameRecords.flush()
102 |             path.rename(newPath)
103 |         except Exception as e:
104 |             print(
105 |                 utils.redBackStr(f"Rename [{str(path)}] to [{str(newPath)}] failed"))
106 |             print(e)
107 | 
108 |     def SaveAlbum(self, info, path):
109 |         if not info["album"]:
110 |             print("Album link not found")
111 |             return
112 | 
113 |         albumFileName = info["bangou"] + ".jpg"
114 |         albumPath = Path(path.parents[0] / albumFileName)
115 | 
116 |         if albumPath.exists():
117 |             print(
118 |                 f"Album {utils.blueBackStr(str(albumPath))} already exists")
119 |             return
120 |         self.DoSaveAlbum(info["album"], albumPath)
121 | 
122 |     def DoSaveAlbum(self, fileURL, albumPath):
123 |         print(
124 |             f"Save album {utils.greenBackStr(str(albumPath))}")
125 | 
126 |         if self.setting.dryRun:
127 |             return
128 | 
129 |         with open(albumPath, 'wb') as albumFile:
130 |             fileObject = requests.get(fileURL)
131 |             albumFile.write(fileObject.content)
132 | 
133 |     def SaveThumb(self, info, path):
134 |         if not info["thumbs"]:
135 |             print("Thumbnail link not found")
136 |             return
137 | 
138 |         for index, thumb in enumerate(info["thumbs"]):
139 |             fileName = info["bangou"] + "_thumb" + \
140 |                 str(index).zfill(2) + ".jpg"
141 |             filePath = Path(path.parents[0] / fileName)
142 | 
143 |             if filePath.exists():
144 |                 print(
145 |                     f"Thumbnail {utils.blueBackStr(str(filePath))} already exists")
146 |                 continue
147 | 
148 |             self.DoSaveThumb(thumb, filePath)
149 | 
150 |     def DoSaveThumb(self, fileURL, filePath):
151 |         print(
152 |             f"Save thumbnail {utils.greenBackStr(str(filePath))}")
153 | 
154 |         if self.setting.dryRun:
155 |             return
156 | 
157 |         with open(filePath, 'wb') as thumbFile:
158 |             fileObject = requests.get(fileURL)
159 |             thumbFile.write(fileObject.content)
160 | 


--------------------------------------------------------------------------------
/FileNameParser.py:
--------------------------------------------------------------------------------
  1 | import mimetypes
  2 | import re
  3 | from pathlib import Path
  4 | import utils
  5 | import json
  6 | 
  7 | 
  8 | class BangouHandler:  # abstract
  9 |     def __init__(self, next):
 10 |         self.next = next
 11 | 
 12 |     def DoNext(self, fileName):
 13 |         if self.next:
 14 |             return self.next.Handle(fileName)
 15 |         else:
 16 |             return ""
 17 | 
 18 | 
 19 | class FC2BangouHandler(BangouHandler):
 20 |     def __init__(self, next):
 21 |         BangouHandler.__init__(self, next)
 22 |         self.fc2BangouRE = re.compile(r"(fc2)-*(ppv)*-*(\d{4,9})")
 23 | 
 24 |     def Handle(self, fileName):
 25 |         result = self.fc2BangouRE.search(fileName)
 26 | 
 27 |         if result:
 28 |             return "fc2-ppv-" + result.group(3)
 29 |         else:
 30 |             return self.DoNext(fileName)
 31 | 
 32 | 
 33 | class GeneralBangouHandler(BangouHandler):
 34 |     def __init__(self, next):
 35 |         BangouHandler.__init__(self, next)
 36 |         self.generalBangouRE = re.compile(r"([a-zA-Z]{2,5})\-+(\d{2,5})")
 37 | 
 38 |     def Handle(self, fileName):
 39 |         result = self.generalBangouRE.search(fileName)
 40 | 
 41 |         if result:
 42 |             return result.group(1) + "-" + result.group(2)
 43 |         else:
 44 |             return self.DoNext(fileName)
 45 | 
 46 | 
 47 | class GeneralLooseBangouHandler(BangouHandler):
 48 |     def __init__(self, next):
 49 |         BangouHandler.__init__(self, next)
 50 |         self.generalLooseBangouRE = re.compile(
 51 |             r"([a-zA-Z]{2,5})\s*\-*\s*(\d{2,5})")
 52 | 
 53 |     def Handle(self, fileName):
 54 |         result = self.generalLooseBangouRE.search(fileName)
 55 | 
 56 |         if result:
 57 |             bangou = result.group(1) + "-" + result.group(2)
 58 |             if bangou == "MP-4":  # special case
 59 |                 bangou = ""
 60 |             if bangou:
 61 |                 return bangou
 62 |         return self.DoNext(fileName)
 63 | 
 64 | 
 65 | class FileNameParser:
 66 |     def __init__(self, minFileSizeMB, ignoreWords):
 67 |         self.minFileSizeMB = minFileSizeMB
 68 |         self.ignoreWords = ignoreWords
 69 |         # TODO: fit different bangou format
 70 |         self.bangouHandler = FC2BangouHandler(
 71 |             GeneralBangouHandler(
 72 |                 GeneralLooseBangouHandler(None)))
 73 | 
 74 |         # TODO: filename to config
 75 |         filePath = Path("BangouToFilename.txt")
 76 |         self.prettyPrinterFile = utils.createPrettyPrinter(
 77 |             open(filePath, "w", encoding="utf-8"))
 78 |         self.prettyPrinter = utils.createPrettyPrinter()
 79 | 
 80 |     def GetFiles(self, fileNames, fileDir):
 81 |         videoFileList = []
 82 |         path = Path(fileDir)
 83 | 
 84 |         mimetypes.init()
 85 |         # Add new unknown video file extension if needed
 86 |         mimetypes.add_type('video/vnd.rn-realmedia-vbr', '.rmvb')
 87 |         mimetypes.add_type('video/rm', '.rm')
 88 |         mimetypes.add_type('video/x-flv', '.flv')
 89 |         mimetypes.add_type('video/dcv', '.dcv')
 90 | 
 91 |         for file in path.glob("**/*"):
 92 |             if file.is_dir():
 93 |                 continue
 94 |             if file.suffix in mimetypes.types_map:
 95 |                 mimetype = mimetypes.types_map[file.suffix]
 96 |                 if "video" in mimetype:
 97 |                     videoFileList.append(file)
 98 |             # else:
 99 |                 # print("unknown file extension: " + file.suffix)
100 | 
101 |         for fileName in videoFileList:
102 |             stat = fileName.stat()
103 |             fileSizeMB = stat.st_size >> 20
104 |             if self.minFileSizeMB > fileSizeMB:
105 |                 # print(f"ignore {str(fileName)} because file too small")
106 |                 continue
107 | 
108 |             bangou = self.ParseBangou(fileName.name)
109 |             if not bangou:
110 |                 print(f"bangou not found in file {fileName.name}")
111 |                 continue
112 | 
113 |             bangou = bangou.upper()
114 |             if bangou in fileNames:
115 |                 fileNames[bangou].append(fileName)
116 |                 fileNames[bangou].sort()
117 |             else:
118 |                 fileNames[bangou] = [fileName]
119 | 
120 |         # print("Legal video files with bangou")
121 |         self.prettyPrinterFile.pprint(fileNames)
122 |         # self.prettyPrinter.pprint(fileNames)
123 |         return fileNames
124 | 
125 |     def ParseBangou(self, fileName):
126 |         fileName = fileName.lower()
127 |         for ignoreWord in self.ignoreWords:
128 |             fileName = fileName.replace(ignoreWord, "")
129 |         fileName = fileName.replace("_", "-")
130 | 
131 |         return self.bangouHandler.Handle(fileName)
132 | 


--------------------------------------------------------------------------------
/JAVInfoGetter.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | from webpage_getter import WebPageGetter_JavLibrary, WebPageGetter_JavDB
  6 | 
  7 | # TODO: find chinese title source website
  8 | 
  9 | 
 10 | def getText(element):
 11 |     return element.getText()
 12 | 
 13 | 
 14 | class JAVInfoGetter:
 15 |     def __init__(self, setting, dataManager):
 16 |         self.setting = setting
 17 |         self.dataManager = dataManager
 18 | 
 19 |     def GetInfo(self, bangou, fileName):
 20 |         print(f"Try to get info from {self.__class__.__name__}")
 21 |         info = self.dataManager.Search(bangou)
 22 |         if info:
 23 |             if "title" in info and info["title"]:
 24 |                 print(f"Find complete info of {bangou} in db")
 25 |                 return info, True
 26 |             elif not self.setting.retryFailedDB:  # directly use incomplete info, no retry
 27 |                 print(f"Find incomplete info of {bangou} in db")
 28 |                 return info, False
 29 | 
 30 |         info = dict()
 31 |         link = self.GetWebContent(bangou)
 32 | 
 33 |         if not link:
 34 |             print("Get Webpage Failed")
 35 |             info["bangou"] = bangou
 36 |             return info, False
 37 | 
 38 |         # print(self.soup.prettify())
 39 |         info["bangou"] = self.ParseBangou()
 40 |         info["title"] = self.ParseTitle(info["bangou"])
 41 |         info["tags"] = self.ParseTag()
 42 |         info["director"] = self.ParseDirector()
 43 |         info["maker"] = self.ParseMaker()
 44 |         info["actors"] = self.ParseActor()
 45 |         info["album"] = self.ParseAlbum()
 46 |         info["duration"] = self.ParseDuration()
 47 |         info["date"] = self.ParseDate()
 48 |         info["thumbs"] = self.ParseThumbs()
 49 |         info["rating"] = self.ParseRating()
 50 |         info["link"] = link
 51 | 
 52 |         self.dataManager.AddRecord(info)
 53 | 
 54 |         if not info["title"]:
 55 |             info["bangou"] = bangou
 56 |             return info, False
 57 |         else:
 58 |             info["title"] = info["title"].replace(
 59 |                 info["bangou"], "").strip(" ")
 60 | 
 61 |         print(json.dumps(info, indent=4, ensure_ascii=False))
 62 | 
 63 |         # BUG: Weird, there are two bangous, maybe it's a bug
 64 |         if bangou != info["bangou"]:
 65 |             info2 = info.copy()
 66 |             info2["bangou"] = bangou
 67 |             print(f"two bangous: {bangou} {info['bangou']}")
 68 | 
 69 |         return info, True
 70 | 
 71 | 
 72 | class JAVInfoGetter_javlibrary(JAVInfoGetter):
 73 |     def __init__(self, setting, dataManager):
 74 |         super().__init__(setting, dataManager)
 75 |         self.webPageGetter = WebPageGetter_JavLibrary(
 76 |             cookieFilePath=self.setting.javlibraryCookieFilePath, waitTime=self.setting.getInfoInterval)
 77 | 
 78 |     def GetWebContent(self, bangou):
 79 |         link = "http://www.javlibrary.com/" + self.setting.language + \
 80 |             "/vl_searchbyid.php?keyword=" + bangou
 81 | 
 82 |         source = self.webPageGetter.getPage(link)
 83 |         self.soup = BeautifulSoup(source, "html.parser")
 84 | 
 85 |         # has multiple search result
 86 |         if self.soup.select_one(".videothumblist"):
 87 |             try:
 88 |                 link = "http://www.javlibrary.com/" + self.setting.language + "/" + \
 89 |                     self.soup.select_one(".videothumblist").select_one(
 90 |                         ".video").select_one("a")["href"]
 91 |                 response = requests.get(link)
 92 |                 self.soup = BeautifulSoup(response.text, "html.parser")
 93 |             except:
 94 |                 link = ""
 95 | 
 96 |         return link
 97 | 
 98 |     def ParseBangou(self):
 99 |         try:
100 |             return self.soup.select_one("#video_id").select_one(".text").getText()
101 |         except:
102 |             return ""
103 | 
104 |     def ParseTitle(self, bangou):
105 |         try:
106 |             return self.soup.select_one(
107 |                 "#video_title").select_one("a").getText()
108 |         except:
109 |             return ""
110 | 
111 |     def ParseTag(self):
112 |         try:
113 |             return list(map(getText, self.soup.select_one("#video_genres").select("a")))
114 |         except:
115 |             return ""
116 | 
117 |     def ParseMaker(self):
118 |         try:
119 |             return self.soup.select_one("#video_maker").select_one("a").getText()
120 |         except:
121 |             return ""
122 | 
123 |     def ParseDirector(self):
124 |         try:
125 |             return self.soup.select_one("#video_director").select_one("a").getText()
126 |         except:
127 |             return ""
128 | 
129 |     def ParseActor(self):
130 |         try:
131 |             return list(map(getText, self.soup.select_one("#video_cast").select("a")))
132 |         except:
133 |             return ""
134 | 
135 |     def ParseAlbum(self):
136 |         try:
137 |             return "http:" + self.soup.select_one("#video_jacket").select_one("img").get("src")
138 |         except:
139 |             return ""
140 | 
141 |     def ParseDuration(self):
142 |         try:
143 |             return self.soup.select_one("#video_length").select_one(".text").getText()
144 |         except:
145 |             return ""
146 | 
147 |     def ParseDate(self):
148 |         try:
149 |             return self.soup.select_one("#video_date").select_one(".text").getText()
150 |         except:
151 |             return ""
152 | 
153 |     def ParseThumbs(self):  # FIXME: sometimes no thumb
154 |         try:
155 |             imgs = self.soup.select_one(".previewthumbs").select("img")
156 |             imgs = imgs[1:]  # remove "../img/player.gif"
157 |             imgs = [img["src"] for img in imgs]
158 |             return imgs
159 |         except:
160 |             return ""
161 | 
162 |     def ParseRating(self):
163 |         try:
164 |             text = self.soup.select_one(
165 |                 "#video_review").select_one(".score").getText()
166 |             rate = re.search("(\d+.*\d)", text).group(0)
167 |             return str(float(rate))
168 |         except:
169 |             return ""
170 | 
171 | 
172 | class JAVInfoGetter_javdb(JAVInfoGetter):
173 |     """
174 |     TODO: now only support english version
175 |     """
176 | 
177 |     def __init__(self, setting, dataManager):
178 |         super().__init__(setting, dataManager)
179 |         self.webPageGetter = WebPageGetter_JavDB(
180 |             cookieFilePath=self.setting.javdbCookieFilePath, waitTime=self.setting.getInfoInterval)
181 | 
182 |     def GetWebContent(self, bangou):
183 |         link = "http://javdb.com/search?q=" + bangou
184 |         print(link)
185 |         source, simpletitle = self.webPageGetter.getPage(link)
186 |         if not source and not simpletitle:
187 |             return ""
188 | 
189 |         self.soup = BeautifulSoup(source, "html.parser")
190 |         try:
191 |             infos = self.soup.select_one(
192 |                 ".movie-panel-info").select(".panel-block")
193 |         except:
194 |             # not found, use simple title as info
195 |             print("Detail page not found, use simple title")
196 |             self.infoDict = dict()
197 |             self.infoDict["title"] = simpletitle
198 |             self.infoDict["ID"] = bangou
199 |             return link
200 | 
201 |         self.infoDict = dict()
202 |         for info in infos:
203 |             key = info.select_one("strong")
204 |             if not key:
205 |                 continue
206 |             key = key.getText().strip(":")
207 |             value = info.select_one("span").getText()
208 |             self.infoDict[key] = value
209 |         return link
210 | 
211 |     def ParseBangou(self):
212 |         try:
213 |             return self.infoDict["ID"]
214 |         except:
215 |             return ""
216 | 
217 |     def ParseTitle(self, bangou):
218 |         try:
219 |             return self.infoDict["title"]
220 |         except:
221 |             pass
222 |         try:
223 |             return self.soup.select_one(".title").select_one("strong").getText()
224 |         except:
225 |             return ""
226 | 
227 |     def ParseTag(self):
228 |         try:
229 |             tags = self.infoDict["Tags"].split(",")
230 |             tags = [tag.strip(u"\xa0").strip(" ") for tag in tags]
231 |             return tags
232 |         except:
233 |             return ""
234 | 
235 |     def ParseMaker(self):
236 |         try:
237 |             return self.infoDict["Maker"]
238 |         except:
239 |             return ""
240 | 
241 |     def ParseDirector(self):
242 |         try:
243 |             return self.infoDict["Director"]
244 |         except:
245 |             return ""
246 | 
247 |     def ParseActor(self):
248 |         try:
249 |             return self.infoDict["Actor(s)"]
250 |         except:
251 |             return ""
252 | 
253 |     def ParseAlbum(self):
254 |         try:
255 |             return self.soup.select_one(".video-cover")["src"]
256 |         except:
257 |             return ""
258 | 
259 |     def ParseDuration(self):
260 |         try:
261 |             duration = self.infoDict["Duration"]
262 |             duration = re.search("\d+", duration).group(0)
263 |             return duration
264 |         except:
265 |             return ""
266 | 
267 |     def ParseDate(self):
268 |         try:
269 |             return self.infoDict["Date"]
270 |         except:
271 |             return ""
272 | 
273 |     def ParseThumbs(self):
274 |         try:
275 |             imgs = self.soup.select_one(".preview-images").select("a")
276 |             imgs = [img["href"] for img in imgs]
277 |             return imgs
278 |         except:
279 |             return ""
280 | 
281 |     def ParseRating(self):
282 |         try:
283 |             rating = self.infoDict["Rating"]
284 |             rating = re.search("\d+.\d+", rating).group(0)
285 |             return rating
286 |         except:
287 |             return ""
288 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 gitqwerty777
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # JAV-Info
 2 | 
 3 | ![](https://img.shields.io/github/downloads/gitqwerty777/JAV-Info/total.svg)
 4 | 
 5 | > A simple tool to rename local video files by jav unique id(bangou); it can also download album image and thumbnails images
 6 | 
 7 | ## Demo
 8 | 
 9 | - ![Demo](demo.gif)
10 | 
11 | ## Usage
12 | 
13 | Download Windows(.exe) version at release(Deprecated❗) or directly use Python
14 | 
15 | ### Requirement
16 | 
17 | - Python3.6 or newer
18 | - Install packages in `requirements.txt`
19 | - Put correct version of `ChromeDriver.exe` at `Path`
20 |   - Download from <https://chromedriver.chromium.org/downloads>
21 | - Modify `config.json` from `config-template.json`
22 | 
23 | Usage: `python main.py`
24 | 
25 | ## Config
26 | 
27 | This program will read config from `config.json`.
28 | 
29 | You can modify from `config-template.json`.
30 | 
31 | | Key                      | Description                                                                                                              |
32 | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------ |
33 | | fileDirs                 | Input directories, Unix-like file path is preferred                                                                      |
34 | | getInfoInterval          | Time interval to retrieve data from source website in second, do not set too small                                       |
35 | | fileNameFormat           | Format of new file name, see detail below                                                                                |
36 | | language                 | `tw`, `cn`, `en`, `ja` for javlibrary, english only in javdb                                                             |
37 | | saveAlbum                | Save album image in the same directory of video file                                                                     |
38 | | saveThumb                | Save thumbnails in the same directory of video file                                                                      |
39 | | dryRun                   | Run without real execution                                                                                               |
40 | | maxFileLength            | Maximum file name length in bytes, reduce this value if "file name too long" error happens                               |
41 | | minFileSizeMB            | Minimum file size(in MB) to rename                                                                                       |
42 | | renameCheck              | Ask before every rename operation                                                                                        |
43 | | ignoreWords              | Ignore list of words in filename to prevent parse bangou error, e.g., "1080p-123.mp4" will possibly be parsed as `p-123` |
44 | | retryFailedDB            | Retrieve failed data in database from source website again                                                               |
45 | | javlibraryCookieFilePath | Your logined cookie files from website <https://www.javlibrary.com>                                                      |
46 | | javdbCookieFilePath      | Your logined cookie files from website <https://javdb.com>                                                               |
47 | 
48 | ### Tags in fileNameFormat
49 | 
50 | Recommend to include `{bangou}` in filename in order to do further renaming.
51 | 
52 | | Tags       | Description                                                            |
53 | | ---------- | ---------------------------------------------------------------------- |
54 | | {bangou}   | The unique ID of jav                                                   |
55 | | {title}    | Title may include actors' name, guarantee not include bangou           |
56 | | {tags}     | Tags in source website                                                 |
57 | | {director} |                                                                        |
58 | | {maker}    | Maker of the video, often related to the first(english) part of bangou |
59 | | {actors}   |                                                                        |
60 | | {duration} | The length of video in minutes                                         |
61 | | {date}     | Release date                                                           |
62 | | {rating}   | User rating from source website                                        |
63 | | {album}    | Link of album image, **not recommend to use**                          |
64 | | {thumbs}   | Link of thumbnails, **not recommend to use**                           |
65 | | {link}     | Link of information source, **not recommend to use**                   |
66 | 
67 | ## Database
68 | 
69 | All queries will be saved in `db-{language}.json`.
70 | 
71 | You can do dry run to check the rename progress and then execute without retrieving data again.
72 | 
73 | Failed requests will also be saved, so clean the database if something went wrong.
74 | 
75 | ## Note
76 | 
77 | - Input **filename** should include bangou, or it cannot be renamed
78 | - If there exist multiple files that have the same bangou, they will be renamed with the suffix serial number, ordered by original file name
79 | 
80 | ## Future Work
81 | 
82 | - Execute
83 |   - fill video metadata in file
84 |   - options for new folder
85 | - FileName
86 |   - fit more types of bangou
87 | - Database
88 |   - use other method instead directly loading into memory
89 |   - find other database which has chinese title
90 | - UI
91 |   - interface to search local database
92 | 
93 | 
94 | ## Source Website
95 | 
96 | - [javlibrary](https://www.javlibrary.com)
97 | - [javdb](https://javdb.com)


--------------------------------------------------------------------------------
/Setting.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | class Setting:
 5 |     def __init__(self):
 6 |         with open("config.json", encoding="utf-8") as configFile:
 7 |             settingJson = json.load(configFile)
 8 | 
 9 |         try:
10 |             self.fileDirs = settingJson["fileDirs"]
11 |             self.getInfoInterval = settingJson["getInfoInterval"]
12 |             self.fileNameFormat = settingJson["fileNameFormat"]
13 |             self.language = settingJson["language"]
14 |             self.saveAlbum = settingJson["saveAlbum"]
15 |             self.dryRun = settingJson["dryRun"]
16 |             self.maxFileLength = settingJson["maxFileLength"]
17 |             self.minFileSizeMB = settingJson["minFileSizeMB"]
18 |             self.renameCheck = settingJson["renameCheck"]
19 |             self.saveThumb = settingJson["saveThumb"]
20 |             self.ignoreWords = settingJson["ignoreWords"]
21 |             self.retryFailedDB = settingJson["retryFailedDB"]
22 |             self.javlibraryCookieFilePath = settingJson["javlibraryCookieFilePath"]
23 |             self.javdbCookieFilePath = settingJson["javdbCookieFilePath"]
24 |             # TODO: enable db or not
25 |         except:
26 |             print("read config file failed")
27 |             exit(0)
28 | 


--------------------------------------------------------------------------------
/config-template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fileDirs": [
 3 |         "./demo_files"
 4 |     ],
 5 |     "ignoreWords": [
 6 |         "1080p"
 7 |     ],
 8 |     "getInfoInterval": 0.5,
 9 |     "fileNameFormat": "[{bangou}]{title}",
10 |     "language": "tw",
11 |     "saveAlbum": false,
12 |     "saveThumb": false,
13 |     "dryRun": true,
14 |     "maxFileLength": 255,
15 |     "minFileSizeMB": 0,
16 |     "renameCheck": false,
17 |     "retryFailedDB": true,
18 |     "javlibraryCookieFilePath": "./javlibrary-cookie.txt",
19 |     "javdbCookieFilePath": "./javdb-cookie.txt"
20 | }


--------------------------------------------------------------------------------
/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo.gif


--------------------------------------------------------------------------------
/demo_files/IENE-777.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/IENE-777.mp4


--------------------------------------------------------------------------------
/demo_files/KAWD-777.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/KAWD-777.mp4


--------------------------------------------------------------------------------
/demo_files/STar-777.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/STar-777.mp4


--------------------------------------------------------------------------------
/demo_files/nykd-54.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitqwerty777/JAV-Info/2bb0fa03d4639e9463e88c2824d0d8fedb23ef04/demo_files/nykd-54.mp4


--------------------------------------------------------------------------------
/getch.py:
--------------------------------------------------------------------------------
 1 | # ref: https://stackoverflow.com/a/510364/2678970
 2 | 
 3 | class _Getch:
 4 |     """Gets a single character from standard input.  Does not echo to the screen."""
 5 | 
 6 |     def __init__(self):
 7 |         try:
 8 |             self.impl = _GetchWindows()
 9 |         except ImportError:
10 |             self.impl = _GetchUnix()
11 | 
12 |     def __call__(self): return self.impl()
13 | 
14 | 
15 | class _GetchUnix:
16 |     def __init__(self):
17 |         import tty
18 |         import sys
19 | 
20 |     def __call__(self):
21 |         import sys
22 |         import tty
23 |         import termios
24 |         fd = sys.stdin.fileno()
25 |         old_settings = termios.tcgetattr(fd)
26 |         try:
27 |             tty.setraw(sys.stdin.fileno())
28 |             ch = sys.stdin.read(1)
29 |         finally:
30 |             termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
31 |         return ch
32 | 
33 | 
34 | class _GetchWindows:
35 |     def __init__(self):
36 |         import msvcrt
37 | 
38 |     def __call__(self):
39 |         import msvcrt
40 |         return msvcrt.getch().decode()
41 | 
42 | 
43 | getch = _Getch()
44 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import colorama
 2 | import utils
 3 | from Setting import Setting
 4 | from FileNameParser import FileNameParser
 5 | from DataManager import DataManager
 6 | from JAVInfoGetter import JAVInfoGetter_javlibrary, JAVInfoGetter_javdb
 7 | from Executor import Executor
 8 | from getch import getch
 9 | 
10 | 
11 | class JAVInfoGetter:
12 |     def __init__(self, setting, fileNameParser, dataManager, infoGetters, executor):
13 |         self.setting = setting
14 |         self.fileNameParser = fileNameParser
15 |         self.dataManager = dataManager
16 |         self.infoGetters = infoGetters
17 |         self.executor = executor
18 |         self.renameFailedFile = open(
19 |             "renameFailedHistory.txt", "a", encoding="utf-8")  # TODO: filename to config
20 | 
21 |     def getInfo(self):
22 |         try:
23 |             fileNames = {}
24 |             for fileDir in self.setting.fileDirs:
25 |                 fileNames = fileNameParser.GetFiles(
26 |                     fileNames, fileDir)
27 |             for bangou in fileNames:
28 |                 self.renameByBangou(bangou, fileNames)
29 |         except Exception as e:
30 |             print(e)
31 |         finally:
32 |             self.dataManager.Save()
33 | 
34 |     def renameByBangou(self, bangou, fileNames):
35 |         info = None
36 |         success = False
37 |         print(
38 |             f"===== 1/3: get bangou info {utils.yellowStr(bangou)}")
39 |         for infoGetter in self.infoGetters:
40 |             # Get the first complete info
41 |             info, success = infoGetter.GetInfo(
42 |                 bangou, str(fileNames[bangou]))
43 |             if success:
44 |                 break
45 |         if not success:
46 |             utils.logError(
47 |                 f"Get Info from bangou {bangou} failed. File name {str(fileNames[bangou])}")
48 |             utils.writeText(self.renameFailedFile,
49 |                             f"{bangou} {fileNames[bangou]}\n")
50 |             self.dataManager.AddRecord(info)
51 |             return
52 |         assert info
53 |         self.executor.HandleFiles(info, bangou, fileNames)
54 | 
55 | 
56 | def checkDryRun(setting):
57 |     if setting.dryRun:
58 |         utils.logError(
59 |             f"This is dry run version.\nSet dryRun to false in config.json to execute")
60 |     else:
61 |         utils.logError(
62 |             f"This is not dry run version.\nDry run is recommended before execution.\nDo you want to continue?(y/N)")
63 |         response = getch()
64 |         if response.lower() != "y":
65 |             exit(0)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     colorama.init()
70 | 
71 |     setting = Setting()
72 |     checkDryRun(setting)
73 | 
74 |     fileNameParser = FileNameParser(setting.minFileSizeMB, setting.ignoreWords)
75 |     dataManager = DataManager(setting)
76 |     infoGetters = [JAVInfoGetter_javlibrary(setting, dataManager), JAVInfoGetter_javdb(
77 |         setting, dataManager)]
78 |     executor = Executor(setting)
79 |     javInfoGetter = JAVInfoGetter(
80 |         setting, fileNameParser, dataManager, infoGetters, executor)
81 | 
82 |     javInfoGetter.getInfo()
83 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.10.0
2 | colorama==0.4.4
3 | requests==2.26.0
4 | selenium==4.1.3


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import colorama
 2 | import pprint
 3 | 
 4 | 
 5 | def createPrettyPrinter(stream=None):
 6 |     return pprint.PrettyPrinter(indent=0, width=60, stream=stream)
 7 | 
 8 | 
 9 | def backColorStr(s, color):
10 |     return f"{color}{s}{colorama.Back.RESET}"
11 | 
12 | 
13 | def foreColorStr(s, color):
14 |     return f"{color}{s}{colorama.Fore.RESET}"
15 | 
16 | 
17 | def grayBackStr(s):
18 |     return backColorStr(s, colorama.Back.LIGHTMAGENTA_EX)
19 | 
20 | 
21 | def whiteBackStr(s):
22 |     return foreColorStr(backColorStr(s, colorama.Back.WHITE), colorama.Fore.BLACK)
23 | 
24 | 
25 | def yellowStr(s):
26 |     return foreColorStr(s, colorama.Fore.YELLOW)
27 | 
28 | 
29 | def blueBackStr(s):
30 |     return backColorStr(s, colorama.Back.BLUE)
31 | 
32 | 
33 | def greenBackStr(s):
34 |     return backColorStr(s, colorama.Back.GREEN)
35 | 
36 | 
37 | def redBackStr(s):
38 |     return backColorStr(s, colorama.Back.RED)
39 | 
40 | 
41 | def logError(s):
42 |     print(redBackStr(s))
43 | 
44 | 
45 | def lenInBytes(string):
46 |     return len(string.encode("utf-8"))
47 | 
48 | 
49 | def writeText(file, str):
50 |     file.write(str)
51 |     file.flush()
52 | 


--------------------------------------------------------------------------------
/webpage_getter.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from pathlib import Path
 3 | 
 4 | from http import cookiejar
 5 | from bs4 import BeautifulSoup
 6 | 
 7 | from selenium import webdriver
 8 | from selenium.webdriver.common.by import By
 9 | from selenium.webdriver.support.wait import WebDriverWait
10 | 
11 | 
12 | class WebPageGetter(object):
13 |     def __init__(self, cookieFilePath, waitTime):
14 |         """
15 |         Put correct version of ChromeDriver.exe at path from https://chromedriver.chromium.org/downloads
16 |         """
17 |         options = webdriver.ChromeOptions()
18 |         options.add_argument("--headless")
19 |         options.add_argument("--ignore-certificate-errors-spki-list")
20 |         self.browser = webdriver.Chrome(options=options)
21 |         self.cookies = cookiejar.MozillaCookieJar(cookieFilePath)
22 |         self.cookies.load()
23 |         self.waitTime = waitTime
24 | 
25 |     def addCookies(self):
26 |         # https://stackoverflow.com/questions/41906704/selenium-add-cookies-from-cookiejar
27 |         for cookie in self.cookies:
28 |             cookie_dict = {'domain': cookie.domain, 'name': cookie.name,
29 |                            'value': cookie.value, 'secure': cookie.secure}
30 |             if cookie.expires:
31 |                 cookie_dict['expiry'] = cookie.expires
32 |             if cookie.path_specified:
33 |                 cookie_dict['path'] = cookie.path
34 |             self.browser.add_cookie(cookie_dict)
35 | 
36 |     def getPage(self, url):
37 |         raise NotImplementedError
38 | 
39 |     def simpleGetPage(self, url):
40 |         print(f"Get page {url}")
41 |         self.browser.get(self.baseUrl)
42 |         self.addCookies()
43 |         self.browser.get(url)
44 |         time.sleep(self.waitTime)
45 | 
46 |     def __del__(self):
47 |         self.browser.close()
48 | 
49 | 
50 | class WebPageGetter_JavLibrary(WebPageGetter):
51 |     def __init__(self, cookieFilePath, waitTime):
52 |         super().__init__(cookieFilePath, waitTime)
53 |         self.baseUrl = "https://www.javlibrary.com/"
54 | 
55 |     def getPage(self, url):
56 |         self.simpleGetPage(url)
57 |         button = self.browser.find_elements(
58 |             by=By.CLASS_NAME, value="btnAdultAgree")
59 |         if button:
60 |             button[0].click()
61 |             time.sleep(self.waitTime)
62 | 
63 |         return self.browser.page_source
64 | 
65 | 
66 | class WebPageGetter_JavDB(WebPageGetter):
67 |     def __init__(self, cookieFilePath, waitTime):
68 |         super().__init__(cookieFilePath, waitTime)
69 |         self.baseUrl = "https://javdb.com"
70 | 
71 |     def getPage(self, url):
72 |         self.simpleGetPage(url)
73 |         try:
74 |             WebDriverWait(self.browser, self.waitTime).until(
75 |                 lambda x: x.find_element(By.ID, "videos"))
76 |         except Exception as e:
77 |             return "", ""
78 | 
79 |         videolink = "http://javdb.com/" + \
80 |             self.browser.find_element(
81 |                 by=By.XPATH, value='//*[@id="videos"]/div/div[1]/a').get_attribute('pathname') + "?locale=en"
82 |         simpletitle = self.browser.find_element(
83 |             by=By.XPATH, value='//*[@id="videos"]/div/div[1]/a/div[3]').text
84 |         #print(f"videolink {videolink}")
85 |         #print(f"simpletitle {simpletitle}")
86 | 
87 |         self.simpleGetPage(videolink)
88 | 
89 |         # with open(url.split("=")[-1]+".html", "w", encoding="utf-8") as f:
90 |         # f.write(self.browser.page_source)
91 | 
92 |         return self.browser.page_source, simpletitle
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------