├── .gitignore ├── AnimeRenamer ├── renamer.py └── title.txt ├── JAV ├── JAV.py ├── JAVAutoSorted.S.Local.py ├── README.md ├── config.py ├── javdb.txt ├── keyword.txt ├── search.py └── sql.py ├── LICENSE ├── Movie ├── MVAutoSort.py ├── README.md ├── config_sample.py ├── folder.txt ├── gen.py ├── get.py ├── region.txt ├── requirements.txt ├── search.py ├── sites │ ├── __init__.py │ ├── frds.py │ ├── ourbits.py │ ├── pter.py │ ├── ssd.py │ ├── tccf.py │ └── tjupt.py └── sql.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | Movie/__pycache__/ 4 | Movie/Test/ 5 | Movie/test.py 6 | Movie/folder.txt 7 | Movie/api.py 8 | Movie/AutoSort.db 9 | Movie/Movie.tsv 10 | Movie/.cookies 11 | Movie/sites/.cookies 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | 115 | #JAV 116 | JAV/Test/ 117 | JAV/@~Sorted/ 118 | JAV/@CodeList.txt 119 | JAV/@FileList.txt 120 | JAV/Key_nyaa.txt 121 | JAV/data.txt 122 | JAV/sign.png 123 | JAV/sign2.png 124 | 125 | #Movie 126 | Movie/config.py -------------------------------------------------------------------------------- /AnimeRenamer/renamer.py: -------------------------------------------------------------------------------- 1 | # coding=UTF-8 2 | import os , time 3 | 4 | mypath = os.getcwd() #執行目錄 5 | 6 | def logNprint(text,path=mypath,pr=True): 7 | logpath=path+"\\"+"rename.log" 8 | #if not os.path.isfile(logpath): 9 | #text="#Renamer Programed By GDST/LMI\n"+ text #整理訊息 10 | if pr : 11 | print(text) 12 | with open(logpath,"a", encoding = 'utf-8-sig') as data: 13 | data.write(str(text)+"\n") 14 | 15 | with open("title.txt" , "r", encoding = 'utf-8-sig') as data: 16 | List = [l.strip().split("\t",1) for l in data ] 17 | Dic ={} 18 | for i in List: 19 | Dic[i[0]] = i[1] 20 | KeyList = [ i[0] for i in List ] 21 | 22 | name = mypath[mypath.rfind("]")+1:] #作品名稱 23 | for root, dirs, files in os.walk(mypath): 24 | if mypath == root or mypath+"\\劇場版" in root : #略過特定資料夾 25 | continue 26 | 27 | currenttime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) #執行時間 28 | runtimetext="\n執行時間 : "+currenttime 29 | logNprint(runtimetext,pr=False,path=root) 30 | 31 | logNprint("\nPath : "+root.replace(mypath,".")+"\n",path=root) 32 | block = root.replace(mypath+"\\","").split("-") #字幕組&語言&畫質 33 | lang = block[1] if block[1] == block[-2] else "CHT" #語言(默認CHT) 34 | 35 | for file in sorted(files) : 36 | print(file) 37 | if ".txt" in file or ".py" in file or ".part" in file or ".log" in file: #略過 38 | continue 39 | if ".ass" in file or ".srt" in file: #略過字幕 40 | continue 41 | filepath1 = root + "\\" +file 42 | #logNprint("File : "+file #原檔案名稱 43 | file2 = file 44 | replaceList = ["1080","720","2160","1280","1920","BS11","2019","2018","S01","S02","S03"] #去除會被誤判的數字 45 | replaceList += [str(year) for year in range(2000,2020)] 46 | for rep in replaceList: 47 | file2=file2.replace(rep,"") 48 | 49 | infopos1 = file2.rfind("[") 50 | infopos2 = file2.rfind("]") 51 | file2 = file2.replace(file2[infopos1:infopos2+1],"") 52 | key = 0 53 | for i in KeyList: 54 | if file2.find(i) != -1 : 55 | key = i 56 | break 57 | if not key: 58 | continue 59 | key2 = key+" END" if "END" in file2 else key 60 | dotpos = file.rfind(".") #副檔名 61 | 62 | try: 63 | filename2 = ( "%s (%s)-%s[%s][%s][%s]%s" % (name,block[0],key2,Dic[key],block[-1],lang,file[dotpos:].lower())) 64 | filepath2 = root + "\\"+ filename2 65 | except: 66 | logNprint("*Error : "+file) 67 | try: 68 | logNprint(Dic[key]) 69 | except: 70 | pass 71 | logNprint(filepath1) 72 | continue 73 | if filepath1 == filepath2: #如果已經改名完成 74 | print("Exist : "+filename2) 75 | continue 76 | if not os.path.isfile(filepath2): 77 | os.rename(filepath1,filepath2) 78 | logNprint("File : "+file,path=root) 79 | logNprint("Rename : "+filename2,path=root) 80 | input("\n整理完成,請按Enter離開") -------------------------------------------------------------------------------- /AnimeRenamer/title.txt: -------------------------------------------------------------------------------- 1 | 01 第七個精靈 2 | 02 你能找到我嗎? 3 | 03 七罪就是妳 4 | 04 變身 5 | 05 陷入絕望之中 6 | 06 分道揚鑣 7 | 07 給予的力量 8 | 08 降下黑暗的夜之魔王 9 | 09 五年前的天宮市 10 | 10 另一個世界另一個她 11 | 11 星辰墜落的夜之天使 12 | -------------------------------------------------------------------------------- /JAV/JAV.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | #v4.0 20190710 重新整理函數、加入預覽圖下載合併 3 | #v4.1 20190807 資料庫輸出、調整目錄結構 4 | #v4.2 未完成 相同檔案去重(檢查檔案大小) 5 | 6 | import os, requests, urllib, time, re 7 | from bs4 import BeautifulSoup 8 | #from fake_useragent import UserAgent 9 | from user_agent import generate_user_agent 10 | import config, search, sql 11 | 12 | ua = generate_user_agent() 13 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName #SQL 14 | 15 | class Log: 16 | def NPrint(text): 17 | os.chdir(mypath) 18 | print(text) 19 | with open("error.log","a", encoding = 'utf8') as data: 20 | data.write(str(text)+"\n") 21 | def Text(text): 22 | with open("error.log","a", encoding = 'utf8') as data: 23 | data.write(str(text)+"\n") 24 | def SaveList(key,Title): 25 | fname = ("@FileList.txt" if Title else "@CodeList.txt") 26 | new = (title if Title else code) 27 | 28 | os.chdir(mypath+"\\@~Sorted\\"+key) 29 | try: #讀取先前的清單 30 | with open(fname , "r", encoding = 'utf8') as clog: 31 | SaveList = [l.strip() for l in clog ] 32 | except: 33 | SaveList = [] 34 | if new not in SaveList : 35 | SaveList += [new] 36 | else: 37 | return 38 | if len(SaveList) != 0: #如果非空目錄的話 39 | with open(fname,"w", encoding = 'utf8') as sdata: #寫檔 40 | for i in sorted(SaveList): 41 | sdata.write(i+"\n") 42 | def convert_bytes(num): 43 | for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: 44 | if num < 1024.0: 45 | return "%3.1f %s" % (num, x) 46 | num /= 1024.0 47 | def file_size(file_path): 48 | if os.path.isfile(file_path): 49 | file_info = os.stat(file_path) 50 | return convert_bytes(file_info.st_size) 51 | def GetCode(filename): 52 | c = key.upper()+"-" 53 | if c in filename.upper(): 54 | cpos = filename.upper().find(c) 55 | elif key.upper() in filename.upper(): 56 | c = key.upper() 57 | cpos = filename.upper().find(c) 58 | filename = filename.upper().replace(c,c+"-") 59 | c = c+"-" 60 | else: 61 | return None 62 | for i in range(len(filename[cpos+len(c):])): 63 | if not filename[cpos+len(c)+i].isdigit(): 64 | code = filename[cpos:cpos+len(c)+i] 65 | code = code.upper() 66 | break 67 | if len(code) == len(c) : #如果找不到番號(番號跟關鍵字長度一樣) 68 | return None 69 | return code 70 | 71 | #要處理的番號清單 72 | with open("keyword.txt" , "r", encoding = 'utf-8-sig') as keydata: 73 | KeyList = [l.strip() for l in keydata if l[0]!="@"] 74 | #KeyList = list(set(KeyList)) #番號去重 75 | if not os.path.isdir(config.tempfolder): #如果不是資料夾 76 | os.mkdir(config.tempfolder) 77 | '''with open("keyword2.txt" , "r", encoding = 'utf-8-sig') as keydata: #找不到資料庫的特殊番號 (!待新增) 78 | Key2List = [l.strip().split(",") for l in keydata ] 79 | Key2Dic = {} 80 | for i in Key2List: 81 | Key2Dic[i[0]]=i[1]''' 82 | 83 | mypath = os.getcwd() #執行目錄 84 | for lsdir in sorted(os.listdir(mypath)): 85 | if not os.path.isdir(mypath+"\\"+lsdir): #如果不是資料夾 86 | continue 87 | if lsdir[0]=="@" or lsdir == "__pycache__" or "新作" in lsdir or "合集" in lsdir: #略過根目錄下帶有@的資料夾 (個人化) 88 | continue 89 | if not os.path.isdir(mypath+"\\@~Sorted\\"): 90 | os.mkdir(mypath+"\\@~Sorted\\") 91 | for root, dirs, files in os.walk(mypath+"\\"+lsdir): 92 | print("\nPath : "+root) 93 | for i in files: 94 | if "padding_file" in i: #跳過冗贅檔案 95 | continue 96 | if not re.search(r'.+?\.(mkv|mp4|ts|wmv|avi|flv|rmvb|iso|mov|m2ts|ass|srt)', i.lower()) \ 97 | and not re.match(r'.+?(_|-)?(s|screen|screenshot)\.(jpg|jpeg|png)', i.lower()): 98 | #and not re.match(r'.+?\.(jpg|jpeg|png)', i.lower()) : #跳過非影像檔和非截圖 99 | continue 100 | '''for key2 in Key2Dic.keys(): #對於無資料庫的番號進行處理 (!待新增) 101 | key2 = key2''' 102 | for key in KeyList: 103 | dirpath = mypath 104 | code = GetCode(i) #從檔名找番號 105 | if key=="FC2" and "FC2" in i.upper() and re.search(r'\d{6,7}',i): #特殊番號 106 | code = "FC2-" + re.search(r'\d{6,7}',i).group(0) 107 | if not code : #如果不能夠從檔案名稱找出番號 108 | continue 109 | if len(code[code.find("-")+1:]) >= 4: #例外處理:部分番號會用4.5位數字,但搜尋時必須為3位 110 | code = code.replace("-00","-") 111 | code = code.replace("-0","-") 112 | #if key[0].isdigit() or key =="SIRO" or key =="KIRAY": 113 | #continue 114 | print("Code :",code) 115 | 116 | query = sql.query(db_name,'JAV',code) #查詢舊有資料 117 | if query == None: #若不存在舊有資料→到網路查詢 118 | if not os.path.isdir(mypath+"\\@~Sorted\\"+key): 119 | os.mkdir(mypath+"\\@~Sorted\\"+key) 120 | #result = search.Database1(key,code,mypath) 121 | if key == "T28": #特殊番號例外處理 122 | result = search.Database3(key,code.replace("T28-","T-28"),mypath) 123 | if result['success']: 124 | result['code'] = result['code'].replace("T-28","T28-") 125 | result['save'][0] = result['save'][0].replace("T-28","T28-") 126 | elif key[0].isdigit() or key =="SIRO" or key =="KIRAY": 127 | result = search.Database2(key,code,mypath) 128 | elif key=="FC2" and "FC2" in i.upper(): 129 | result = search.Database3(key,code,mypath) 130 | time.sleep(2) 131 | else: 132 | result = search.Database1(key,code,mypath) 133 | if not result['success']: #如果不存在對應的資料 134 | print("*Error :",result['error']) 135 | result = search.Database1(key,code,mypath) if key[0].isdigit() or key =="SIRO" or key =="KIRAY" else search.Database2(key,code,mypath) #調換 136 | if not result['success']: 137 | if key not in ["FC2","T28"]: 138 | result = search.Database3(key,code,mypath) 139 | if not result['success']: 140 | print("*Error :",result['error']) 141 | continue 142 | else: 143 | print("*Error :",result['error']) 144 | continue 145 | 146 | save = result['save'] 147 | sql.input(db_name,'JAV', save) 148 | dirpath = result['dirpath'] 149 | else: 150 | if key=="FC2": 151 | dirpath = mypath+"\\@~Sorted\\@"+key+"\\"+query[7]+"\\"+code 152 | else: 153 | number = int(code[code.find("-")+1:]) 154 | order = "%03d~%03d" % (number-100+1,number) if number%100 == 0 else "%03d~%03d" % ((number//100)*100+1,(number//100+1)*100) 155 | dirpath = mypath+"\\@~Sorted\\"+key+"\\"+order+"\\"+code 156 | 157 | print("File : "+i) 158 | i2=i #檔案移動處理 159 | i2=i2.replace("_hhd000.com_免翻_墙免费访问全球最大情_色网站P_ornhub_可看收费内容","") 160 | i2=i2.replace("@hhd000.com_免翻#墙免费访问全球最大情#色网站P#ornhub,可看收费内容","") 161 | 162 | if not os.path.isfile(dirpath+"\\"+i2): #若檔案不存在 163 | if not os.path.isdir(dirpath): 164 | os.makedirs(dirpath) 165 | try: 166 | os.rename(root+"\\"+i,dirpath+"\\"+i2) 167 | print("Move : "+dirpath) 168 | except FileNotFoundError as e: 169 | print("*Error : FileNotFound "+i) 170 | continue 171 | except PermissionError as e: 172 | print("*Error : PermissionError "+i) 173 | continue 174 | else: #若檔案存在 175 | file1 = root+"\\"+i 176 | file2 = dirpath+"\\"+i2 177 | if config.CheckFile and file_size(file1) == file_size(file2) : #若需要比對檔案,且存在的檔案相同 178 | os.remove(file1) 179 | print("*Error : Exist same file \n *Remove : "+file1) 180 | else: #若存在的檔案不同 181 | for j in range(1,10): 182 | dotpos = i2.rfind(".") 183 | i3 = i2[:dotpos]+"~"+str(j)+i2[dotpos:] 184 | if config.CheckFile and file_size(file1) == file_size(dirpath+"\\"+i3) : #若需要比對檔案,且存在的檔案相同 185 | os.remove(file1) 186 | break 187 | print("*Error : Exist same file \n *Remove : "+file1) 188 | if not os.path.isfile(dirpath+"\\"+i3): 189 | try: 190 | os.rename(root+"\\"+i,dirpath+"\\"+i3) 191 | except FileNotFoundError: 192 | print("*Error : FileNotFound "+file1) 193 | break 194 | print("*Exist : "+i+"\n *Rename : "+i3) 195 | print("Move : "+dirpath) 196 | break 197 | #sql.input(db_name,'JAV', save) 198 | break 199 | input("\n整理完成,請按Enter離開") -------------------------------------------------------------------------------- /JAV/JAVAutoSorted.S.Local.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | ##Local Ver 3 | #使用本地資料,不爬取 4 | 5 | import os , time ,filecmp ,hashlib 6 | 7 | CheckFile = True #是否進行重複檔案杜對 8 | 9 | class Log: 10 | def NPrint(text): 11 | os.chdir(mypath) 12 | print(text) 13 | with open("error.log","a", encoding = 'utf8') as data: 14 | data.write(str(text)+"\n") 15 | def Text(text): 16 | with open("error.log","a", encoding = 'utf8') as data: 17 | data.write(str(text)+"\n") 18 | def SaveList(key,Title): 19 | fname = ("@FileList.txt" if Title else "@CodeList.txt") 20 | new = (title if Title else code) 21 | 22 | os.chdir(mypath+"\\@~Sorted\\"+key) 23 | try: #讀取先前的清單 24 | with open(fname , "r", encoding = 'utf8') as clog: 25 | SaveList = [l.strip() for l in clog ] 26 | except: 27 | SaveList = [] 28 | if new not in SaveList : 29 | SaveList += [new] 30 | else: 31 | return 32 | if len(SaveList) != 0: #如果非空目錄的話 33 | with open(fname,"w", encoding = 'utf8') as sdata: #寫檔 34 | for i in sorted(SaveList): 35 | sdata.write(i+"\n") 36 | 37 | def convert_bytes(num): 38 | for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: 39 | if num < 1024.0: 40 | return "%3.1f %s" % (num, x) 41 | num /= 1024.0 42 | def file_size(file_path): 43 | if os.path.isfile(file_path): 44 | file_info = os.stat(file_path) 45 | return convert_bytes(file_info.st_size) 46 | def hashs(fineName, type="md5", block_size=128 * 1024): 47 | """ Support md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(), 48 | sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256 49 | """ 50 | with open(fineName, 'rb') as file: 51 | hash = hashlib.new(type, b"") 52 | while True: 53 | data = file.read(block_size) 54 | if not data: 55 | break 56 | hash.update(data) 57 | return hash.hexdigest() 58 | 59 | #要處理的番號清單 60 | with open("data.txt" , "r", encoding = 'utf_8_sig') as keydata: 61 | DataList = [l.strip().split(" ",1) for l in keydata ] 62 | Dic = {} 63 | for i in DataList: 64 | Dic[i[0]] = i[1] 65 | 66 | mypath = os.getcwd() #執行目錄 67 | 68 | for root, dirs, files in os.walk(mypath): 69 | if mypath+"\\@~Sorted" in root or mypath+"\\@" in root : #略過根目錄下帶有@的資料夾 (特製) 70 | continue 71 | if not os.path.isdir(mypath+"\\@~Sorted\\"): 72 | os.mkdir(mypath+"\\@~Sorted\\") 73 | os.chdir(root) #更改到當前目錄 74 | print("\nPath : "+root) 75 | 76 | for key in Dic.keys(): 77 | for i in files: 78 | if ".part" in i: #略過下載中檔案 79 | continue 80 | if key.upper() in i.upper() or key.upper().replace("-","",1) in i.upper(): #如果能夠從檔案名稱找出番號 81 | print("Code :",key) 82 | dirpath = mypath+"\\@~Sorted\\"+key+" "+Dic[key] 83 | if not os.path.isdir(dirpath): 84 | try: 85 | os.mkdir(dirpath) 86 | except: #若無法建立資料夾(名稱太長) 87 | dirpath = dirpath = mypath+"\\@~Sorted\\"+key 88 | if not os.path.isdir(dirpath): 89 | os.mkdir(dirpath) 90 | print("File : "+i) 91 | fsize = file_size(root+"\\"+i).split(" ") #檢查檔案大小 92 | 93 | if not os.path.isfile(dirpath+"\\"+i): #若檔案不存在 94 | os.rename(root+"\\"+i,dirpath+"\\"+i) 95 | print("Move : "+dirpath) 96 | else: #若檔案存在 97 | file1 = root+"\\"+i 98 | file2 = dirpath+"\\"+i 99 | if CheckFile and file_size(file1) == file_size(file2) : #若需要比對檔案,且存在的檔案相同 100 | #if CheckFile and file_size(file1) == file_size(file2) and hashs(file1) == hashs(file2) : #若需要比對檔案,且存在的檔案相同 101 | os.remove(file1) 102 | Log.NPrint("*Error : Exist same file \n *Remove : "+file1) 103 | else: #若存在的檔案不同 104 | for j in range(1,10): 105 | dotpos = i.rfind(".") 106 | i3 = i[:dotpos]+"~"+str(j)+i2[dotpos:] 107 | if not os.path.isfile(dirpath+"\\"+i3): 108 | os.rename(root+"\\"+i,dirpath+"\\"+i3) 109 | Log.NPrint("*Exist : "+i+"\n *Rename : "+i3) 110 | print("Move : "+dirpath) 111 | break 112 | input("\n整理完成,請按Enter離開") -------------------------------------------------------------------------------- /JAV/README.md: -------------------------------------------------------------------------------- 1 | # JAVAutoSort 2 | 3 | ![執行示例](https://i.imgur.com/zQCvrT1.png) 4 | 5 | ## 需求套件 6 | pip install requests bs4 lxml 7 | 8 | ## 不同版本簡介&使用說明 9 | * v1.7 20180929 10 | 11 | 最初版本,根據輸入番號(e.g.:ABP)尋找符合之檔案,遇到重複檔案會跳過並將資訊儲存在error.log,會將整理過的檔案儲存在執行目錄下@Sorted資料夾 12 | 13 | 邏輯為:識別檔名內是否存在番號→取得番號→搜尋標題與封面→下載到以標題為名的資料夾→將搜尋到的檔案搬移過去 14 | 15 | * v2.0 20180929 16 | 17 | 相比v1.7增加了檔案比對跟重新命名,若同檔名的檔案比對相同仍舊會跳過儲存在error.log 18 | 19 | 問題在比對檔案似乎仍需要一段時間,可能是演算問題或因為檔案是掛載在雲端的關係 20 | 21 | * v3.x 22 | 23 | > v3.0 20180930 24 | 25 | > 增加批次處理功能,可將欲處理之番號儲存在keyword.txt(每行一個,注意不要有空行) 26 | 27 | > 會將整理過的檔案儲存在執行目錄下 /@Sorted/番號/ 28 | 29 | > v3.1 20181006 30 | 31 | > 更新運算邏輯、重新整理架構、新增素人片番號的比對及封面下載、 32 | 33 | > 比對檔案改為直接比對檔案大小、以及些許細部調整 34 | 35 | ## 注意事項 36 | 37 | * 200GANA 300NTK等番號會下載影片截圖,需時較長為正常現象 ! 會考慮加入自訂參數,可以設定成只下載封面 38 | 39 | ## 待更新&研究 40 | 41 | 透過MultiTasK增加下載速度(分布式爬蟲) 42 | -------------------------------------------------------------------------------- /JAV/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | #Main.py 4 | CheckFile = True #是否進行重複檔案杜對 5 | 6 | #search.py 7 | tempfolder = "D:\\Cache" #圖片快取存放 8 | MergeAllPreview = True 9 | javdb = "D:\\GoogleDrive\\AutoSort\\javdb.txt" 10 | #signpic = "D:\\GoogleDrive\\AutoSort\\sign.png" #浮水印,留空為不使用 11 | signpic = "" 12 | 13 | #sql.py 14 | LogPath = "D:\\GoogleDrive\\AutoSort" 15 | LogName = "JAV.db" 16 | 17 | if not os.path.isdir(tempfolder): 18 | os.mkdir(tempfolder) 19 | if not os.path.isdir(LogPath): 20 | os.mkdir(LogPath) 21 | -------------------------------------------------------------------------------- /JAV/javdb.txt: -------------------------------------------------------------------------------- 1 | # Netscape HTTP Cookie File 2 | # http://curl.haxx.se/rfc/cookie_spec.html 3 | # This file was generated by EditThisCookie 4 | .javdb.com TRUE / TRUE 1600974431 __cf_bm a96ad3d3e703c36c91abfb450baed2ebe4613f13-1600972631-1800-AelddG/CeL5phqIEE2vT6tRciYstxoAaiDKHTmN8CKku587MRQJggA1PjBbVZQJlQvmjjrqYmbEInjUlyDmqDWhaoDQ6NbSTkXJN+5IWZhvsA22FmPPW0vi3pj0WggNrBunmjLwFT3lNjQnxNlujfTE= 5 | .javdb.com TRUE / FALSE 1603557617 __cfduid dd916250d1c00444c3d001052518fd7ef1600965617 6 | .javdb.com TRUE / FALSE 1601052019 __extfc 1 7 | .javdb.com TRUE / FALSE 1664044680 _ga GA1.2.566478709.1600965619 8 | .javdb.com TRUE / FALSE 1600972691 _gat_gtag_UA_160995389_2 1 9 | .javdb.com TRUE / FALSE 1601059080 _gid GA1.2.1921757634.1600965619 10 | javdb.com FALSE / FALSE 1602268680 _jdb_session %2Fu7l%2FnWabtkFOBhWTcWb05QaQOS%2F2VdoKNL2n9g3SKBlbdQwDRwRdFRMpb8nEml1Pfz7ArgyVMInlrAb8VYrYUzi4YSK93gh3sqDg8S%2FT5%2FNzbofjUntpYsVqX3CT4blE1WgVbdvK0f4RWSrpmkZpjkBeCGd4FIgqtxNhQm9OA88cFuBML3lgz5H1d07UEIH0RHl6gjyiJAmRq563lu83b5qP9AakYu7cjnbylvK4CKocvtJ4T9Fu79Wq4tz4%2FSbMIkbHbGmGmnOG0gGAB87Slkpc8jWPtVOX3PkeLunzTnFPxaNGLSbjR85Oce5Ltn%2BSFBjm3pgJ957ljHvLm%2Bmgnnv--UQd8cqTz4oXUHinj--IFf%2FXqP0EUmin4l271UxRw%3D%3D 11 | javdb.com FALSE / FALSE 0 locale zh 12 | javdb.com FALSE / FALSE 1632501620 over18 1 13 | javdb.com FALSE / FALSE 0 redirect_to %2Fv%2FWVP8g 14 | javdb.com FALSE / FALSE 1602268680 remember_me_token eyJfcmFpbHMiOnsibWVzc2FnZSI6IklsODNUVlJOWTJWNGR6TnpjMVF0VEd0ellYbFJJZz09IiwiZXhwIjoiMjAyMC0xMC0wOVQxODozNzo1OS4wMDBaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX21lX3Rva2VuIn19--815a7e457118d9f641d0deea23b953590d6f7b80 -------------------------------------------------------------------------------- /JAV/keyword.txt: -------------------------------------------------------------------------------- 1 | 200GANA 2 | 259LUXU 3 | 261ARA 4 | 277DCV 5 | 300MAAN 6 | 300MIUM 7 | 300NTK 8 | 152EKO 9 | 332NAMA 10 | 274ETQT 11 | 302GERK 12 | 279UTSU 13 | 336KNB 14 | 253KAKU 15 | 326EVA 16 | 326JKK 17 | 326MTP 18 | 326NKR 19 | 326AID 20 | 326SCP 21 | 336DTT 22 | 236GAH 23 | 328HMDN 24 | KIRAY 25 | SIRO 26 | ABP 27 | AVOP 28 | AVOD 29 | EBOD 30 | HUSR 31 | IPX 32 | IPZ 33 | KAWD 34 | MDTM 35 | MIAE 36 | MXGS 37 | OAE 38 | OFJE 39 | SNIS 40 | SSNI 41 | STARS 42 | STAR 43 | SVDVD 44 | SVOMN 45 | WANZ 46 | ADN 47 | ARM 48 | AVSA 49 | CJOD 50 | CLUB 51 | DASD 52 | DAZD 53 | DVAJ 54 | EKDV 55 | HUNTA 56 | IBW 57 | JUY 58 | MGT 59 | RBD 60 | REAL 61 | SDAB 62 | SDDE 63 | SDHS 64 | SDMU 65 | SDNM 66 | SGA 67 | SHKD 68 | SOAN 69 | SORA 70 | SQTE 71 | TKI 72 | URE 73 | XRW 74 | XVSR 75 | YRH 76 | AMA 77 | BAZX 78 | BBAN 79 | DAVK 80 | DDK 81 | DDT 82 | DMOW 83 | DOCP 84 | DSVR 85 | HOMA 86 | MANE 87 | NHDTB 88 | ONEZ 89 | PKPD 90 | PPPD 91 | PRED 92 | PRTD 93 | ZEX 94 | ADVO 95 | AGEMIX 96 | APKH 97 | APNS 98 | BCDP 99 | BDA 100 | BF 101 | CMV 102 | CPDE 103 | CUT 104 | ETQR 105 | FONE 106 | FSKT 107 | FSRE 108 | GDJU 109 | GENT 110 | GETS 111 | GVG 112 | HFD 113 | HND 114 | HODV 115 | HONB 116 | IENE 117 | INCT 118 | IRCP 119 | JKSR 120 | JUFD 121 | JUKF 122 | JUTN 123 | KAGP 124 | KDKJ 125 | KTB 126 | KTKC 127 | KTKL 128 | KTKX 129 | KTKY 130 | KTKZ 131 | KTRA 132 | TIKB 133 | TIKC 134 | TIKF 135 | TIKM 136 | TIKJ 137 | TIKP 138 | LOL 139 | MDB 140 | MEKI 141 | MEYD 142 | MIDE 143 | MIFD 144 | MIRD 145 | MISM 146 | MIZD 147 | MUCD 148 | MUDR 149 | MVG 150 | NBD 151 | NNPJ 152 | NSPS 153 | PIYO 154 | PTS 155 | PXH 156 | RBB 157 | TAAK 158 | TPPN 159 | TRUM 160 | VECR 161 | KRI 162 | T28 163 | KBI 164 | MIAA 165 | MAKT 166 | KANE 167 | MVSD 168 | TMDI 169 | TMCY 170 | MKON 171 | SUPA 172 | MMGH 173 | OKB 174 | OKP 175 | OKS 176 | IANF 177 | AOZ 178 | DIC 179 | JUFE 180 | DVDMS 181 | TRE 182 | AMBI 183 | AMBS 184 | CESD 185 | MMND 186 | MIGD 187 | NACR 188 | HIKR 189 | DOKS 190 | MDBK 191 | YST 192 | YTR 193 | MIST 194 | MGMQ 195 | MOPP 196 | AKA 197 | EIKI 198 | NSM 199 | KRU 200 | HMDN 201 | SIMM 202 | MKMP 203 | PPT 204 | PPS 205 | KKJ 206 | GDTM 207 | EVIS 208 | EMLB 209 | BGN 210 | NZK 211 | GEKI 212 | GNE 213 | HNDB 214 | RKI 215 | ORETD 216 | ORE 217 | HARU 218 | DANDY 219 | SUJI 220 | SABA 221 | FINH 222 | FNEO 223 | FSKI 224 | HDKA 225 | JBD 226 | ODVHJ 227 | MUKD 228 | MUKC 229 | TUE 230 | DBER 231 | DAKH 232 | DBVB 233 | DBEB 234 | DYIB 235 | CACA 236 | SCR 237 | ATOM 238 | MILK 239 | MADM 240 | JYA 241 | TXCD 242 | ASI 243 | HOKS 244 | SIM 245 | ATID 246 | MDYD 247 | TYOD 248 | LID 249 | GENS 250 | GENM 251 | NUBI 252 | CHRV 253 | ENDX 254 | OPPW 255 | LZPL 256 | 27ID 257 | DNW 258 | YMDD 259 | HNDS 260 | KWBD 261 | BLOR 262 | CHN 263 | BAK 264 | GMEN 265 | LLDV 266 | POST 267 | MDS 268 | SDMM 269 | SDJS 270 | KMHR 271 | SHYN 272 | SHIC 273 | SIMGE 274 | CHAE 275 | MIMK 276 | BDSR 277 | ISJ 278 | ANX 279 | MRSS 280 | DOA 281 | VRTM 282 | UMSO 283 | BCPV 284 | EYAN 285 | HSL 286 | WZEN 287 | SOJU 288 | QRDA 289 | USBA 290 | MMUS 291 | SLAP 292 | CMN 293 | SKSK 294 | BLK 295 | EKW 296 | DNJR 297 | BOBB 298 | SKMJ 299 | OYC 300 | PAKO 301 | WAVR 302 | KAVR 303 | AVOPVR 304 | MAXVR 305 | YAL 306 | MBM 307 | CAWD 308 | EYS 309 | URHJ 310 | URLH 311 | URKH 312 | URKK 313 | URPW 314 | URVRSP 315 | OME 316 | MERD 317 | SUPD 318 | KFNE 319 | NITR 320 | SDAM 321 | SDMF 322 | SDNT 323 | TIK 324 | BF 325 | CUT 326 | MONE 327 | GENT 328 | IENF 329 | JUNY 330 | KTDS 331 | LOL 332 | SALO 333 | JRZD 334 | HMNF 335 | APOD 336 | IESM 337 | BUZ -------------------------------------------------------------------------------- /JAV/search.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | import os, re, requests, math, shutil 3 | from bs4 import BeautifulSoup 4 | from time import sleep 5 | from user_agent import generate_user_agent 6 | import http.cookiejar 7 | from PIL import Image 8 | import config, sql 9 | 10 | #UA = UserAgent().random 11 | UA = generate_user_agent() 12 | mypath = os.getcwd() 13 | 14 | def ImageDL(imgurl,filename): #圖片下載 15 | r = requests.get(imgurl,headers = {'User-Agent':UA},stream=True) 16 | with open(filename, "wb") as imgdata: 17 | imgdata.write(r.content) 18 | if 'Content-Length' in r.headers.keys() and os.stat(filename).st_size != int(r.headers['Content-Length']): #檢查檔案大小,避免下載失敗 19 | ImageDL(imgurl) 20 | def Merge(code,allpreview,tempfolder="Cache",signpic=False): #下載並合併預覽圖 21 | execdir = tempfolder+"\\"+code 22 | 23 | if not os.path.isdir(execdir): 24 | os.mkdir(execdir) 25 | os.chdir(execdir) 26 | for preview,prenum in zip(allpreview,range(len(allpreview))): 27 | preview = preview[:preview.rfind("?")] 28 | filename = "pre%02d_%s" % (prenum,preview[preview.rfind("/")+1:]) 29 | if not os.path.isfile(filename): 30 | ImageDL(preview,filename) 31 | try: 32 | imgs = [Image.open(fn) for fn in sorted(os.listdir()) 33 | if re.match(r'.+?\.(jpg|jpeg|png)', fn) and not fn.endswith("_preview.jpg") and os.stat(fn).st_size > 0] #打開所有預覽圖 34 | except OSError: 35 | try: 36 | shutil.rmtree(execdir) 37 | except PermissionError: 38 | return False 39 | Merge(code,allpreview,tempfolder,signpic) 40 | return True 41 | width,height=0,0 42 | for img in imgs: #獲取長寬(避免尺寸不一) 43 | width2 , height2 = img.size 44 | width = width2 if width2 > width else width 45 | height = height2 if height2 > height else height 46 | 47 | if len(imgs) <= 5: Column =len(imgs) 48 | elif len(imgs)%5 == 0: Column =5 49 | elif len(imgs)%4 == 0: Column =4 50 | elif len(imgs)%3 == 0: Column =3 51 | else: Column =5 52 | result = Image.new(imgs[0].mode,(width*Column,height*(math.ceil(len(imgs)/Column)) )) 53 | for order,img in enumerate(imgs): #貼上圖片 54 | width2 , height2 = img.size #尺寸不符的做置中 55 | result.paste(img, box=(width*(order%Column)+(width-width2)//2 ,height*(order//Column)+(height-height2)//2 )) 56 | if signpic : #浮水印 57 | signimg = Image.open(signpic) 58 | signimg = signimg.convert('RGBA') 59 | width3 , height3 = signimg.size 60 | result.paste(signimg, box=(width*Column-width3,height*(math.ceil(len(imgs)/Column))-height3 ) ,mask=signimg ) 61 | result.save(code+"_preview.jpg") #儲存 62 | return True 63 | 64 | def Sort2Dir(key,code,mypath,mode=1,sub=''): 65 | global dirpath 66 | if key == "T28": #特殊番號例外處理 67 | code = code.replace("T-28","T28-") 68 | number = int(code[code.find("-")+1:]) 69 | if mode ==1: 70 | order = "%03d~%03d" % (number-100+1,number) if number%100 == 0 else "%03d~%03d" % ((number//100)*100+1,(number//100+1)*100) 71 | dirpath = mypath+"\\@~Sorted\\"+key+"\\"+order+"\\"+code 72 | elif mode ==2: #FC2 73 | sub = sub.replace(":",":") 74 | dirpath = mypath+"\\@~Sorted\\@"+key+"\\"+sub+"\\"+code 75 | if not os.path.isdir(dirpath): 76 | os.makedirs(dirpath) 77 | os.chdir(dirpath) 78 | if coverurl==None or not coverurl: 79 | print("*Error : No Cover.") 80 | return 81 | 82 | coverfile = code+"_cover.jpg" 83 | r = requests.get(coverurl,headers = {'User-Agent':UA}) 84 | if not os.path.isfile(coverfile) or os.stat(coverfile).st_size == 0: 85 | with open(coverfile, "wb") as imgdata: 86 | imgdata.write(r.content) 87 | os.chdir(dirpath[:dirpath.rfind("\\")]) 88 | coverfile2 = title+".jpg" 89 | if not os.path.isfile(coverfile) and not os.path.isfile(coverfile2): 90 | try: 91 | with open(coverfile2, "wb") as imgdata: 92 | imgdata.write(r.content) 93 | except: 94 | with open(code+".jpg", "wb") as imgdata: 95 | imgdata.write(r.content) 96 | print("CoverDL : "+title) 97 | 98 | def Database1(key,code,mypath): #搜尋JAVBUS 99 | global dirpath,title,coverurl 100 | url = "https://www.javbus.com/"+code 101 | response = requests.get(url,headers = {'User-Agent':UA}) 102 | response.encoding = 'UTF-8' 103 | soup = BeautifulSoup(response.text, 'lxml') 104 | 105 | if soup.find("title").getText() == "404 Not Found" or soup.find("title").getText() == "404 Page Not Found! - JavBus": 106 | return {'success':False,'error':code+" 404 Not Found"} 107 | elif soup.find("h3") == None: 108 | return {'success':False,'error':code+" Unknown Error"} 109 | 110 | article = soup.find("div", {"class": "container"}) 111 | if article == None: 112 | return {'success':False,'error':code+" Unknown Error"} 113 | 114 | title = article.find("h3").getText() 115 | coverurl = article.find("a", {"class": "bigImage"}).get("href") 116 | allinfo = article.find("div",{"class":"col-md-3 info"}).find_all("p") 117 | code,date,time,dierector,producer,pulisher,series,genre,actress,allpreview="","","","","","","","","",[] 118 | if article.find("div",{"id":"sample-waterfall"}): 119 | waterfall = article.find("div",{"id":"sample-waterfall"}).find_all("a",{"class":"sample-box"}) 120 | allpreview = [prev.get("href").strip() for prev in waterfall] 121 | for nfo in range(len(allinfo)): 122 | if allinfo[nfo].getText().split(" ")[0] == "識別碼:": 123 | code = allinfo[nfo].getText().split(" ")[1].strip() 124 | elif allinfo[nfo].getText().split(" ")[0] == "發行日期:": 125 | date = allinfo[nfo].getText().split(" ")[1].strip() 126 | elif allinfo[nfo].getText().split(" ")[0] == "長度:": 127 | time = allinfo[nfo].getText().split(" ")[1].strip() 128 | elif allinfo[nfo].getText().split(" ")[0] == "導演:": 129 | dierector = allinfo[nfo].getText().split(" ")[1].strip() 130 | elif allinfo[nfo].getText().split(" ")[0] == "製作商:": 131 | producer = allinfo[nfo].getText().split(" ")[1].strip() 132 | elif allinfo[nfo].getText().split(" ")[0] == "發行商:": 133 | pulisher = allinfo[nfo].getText().split(" ")[1].strip() 134 | elif allinfo[nfo].getText().split(" ")[0] == "系列:": 135 | series = allinfo[nfo].getText().split(" ")[1].strip() 136 | elif allinfo[nfo].getText() == "類別:": 137 | genre = [g.getText().strip() for g in allinfo[nfo+1].find_all("span",{"class":"genre"}) ] 138 | elif allinfo[nfo].getText() == "演員:": 139 | if nfo+1 < len(allinfo): 140 | actress = [g.getText().strip() for g in allinfo[nfo+1].find_all("span",{"class":"genre"}) ] 141 | Sort2Dir(key,code,mypath) 142 | os.chdir(mypath) 143 | mergename = code+"_preview.jpg" 144 | mergepath = config.tempfolder+"\\"+code 145 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0: 146 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic) 147 | if mergeres: 148 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move 149 | #shutil.rmtree(mergepath) #清除Cache 150 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,dierector,producer,pulisher] 151 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()} 152 | 153 | def Database2(key,code,mypath): #搜尋JAV321 154 | global dirpath,title,coverurl 155 | surl = "https://www.jav321.com/search" 156 | payload = {'sn': code} 157 | response = requests.post(url=surl, data=payload, headers={'User-Agent':UA}) 158 | response.encoding = 'UTF-8' 159 | soup = BeautifulSoup(response.text, 'lxml') 160 | if soup.find("div", {"class": "alert"}): 161 | return {'success':False,'error':soup.find("div", {"class": "alert"}).getText()} 162 | elif soup.find("h3") == None: 163 | return {'success':False,'error':'Unknown Error'} 164 | 165 | t1 = soup.find("h3").getText() 166 | t2 = soup.find("h3").find("small").getText() 167 | title = code + " " +t1.replace(t2,"").strip() 168 | imgs = soup.find_all("div","col-xs-12 col-md-12")[:-1] 169 | imglist = [i.find("img").get("src") for i in imgs] 170 | if len(imglist) == 0: 171 | return {'success':False,'error':"No Cover."} 172 | coverurl = imglist[0] 173 | allpreview = imglist[1:] 174 | 175 | allinfo = soup.find("div",{"class":"col-md-9"}) 176 | allinfo = str(allinfo).split("
") 177 | 178 | actress,producer,genre,code,date,time,series="","","","","","","" 179 | for nfo in allinfo: 180 | nfo2 = BeautifulSoup(nfo, 'lxml').getText() 181 | if "女优:" in nfo2: 182 | actress = nfo2.replace("女优:","").strip().split("   ") 183 | elif "片商:" in nfo2: 184 | producer = nfo2.replace("片商:","").strip() 185 | elif "标签:" in nfo2: 186 | genre = nfo2.replace("标签:","").strip().split(" ") 187 | elif "番号:" in nfo2: 188 | code = nfo2.replace("番号:","").strip().upper() 189 | code = key+code[code.find("-"):] 190 | elif "发行日期:" in nfo2: 191 | date = nfo2.replace("发行日期:","").strip() 192 | elif "播放时长:" in nfo2: 193 | time = nfo2.replace("播放时长:","").strip().replace("分钟","分鐘") 194 | elif "系列:" in nfo2: 195 | series = nfo2.replace("系列:","").strip() 196 | 197 | Sort2Dir(key,code,mypath) 198 | os.chdir(mypath) 199 | mergename = code+"_preview.jpg" 200 | mergepath = config.tempfolder+"\\"+code 201 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0: 202 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic) 203 | if mergeres: 204 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move 205 | #shutil.rmtree(mergepath) #清除Cache 206 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,'',producer,''] 207 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()} 208 | 209 | def Database3(key,code,mypath,cookies=config.javdb): #搜尋JAVDB 210 | global dirpath,title,coverurl 211 | 212 | re_code = re.search(r"(\d+)([a-zA-Z]+-?.+)",code) 213 | url = "https://javdb.com/videos/search_autocomplete.json?q="+ (re_code.group(2) if re_code else code) 214 | vurl = "" 215 | 216 | cookies = http.cookiejar.MozillaCookieJar(cookies) 217 | cookies.load() 218 | 219 | res = requests.get(url,headers = {'User-Agent':UA},cookies=cookies) 220 | res.encoding = 'UTF-8' 221 | res = res.json() # return dict 222 | 223 | if len(res)==0: 224 | return {'success':False,'error':code+" not found, return empty json."} 225 | for r in res: 226 | if r['number'] == (re_code.group(2) if re_code else code): 227 | vurl = "https://javdb.com/v/" + r['uid'] 228 | if not vurl: 229 | return {'success':False,'error':code+" not found, can't find this video."} 230 | 231 | res = requests.get(vurl,headers = {'User-Agent':UA},cookies=cookies) 232 | res.encoding = 'UTF-8' 233 | soup = BeautifulSoup(res.text, 'lxml') 234 | 235 | title = soup.find("h2").getText().strip() 236 | try: 237 | if soup.find("img",{"class":"video-cover"}): 238 | coverurl = soup.find("img",{"class":"video-cover"}).get("src") 239 | 240 | elif soup.find("video",{"id":"preview-video"}): 241 | coverurl = soup.find("video",{"id":"preview-video"}).get("poster") 242 | else: 243 | coverurl = soup.find("div",{"class":"column is-three-fifths column-video-cover"}).find("a").get("href") 244 | except AttributeError: 245 | sleep(1) 246 | return {'success':False,'error':code+' AttributeError'} 247 | allinfo = soup.find_all("div",{"class":"panel-block"})[:-1] #去除最後一行 248 | 249 | date,time,dierector,producer,pulisher,seller,series,genre,actress,allpreview="","","","","","","","","",[] 250 | if soup.find("div",{"class":"tile-images preview-images"}): 251 | waterfall = soup.find("div",{"class":"tile-images preview-images"}).find_all("a",{"class":"tile-item"}) 252 | allpreview = [prev.get("href").strip() for prev in waterfall] 253 | for nfo in allinfo: 254 | if nfo.find("strong").getText() == "番號": 255 | code = nfo.find("span",{"class":"value"}).getText() 256 | elif nfo.find("strong").getText() == "時間:": 257 | date = nfo.find("span",{"class":"value"}).getText() 258 | elif nfo.find("strong").getText() == "時長:": 259 | time = nfo.find("span",{"class":"value"}).getText() 260 | elif nfo.find("strong").getText() == "導演:": 261 | dierector = nfo.find("span",{"class":"value"}).getText() 262 | elif nfo.find("strong").getText() == "片商:": 263 | producer = nfo.find("span",{"class":"value"}).getText() 264 | elif nfo.find("strong").getText() == "發行:": 265 | pulisher = nfo.find("span",{"class":"value"}).getText() 266 | elif nfo.find("strong").getText() == "系列:": 267 | series = nfo.find("span",{"class":"value"}).getText() 268 | elif nfo.find("strong").getText() == "賣家:": 269 | seller = nfo.find("span",{"class":"value"}).getText() 270 | dierector = seller 271 | elif nfo.find("strong").getText() == "類別:": 272 | genre = [g.getText().strip() for g in nfo.find("span",{"class":"value"}).find_all("a") ] 273 | elif nfo.find("strong").getText() == "演員:": 274 | actress = [g.getText().strip() for g in nfo.find("span",{"class":"value"}).find_all("a") ] 275 | 276 | if key in ["FC2"]: 277 | Sort2Dir(key,code,mypath,mode=2,sub=seller) 278 | sleep(1) 279 | else: 280 | Sort2Dir(key,code,mypath) 281 | os.chdir(mypath) 282 | mergename = code+"_preview.jpg" 283 | mergepath = config.tempfolder+"\\"+code 284 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0: 285 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic) 286 | if mergeres: 287 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move 288 | #shutil.rmtree(mergepath) #清除Cache 289 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,dierector,producer,pulisher] 290 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()} 291 | 292 | 293 | def Database4(key,code,mypath): #搜尋JAVDB 294 | global dirpath,title,coverurl 295 | 296 | re_code = re.search(r"(\d+)(.+-?.+)",code) 297 | url = "https://javdb.com/videos/search_autocomplete.json?q="+re_code.group(2) if re_code else code 298 | vurl = "" 299 | 300 | #shutil.rmtree(config.tempfolder) #清除Cache 301 | ''' 302 | #Test 303 | res = Database3("300MIUM","300MIUM-670",mypath) 304 | #res = Database3("ORE","ORE-670",mypath) 305 | print(res) 306 | 307 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName 308 | sql.input(db_name, 'JAV', save,replace=True)''' -------------------------------------------------------------------------------- /JAV/sql.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sqlite3, config 3 | 4 | def init(db_name,table_name): 5 | conn = sqlite3.connect(db_name) 6 | cursor = conn.cursor() 7 | execute = '''CREATE TABLE IF NOT EXISTS %s 8 | (Code INT PRIMARY KEY NOT NULL, 9 | 標題 TEXT NOT NULL, 10 | 系列 TEXT, 11 | 女優 TEXT, 12 | 類別 TEXT, 13 | 日期 VARCHAR(10) , 14 | 時長 VARCHAR(10) , 15 | 導演 TEXT, 16 | 製作商 TEXT, 17 | 發行商 TEXT, 18 | UNIQUE(Code) 19 | )''' % (table_name) 20 | cursor.execute(execute) 21 | cursor.close() 22 | conn.close() 23 | 24 | def input(db_name,table_name,List,many=False,replace=False): 25 | num = len(List[0]) if many else len(List) 26 | conn = sqlite3.connect(db_name) 27 | cursor = conn.cursor() 28 | pattern = "IGNORE" if not replace else "REPLACE" 29 | execute = 'INSERT OR %s INTO %s VALUES (?%s)' % (pattern,table_name,",?"*(num-1)) 30 | if many : #如果是批量資料(蜂巢迴圈) 31 | cursor.executemany(execute,List) 32 | else: 33 | cursor.execute(execute,List) 34 | conn.commit() 35 | cursor.close() 36 | conn.close() 37 | 38 | def output(db_name,table_name,file_name): 39 | with open(file_name, "w", encoding = 'utf-8-sig') as write_file: 40 | conn = sqlite3.connect(db_name) 41 | cursor = conn.cursor() 42 | execute = "SELECT * FROM %s" % (table_name) 43 | for row in cursor.execute(execute): 44 | writeRow = "\t".join('%s' % r for r in row)+"\n" 45 | write_file.write(writeRow) 46 | def query(db_name,table_name,sid): 47 | conn = sqlite3.connect(db_name) 48 | cursor = conn.cursor() 49 | execute = "SELECT * From %s WHERE SID = ?" % (table_name) 50 | result = cursor.execute(execute, [sid]).fetchone() 51 | cursor.close() 52 | conn.close() 53 | 54 | #Init 55 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName 56 | init(db_name,"JAV") -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Gdist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Movie/MVAutoSort.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) 2019-2020 GDST 4 | 5 | import json, requests, random, os, re, time 6 | from opencc import OpenCC 7 | from bs4 import BeautifulSoup 8 | #from fake_useragent import UserAgent 9 | from user_agent import generate_user_agent 10 | import gen as Gen 11 | import get as Get 12 | import sql, search,config 13 | 14 | #Initialize 15 | #ua = UserAgent().random 16 | ua = generate_user_agent() 17 | regDic = {} 18 | 19 | #SQL 20 | db_name = "%s\\%s.db" % (config.LogPath,config.LogName) if config.LogPath else config.LogName+".db" 21 | sql.init(db_name,"Movie") 22 | sql.init(db_name,"TV") 23 | 24 | with open("folder.txt" , "r", encoding = 'utf-8-sig') as data: #只在這些子資料夾執行 25 | folderList = [l.strip() for l in data ] 26 | with open("region.txt" , "r", encoding = 'utf-8-sig') as regdata: #地區縮寫對應 27 | regList = [l.strip().split(',') for l in regdata ] 28 | for reg in regList: 29 | regDic[reg[0]]=reg[1:] 30 | 31 | #Function 32 | def resjson(url): 33 | r = requests.get(url,headers={'User-Agent':ua}) 34 | res = r.json() # return dict 35 | return res 36 | def move(src,dst): 37 | for root, dirs, files in os.walk(src): 38 | for d in dirs: 39 | src_dir = root+"\\"+d 40 | dst_dir = src_dir.replace(src,dst) 41 | if not os.path.exists(dst_dir): 42 | os.mkdir(dst_dir) 43 | for file in files: 44 | src_path = root+"\\"+file 45 | dst_path = src_path.replace(src,dst) 46 | if not os.path.exists(dst_path): 47 | os.rename(src_path,dst_path) 48 | def SaveLog(save): 49 | if subtype == "movie": 50 | #config.LogName2 = "%s_Movie_%s.csv" % (config.LogName,year) 51 | config.LogName2 = "%s_Movie.tsv" % (config.LogName) 52 | elif subtype == "tv": 53 | #config.LogName2 = "%s_TV_%s_%s.csv" % (config.LogName,reg1,year) 54 | config.LogName2 = "%s_TV_%s.tsv" % (config.LogName,reg1) 55 | fname = config.LogPath+"\\"+config.LogName2 56 | if not os.path.isfile(fname): 57 | save = "Folder\tSID\tRename\n"+save 58 | with open(fname,"a", encoding = 'utf-8-sig') as sdata: #寫檔 59 | sdata.write(save+"\n") 60 | def LogNPrint(text): 61 | print(text) 62 | with open(config.LogPath+"//AutoSort.log","a", encoding = 'utf8') as data: 63 | data.write(str(text)+"\n") 64 | 65 | class Search: 66 | def get_year(key1): #搜尋年份 67 | if re.search(r"(19|20\d{2})",key1): 68 | return re.search(r"(19|20\d{2})",key1).group(1) 69 | else: 70 | return False 71 | def DB(key1,mod=1,year_check=True): 72 | global subtype , dblink 73 | key2 = key1 74 | year0 = Search.get_year(key1) 75 | if mod == 1 : #搜尋年份或畫質之前的名稱 76 | if year0 : 77 | key2 = key1[:key1.find(year0)] if key1[:key1.find(year0)] != "" else key1 78 | else: 79 | key2 = re.search(r"(.+)\d{4}",key1).group(1) if re.search(r"(.+)\d{4}",key1) else key1 80 | if mod == 2 : #搜尋第一個.之前的中文名稱 或 包含季數(SXX)的英文名稱 81 | key2 = re.search(r"([\u4e00-\u9fa5]+)\.",key1).group(1) if re.search(r"([\u4e00-\u9fa5]+)\.",key1) else key1 82 | key2 = re.search(r"(.+\.S\d{2})",key1).group(1) if key2 == key1 and re.search(r"(.+\.S\d{2})",key1) else key1 83 | Bracket = re.search(r"\[(.+?)\]",key2) #搜尋中括弧 84 | if Bracket: 85 | key2 = Bracket.group(1) 86 | LogNPrint("Change : "+key2) 87 | url = config.dbapi+key2 88 | res = resjson(url) 89 | if res['msg'] == 'invalid_credencial2': #API失效應急 90 | return False 91 | if int(res['total']) == 1 and len(res['subjects'])==1: #Only 1 Result 92 | subtype = res['subjects'][0]['subtype'] 93 | dblink = res['subjects'][0]['alt'] 94 | year = res['subjects'][0]['year'] 95 | if year in key1 or not year0: #當名稱包含搜尋到年份 或 名稱內不含年份 96 | return dblink 97 | else: 98 | print("*Error : Year doesn't match.") 99 | elif int(res['total']) == 0 or len(res['subjects'])==0: #找不到結果trf 100 | return "" 101 | elif int(res['total']) > 1 : #過多結果 102 | if not year_check: #如果不檢查年份,直接返回第一個搜尋結果 103 | return res['subjects'][0]['alt'] 104 | for subject in res['subjects']: #例外處理-過多資料-年份比對 105 | if subject['year'] != "" and subject['year'] in key1: #去除年份空白的情況 106 | #if subject['year'] in key1 or (not year0 and subject['title'] in key1): 107 | subtype = subject['subtype'] 108 | dblink = subject['alt'] 109 | return dblink 110 | return False #Error : No results found. 111 | def GetInfo(dblink,switch=0): 112 | global year,subtype,reg1,reg2,reg3,save 113 | res = Gen.gen_douban(dblink) 114 | if not res['success']: # Success 115 | print("*Error :",res['error']) 116 | if 'exit' in res.keys(): exit() 117 | return "" 118 | else: 119 | subtype = res['subtype'] if not subtype else subtype 120 | year = year2 = res['year'] 121 | if not year: 122 | year = res['playdate'][0][:4] if res['playdate'] else 0 123 | '''if int(len(res['seasons_list'])) > 1 and subtype == "tv": #多季的電視劇 124 | year = 999 #多季 125 | year2 = "多季"''' 126 | titleZH = res['chinese_title'].replace(" "," ") #中文標題 127 | this_title = res['this_title'][0] #原始標題 128 | trans_title = res['trans_title'] #List 用來取台灣譯名 129 | aka = res['aka'] 130 | 131 | try: 132 | imdb_id = res['imdb_id'] 133 | except KeyError: 134 | imdb_id = "" 135 | db_id,db_rating = "db_"+res['sid'],res['douban_rating_average'] 136 | if not imdb_id: 137 | imdb_rating = "" 138 | else: 139 | try: 140 | imdb_rating = res['imdb_rating'][:res['imdb_rating'].find('/')] 141 | except: 142 | imdb_rating = 0 143 | mvid,rating = (imdb_id,imdb_rating )if imdb_id and imdb_rating else (db_id,db_rating) 144 | 145 | genre = "|".join(res['genre']) #List→str 類型 146 | region = res['region'] if type(res['region']) == type("str") else res['region'][0] 147 | region = region.split(" ")[0] #解決中英混合的地區 148 | reg1,reg2,reg3 = region,region,region 149 | for reg in regDic.keys(): #地區 150 | if reg == region: 151 | if "台湾" in region or "香港" in region: reg1 = reg.replace("中国","") 152 | elif region == "中国": reg1 = "中国大陆" 153 | else: reg1 = reg 154 | reg2 = regDic[reg][0] 155 | reg3 = regDic[reg][1] 156 | break 157 | 158 | AllTitle1 = [titleZH]+[this_title]+aka+trans_title 159 | AllTitle2 = list(set(AllTitle1)) 160 | AllTitle2.sort(key=AllTitle1.index) 161 | 162 | if config.CHT_TW: #繁體、台灣譯名 163 | if this_title != "" and reg1 == "台湾": #原始標題為中文地區是台灣) 164 | titleZH = this_title 165 | breakcheck = False 166 | zhtwList = ["(台)","(港/台)","(台/港)","(台)","(港/台)","(台/港)"] 167 | for trans in AllTitle2: 168 | for zhtw in zhtwList: 169 | if zhtw in trans: 170 | if trans in AllTitle2: 171 | AllTitle2.remove(trans) 172 | breakcheck = True 173 | titleZH = trans.replace(zhtw,"") 174 | break 175 | if breakcheck: 176 | break 177 | titleZH = OpenCC('s2tw').convert(titleZH) 178 | genre = OpenCC('s2tw').convert(genre) 179 | reg1 = OpenCC('s2tw').convert(reg1) 180 | for i in range(len(AllTitle2)): 181 | AllTitle2[i] = OpenCC('s2tw').convert(AllTitle2[i]) 182 | if reg2 == reg3: 183 | reg2 = OpenCC('s2tw').convert(reg2) 184 | if config.ZH_ENG: #中英標題 185 | titleEN = "" 186 | for tt in AllTitle2: 187 | if not Get.checkzh(tt): 188 | if tt in AllTitle2: 189 | AllTitle2.remove(tt) 190 | titleEN = tt.replace(" : ",":").replace(": ",":").replace("/","/").replace("\\","\") 191 | break 192 | for tt in [titleZH]+aka: 193 | if Get.checkzh(tt): 194 | if tt in AllTitle2: 195 | AllTitle2.remove(tt) 196 | titleZH = tt.replace(" : ",":").replace(": ",":").replace("/","/").replace("\\","\") 197 | break 198 | title = (titleZH+" "+titleEN) if titleEN and len(titleEN) <= config.ENGlen and titleZH != titleEN else titleZH 199 | titleOT = AllTitle2 200 | 201 | region = reg2 if config.regSt else reg1 202 | titleOT = [] if not titleOT else titleOT 203 | save = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (mvid,year,reg1,imdb_rating,db_rating,titleZH,titleEN,"/".join(titleOT),genre,imdb_id,db_id) 204 | if rating == '' or float(rating): 205 | return "[%s][%s]%s(%s)(%s)(%s)" % (year2,region,title,genre.replace("|","_"),rating,mvid) 206 | else: 207 | return "[%s][%s]%s(%s)(%s)" % (year2,region,title,genre.replace("|","_"),mvid) 208 | 209 | mypath = os.getcwd() if not config.UseRemote else config.remotepath #執行目錄 210 | Logfile = config.LogPath+"\\move.log" if config.LogPath else "move.log" 211 | 212 | for folder in folderList: 213 | if os.path.isdir(folder): #如果指定的資料夾存在 214 | for d in sorted(os.listdir(folder)): 215 | subtype,IMDbID,dblink= "","","" 216 | folderpath = "%s/%s" % (folder,d) 217 | SubD = False if re.match(r'.+?\.(mkv|mp4|ts).?', d) else config.SubFolder #若資料夾為檔案名稱,則不使用config.SubFolder 218 | LogNPrint("\nFolder : "+d) 219 | if re.search(r"\(db_(.+?)\)",d): #如果能從資料夾名稱找到dbID 220 | SubD = False 221 | dblink = "https://movie.douban.com/subject/%s/" % (re.search(r"\(db_(.+?)\)",d).group(1)) 222 | elif re.search(r"\(tt(.+?)\)",d): #如果能從資料夾名稱找到IMDbID 223 | SubD = False 224 | IMDbID = re.search(r"\((tt\d+)\)",d).group(1) 225 | LogNPrint("IMDbID : "+IMDbID) 226 | dblink = search.imdb2db2(IMDbID) 227 | elif Get.findnfo(folderpath): #如果能從資料夾內的.nfo找到IMDb或douban鏈接 228 | get_nfo = Get.findnfo(folderpath) 229 | if 'imdb' in get_nfo.keys(): 230 | IMDbID = get_nfo['imdb'] 231 | LogNPrint("IMDbID : "+IMDbID) 232 | dblink = search.imdb2db2(get_nfo['imdb']) 233 | elif 'douban' in get_nfo.keys(): 234 | dblink = get_nfo['douban'] 235 | else: 236 | ptsearch = search.PT(d) 237 | if not (IMDbID or dblink) and ptsearch: 238 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 239 | dblink = ptsearch['douban'] if ptsearch['douban'] else "" 240 | LogNPrint("Search : from {}".format(ptsearch['source'])) 241 | if not (IMDbID or dblink) and re.search(r"WiKi|DoA|JuJuYuJu|NGB|ARiN|ExREN|NTb|NTG|CHD",d) and search.TTG(d): #如果能從TTG找到IMDbID或dblink 242 | ptsearch = search.TTG(d) 243 | if ptsearch['imdb'] or ptsearch['douban']: 244 | LogNPrint("Search : from TTG") 245 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 246 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID) 247 | if ptsearch['title'] and not dblink: 248 | LogNPrint("UseThis: "+ptsearch['title']) 249 | dblink = Search.DB(ptsearch['title'],year_check=config.year_check) 250 | if not dblink: 251 | dblink = Search.DB(ptsearch['title'],year_check=False) 252 | if not (IMDbID or dblink) and re.search(r"PuTao",d) and search.PuTao(d): #如果能從PuTao找到IMDbID或dblink 253 | ptsearch = search.PuTao(d) 254 | LogNPrint("Search : from PuTao") 255 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 256 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID) 257 | if not (IMDbID or dblink) and re.search(r"MTeam|MPAD|OneHD|StBOX|beAst|CHD",d) and search.MTeam(d): #如果能從MTeam找到IMDbID或dblink 258 | ptsearch = search.MTeam(d) 259 | if 'imdb' in ptsearch.keys() or 'douban' in ptsearch.keys(): 260 | LogNPrint("Search : from MTeam") 261 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 262 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID) 263 | elif 'title' in ptsearch.keys(): 264 | dblink = Search.DB(ptsearch['title'],year_check=config.year_check) 265 | if not dblink: 266 | dblink = Search.DB(ptsearch['title'],year_check=False) 267 | if not (dblink or IMDbID): 268 | dblink = Search.DB(d,year_check=config.year_check) 269 | '''if not dblink: 270 | dblink = Search.DB(d,mod=2,year_check=config.year_check)''' 271 | if dblink: #如果能返回豆瓣鏈接 272 | LogNPrint("dbLink : "+dblink) 273 | '''LogNPrint("*Debug : Pass.") 274 | continue''' 275 | try: 276 | name = Search.GetInfo(dblink) 277 | except: 278 | LogNPrint("*Error : Name error. Pass.") 279 | continue 280 | if not name and IMDbID: #如果無法從dblink找到資料,但存在IMDbID 281 | #LogNPrint("Change : IMDb&TMDb") # 待辦:研究TMDB回傳錯誤訊息 282 | GetTMDb = Get.IMDb2TMDb(IMDbID) 283 | if GetTMDb: 284 | subtype,year,reg1,name,save = GetTMDb[0],GetTMDb[1],GetTMDb[2],GetTMDb[3],GetTMDb[4] 285 | LogNPrint("Change : IMDb&TMDb") 286 | else: 287 | subtype,year,reg1,name,save = "","","","","" 288 | elif not dblink and IMDbID: #如果無法返回dbLink,但有IMDbID→改用TMDB跟IMDb搜尋資訊 289 | LogNPrint("IMDbID : "+IMDbID) 290 | GetTMDb = Get.IMDb2TMDb(IMDbID) 291 | 292 | if GetTMDb: 293 | subtype,year,reg1,name,save = GetTMDb[0],GetTMDb[1],GetTMDb[2],GetTMDb[3],GetTMDb[4] 294 | LogNPrint("Change : IMDb&TMDb") 295 | else: 296 | LogNPrint("*Error : Can't find info from IMDb&TMDb.") 297 | subtype,year,reg1,name,save = "","","","","" 298 | else: 299 | LogNPrint("*Error : Can't find dbLink.") 300 | continue 301 | 302 | if name: 303 | name = name.replace("\"","") 304 | LogNPrint("Subtype: "+subtype) 305 | else: 306 | continue 307 | if config.YearSort: 308 | if int(year[:4]) > 2000: 309 | year = year[:4] 310 | #elif int(year) == 999: 311 | # year = "多季" 312 | elif 1991<=int(year[:4]) and int(year[:4])<=2000: 313 | year = "1991-2000" 314 | elif 1981<=int(year[:4]) and int(year[:4])<=1990: 315 | year = "1981-1990" 316 | elif int(year[:4])<=1980: 317 | year = "1980以前" 318 | if subtype == "movie": 319 | table_name = "Movie" 320 | Path = ("Movie\\%s\\%s" % (year,name)) 321 | elif subtype == "tv": 322 | table_name = "TV" 323 | Path = ("TVSeries\\%s\\%s\\%s" % (reg1,year,name)) 324 | 325 | query = sql.query(db_name, table_name,save.split("\t")[0]) #查詢舊有資料 326 | print 327 | if query != None and (not config.DataUpdate or query[-1]==Path): #若存在舊有資料且不須更新現有資料 328 | LogNPrint("Ignore : "+query[-1]) 329 | Path = query[-1] 330 | name = Path[Path.rfind("\\")+1:] 331 | elif query != None and config.DataUpdate and query[-1]!=Path : #若存在舊有資料且與新的資料不相符(數據更新)且更新資料參數為True 332 | sql.input(db_name, table_name, save.split("\t")+[Path],replace=True) 333 | command2 = ("rclone move -v \"%s\" \"%s\" --log-file=%s" %(mypath+"\\"+query[-1],mypath+"\\"+Path,Logfile)) 334 | os.system(command2) 335 | if config.syncpath: 336 | for syncpath in config.syncpath: 337 | command2 = ("rclone move -v \"%s\" \"%s\" --log-file=%s" %(syncpath+"\\"+query[-1],syncpath+"\\"+Path,Logfile)) 338 | os.system(command2) 339 | LogNPrint("Update : "+query[-1]) 340 | else: #資料庫內無對應資料 341 | sql.input(db_name, table_name, save.split("\t")+[Path]) 342 | LogNPrint("Rename : "+name) 343 | 344 | folder = folder.replace(config.mountpath,"") if config.UseRemote and config.mountpath else folder 345 | path1 = mypath+"\\"+folder+"\\"+d 346 | path2 = mypath+"\\"+Path+"\\"+d if SubD else mypath+"\\"+Path 347 | if len(path2) > config.pathlen and not subtype == "tv" : #路徑長度(但對TV類型不啟用) 348 | path2 = mypath+"\\"+Path 349 | command = ("rclone move -v \"%s\" \"%s\" --stats 30s --log-file=%s" %(path1,path2,Logfile)) 350 | os.system(command) 351 | #os.popen(command) 352 | LogNPrint("MoveTo : "+path2) 353 | command = ("rclone rmdirs -v \"%s\"" % (mypath+"\\"+folder)) 354 | os.system(command) -------------------------------------------------------------------------------- /Movie/README.md: -------------------------------------------------------------------------------- 1 | # MVAutoSort 2 | 3 | ![執行示例](https://i.imgur.com/whiajFm.png) 4 | 5 | ## 需求套件 6 | pip install -r requirements.txt 7 | 8 | ## 說明 9 | 未完成待續 10 | 11 | ### 搜尋模式 12 | 13 | - 若資料夾名稱含有IMDbID(tt:d)或DoubanID(db_:d),則使用ID搜尋對應資料 14 | 15 | - 若資料夾內存在.nfo檔,則在其中尋找IMDbID,並以此做搜尋 16 | 17 | - 若可在PT站搜尋到IMDbID或DoubanID,則使用此資料。 18 | 19 | - 若以上皆非,則解析文件夾名稱調用豆瓣API做搜尋 20 | 21 | - 通常文件夾名稱由3個部分組成 $電影名稱.$年份.$壓制參數,目前採用以$年份為錨點解析出$電影名稱的方式 22 | 23 | ### 資料採集 24 | 25 | - 原採用PT-Gen的API,但受其穩定性&API調用次數限制,故後來棄用。 26 | 27 | - 改採參照其代碼簡化成gen.py,以此採集資料。感謝@Rhilip大佬、BFDZ大佬的PT-Gen。 28 | 29 | - 如果無法從豆瓣找到資料,則透過IMDbID在IMDb&TMDb分別搜尋資料,並合併 30 | 31 | ### 自動分類 32 | 33 | #### 標題 34 | 35 | - 中文標題優先採取台灣地區翻譯標題,若為標記則採用豆瓣中文標題,並使用OpenCC翻譯成繁體中文 36 | 37 | - 英文標題為在所有標題清單中找到由純英文組成之標題,若無英文標題則只使用中文標題 38 | 39 | ## 待加入功能 40 | 41 | - [ ] 手動模式(全手動、錯誤時手動、不使用手動),手動輸入IMDbID 42 | 43 | - [ ] 搜尋模式切換 44 | 45 | - [ ] 根據TMDB資料搜尋各季資訊(因IMDb不會返回各季資料),並存放到 合集 資料夾 46 | 47 | - [x] 引用資料庫已有的資料 48 | -------------------------------------------------------------------------------- /Movie/config_sample.py: -------------------------------------------------------------------------------- 1 | #API 2 | TMDbAPI = "" 3 | Rapid_IMDb = "" 4 | dbapi = "https://api.douban.com/v2/movie/search?apikey=0dad551ec0f84ed02907ff5c42e8ec70&q=" 5 | 6 | #Cookies 7 | ##格式為:{'cookie':cookie},留空則不使用PT搜尋 8 | ourbits = "" 9 | SSD = "" 10 | TJUPT = "" 11 | FRDS = "" 12 | MTeam = "" 13 | PuTao = "" 14 | TTG = "" 15 | 16 | #Search 17 | year_check = True #是否在使用豆瓣搜索時檢查年份(若名稱無包含年份則會搜尋不到結果),建議啟用避免搜尋錯誤 18 | 19 | #Rename 20 | CHT_TW = True #優先取台灣譯名,且轉為繁體;若為False則為豆瓣上簡體中文標題 21 | ZH_ENG = True #標題採中英混合;若為False則為僅中文標題 (當觸發ENGlen限制時則不保留英文標題) 22 | ENGlen = 65 #英文標題長度限制,若過長則僅保留中文標題。若不想啟用輸入極大值即可 23 | regSt = True #地區縮寫,使用region.txt文件 24 | 25 | #Move 26 | UseRemote = True #將路徑替換為遠端路徑 (讀取掛載信息,但在遠端上操作) 27 | remotepath = "remotepath:" #承上,遠端路徑 28 | pathlen = 200 #路徑長度限制(僅計算資料夾)。若不想啟用輸入極大值即可,觸發後將不建立子資料夾 29 | SubFolder = True #是否保留原始資料夾名稱,將其設為子資料夾 (當觸發config.pathlen限制時則不保留 30 | YearSort = True #老舊電影合併存放 31 | 32 | #Log 33 | LogPath = "D:\\AutoSort\\Movie" #默認為執行目錄 34 | LogName = "AutoSort" 35 | DataUpdate = False #資料是否更新,True為會將舊資料更新為新資料且移動資料夾,False會依據資料庫內現有資料做資料夾命名 -------------------------------------------------------------------------------- /Movie/folder.txt: -------------------------------------------------------------------------------- 1 | G:/Upload/ATS/@TJUPT/~TVSeries 2 | G:/Upload/ATS/@TJUPT/~TVShow 3 | G:/Upload/ATS/@Ourbits/Ao 4 | G:/Upload/ATS/@Ourbits/FLTTH 5 | G:/Upload/ATS/@Ourbits/OurTV 6 | G:/Upload/ATS/@PTer/~TVShow 7 | G:/Upload/ATS/@PTer/~Movie 8 | G:/Upload/ATS/@PTer/~Movie2 -------------------------------------------------------------------------------- /Movie/gen.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) 2017-2020 Rhilip 4 | 5 | import re,time 6 | import json 7 | import random 8 | import requests 9 | from bs4 import BeautifulSoup 10 | from html2bbcode.parser import HTML2BBCode 11 | import http.cookiejar 12 | 13 | __version__ = "0.4.5" 14 | __author__ = "Rhilip" 15 | 16 | douban_apikey_list = [ 17 | "02646d3fb69a52ff072d47bf23cef8fd", 18 | "0b2bdeda43b5688921839c8ecb20399b", 19 | "0dad551ec0f84ed02907ff5c42e8ec70", 20 | "0df993c66c0c636e29ecbb5344252a4a" 21 | ] 22 | #"07c78782db00a121175696889101e363" 23 | headers = { 24 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 25 | 'Chrome/61.0.3163.100 Safari/537.36 ', 26 | "Accept-Language": "en,zh-CN;q=0.9,zh;q=0.8" 27 | } 28 | 29 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt') 30 | cookies.load() 31 | 32 | def get_db_apikey() -> str: 33 | return random.choice(douban_apikey_list) 34 | 35 | def get_page(url: str, json_=False, jsonp_=False, bs_=False, text_=False, **kwargs): 36 | kwargs.setdefault("headers", headers) 37 | page = requests.get(url, **kwargs,cookies=cookies) 38 | 39 | page.encoding = "utf-8" 40 | page_text = page.text 41 | if json_: 42 | try: 43 | return page.json() 44 | except: 45 | time.sleep(0.5) 46 | return get_page(url,json_=True) 47 | elif jsonp_: 48 | start_idx = page_text.find('(') 49 | end_idx = page_text.rfind(')') 50 | return json.loads(page_text[start_idx + 1:end_idx]) 51 | elif bs_: 52 | return BeautifulSoup(page.text, "lxml") 53 | elif text_: 54 | return page_text 55 | else: 56 | return page 57 | 58 | 59 | def html2ubb(html: str) -> str: 60 | return str(HTML2BBCode().feed(html)) 61 | 62 | def get_num_from_string(raw): 63 | return int(re.search('[\d,]+', raw).group(0).replace(',', '')) 64 | 65 | def gen_douban(dblink): 66 | data = {} 67 | sid = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",dblink).group(3) 68 | data['sid'] = sid 69 | douban_page = get_page(dblink, bs_=True) 70 | douban_api_json = get_page( 71 | 'https://api.douban.com/v2/movie/{}'.format(sid), 72 | params={'apikey': get_db_apikey()}, 73 | json_=True 74 | ) 75 | douban_abstract_json = get_page('https://movie.douban.com/j/subject_abstract?subject_id={}'.format(sid), json_=True) 76 | data['success'] = False 77 | 78 | if "msg" in douban_api_json and (douban_api_json["msg"] != 'invalid_credencial2'): #API失效應急 79 | data["error"] = douban_api_json["msg"] 80 | elif str(douban_page).find("检测到有异常请求") > -1: 81 | data["error"] = "GenHelp was banned by Douban." 82 | data['exit'] = True 83 | elif douban_page.title.text == "页面不存在": 84 | print(douban_page) 85 | data["error"] = "The corresponding resource does not exist." 86 | else: 87 | data["douban_link"] = dblink 88 | data['success'] = True 89 | def fetch(node): 90 | return node.next_element.next_element.strip() 91 | # 对主页面进行解析 92 | data["chinese_title"] = (douban_page.title.text.replace("(豆瓣)", "").strip()) 93 | data["foreign_title"] = (douban_page.find("span", property="v:itemreviewed").text 94 | .replace(data["chinese_title"], '').strip()) if douban_page.find("span", property="v:itemreviewed") else "" 95 | 96 | aka_anchor = douban_page.find("span", class_="pl", text=re.compile("又名")) 97 | data["aka"] = sorted(fetch(aka_anchor).split(' / ')) if aka_anchor else [] 98 | 99 | if data["foreign_title"]: 100 | trans_title = data["chinese_title"] + (('/' + "/".join(data["aka"])) if data["aka"] else "") 101 | this_title = data["foreign_title"] 102 | else: 103 | trans_title = "/".join(data["aka"]) if data["aka"] else "" 104 | this_title = data["chinese_title"] 105 | 106 | data["trans_title"] = trans_title.split("/") 107 | data["this_title"] = this_title.split("/") 108 | 109 | region_anchor = douban_page.find("span", class_="pl", text=re.compile("制片国家/地区")) 110 | language_anchor = douban_page.find("span", class_="pl", text=re.compile("语言")) 111 | seasons_anchor = douban_page.find("span", class_="pl", text=re.compile("季数")) 112 | episodes_anchor = douban_page.find("span", class_="pl", text=re.compile("集数")) 113 | imdb_link_anchor = douban_page.find("a", text=re.compile("tt\d+")) 114 | year_anchor = douban_page.find("span", class_="year") 115 | 116 | data["year"] = douban_page.find("span", class_="year").text[1:-1] if year_anchor else "" # 年代 117 | data["region"] = fetch(region_anchor).split(" / ") if region_anchor else [] # 产地 118 | data["genre"] = list(map(lambda l: l.text.strip(), douban_page.find_all("span", property="v:genre"))) # 类别 119 | data["language"] = fetch(language_anchor).split(" / ") if language_anchor else [] # 语言 120 | data["playdate"] = sorted(map(lambda l: l.text.strip(), # 上映日期 121 | douban_page.find_all("span", property="v:initialReleaseDate"))) 122 | data["imdb_link"] = imdb_link_anchor.attrs["href"] if imdb_link_anchor else "" # IMDb链接 123 | data["imdb_id"] = imdb_link_anchor.text if imdb_link_anchor else "" # IMDb号 124 | data["episodes"] = fetch(episodes_anchor) if episodes_anchor else "" # 集数 125 | season_check = douban_page.find("select", id="season") 126 | data["seasons_list"] = [option.get("value") for option in douban_page.find("select", id="season").find_all("option")] if seasons_anchor and season_check else [] #季數 127 | data["seasons"] = douban_page.find("select", id="season").find_all("option")[-1].getText() if seasons_anchor and season_check else "" #季數 128 | 129 | duration_anchor = douban_page.find("span", class_="pl", text=re.compile("单集片长")) 130 | runtime_anchor = douban_page.find("span", property="v:runtime") 131 | 132 | duration = "" # 片长 133 | if duration_anchor: 134 | duration = fetch(duration_anchor) 135 | elif runtime_anchor: 136 | duration = runtime_anchor.text.strip() 137 | data["duration"] = duration 138 | 139 | # 请求其他资源 140 | if data["imdb_link"]: # 该影片在豆瓣上存在IMDb链接 141 | imdb_source = ("https://p.media-imdb.com/static-content/documents/v1/title/{}/ratings%3Fjsonp=" 142 | "imdb.rating.run:imdb.api.title.ratings/data.json".format(data["imdb_id"])) 143 | try: 144 | imdb_json = get_page(imdb_source, jsonp_=True) # 通过IMDb的API获取信息,(经常超时555555) 145 | imdb_average_rating = imdb_json["resource"]["rating"] 146 | imdb_votes = imdb_json["resource"]["ratingCount"] 147 | if imdb_average_rating and imdb_votes: 148 | data["imdb_rating"] = "{}/10 from {} users".format(imdb_average_rating, imdb_votes) 149 | except Exception as err: 150 | pass 151 | 152 | # 豆瓣评分,简介,海报,导演,编剧,演员,标签 153 | '''data["douban_rating_average"] = douban_average_rating = douban_api_json["rating"]["average"] or 0 154 | data["douban_votes"] = douban_votes = douban_api_json["rating"]["numRaters"] or 0 155 | data["douban_rating"] = "{}/10 from {} users".format(douban_average_rating, douban_votes) 156 | data["tags"] = list(map(lambda member: member["name"], douban_api_json["tags"]))''' 157 | 158 | abstract_subject = douban_abstract_json["subject"] 159 | try: 160 | data["douban_rating_average"] = douban_average_rating = douban_page.find("strong", property="v:average").text or 0 161 | data["douban_votes"] = douban_votes = douban_page.find("span", property="v:votes").text or 0 162 | except: 163 | data["douban_rating_average"] = douban_average_rating = abstract_subject["rate"] or 0 164 | data["year"] = abstract_subject["release_year"] if not data["year"] else data["year"] 165 | data["subtype"] = 'tv' if abstract_subject['is_tv'] or data["episodes"] or abstract_subject['subtype'].lower() == 'tv' else 'movie' 166 | 167 | # 将清洗的数据一并发出 168 | return data 169 | if __name__ == '__main__': 170 | x = "https://movie.douban.com/subject/27200642" 171 | gen_douban(x) 172 | -------------------------------------------------------------------------------- /Movie/get.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) 2019-2020 GDST 4 | import os ,re ,requests, time 5 | from opencc import OpenCC 6 | from fake_useragent import UserAgent 7 | import config 8 | from bs4 import BeautifulSoup 9 | import http.cookiejar 10 | 11 | #UA = UserAgent() 12 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36' 13 | regDicEN = {} 14 | 15 | with open("region.txt" , "r", encoding = 'utf-8-sig') as regdataEN: #地區縮寫對應 16 | regListEN = [l.strip().split(',') for l in regdataEN ] 17 | for regEN in regListEN: 18 | regDicEN[regEN[-1]]=regEN[:-1] 19 | 20 | def resjson(url,cookies=''): 21 | r = requests.get(url,headers={'User-Agent':UA},cookies=cookies) 22 | res = r.json() # return dict 23 | return res 24 | 25 | def checkzh(text): 26 | for t in text: 27 | if ord(t) > 255: 28 | return True 29 | 30 | def findnfo(path): 31 | if not os.path.isdir(path): 32 | return False 33 | for file in sorted(os.listdir(path)): 34 | filepath = "%s\\%s" % (path,file) 35 | if os.path.isfile(filepath) and ( re.match(r'.+?\.nfo', file) or re.match(r'.+?\.txt', file) ): 36 | with open(filepath, "r", encoding="latin-1") as data: 37 | for line in data: 38 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",line) 39 | if imdb_search: 40 | return {'imdb':imdb_search.group(2)} 41 | db_search = re.search(r"https:\/\/movie\.douban\.com\/(subject|movie)\/(\d+)",line) 42 | if db_search: 43 | return {'douban':db_search.group()} 44 | 45 | 46 | def imdb2db(IMDbID): 47 | imdb2db = "https://api.douban.com/v2/movie/imdb/%s?apikey=0df993c66c0c636e29ecbb5344252a4a" % (IMDbID) 48 | res = resjson(imdb2db) 49 | dblink = res['alt'].replace("/movie/","/subject/")+"/" if 'alt' in res.keys() else "" 50 | return dblink 51 | 52 | def imdb2db2(IMDbID): 53 | url = "https://movie.douban.com/j/subject_suggest?q={}".format(IMDbID) 54 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt') 55 | cookies.load() 56 | res = resjson(url,cookies=cookies) 57 | time.sleep(0.5) 58 | try: 59 | dblink = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res[0]['url']).group(0) 60 | return dblink 61 | except: 62 | return False 63 | 64 | def IMDbInfo(IMDbID): 65 | rapidapi_imdb = "https://movie-database-imdb-alternative.p.rapidapi.com/?i=%s&r=json" % (IMDbID) 66 | payload = {"X-RapidAPI-Host": "movie-database-imdb-alternative.p.rapidapi.com", 67 | "X-RapidAPI-Key": config.Rapid_IMDb} 68 | try: 69 | res = requests.get(rapidapi_imdb,headers=payload).json() 70 | return res 71 | except Exception as e: 72 | return False 73 | 74 | def IMDb2TMDb(IMDbID,lan="zh-TW"): 75 | global year,subtype,reg1,reg2,reg3,save 76 | IMDbRating = IMDbInfo(IMDbID) 77 | imdb2tmdb = "https://api.themoviedb.org/3/find/%s?api_key=%s&language=%s&external_source=imdb_id" % (IMDbID ,config.TMDbAPI,lan) 78 | 79 | res = resjson(imdb2tmdb) 80 | if not "status_message" in res.keys() : 81 | if len(res["movie_results"]) != 0 or len(res["tv_results"]) != 0: #Movie+TVS 82 | IMDb =IMDbInfo(IMDbID) 83 | if not IMDb or "Error" in IMDb.keys(): 84 | return False 85 | year = IMDb['Year'][0:4] 86 | titleIMDb = IMDb['Title'] 87 | IMDbRating = IMDb['imdbRating'] if IMDb['imdbRating'] != "N/A" else "0" 88 | region = IMDb['Country'].replace(" ","").split(",")[0] 89 | subtype = IMDb['Type'] if IMDb['Type'] == "movie" else "tv" 90 | 91 | results = res['movie_results'][0] if subtype == "movie" else res['tv_results'][0] 92 | titleZH = results['title'] if subtype == "movie" else results['name'] #Movie為title、TVS為name 93 | titleEN = results['original_title'] if subtype == "movie" else results['original_name'] 94 | if titleZH == titleEN and lan != "zh-CN": 95 | return IMDb2TMDb(IMDbID,lan="zh-CN") #若TW譯名不存在,返回CN譯名 96 | genre_ids = results['genre_ids'] 97 | 98 | genres = "|".join([MVgenres[genre_id] if subtype == "movie" else TVgenres[genre_id] for genre_id in genre_ids]) 99 | TMDbID = "TMDbMV_%s" % (results['id']) 100 | TMDbRating = results['vote_average'] 101 | 102 | reg1 = reg2 = reg3 = "None" 103 | for reg in regDicEN.keys(): #地區 104 | if reg == region: 105 | reg1 = regDicEN[reg][0] 106 | reg2 = regDicEN[reg][1] 107 | reg3 = reg 108 | break 109 | 110 | AllTitle1 = [titleEN]+[titleIMDb] 111 | AllTitle2 = list(set(AllTitle1)) 112 | AllTitle2.sort(key=AllTitle1.index) 113 | 114 | if config.CHT_TW: #繁體、台灣譯名 115 | titleZH = OpenCC('s2twp').convert(titleZH) 116 | genres = OpenCC('s2twp').convert(genres) 117 | reg1 = OpenCC('s2twp').convert(reg1) 118 | if config.ZH_ENG: #中英標題 119 | titleEN = "" 120 | for tt in AllTitle2: 121 | if not checkzh(tt): 122 | if tt in AllTitle2: 123 | AllTitle2.remove(tt) 124 | titleEN = tt.replace(" : ",":").replace(": ",":") 125 | break 126 | title = (titleZH+" "+ titleEN) if titleIMDb and len(titleEN) <= config.ENGlen and titleZH != titleEN else titleZH 127 | save = "%s\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s" % (IMDbID,year,reg1,IMDbRating,titleZH,titleEN,"/".join(AllTitle2),genres,IMDbID,TMDbID) 128 | name = "[%s][%s]%s(%s)(%s)(%s)" % (year,reg2,title,genres.replace("|","_"),IMDbRating,IMDbID) 129 | return [subtype,year,reg1,name,save] 130 | 131 | def IMDbInt(): 132 | global MVgenres,TVgenres 133 | MVgenresAPI = "https://api.themoviedb.org/3/genre/movie/list?api_key=%s&language=zh_TW" % (config.TMDbAPI) 134 | genres = resjson(MVgenresAPI)['genres'] 135 | MVgenres = {} 136 | for genre in genres: 137 | MVgenres[genre['id']] = genre['name'] 138 | TVgenresAPI = "https://api.themoviedb.org/3/genre/tv/list?api_key=%s&language=zh_TW" % (config.TMDbAPI) 139 | genres = resjson(TVgenresAPI)['genres'] 140 | TVgenres = {} 141 | for genre in genres: 142 | TVgenres[genre['id']] = genre['name'] 143 | 144 | IMDbInt() 145 | 146 | -------------------------------------------------------------------------------- /Movie/region.txt: -------------------------------------------------------------------------------- 1 | 香港,港,HongKong 2 | 中国香港,港,HongKong 3 | 台湾,台,Taiwan 4 | 中国台湾,台,Taiwan 5 | 中国大陆,陸,China 6 | 中国,陸,China 7 | 美国,美,USA 8 | 英国,英,UK 9 | 日本,日,Japan 10 | 韩国,韓,SouthKorea 11 | 法国,法,France 12 | 德国,德,Germany 13 | 意大利,義,Italy 14 | 西班牙,西,Spain 15 | 印度,印,India 16 | 泰国,泰,Thailand 17 | 俄罗斯,俄,Russia 18 | 伊朗,伊朗,Iran 19 | 加拿大,加,Canada 20 | 加拿大 Canada,加,Canada 21 | 澳大利亚,澳,Australia 22 | 爱尔兰,愛,Ireland 23 | 瑞典,瑞典,Sweden 24 | 巴西,巴西,Brazil 25 | 丹麦,丹,Denmark 26 | 墨西哥,墨,Mexico 27 | 土耳其,土,Turkey 28 | 土耳其 Turkey,土,Turkey 29 | 柬埔寨,柬,Cambodia 30 | 匈牙利,匈,Hungary 31 | 挪威,挪威,Norway 32 | 冰岛,冰島,Iceland 33 | 印度尼西亚,印尼,Indonesia 34 | 比利时,比,Belgium -------------------------------------------------------------------------------- /Movie/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | bs4 3 | lxml 4 | fake-useragent 5 | opencc-python-reimplemented 6 | html2bbcode -------------------------------------------------------------------------------- /Movie/search.py: -------------------------------------------------------------------------------- 1 | import requests,re,time 2 | from bs4 import BeautifulSoup 3 | #from fake_useragent import UserAgent 4 | from user_agent import generate_user_agent 5 | import http.cookiejar 6 | 7 | import config 8 | from sites import ourbits, ssd, tjupt, pter, frds, tccf 9 | 10 | #UA = UserAgent().random 11 | UA = generate_user_agent() 12 | 13 | def PT(dirname): 14 | IMDbID, dblink, source = '', '', '' 15 | if not (IMDbID or dblink) and re.search(r"FRDS|Yumi",dirname): #From FRDS 16 | ptsearch = frds.search(dirname, config.headers, "sites\\.cookies\\frds.txt") 17 | if ptsearch: 18 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 19 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 20 | return {'douban':dblink,'imdb':IMDbID,'source':'FRDS'} 21 | if not (IMDbID or dblink) and re.search(r"BMDru",dirname): #From TCCF 22 | ptsearch = tccf.search(dirname, config.headers, "sites\\.cookies\\tccf.txt") 23 | if ptsearch: 24 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 25 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 26 | return {'douban':dblink,'imdb':IMDbID,'source':'TCCF'} 27 | if not (IMDbID or dblink) and re.search(r"(Ao|FLTTH|iLoveHD|iLoveTV|MGs|OurPad|OurTV|PbK|NTb|NTG)",dirname): #From Ourbits 28 | ptsearch = ourbits.search(dirname, config.headers, "sites\\.cookies\\ourbits.txt") 29 | if ptsearch: 30 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 31 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 32 | return {'douban':dblink,'imdb':IMDbID,'source':'Ourbits'} 33 | if not (IMDbID or dblink) and re.search(r"CMCT|NTb|NTG",dirname): #From SSD 34 | ptsearch = ssd.search(dirname, config.headers, "sites\\.cookies\\ssd.txt") 35 | if ptsearch: 36 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 37 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 38 | return {'douban':dblink,'imdb':IMDbID,'source':'SSD'} 39 | if not (IMDbID or dblink) and re.search(r"TJUPT|AOA|QAQ|PBO|DGF|NigulaSi|VCB-Studio",dirname): #From TJUPT 40 | ptsearch = tjupt.search(dirname, config.headers, "sites\\.cookies\\tjupt.txt") 41 | if ptsearch: 42 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 43 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 44 | return {'douban':dblink,'imdb':IMDbID,'source':'TJUPT'} 45 | if not (IMDbID or dblink) and re.search(r"PTer|AREY|NTb|NTG|ExREN|FRDS|beAst|CHD|RBOF|recked89",dirname): #From PTer 46 | ptsearch = pter.search(dirname, config.headers, "sites\\.cookies\\pter.txt") 47 | if ptsearch: 48 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else "" 49 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "") 50 | return {'douban':dblink,'imdb':IMDbID,'source':'PTer'} 51 | return False 52 | 53 | def imdb2db2(IMDbID,count=3): 54 | if count < 0: 55 | return '' 56 | url = "https://movie.douban.com/j/subject_suggest?q={}".format(IMDbID) 57 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt') 58 | cookies.load() 59 | res = requests.get(url,headers=config.headers,cookies=cookies) 60 | if '检测到有异常请求从你的 IP 发出' in res.text: 61 | print("*Error : IP banned by douban.") 62 | exit() 63 | return False 64 | res = res.json() # return dict 65 | try: 66 | dblink = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res[0]['url']).group(0) 67 | time.sleep(0.5) 68 | return dblink 69 | except: 70 | imdb2db2(IMDbID,count-1) 71 | return '' 72 | def MTeam(keyword,cookies=config.MTeam,headers=config.headers): #未知錯誤,疑似cookies無法使用 73 | if not config.MTeam: 74 | return False 75 | key2 = re.search(r'\.?([A-Za-z0-9.\']+\.S\d+)', keyword).group(1) if re.search(r'\.?([A-Za-z0-9.\']+\.S\d+)', keyword) else keyword 76 | url="https://pt.m-team.cc/torrents.php?search="+ key2 77 | response=requests.get(url,headers=headers,cookies=cookies) 78 | response.encoding = 'UTF-8' 79 | soup = BeautifulSoup(response.text, 'lxml') 80 | results = soup.find_all("table",{"class":"torrentname"}) 81 | reslinks = ["https://pt.m-team.cc/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 82 | for reslink in reslinks: 83 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies) 84 | res.encoding = 'UTF-8' 85 | soup = BeautifulSoup(res.text, 'lxml') 86 | try: 87 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[M-TEAM].","") 88 | subtitle = soup.find("td",{"class":"rowfollow","valign":"top"}).getText() 89 | except: 90 | print(soup) 91 | continue 92 | if title == keyword: 93 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text) 94 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 95 | dblink = db_search.group() if db_search else "" 96 | imdbid = imdb_search.group(2) if imdb_search else "" 97 | if dblink or imdbid: 98 | return {'douban':dblink,'imdb':imdbid} 99 | elif re.search(r"(:|:)(.+)\(",subtitle): 100 | return {'title':re.search(r"(:|:)(.+)\(",subtitle).group(2).strip()} 101 | return False 102 | def PuTao(keyword,cookies=config.PuTao): 103 | if not config.PuTao: 104 | return False 105 | key2 = keyword if not re.match(r'(.+?)\.(mkv|mp4|ts)', keyword) else re.match(r'(.+?)\.(mkv|mp4|ts)', keyword).group(1) 106 | url="https://pt.sjtu.edu.cn/torrents.php?search="+key2 107 | response=requests.get(url,headers={'User-Agent':UA},cookies=cookies) 108 | response.encoding = 'UTF-8' 109 | soup = BeautifulSoup(response.text, 'lxml') 110 | results = soup.find_all("table",{"class":"torrentname"}) 111 | reslinks = ["https://pt.sjtu.edu.cn/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 112 | for reslink in reslinks: 113 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies) 114 | res.encoding = 'UTF-8' 115 | soup = BeautifulSoup(res.text, 'lxml') 116 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[PT].","") 117 | if title == keyword: 118 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text) 119 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 120 | dblink = db_search.group() if db_search else "" 121 | imdbid = imdb_search.group(2) if imdb_search else "" 122 | if dblink or imdbid: 123 | return {'douban':dblink,'imdb':imdbid} 124 | return False 125 | def TTG(keyword,cookies=config.TTG): 126 | if not config.TTG: 127 | return False 128 | key2 = keyword if not re.match(r'(.+?)\.(mkv|mp4|ts)', keyword) else re.match(r'(.+?)\.(mkv|mp4|ts)', keyword).group(1) 129 | url="https://totheglory.im/browse.php?c=M&search_field="+key2 130 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\ttg.txt') 131 | cookies.load() 132 | response=requests.get(url,headers={'User-Agent':UA},cookies=cookies) 133 | response.encoding = 'UTF-8' 134 | soup = BeautifulSoup(response.text, 'lxml') 135 | results = soup.find_all("div",{"class":"name_left"}) 136 | reslinks = ["https://totheglory.im/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 137 | for reslink in reslinks: 138 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies) 139 | res.encoding = 'UTF-8' 140 | soup = BeautifulSoup(res.text, 'lxml') 141 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[TTG] ","") 142 | ftitle = soup.find("h1").getText().replace("[email protected]","") 143 | subtitle = ftitle[ftitle.find("[")+1:ftitle.find("]")] 144 | if title == keyword or title == key2: 145 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text) 146 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 147 | dblink = db_search.group() if db_search else "" 148 | imdbid = imdb_search.group(2) if imdb_search else "" 149 | #標題 150 | search_name = '' 151 | title_search_1 = re.search(r"(.+) 全集",subtitle) 152 | title_search_2 = re.search(r"(\d{2})年( )?(\d{1,2}月|.季)( )?(.+劇) (.+) 全\d+(話|集)",subtitle) 153 | title_search_3 = re.search(r"(.+劇) (.+) 主演",subtitle) 154 | if title_search_1: 155 | search_name = title_search_1.group(1) 156 | elif title_search_2: 157 | year = title_search_2.group(1) 158 | search_name = title_search_2.group(6)+ " " + ("20"+year if int(year) < 30 else "19"+year) 159 | elif title_search_3: 160 | search_name = title_search_3.group(2) 161 | if dblink or imdbid or search_name: 162 | return {'douban':dblink,'imdb':imdbid,'title':search_name} 163 | return False 164 | if __name__ == "__main__": 165 | x = imdb2db2("tt10027990") 166 | print(x) -------------------------------------------------------------------------------- /Movie/sites/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxyfer/AutoSort/83f43f50a8d36d74442f92121b631bf482282d39/Movie/sites/__init__.py -------------------------------------------------------------------------------- /Movie/sites/frds.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def search(keyword, headers, cookies='.cookies\\frds.txt'): 6 | if not os.path.exists(cookies): 7 | return False 8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名 9 | key1 = key2 = re_subname.group(1) if re_subname else keyword 10 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧 11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2 12 | key2 = key2.replace("@"," ") 13 | key2 = key2.replace(".Complete."," ") 14 | url = "https://pt.keepfrds.com/torrents.php?search=" + key2 15 | 16 | cookies = http.cookiejar.MozillaCookieJar(cookies) 17 | cookies.load() 18 | response=requests.get(url,headers=headers,cookies=cookies) 19 | response.encoding = 'UTF-8' 20 | print(response.status_code) if response.status_code != 200 else print("",end="") 21 | 22 | soup = BeautifulSoup(response.text, 'lxml') 23 | results = soup.find_all("table",{"class":"torrentname"}) 24 | reslinks = ["https://pt.keepfrds.com/"+result.find("a").get("href") for result in results] 25 | for reslink in reslinks: 26 | res=requests.get(reslink,headers=headers,cookies=cookies) 27 | res.encoding = 'UTF-8' 28 | print(response.status_code) if response.status_code != 200 else print("",end="") 29 | soup = BeautifulSoup(res.text, 'lxml') 30 | 31 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[FRDS].","") 32 | if title == keyword: 33 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text) 34 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 35 | dblink = db_search.group() if db_search else "" 36 | imdbid = imdb_search.group(2) if imdb_search else "" 37 | if dblink or imdbid: 38 | return {'douban':dblink,'imdb':imdbid} 39 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用 40 | return False 41 | 42 | if __name__ == '__main__': 43 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 44 | x = search("天气之子.Weathering.With.You.2019.Bluray.1080p.HDR.x265.10bit.MNHD-FRDS", headers) 45 | print(x) -------------------------------------------------------------------------------- /Movie/sites/ourbits.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def search(keyword, headers, cookies='.cookies\\ourbits.txt'): 6 | if not os.path.exists(cookies): 7 | return False 8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名 9 | key2 = re_subname.group(1) if re_subname else keyword 10 | key2 = key2.replace(".Complete."," ").replace(".SUBBED."," ") 11 | key2 = key2.replace("第"," 第 ").replace("季"," 季 ") 12 | url="https://ourbits.club/torrents.php?search="+key2 13 | 14 | cookies = http.cookiejar.MozillaCookieJar(cookies) 15 | cookies.load() 16 | response=requests.get(url,headers=headers,cookies=cookies) 17 | response.encoding = 'UTF-8' 18 | print(response.status_code) if response.status_code != 200 else print("",end="") 19 | 20 | soup = BeautifulSoup(response.text, 'lxml') 21 | results = soup.find_all("table",{"class":"torrentname"}) 22 | reslinks = ["https://ourbits.club/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 23 | for reslink in reslinks: 24 | res=requests.get(reslink,headers=headers,cookies=cookies) 25 | res.encoding = 'UTF-8' 26 | print(res.status_code) if res.status_code != 200 else print("",end="") 27 | soup = BeautifulSoup(res.text, 'lxml') 28 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[OurBits].","") 29 | 30 | if title == keyword or title == key2: 31 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text) 32 | douban_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 33 | imdbid = imdb_search.group(2) if imdb_search else "" 34 | dblink = douban_search.group() if douban_search else "" 35 | try: 36 | dblink = 'https://movie.douban.com/subject/' + soup.find('div',{'id':'kdouban'}).get('data-doubanid') if not dblink else dblink 37 | except: 38 | pass 39 | if dblink or imdbid: 40 | return {'douban':dblink,'imdb':imdbid} 41 | print(url) if len(reslinks) == 0 else print("",end="") #搜尋邏輯優化用 42 | return False 43 | 44 | if __name__ == '__main__': 45 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 46 | x = search("All.Together.Now.2020.1080p.NF.WEB-DL.DDP5.1.H264-Ao", headers) 47 | print(x) -------------------------------------------------------------------------------- /Movie/sites/pter.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def decode(cfemail): 6 | enc = bytes.fromhex(cfemail) 7 | return bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8') 8 | 9 | def search(keyword, headers, cookies='.cookies\\pter.txt'): 10 | if not os.path.exists(cookies): 11 | return False 12 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名 13 | key1 = key2 = re_subname.group(1) if re_subname else keyword 14 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧 15 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2 16 | re_year = re.search(r'(.+?)\.+(\d{4})', key2) #NAME.YEAR 17 | key2 = "{} {}".format(re_year.group(1), re_year.group(2)) if re_year else key2 18 | re_CNseason = re.search(r'第\w季', key2) #移除中文季數 19 | key2 = key2.replace(re_CNseason.group(0)," ") if re_CNseason else key2 20 | key2 = key2.replace("!"," ").replace("!"," ").replace("-"," ").replace("\'"," ") 21 | 22 | key2 = key2.replace("@"," ") 23 | key2 = key2.replace(".Complete."," ") 24 | url = "https://pterclub.com/torrents.php?search=" + key2 25 | 26 | cookies = http.cookiejar.MozillaCookieJar(cookies) 27 | cookies.load() 28 | response=requests.get(url,headers=headers,cookies=cookies) 29 | response.encoding = 'UTF-8' 30 | print(response.status_code) if response.status_code != 200 else print("",end="") 31 | 32 | soup = BeautifulSoup(response.text, 'lxml') 33 | results = soup.find_all("table",{"class":"torrentname"}) 34 | reslinks = ["https://pterclub.com/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 35 | for reslink in reslinks: 36 | res=requests.get(reslink,headers=headers,cookies=cookies) 37 | res.encoding = 'UTF-8' 38 | print(response.status_code) if response.status_code != 200 else print("",end="") 39 | 40 | soup = BeautifulSoup(res.text, 'lxml') 41 | cf_email = soup.find("span",{"class":"__cf_email__"}) 42 | decrypted = decode(cf_email.get("data-cfemail")) if cf_email else "" 43 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[PTer].","").replace("[email protected]",decrypted) 44 | 45 | if title == keyword or title == key1: 46 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text) 47 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 48 | dblink = db_search.group() if db_search else "" 49 | imdbid = imdb_search.group(2) if imdb_search else "" 50 | if dblink or imdbid: 51 | return {'douban':dblink,'imdb':imdbid} 52 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用 53 | return False 54 | 55 | if __name__ == '__main__': 56 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 57 | x = search("鬼马智多星.All.the.Wrong.Clues.1981.BluRay.1080p.HEVC.10bit.2Audio.MiniFHD-XPcl@PTer.mkv", headers) 58 | print(x) -------------------------------------------------------------------------------- /Movie/sites/ssd.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def search(keyword, headers, cookies=".cookies\\ssd.txt"): 6 | if not os.path.exists(cookies): 7 | return False 8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去除副檔名 9 | key1 = key2 = re_subname.group(1) if re_subname else keyword 10 | re_brackets = re.search(r'\[(.+?)(\(.+\))?\].+(\d{4})', key2) #去除中括弧 11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2 12 | ssd_movie = re.search(r'(.+?)\d{0,2}(\(.+\))?\.(\d{4})(\..+)?.?£.+', key2) #SSD-Movie 13 | key2 = "{} {} CMCT".format(ssd_movie.group(1),ssd_movie.group(3)) if ssd_movie else key2 14 | ssd_tv = re.search(r'(.+)\.全\d+集|话\.(\d{4})\..+£.+', key1) #SSD-TV 15 | key2 = "{} CMCT".format(ssd_tv.group(1)) if ssd_tv else key2 16 | ssd_version = re.search(r'(.+)( |\.)(.+版)', key2) #SSD-Version 17 | key2 = key2.replace(ssd_version.group(3),"") if ssd_version else key2 18 | key2 = key2.replace("!"," ").replace("!"," ").replace("-"," ").replace("\'"," ") 19 | url="https://springsunday.net/torrents.php?search="+key2 20 | 21 | cookies = http.cookiejar.MozillaCookieJar(cookies) 22 | cookies.load() 23 | response=requests.get(url,headers=headers,cookies=cookies) 24 | response.encoding = 'UTF-8' 25 | print(response.status_code) if response.status_code != 200 else print("",end="") 26 | 27 | soup = BeautifulSoup(response.text, 'lxml') 28 | results = soup.find_all("table",{"class":"torrentname"}) 29 | reslinks = ["https://springsunday.net/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 30 | for reslink in reslinks: 31 | res=requests.get(reslink,headers=headers,cookies=cookies) 32 | res.encoding = 'UTF-8' 33 | print(res.status_code) if res.status_code != 200 else print("",end="") 34 | soup = BeautifulSoup(res.text, 'lxml') 35 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[SSD].","") 36 | if title == keyword: 37 | imdb_search = re.search(r"(http|https)://(www|us)\.imdb\.com/title/(tt\d+)",res.text) 38 | imdb_search2 = re.search(r'tt\d{6,}',res.text) 39 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 40 | dblink = db_search.group() if db_search else "" 41 | imdbid = imdb_search.group(3) if imdb_search else "" 42 | imdbid = imdb_search2.group() if not imdbid and imdb_search2 else imdbid 43 | if dblink or imdbid: 44 | return {'douban':dblink,'imdb':imdbid} 45 | time.sleep(0.5) 46 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用 47 | return False 48 | 49 | if __name__ == '__main__': 50 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 51 | x = search("[1922].1922.2017.1080p.NF.WEB-DL.DDP.5.1.x264-CMCTV.mkv", headers) 52 | print(x) -------------------------------------------------------------------------------- /Movie/sites/tccf.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def search(keyword, headers, cookies='.cookies\\tccf.txt'): 6 | if not os.path.exists(cookies): 7 | return False 8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名 9 | key1 = key2 = re_subname.group(1) if re_subname else keyword 10 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧 11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2 12 | key2 = key2.replace("@"," ") 13 | key2 = key2.replace(".Complete."," ") 14 | url = "https://et8.org/torrents.php?search=" + key2 15 | 16 | cookies = http.cookiejar.MozillaCookieJar(cookies) 17 | cookies.load() 18 | response=requests.get(url,headers=headers,cookies=cookies) 19 | response.encoding = 'UTF-8' 20 | print(response.status_code) if response.status_code != 200 else print("",end="") 21 | 22 | soup = BeautifulSoup(response.text, 'lxml') 23 | results = soup.find_all("table",{"class":"torrentname"}) 24 | reslinks = ["https://et8.org/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 25 | for reslink in reslinks: 26 | res=requests.get(reslink,headers=headers,cookies=cookies) 27 | res.encoding = 'UTF-8' 28 | print(response.status_code) if response.status_code != 200 else print("",end="") 29 | soup = BeautifulSoup(res.text, 'lxml') 30 | 31 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[TCCF].","") 32 | if title == keyword: 33 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text) 34 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 35 | dblink = db_search.group() if db_search else "" 36 | imdbid = imdb_search.group(2) if imdb_search else "" 37 | if dblink or imdbid: 38 | return {'douban':dblink,'imdb':imdbid} 39 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用 40 | return False 41 | 42 | if __name__ == '__main__': 43 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 44 | x = search("Tokyo.Olympiad.1965.Criterion.Collection.720p.BluRay.DD1.0.x264-BMDru", headers) 45 | print(x) -------------------------------------------------------------------------------- /Movie/sites/tjupt.py: -------------------------------------------------------------------------------- 1 | import requests, re, os, time 2 | from bs4 import BeautifulSoup 3 | import http.cookiejar 4 | 5 | def decode(cfemail): 6 | enc = bytes.fromhex(cfemail) 7 | return bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8') 8 | 9 | def search(keyword, headers, cookies='.cookies\\tjupt.txt'): 10 | if not os.path.exists(cookies): 11 | return False 12 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名 13 | key1 = key2 = re_subname.group(1) if re_subname else keyword 14 | key2 = key2.replace("@"," ") 15 | key2 = key2.replace(".Complete."," ") 16 | url="https://www.tjupt.org/torrents.php?search="+key2 17 | 18 | cookies = http.cookiejar.MozillaCookieJar(cookies) 19 | cookies.load() 20 | response=requests.get(url,headers=headers,cookies=cookies) 21 | response.encoding = 'UTF-8' 22 | print(response.status_code) if response.status_code != 200 else print("",end="") 23 | 24 | soup = BeautifulSoup(response.text, 'lxml') 25 | results = soup.find_all("table",{"class":"torrentname"}) 26 | reslinks = ["https://www.tjupt.org/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接 27 | for reslink in reslinks: 28 | res=requests.get(reslink,headers=headers,cookies=cookies) 29 | res.encoding = 'UTF-8' 30 | print(res.status_code) if res.status_code != 200 else print("",end="") 31 | 32 | soup = BeautifulSoup(res.text, 'lxml') 33 | cf_email = soup.find("span",{"class":"__cf_email__"}) 34 | decrypted = decode(cf_email.get("data-cfemail")) if cf_email else "" 35 | title = soup.find("a",{"class":"index"}).getText().replace("[email protected]",decrypted).replace("[TJUPT].","").replace(".torrent","") 36 | 37 | if title == keyword or title == key1: 38 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text) 39 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text) 40 | dblink = db_search.group() if db_search else "" 41 | imdbid = imdb_search.group(2) if imdb_search else "" 42 | if dblink or imdbid: 43 | return {'douban':dblink,'imdb':imdbid} 44 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用 45 | return False 46 | 47 | if __name__ == '__main__': 48 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'} 49 | x = search("Liu.Lao.Gen.S02.1080p.WEB-DL.H264.AAC-RushB@TJUPT", headers) 50 | print(x) -------------------------------------------------------------------------------- /Movie/sql.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sqlite3 ,re 3 | 4 | def init(db_name,table_name): 5 | conn = sqlite3.connect(db_name) 6 | cursor = conn.cursor() 7 | execute = '''CREATE TABLE IF NOT EXISTS %s 8 | (SID INT PRIMARY KEY NOT NULL, 9 | Year INT NOT NULL, 10 | 地區 VARCHAR(10) NOT NULL, 11 | IMDb REAL , 12 | 豆瓣 REAL , 13 | 中文標題 VARCHAR(100) NOT NULL, 14 | 英文標題 TEXT , 15 | 其他標題 TEXT , 16 | 類型 VARCHAR(20) NOT NULL, 17 | IMDbID VARCHAR(15) , 18 | DBID VARCHAR(15) , 19 | FolderPath TEXT, 20 | UNIQUE(SID) 21 | )''' % (table_name) 22 | cursor.execute(execute) 23 | cursor.close() 24 | conn.close() 25 | 26 | def build_tsv(tsvname): #將之前的TSV格式資料匯入成List並做改寫成新格式 27 | with open(tsvname , "r", encoding = 'utf-8-sig') as data: 28 | List = [] 29 | for line in data: 30 | part1 = line.strip().split("\t")[0:4] 31 | part2 = line.strip().split("\t")[4:10] 32 | part3 = line.strip().split("\t")[10] 33 | if re.search(r"\((db_\d+)\)",line): #如果能從資料夾名稱找到dbID 34 | MainID = re.search(r"\((db_\d+)\)",line).group(1) 35 | elif re.search(r"\((tt\d+)\)",line): #如果能從資料夾名稱找到IMDbID 36 | MainID = re.search(r"\((tt\d+)\)",line).group(1) 37 | else: 38 | print(line) 39 | continue 40 | reList = [MainID] + part1 + part2 + [part3] 41 | List += [reList] 42 | return List 43 | 44 | def input(db_name,table_name,List,many=False,replace=False): 45 | num = len(List[0]) if many else len(List) 46 | conn = sqlite3.connect(db_name) 47 | cursor = conn.cursor() 48 | pattern = "IGNORE" if not replace else "REPLACE" 49 | execute = 'INSERT OR %s INTO %s VALUES (?%s)' % (pattern,table_name,",?"*(num-1)) 50 | if many : #如果是批量資料(蜂巢迴圈) 51 | cursor.executemany(execute,List) 52 | else: 53 | cursor.execute(execute,List) 54 | conn.commit() 55 | cursor.close() 56 | conn.close() 57 | 58 | def output(db_name,table_name,file_name): 59 | with open(file_name, "w", encoding = 'utf-8-sig') as write_file: 60 | conn = sqlite3.connect(db_name) 61 | cursor = conn.cursor() 62 | execute = "SELECT * FROM %s" % (table_name) 63 | for row in cursor.execute(execute): 64 | writeRow = "\t".join('%s' % r for r in row)+"\n" 65 | write_file.write(writeRow) 66 | def query(db_name,table_name,sid): 67 | conn = sqlite3.connect(db_name) 68 | cursor = conn.cursor() 69 | execute = "SELECT * From %s WHERE SID = ?" % (table_name) 70 | result = cursor.execute(execute, [sid]).fetchone() 71 | cursor.close() 72 | conn.close() 73 | return result 74 | #init(db_name,table_name) 75 | 76 | ''' 77 | db_name = "AutoSort.db" 78 | table_name = "Movie" 79 | 80 | #匯出用 81 | #output(db_name,table_name,"Movie.tsv") 82 | 83 | #匯入用 84 | List = build_tsv("IN.tsv") 85 | input(db_name,table_name,List,many=True,replace=True)''' 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoSortTools 2 | 3 | #### AnimeRenamer 4 | 5 | * 動漫標題重命名 6 | 7 | 僅有對各話標題進行重命名之功能,且需要自行找對應標題,若找到相關動漫資料庫會添加成自動讀取 8 | 9 | #### JAVAutoSort 10 | 11 | * JAVAutoSort 12 | 13 | 14 | --------------------------------------------------------------------------------