├── .gitignore
├── AnimeRenamer
├── renamer.py
└── title.txt
├── JAV
├── JAV.py
├── JAVAutoSorted.S.Local.py
├── README.md
├── config.py
├── javdb.txt
├── keyword.txt
├── search.py
└── sql.py
├── LICENSE
├── Movie
├── MVAutoSort.py
├── README.md
├── config_sample.py
├── folder.txt
├── gen.py
├── get.py
├── region.txt
├── requirements.txt
├── search.py
├── sites
│ ├── __init__.py
│ ├── frds.py
│ ├── ourbits.py
│ ├── pter.py
│ ├── ssd.py
│ ├── tccf.py
│ └── tjupt.py
└── sql.py
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | Movie/__pycache__/
4 | Movie/Test/
5 | Movie/test.py
6 | Movie/folder.txt
7 | Movie/api.py
8 | Movie/AutoSort.db
9 | Movie/Movie.tsv
10 | Movie/.cookies
11 | Movie/sites/.cookies
12 | *.py[cod]
13 | *$py.class
14 |
15 | # C extensions
16 | *.so
17 |
18 | # Distribution / packaging
19 | .Python
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # celery beat schedule file
88 | celerybeat-schedule
89 |
90 | # SageMath parsed files
91 | *.sage.py
92 |
93 | # Environments
94 | .env
95 | .venv
96 | env/
97 | venv/
98 | ENV/
99 | env.bak/
100 | venv.bak/
101 |
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 |
106 | # Rope project settings
107 | .ropeproject
108 |
109 | # mkdocs documentation
110 | /site
111 |
112 | # mypy
113 | .mypy_cache/
114 |
115 | #JAV
116 | JAV/Test/
117 | JAV/@~Sorted/
118 | JAV/@CodeList.txt
119 | JAV/@FileList.txt
120 | JAV/Key_nyaa.txt
121 | JAV/data.txt
122 | JAV/sign.png
123 | JAV/sign2.png
124 |
125 | #Movie
126 | Movie/config.py
--------------------------------------------------------------------------------
/AnimeRenamer/renamer.py:
--------------------------------------------------------------------------------
1 | # coding=UTF-8
2 | import os , time
3 |
4 | mypath = os.getcwd() #執行目錄
5 |
6 | def logNprint(text,path=mypath,pr=True):
7 | logpath=path+"\\"+"rename.log"
8 | #if not os.path.isfile(logpath):
9 | #text="#Renamer Programed By GDST/LMI\n"+ text #整理訊息
10 | if pr :
11 | print(text)
12 | with open(logpath,"a", encoding = 'utf-8-sig') as data:
13 | data.write(str(text)+"\n")
14 |
15 | with open("title.txt" , "r", encoding = 'utf-8-sig') as data:
16 | List = [l.strip().split("\t",1) for l in data ]
17 | Dic ={}
18 | for i in List:
19 | Dic[i[0]] = i[1]
20 | KeyList = [ i[0] for i in List ]
21 |
22 | name = mypath[mypath.rfind("]")+1:] #作品名稱
23 | for root, dirs, files in os.walk(mypath):
24 | if mypath == root or mypath+"\\劇場版" in root : #略過特定資料夾
25 | continue
26 |
27 | currenttime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) #執行時間
28 | runtimetext="\n執行時間 : "+currenttime
29 | logNprint(runtimetext,pr=False,path=root)
30 |
31 | logNprint("\nPath : "+root.replace(mypath,".")+"\n",path=root)
32 | block = root.replace(mypath+"\\","").split("-") #字幕組&語言&畫質
33 | lang = block[1] if block[1] == block[-2] else "CHT" #語言(默認CHT)
34 |
35 | for file in sorted(files) :
36 | print(file)
37 | if ".txt" in file or ".py" in file or ".part" in file or ".log" in file: #略過
38 | continue
39 | if ".ass" in file or ".srt" in file: #略過字幕
40 | continue
41 | filepath1 = root + "\\" +file
42 | #logNprint("File : "+file #原檔案名稱
43 | file2 = file
44 | replaceList = ["1080","720","2160","1280","1920","BS11","2019","2018","S01","S02","S03"] #去除會被誤判的數字
45 | replaceList += [str(year) for year in range(2000,2020)]
46 | for rep in replaceList:
47 | file2=file2.replace(rep,"")
48 |
49 | infopos1 = file2.rfind("[")
50 | infopos2 = file2.rfind("]")
51 | file2 = file2.replace(file2[infopos1:infopos2+1],"")
52 | key = 0
53 | for i in KeyList:
54 | if file2.find(i) != -1 :
55 | key = i
56 | break
57 | if not key:
58 | continue
59 | key2 = key+" END" if "END" in file2 else key
60 | dotpos = file.rfind(".") #副檔名
61 |
62 | try:
63 | filename2 = ( "%s (%s)-%s[%s][%s][%s]%s" % (name,block[0],key2,Dic[key],block[-1],lang,file[dotpos:].lower()))
64 | filepath2 = root + "\\"+ filename2
65 | except:
66 | logNprint("*Error : "+file)
67 | try:
68 | logNprint(Dic[key])
69 | except:
70 | pass
71 | logNprint(filepath1)
72 | continue
73 | if filepath1 == filepath2: #如果已經改名完成
74 | print("Exist : "+filename2)
75 | continue
76 | if not os.path.isfile(filepath2):
77 | os.rename(filepath1,filepath2)
78 | logNprint("File : "+file,path=root)
79 | logNprint("Rename : "+filename2,path=root)
80 | input("\n整理完成,請按Enter離開")
--------------------------------------------------------------------------------
/AnimeRenamer/title.txt:
--------------------------------------------------------------------------------
1 | 01 第七個精靈
2 | 02 你能找到我嗎?
3 | 03 七罪就是妳
4 | 04 變身
5 | 05 陷入絕望之中
6 | 06 分道揚鑣
7 | 07 給予的力量
8 | 08 降下黑暗的夜之魔王
9 | 09 五年前的天宮市
10 | 10 另一個世界另一個她
11 | 11 星辰墜落的夜之天使
12 |
--------------------------------------------------------------------------------
/JAV/JAV.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*-
2 | #v4.0 20190710 重新整理函數、加入預覽圖下載合併
3 | #v4.1 20190807 資料庫輸出、調整目錄結構
4 | #v4.2 未完成 相同檔案去重(檢查檔案大小)
5 |
6 | import os, requests, urllib, time, re
7 | from bs4 import BeautifulSoup
8 | #from fake_useragent import UserAgent
9 | from user_agent import generate_user_agent
10 | import config, search, sql
11 |
12 | ua = generate_user_agent()
13 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName #SQL
14 |
15 | class Log:
16 | def NPrint(text):
17 | os.chdir(mypath)
18 | print(text)
19 | with open("error.log","a", encoding = 'utf8') as data:
20 | data.write(str(text)+"\n")
21 | def Text(text):
22 | with open("error.log","a", encoding = 'utf8') as data:
23 | data.write(str(text)+"\n")
24 | def SaveList(key,Title):
25 | fname = ("@FileList.txt" if Title else "@CodeList.txt")
26 | new = (title if Title else code)
27 |
28 | os.chdir(mypath+"\\@~Sorted\\"+key)
29 | try: #讀取先前的清單
30 | with open(fname , "r", encoding = 'utf8') as clog:
31 | SaveList = [l.strip() for l in clog ]
32 | except:
33 | SaveList = []
34 | if new not in SaveList :
35 | SaveList += [new]
36 | else:
37 | return
38 | if len(SaveList) != 0: #如果非空目錄的話
39 | with open(fname,"w", encoding = 'utf8') as sdata: #寫檔
40 | for i in sorted(SaveList):
41 | sdata.write(i+"\n")
42 | def convert_bytes(num):
43 | for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
44 | if num < 1024.0:
45 | return "%3.1f %s" % (num, x)
46 | num /= 1024.0
47 | def file_size(file_path):
48 | if os.path.isfile(file_path):
49 | file_info = os.stat(file_path)
50 | return convert_bytes(file_info.st_size)
51 | def GetCode(filename):
52 | c = key.upper()+"-"
53 | if c in filename.upper():
54 | cpos = filename.upper().find(c)
55 | elif key.upper() in filename.upper():
56 | c = key.upper()
57 | cpos = filename.upper().find(c)
58 | filename = filename.upper().replace(c,c+"-")
59 | c = c+"-"
60 | else:
61 | return None
62 | for i in range(len(filename[cpos+len(c):])):
63 | if not filename[cpos+len(c)+i].isdigit():
64 | code = filename[cpos:cpos+len(c)+i]
65 | code = code.upper()
66 | break
67 | if len(code) == len(c) : #如果找不到番號(番號跟關鍵字長度一樣)
68 | return None
69 | return code
70 |
71 | #要處理的番號清單
72 | with open("keyword.txt" , "r", encoding = 'utf-8-sig') as keydata:
73 | KeyList = [l.strip() for l in keydata if l[0]!="@"]
74 | #KeyList = list(set(KeyList)) #番號去重
75 | if not os.path.isdir(config.tempfolder): #如果不是資料夾
76 | os.mkdir(config.tempfolder)
77 | '''with open("keyword2.txt" , "r", encoding = 'utf-8-sig') as keydata: #找不到資料庫的特殊番號 (!待新增)
78 | Key2List = [l.strip().split(",") for l in keydata ]
79 | Key2Dic = {}
80 | for i in Key2List:
81 | Key2Dic[i[0]]=i[1]'''
82 |
83 | mypath = os.getcwd() #執行目錄
84 | for lsdir in sorted(os.listdir(mypath)):
85 | if not os.path.isdir(mypath+"\\"+lsdir): #如果不是資料夾
86 | continue
87 | if lsdir[0]=="@" or lsdir == "__pycache__" or "新作" in lsdir or "合集" in lsdir: #略過根目錄下帶有@的資料夾 (個人化)
88 | continue
89 | if not os.path.isdir(mypath+"\\@~Sorted\\"):
90 | os.mkdir(mypath+"\\@~Sorted\\")
91 | for root, dirs, files in os.walk(mypath+"\\"+lsdir):
92 | print("\nPath : "+root)
93 | for i in files:
94 | if "padding_file" in i: #跳過冗贅檔案
95 | continue
96 | if not re.search(r'.+?\.(mkv|mp4|ts|wmv|avi|flv|rmvb|iso|mov|m2ts|ass|srt)', i.lower()) \
97 | and not re.match(r'.+?(_|-)?(s|screen|screenshot)\.(jpg|jpeg|png)', i.lower()):
98 | #and not re.match(r'.+?\.(jpg|jpeg|png)', i.lower()) : #跳過非影像檔和非截圖
99 | continue
100 | '''for key2 in Key2Dic.keys(): #對於無資料庫的番號進行處理 (!待新增)
101 | key2 = key2'''
102 | for key in KeyList:
103 | dirpath = mypath
104 | code = GetCode(i) #從檔名找番號
105 | if key=="FC2" and "FC2" in i.upper() and re.search(r'\d{6,7}',i): #特殊番號
106 | code = "FC2-" + re.search(r'\d{6,7}',i).group(0)
107 | if not code : #如果不能夠從檔案名稱找出番號
108 | continue
109 | if len(code[code.find("-")+1:]) >= 4: #例外處理:部分番號會用4.5位數字,但搜尋時必須為3位
110 | code = code.replace("-00","-")
111 | code = code.replace("-0","-")
112 | #if key[0].isdigit() or key =="SIRO" or key =="KIRAY":
113 | #continue
114 | print("Code :",code)
115 |
116 | query = sql.query(db_name,'JAV',code) #查詢舊有資料
117 | if query == None: #若不存在舊有資料→到網路查詢
118 | if not os.path.isdir(mypath+"\\@~Sorted\\"+key):
119 | os.mkdir(mypath+"\\@~Sorted\\"+key)
120 | #result = search.Database1(key,code,mypath)
121 | if key == "T28": #特殊番號例外處理
122 | result = search.Database3(key,code.replace("T28-","T-28"),mypath)
123 | if result['success']:
124 | result['code'] = result['code'].replace("T-28","T28-")
125 | result['save'][0] = result['save'][0].replace("T-28","T28-")
126 | elif key[0].isdigit() or key =="SIRO" or key =="KIRAY":
127 | result = search.Database2(key,code,mypath)
128 | elif key=="FC2" and "FC2" in i.upper():
129 | result = search.Database3(key,code,mypath)
130 | time.sleep(2)
131 | else:
132 | result = search.Database1(key,code,mypath)
133 | if not result['success']: #如果不存在對應的資料
134 | print("*Error :",result['error'])
135 | result = search.Database1(key,code,mypath) if key[0].isdigit() or key =="SIRO" or key =="KIRAY" else search.Database2(key,code,mypath) #調換
136 | if not result['success']:
137 | if key not in ["FC2","T28"]:
138 | result = search.Database3(key,code,mypath)
139 | if not result['success']:
140 | print("*Error :",result['error'])
141 | continue
142 | else:
143 | print("*Error :",result['error'])
144 | continue
145 |
146 | save = result['save']
147 | sql.input(db_name,'JAV', save)
148 | dirpath = result['dirpath']
149 | else:
150 | if key=="FC2":
151 | dirpath = mypath+"\\@~Sorted\\@"+key+"\\"+query[7]+"\\"+code
152 | else:
153 | number = int(code[code.find("-")+1:])
154 | order = "%03d~%03d" % (number-100+1,number) if number%100 == 0 else "%03d~%03d" % ((number//100)*100+1,(number//100+1)*100)
155 | dirpath = mypath+"\\@~Sorted\\"+key+"\\"+order+"\\"+code
156 |
157 | print("File : "+i)
158 | i2=i #檔案移動處理
159 | i2=i2.replace("_hhd000.com_免翻_墙免费访问全球最大情_色网站P_ornhub_可看收费内容","")
160 | i2=i2.replace("@hhd000.com_免翻#墙免费访问全球最大情#色网站P#ornhub,可看收费内容","")
161 |
162 | if not os.path.isfile(dirpath+"\\"+i2): #若檔案不存在
163 | if not os.path.isdir(dirpath):
164 | os.makedirs(dirpath)
165 | try:
166 | os.rename(root+"\\"+i,dirpath+"\\"+i2)
167 | print("Move : "+dirpath)
168 | except FileNotFoundError as e:
169 | print("*Error : FileNotFound "+i)
170 | continue
171 | except PermissionError as e:
172 | print("*Error : PermissionError "+i)
173 | continue
174 | else: #若檔案存在
175 | file1 = root+"\\"+i
176 | file2 = dirpath+"\\"+i2
177 | if config.CheckFile and file_size(file1) == file_size(file2) : #若需要比對檔案,且存在的檔案相同
178 | os.remove(file1)
179 | print("*Error : Exist same file \n *Remove : "+file1)
180 | else: #若存在的檔案不同
181 | for j in range(1,10):
182 | dotpos = i2.rfind(".")
183 | i3 = i2[:dotpos]+"~"+str(j)+i2[dotpos:]
184 | if config.CheckFile and file_size(file1) == file_size(dirpath+"\\"+i3) : #若需要比對檔案,且存在的檔案相同
185 | os.remove(file1)
186 | break
187 | print("*Error : Exist same file \n *Remove : "+file1)
188 | if not os.path.isfile(dirpath+"\\"+i3):
189 | try:
190 | os.rename(root+"\\"+i,dirpath+"\\"+i3)
191 | except FileNotFoundError:
192 | print("*Error : FileNotFound "+file1)
193 | break
194 | print("*Exist : "+i+"\n *Rename : "+i3)
195 | print("Move : "+dirpath)
196 | break
197 | #sql.input(db_name,'JAV', save)
198 | break
199 | input("\n整理完成,請按Enter離開")
--------------------------------------------------------------------------------
/JAV/JAVAutoSorted.S.Local.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | ##Local Ver
3 | #使用本地資料,不爬取
4 |
5 | import os , time ,filecmp ,hashlib
6 |
7 | CheckFile = True #是否進行重複檔案杜對
8 |
9 | class Log:
10 | def NPrint(text):
11 | os.chdir(mypath)
12 | print(text)
13 | with open("error.log","a", encoding = 'utf8') as data:
14 | data.write(str(text)+"\n")
15 | def Text(text):
16 | with open("error.log","a", encoding = 'utf8') as data:
17 | data.write(str(text)+"\n")
18 | def SaveList(key,Title):
19 | fname = ("@FileList.txt" if Title else "@CodeList.txt")
20 | new = (title if Title else code)
21 |
22 | os.chdir(mypath+"\\@~Sorted\\"+key)
23 | try: #讀取先前的清單
24 | with open(fname , "r", encoding = 'utf8') as clog:
25 | SaveList = [l.strip() for l in clog ]
26 | except:
27 | SaveList = []
28 | if new not in SaveList :
29 | SaveList += [new]
30 | else:
31 | return
32 | if len(SaveList) != 0: #如果非空目錄的話
33 | with open(fname,"w", encoding = 'utf8') as sdata: #寫檔
34 | for i in sorted(SaveList):
35 | sdata.write(i+"\n")
36 |
37 | def convert_bytes(num):
38 | for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
39 | if num < 1024.0:
40 | return "%3.1f %s" % (num, x)
41 | num /= 1024.0
42 | def file_size(file_path):
43 | if os.path.isfile(file_path):
44 | file_info = os.stat(file_path)
45 | return convert_bytes(file_info.st_size)
46 | def hashs(fineName, type="md5", block_size=128 * 1024):
47 | """ Support md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),
48 | sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256
49 | """
50 | with open(fineName, 'rb') as file:
51 | hash = hashlib.new(type, b"")
52 | while True:
53 | data = file.read(block_size)
54 | if not data:
55 | break
56 | hash.update(data)
57 | return hash.hexdigest()
58 |
59 | #要處理的番號清單
60 | with open("data.txt" , "r", encoding = 'utf_8_sig') as keydata:
61 | DataList = [l.strip().split(" ",1) for l in keydata ]
62 | Dic = {}
63 | for i in DataList:
64 | Dic[i[0]] = i[1]
65 |
66 | mypath = os.getcwd() #執行目錄
67 |
68 | for root, dirs, files in os.walk(mypath):
69 | if mypath+"\\@~Sorted" in root or mypath+"\\@" in root : #略過根目錄下帶有@的資料夾 (特製)
70 | continue
71 | if not os.path.isdir(mypath+"\\@~Sorted\\"):
72 | os.mkdir(mypath+"\\@~Sorted\\")
73 | os.chdir(root) #更改到當前目錄
74 | print("\nPath : "+root)
75 |
76 | for key in Dic.keys():
77 | for i in files:
78 | if ".part" in i: #略過下載中檔案
79 | continue
80 | if key.upper() in i.upper() or key.upper().replace("-","",1) in i.upper(): #如果能夠從檔案名稱找出番號
81 | print("Code :",key)
82 | dirpath = mypath+"\\@~Sorted\\"+key+" "+Dic[key]
83 | if not os.path.isdir(dirpath):
84 | try:
85 | os.mkdir(dirpath)
86 | except: #若無法建立資料夾(名稱太長)
87 | dirpath = dirpath = mypath+"\\@~Sorted\\"+key
88 | if not os.path.isdir(dirpath):
89 | os.mkdir(dirpath)
90 | print("File : "+i)
91 | fsize = file_size(root+"\\"+i).split(" ") #檢查檔案大小
92 |
93 | if not os.path.isfile(dirpath+"\\"+i): #若檔案不存在
94 | os.rename(root+"\\"+i,dirpath+"\\"+i)
95 | print("Move : "+dirpath)
96 | else: #若檔案存在
97 | file1 = root+"\\"+i
98 | file2 = dirpath+"\\"+i
99 | if CheckFile and file_size(file1) == file_size(file2) : #若需要比對檔案,且存在的檔案相同
100 | #if CheckFile and file_size(file1) == file_size(file2) and hashs(file1) == hashs(file2) : #若需要比對檔案,且存在的檔案相同
101 | os.remove(file1)
102 | Log.NPrint("*Error : Exist same file \n *Remove : "+file1)
103 | else: #若存在的檔案不同
104 | for j in range(1,10):
105 | dotpos = i.rfind(".")
106 | i3 = i[:dotpos]+"~"+str(j)+i2[dotpos:]
107 | if not os.path.isfile(dirpath+"\\"+i3):
108 | os.rename(root+"\\"+i,dirpath+"\\"+i3)
109 | Log.NPrint("*Exist : "+i+"\n *Rename : "+i3)
110 | print("Move : "+dirpath)
111 | break
112 | input("\n整理完成,請按Enter離開")
--------------------------------------------------------------------------------
/JAV/README.md:
--------------------------------------------------------------------------------
1 | # JAVAutoSort
2 |
3 | 
4 |
5 | ## 需求套件
6 | pip install requests bs4 lxml
7 |
8 | ## 不同版本簡介&使用說明
9 | * v1.7 20180929
10 |
11 | 最初版本,根據輸入番號(e.g.:ABP)尋找符合之檔案,遇到重複檔案會跳過並將資訊儲存在error.log,會將整理過的檔案儲存在執行目錄下@Sorted資料夾
12 |
13 | 邏輯為:識別檔名內是否存在番號→取得番號→搜尋標題與封面→下載到以標題為名的資料夾→將搜尋到的檔案搬移過去
14 |
15 | * v2.0 20180929
16 |
17 | 相比v1.7增加了檔案比對跟重新命名,若同檔名的檔案比對相同仍舊會跳過儲存在error.log
18 |
19 | 問題在比對檔案似乎仍需要一段時間,可能是演算問題或因為檔案是掛載在雲端的關係
20 |
21 | * v3.x
22 |
23 | > v3.0 20180930
24 |
25 | > 增加批次處理功能,可將欲處理之番號儲存在keyword.txt(每行一個,注意不要有空行)
26 |
27 | > 會將整理過的檔案儲存在執行目錄下 /@Sorted/番號/
28 |
29 | > v3.1 20181006
30 |
31 | > 更新運算邏輯、重新整理架構、新增素人片番號的比對及封面下載、
32 |
33 | > 比對檔案改為直接比對檔案大小、以及些許細部調整
34 |
35 | ## 注意事項
36 |
37 | * 200GANA 300NTK等番號會下載影片截圖,需時較長為正常現象 ! 會考慮加入自訂參數,可以設定成只下載封面
38 |
39 | ## 待更新&研究
40 |
41 | 透過MultiTasK增加下載速度(分布式爬蟲)
42 |
--------------------------------------------------------------------------------
/JAV/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | #Main.py
4 | CheckFile = True #是否進行重複檔案杜對
5 |
6 | #search.py
7 | tempfolder = "D:\\Cache" #圖片快取存放
8 | MergeAllPreview = True
9 | javdb = "D:\\GoogleDrive\\AutoSort\\javdb.txt"
10 | #signpic = "D:\\GoogleDrive\\AutoSort\\sign.png" #浮水印,留空為不使用
11 | signpic = ""
12 |
13 | #sql.py
14 | LogPath = "D:\\GoogleDrive\\AutoSort"
15 | LogName = "JAV.db"
16 |
17 | if not os.path.isdir(tempfolder):
18 | os.mkdir(tempfolder)
19 | if not os.path.isdir(LogPath):
20 | os.mkdir(LogPath)
21 |
--------------------------------------------------------------------------------
/JAV/javdb.txt:
--------------------------------------------------------------------------------
1 | # Netscape HTTP Cookie File
2 | # http://curl.haxx.se/rfc/cookie_spec.html
3 | # This file was generated by EditThisCookie
4 | .javdb.com TRUE / TRUE 1600974431 __cf_bm a96ad3d3e703c36c91abfb450baed2ebe4613f13-1600972631-1800-AelddG/CeL5phqIEE2vT6tRciYstxoAaiDKHTmN8CKku587MRQJggA1PjBbVZQJlQvmjjrqYmbEInjUlyDmqDWhaoDQ6NbSTkXJN+5IWZhvsA22FmPPW0vi3pj0WggNrBunmjLwFT3lNjQnxNlujfTE=
5 | .javdb.com TRUE / FALSE 1603557617 __cfduid dd916250d1c00444c3d001052518fd7ef1600965617
6 | .javdb.com TRUE / FALSE 1601052019 __extfc 1
7 | .javdb.com TRUE / FALSE 1664044680 _ga GA1.2.566478709.1600965619
8 | .javdb.com TRUE / FALSE 1600972691 _gat_gtag_UA_160995389_2 1
9 | .javdb.com TRUE / FALSE 1601059080 _gid GA1.2.1921757634.1600965619
10 | javdb.com FALSE / FALSE 1602268680 _jdb_session %2Fu7l%2FnWabtkFOBhWTcWb05QaQOS%2F2VdoKNL2n9g3SKBlbdQwDRwRdFRMpb8nEml1Pfz7ArgyVMInlrAb8VYrYUzi4YSK93gh3sqDg8S%2FT5%2FNzbofjUntpYsVqX3CT4blE1WgVbdvK0f4RWSrpmkZpjkBeCGd4FIgqtxNhQm9OA88cFuBML3lgz5H1d07UEIH0RHl6gjyiJAmRq563lu83b5qP9AakYu7cjnbylvK4CKocvtJ4T9Fu79Wq4tz4%2FSbMIkbHbGmGmnOG0gGAB87Slkpc8jWPtVOX3PkeLunzTnFPxaNGLSbjR85Oce5Ltn%2BSFBjm3pgJ957ljHvLm%2Bmgnnv--UQd8cqTz4oXUHinj--IFf%2FXqP0EUmin4l271UxRw%3D%3D
11 | javdb.com FALSE / FALSE 0 locale zh
12 | javdb.com FALSE / FALSE 1632501620 over18 1
13 | javdb.com FALSE / FALSE 0 redirect_to %2Fv%2FWVP8g
14 | javdb.com FALSE / FALSE 1602268680 remember_me_token eyJfcmFpbHMiOnsibWVzc2FnZSI6IklsODNUVlJOWTJWNGR6TnpjMVF0VEd0ellYbFJJZz09IiwiZXhwIjoiMjAyMC0xMC0wOVQxODozNzo1OS4wMDBaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX21lX3Rva2VuIn19--815a7e457118d9f641d0deea23b953590d6f7b80
--------------------------------------------------------------------------------
/JAV/keyword.txt:
--------------------------------------------------------------------------------
1 | 200GANA
2 | 259LUXU
3 | 261ARA
4 | 277DCV
5 | 300MAAN
6 | 300MIUM
7 | 300NTK
8 | 152EKO
9 | 332NAMA
10 | 274ETQT
11 | 302GERK
12 | 279UTSU
13 | 336KNB
14 | 253KAKU
15 | 326EVA
16 | 326JKK
17 | 326MTP
18 | 326NKR
19 | 326AID
20 | 326SCP
21 | 336DTT
22 | 236GAH
23 | 328HMDN
24 | KIRAY
25 | SIRO
26 | ABP
27 | AVOP
28 | AVOD
29 | EBOD
30 | HUSR
31 | IPX
32 | IPZ
33 | KAWD
34 | MDTM
35 | MIAE
36 | MXGS
37 | OAE
38 | OFJE
39 | SNIS
40 | SSNI
41 | STARS
42 | STAR
43 | SVDVD
44 | SVOMN
45 | WANZ
46 | ADN
47 | ARM
48 | AVSA
49 | CJOD
50 | CLUB
51 | DASD
52 | DAZD
53 | DVAJ
54 | EKDV
55 | HUNTA
56 | IBW
57 | JUY
58 | MGT
59 | RBD
60 | REAL
61 | SDAB
62 | SDDE
63 | SDHS
64 | SDMU
65 | SDNM
66 | SGA
67 | SHKD
68 | SOAN
69 | SORA
70 | SQTE
71 | TKI
72 | URE
73 | XRW
74 | XVSR
75 | YRH
76 | AMA
77 | BAZX
78 | BBAN
79 | DAVK
80 | DDK
81 | DDT
82 | DMOW
83 | DOCP
84 | DSVR
85 | HOMA
86 | MANE
87 | NHDTB
88 | ONEZ
89 | PKPD
90 | PPPD
91 | PRED
92 | PRTD
93 | ZEX
94 | ADVO
95 | AGEMIX
96 | APKH
97 | APNS
98 | BCDP
99 | BDA
100 | BF
101 | CMV
102 | CPDE
103 | CUT
104 | ETQR
105 | FONE
106 | FSKT
107 | FSRE
108 | GDJU
109 | GENT
110 | GETS
111 | GVG
112 | HFD
113 | HND
114 | HODV
115 | HONB
116 | IENE
117 | INCT
118 | IRCP
119 | JKSR
120 | JUFD
121 | JUKF
122 | JUTN
123 | KAGP
124 | KDKJ
125 | KTB
126 | KTKC
127 | KTKL
128 | KTKX
129 | KTKY
130 | KTKZ
131 | KTRA
132 | TIKB
133 | TIKC
134 | TIKF
135 | TIKM
136 | TIKJ
137 | TIKP
138 | LOL
139 | MDB
140 | MEKI
141 | MEYD
142 | MIDE
143 | MIFD
144 | MIRD
145 | MISM
146 | MIZD
147 | MUCD
148 | MUDR
149 | MVG
150 | NBD
151 | NNPJ
152 | NSPS
153 | PIYO
154 | PTS
155 | PXH
156 | RBB
157 | TAAK
158 | TPPN
159 | TRUM
160 | VECR
161 | KRI
162 | T28
163 | KBI
164 | MIAA
165 | MAKT
166 | KANE
167 | MVSD
168 | TMDI
169 | TMCY
170 | MKON
171 | SUPA
172 | MMGH
173 | OKB
174 | OKP
175 | OKS
176 | IANF
177 | AOZ
178 | DIC
179 | JUFE
180 | DVDMS
181 | TRE
182 | AMBI
183 | AMBS
184 | CESD
185 | MMND
186 | MIGD
187 | NACR
188 | HIKR
189 | DOKS
190 | MDBK
191 | YST
192 | YTR
193 | MIST
194 | MGMQ
195 | MOPP
196 | AKA
197 | EIKI
198 | NSM
199 | KRU
200 | HMDN
201 | SIMM
202 | MKMP
203 | PPT
204 | PPS
205 | KKJ
206 | GDTM
207 | EVIS
208 | EMLB
209 | BGN
210 | NZK
211 | GEKI
212 | GNE
213 | HNDB
214 | RKI
215 | ORETD
216 | ORE
217 | HARU
218 | DANDY
219 | SUJI
220 | SABA
221 | FINH
222 | FNEO
223 | FSKI
224 | HDKA
225 | JBD
226 | ODVHJ
227 | MUKD
228 | MUKC
229 | TUE
230 | DBER
231 | DAKH
232 | DBVB
233 | DBEB
234 | DYIB
235 | CACA
236 | SCR
237 | ATOM
238 | MILK
239 | MADM
240 | JYA
241 | TXCD
242 | ASI
243 | HOKS
244 | SIM
245 | ATID
246 | MDYD
247 | TYOD
248 | LID
249 | GENS
250 | GENM
251 | NUBI
252 | CHRV
253 | ENDX
254 | OPPW
255 | LZPL
256 | 27ID
257 | DNW
258 | YMDD
259 | HNDS
260 | KWBD
261 | BLOR
262 | CHN
263 | BAK
264 | GMEN
265 | LLDV
266 | POST
267 | MDS
268 | SDMM
269 | SDJS
270 | KMHR
271 | SHYN
272 | SHIC
273 | SIMGE
274 | CHAE
275 | MIMK
276 | BDSR
277 | ISJ
278 | ANX
279 | MRSS
280 | DOA
281 | VRTM
282 | UMSO
283 | BCPV
284 | EYAN
285 | HSL
286 | WZEN
287 | SOJU
288 | QRDA
289 | USBA
290 | MMUS
291 | SLAP
292 | CMN
293 | SKSK
294 | BLK
295 | EKW
296 | DNJR
297 | BOBB
298 | SKMJ
299 | OYC
300 | PAKO
301 | WAVR
302 | KAVR
303 | AVOPVR
304 | MAXVR
305 | YAL
306 | MBM
307 | CAWD
308 | EYS
309 | URHJ
310 | URLH
311 | URKH
312 | URKK
313 | URPW
314 | URVRSP
315 | OME
316 | MERD
317 | SUPD
318 | KFNE
319 | NITR
320 | SDAM
321 | SDMF
322 | SDNT
323 | TIK
324 | BF
325 | CUT
326 | MONE
327 | GENT
328 | IENF
329 | JUNY
330 | KTDS
331 | LOL
332 | SALO
333 | JRZD
334 | HMNF
335 | APOD
336 | IESM
337 | BUZ
--------------------------------------------------------------------------------
/JAV/search.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*-
2 | import os, re, requests, math, shutil
3 | from bs4 import BeautifulSoup
4 | from time import sleep
5 | from user_agent import generate_user_agent
6 | import http.cookiejar
7 | from PIL import Image
8 | import config, sql
9 |
10 | #UA = UserAgent().random
11 | UA = generate_user_agent()
12 | mypath = os.getcwd()
13 |
14 | def ImageDL(imgurl,filename): #圖片下載
15 | r = requests.get(imgurl,headers = {'User-Agent':UA},stream=True)
16 | with open(filename, "wb") as imgdata:
17 | imgdata.write(r.content)
18 | if 'Content-Length' in r.headers.keys() and os.stat(filename).st_size != int(r.headers['Content-Length']): #檢查檔案大小,避免下載失敗
19 | ImageDL(imgurl)
20 | def Merge(code,allpreview,tempfolder="Cache",signpic=False): #下載並合併預覽圖
21 | execdir = tempfolder+"\\"+code
22 |
23 | if not os.path.isdir(execdir):
24 | os.mkdir(execdir)
25 | os.chdir(execdir)
26 | for preview,prenum in zip(allpreview,range(len(allpreview))):
27 | preview = preview[:preview.rfind("?")]
28 | filename = "pre%02d_%s" % (prenum,preview[preview.rfind("/")+1:])
29 | if not os.path.isfile(filename):
30 | ImageDL(preview,filename)
31 | try:
32 | imgs = [Image.open(fn) for fn in sorted(os.listdir())
33 | if re.match(r'.+?\.(jpg|jpeg|png)', fn) and not fn.endswith("_preview.jpg") and os.stat(fn).st_size > 0] #打開所有預覽圖
34 | except OSError:
35 | try:
36 | shutil.rmtree(execdir)
37 | except PermissionError:
38 | return False
39 | Merge(code,allpreview,tempfolder,signpic)
40 | return True
41 | width,height=0,0
42 | for img in imgs: #獲取長寬(避免尺寸不一)
43 | width2 , height2 = img.size
44 | width = width2 if width2 > width else width
45 | height = height2 if height2 > height else height
46 |
47 | if len(imgs) <= 5: Column =len(imgs)
48 | elif len(imgs)%5 == 0: Column =5
49 | elif len(imgs)%4 == 0: Column =4
50 | elif len(imgs)%3 == 0: Column =3
51 | else: Column =5
52 | result = Image.new(imgs[0].mode,(width*Column,height*(math.ceil(len(imgs)/Column)) ))
53 | for order,img in enumerate(imgs): #貼上圖片
54 | width2 , height2 = img.size #尺寸不符的做置中
55 | result.paste(img, box=(width*(order%Column)+(width-width2)//2 ,height*(order//Column)+(height-height2)//2 ))
56 | if signpic : #浮水印
57 | signimg = Image.open(signpic)
58 | signimg = signimg.convert('RGBA')
59 | width3 , height3 = signimg.size
60 | result.paste(signimg, box=(width*Column-width3,height*(math.ceil(len(imgs)/Column))-height3 ) ,mask=signimg )
61 | result.save(code+"_preview.jpg") #儲存
62 | return True
63 |
64 | def Sort2Dir(key,code,mypath,mode=1,sub=''):
65 | global dirpath
66 | if key == "T28": #特殊番號例外處理
67 | code = code.replace("T-28","T28-")
68 | number = int(code[code.find("-")+1:])
69 | if mode ==1:
70 | order = "%03d~%03d" % (number-100+1,number) if number%100 == 0 else "%03d~%03d" % ((number//100)*100+1,(number//100+1)*100)
71 | dirpath = mypath+"\\@~Sorted\\"+key+"\\"+order+"\\"+code
72 | elif mode ==2: #FC2
73 | sub = sub.replace(":",":")
74 | dirpath = mypath+"\\@~Sorted\\@"+key+"\\"+sub+"\\"+code
75 | if not os.path.isdir(dirpath):
76 | os.makedirs(dirpath)
77 | os.chdir(dirpath)
78 | if coverurl==None or not coverurl:
79 | print("*Error : No Cover.")
80 | return
81 |
82 | coverfile = code+"_cover.jpg"
83 | r = requests.get(coverurl,headers = {'User-Agent':UA})
84 | if not os.path.isfile(coverfile) or os.stat(coverfile).st_size == 0:
85 | with open(coverfile, "wb") as imgdata:
86 | imgdata.write(r.content)
87 | os.chdir(dirpath[:dirpath.rfind("\\")])
88 | coverfile2 = title+".jpg"
89 | if not os.path.isfile(coverfile) and not os.path.isfile(coverfile2):
90 | try:
91 | with open(coverfile2, "wb") as imgdata:
92 | imgdata.write(r.content)
93 | except:
94 | with open(code+".jpg", "wb") as imgdata:
95 | imgdata.write(r.content)
96 | print("CoverDL : "+title)
97 |
98 | def Database1(key,code,mypath): #搜尋JAVBUS
99 | global dirpath,title,coverurl
100 | url = "https://www.javbus.com/"+code
101 | response = requests.get(url,headers = {'User-Agent':UA})
102 | response.encoding = 'UTF-8'
103 | soup = BeautifulSoup(response.text, 'lxml')
104 |
105 | if soup.find("title").getText() == "404 Not Found" or soup.find("title").getText() == "404 Page Not Found! - JavBus":
106 | return {'success':False,'error':code+" 404 Not Found"}
107 | elif soup.find("h3") == None:
108 | return {'success':False,'error':code+" Unknown Error"}
109 |
110 | article = soup.find("div", {"class": "container"})
111 | if article == None:
112 | return {'success':False,'error':code+" Unknown Error"}
113 |
114 | title = article.find("h3").getText()
115 | coverurl = article.find("a", {"class": "bigImage"}).get("href")
116 | allinfo = article.find("div",{"class":"col-md-3 info"}).find_all("p")
117 | code,date,time,dierector,producer,pulisher,series,genre,actress,allpreview="","","","","","","","","",[]
118 | if article.find("div",{"id":"sample-waterfall"}):
119 | waterfall = article.find("div",{"id":"sample-waterfall"}).find_all("a",{"class":"sample-box"})
120 | allpreview = [prev.get("href").strip() for prev in waterfall]
121 | for nfo in range(len(allinfo)):
122 | if allinfo[nfo].getText().split(" ")[0] == "識別碼:":
123 | code = allinfo[nfo].getText().split(" ")[1].strip()
124 | elif allinfo[nfo].getText().split(" ")[0] == "發行日期:":
125 | date = allinfo[nfo].getText().split(" ")[1].strip()
126 | elif allinfo[nfo].getText().split(" ")[0] == "長度:":
127 | time = allinfo[nfo].getText().split(" ")[1].strip()
128 | elif allinfo[nfo].getText().split(" ")[0] == "導演:":
129 | dierector = allinfo[nfo].getText().split(" ")[1].strip()
130 | elif allinfo[nfo].getText().split(" ")[0] == "製作商:":
131 | producer = allinfo[nfo].getText().split(" ")[1].strip()
132 | elif allinfo[nfo].getText().split(" ")[0] == "發行商:":
133 | pulisher = allinfo[nfo].getText().split(" ")[1].strip()
134 | elif allinfo[nfo].getText().split(" ")[0] == "系列:":
135 | series = allinfo[nfo].getText().split(" ")[1].strip()
136 | elif allinfo[nfo].getText() == "類別:":
137 | genre = [g.getText().strip() for g in allinfo[nfo+1].find_all("span",{"class":"genre"}) ]
138 | elif allinfo[nfo].getText() == "演員:":
139 | if nfo+1 < len(allinfo):
140 | actress = [g.getText().strip() for g in allinfo[nfo+1].find_all("span",{"class":"genre"}) ]
141 | Sort2Dir(key,code,mypath)
142 | os.chdir(mypath)
143 | mergename = code+"_preview.jpg"
144 | mergepath = config.tempfolder+"\\"+code
145 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0:
146 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic)
147 | if mergeres:
148 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move
149 | #shutil.rmtree(mergepath) #清除Cache
150 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,dierector,producer,pulisher]
151 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()}
152 |
153 | def Database2(key,code,mypath): #搜尋JAV321
154 | global dirpath,title,coverurl
155 | surl = "https://www.jav321.com/search"
156 | payload = {'sn': code}
157 | response = requests.post(url=surl, data=payload, headers={'User-Agent':UA})
158 | response.encoding = 'UTF-8'
159 | soup = BeautifulSoup(response.text, 'lxml')
160 | if soup.find("div", {"class": "alert"}):
161 | return {'success':False,'error':soup.find("div", {"class": "alert"}).getText()}
162 | elif soup.find("h3") == None:
163 | return {'success':False,'error':'Unknown Error'}
164 |
165 | t1 = soup.find("h3").getText()
166 | t2 = soup.find("h3").find("small").getText()
167 | title = code + " " +t1.replace(t2,"").strip()
168 | imgs = soup.find_all("div","col-xs-12 col-md-12")[:-1]
169 | imglist = [i.find("img").get("src") for i in imgs]
170 | if len(imglist) == 0:
171 | return {'success':False,'error':"No Cover."}
172 | coverurl = imglist[0]
173 | allpreview = imglist[1:]
174 |
175 | allinfo = soup.find("div",{"class":"col-md-9"})
176 | allinfo = str(allinfo).split("
")
177 |
178 | actress,producer,genre,code,date,time,series="","","","","","",""
179 | for nfo in allinfo:
180 | nfo2 = BeautifulSoup(nfo, 'lxml').getText()
181 | if "女优:" in nfo2:
182 | actress = nfo2.replace("女优:","").strip().split(" ")
183 | elif "片商:" in nfo2:
184 | producer = nfo2.replace("片商:","").strip()
185 | elif "标签:" in nfo2:
186 | genre = nfo2.replace("标签:","").strip().split(" ")
187 | elif "番号:" in nfo2:
188 | code = nfo2.replace("番号:","").strip().upper()
189 | code = key+code[code.find("-"):]
190 | elif "发行日期:" in nfo2:
191 | date = nfo2.replace("发行日期:","").strip()
192 | elif "播放时长:" in nfo2:
193 | time = nfo2.replace("播放时长:","").strip().replace("分钟","分鐘")
194 | elif "系列:" in nfo2:
195 | series = nfo2.replace("系列:","").strip()
196 |
197 | Sort2Dir(key,code,mypath)
198 | os.chdir(mypath)
199 | mergename = code+"_preview.jpg"
200 | mergepath = config.tempfolder+"\\"+code
201 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0:
202 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic)
203 | if mergeres:
204 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move
205 | #shutil.rmtree(mergepath) #清除Cache
206 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,'',producer,'']
207 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()}
208 |
209 | def Database3(key,code,mypath,cookies=config.javdb): #搜尋JAVDB
210 | global dirpath,title,coverurl
211 |
212 | re_code = re.search(r"(\d+)([a-zA-Z]+-?.+)",code)
213 | url = "https://javdb.com/videos/search_autocomplete.json?q="+ (re_code.group(2) if re_code else code)
214 | vurl = ""
215 |
216 | cookies = http.cookiejar.MozillaCookieJar(cookies)
217 | cookies.load()
218 |
219 | res = requests.get(url,headers = {'User-Agent':UA},cookies=cookies)
220 | res.encoding = 'UTF-8'
221 | res = res.json() # return dict
222 |
223 | if len(res)==0:
224 | return {'success':False,'error':code+" not found, return empty json."}
225 | for r in res:
226 | if r['number'] == (re_code.group(2) if re_code else code):
227 | vurl = "https://javdb.com/v/" + r['uid']
228 | if not vurl:
229 | return {'success':False,'error':code+" not found, can't find this video."}
230 |
231 | res = requests.get(vurl,headers = {'User-Agent':UA},cookies=cookies)
232 | res.encoding = 'UTF-8'
233 | soup = BeautifulSoup(res.text, 'lxml')
234 |
235 | title = soup.find("h2").getText().strip()
236 | try:
237 | if soup.find("img",{"class":"video-cover"}):
238 | coverurl = soup.find("img",{"class":"video-cover"}).get("src")
239 |
240 | elif soup.find("video",{"id":"preview-video"}):
241 | coverurl = soup.find("video",{"id":"preview-video"}).get("poster")
242 | else:
243 | coverurl = soup.find("div",{"class":"column is-three-fifths column-video-cover"}).find("a").get("href")
244 | except AttributeError:
245 | sleep(1)
246 | return {'success':False,'error':code+' AttributeError'}
247 | allinfo = soup.find_all("div",{"class":"panel-block"})[:-1] #去除最後一行
248 |
249 | date,time,dierector,producer,pulisher,seller,series,genre,actress,allpreview="","","","","","","","","",[]
250 | if soup.find("div",{"class":"tile-images preview-images"}):
251 | waterfall = soup.find("div",{"class":"tile-images preview-images"}).find_all("a",{"class":"tile-item"})
252 | allpreview = [prev.get("href").strip() for prev in waterfall]
253 | for nfo in allinfo:
254 | if nfo.find("strong").getText() == "番號":
255 | code = nfo.find("span",{"class":"value"}).getText()
256 | elif nfo.find("strong").getText() == "時間:":
257 | date = nfo.find("span",{"class":"value"}).getText()
258 | elif nfo.find("strong").getText() == "時長:":
259 | time = nfo.find("span",{"class":"value"}).getText()
260 | elif nfo.find("strong").getText() == "導演:":
261 | dierector = nfo.find("span",{"class":"value"}).getText()
262 | elif nfo.find("strong").getText() == "片商:":
263 | producer = nfo.find("span",{"class":"value"}).getText()
264 | elif nfo.find("strong").getText() == "發行:":
265 | pulisher = nfo.find("span",{"class":"value"}).getText()
266 | elif nfo.find("strong").getText() == "系列:":
267 | series = nfo.find("span",{"class":"value"}).getText()
268 | elif nfo.find("strong").getText() == "賣家:":
269 | seller = nfo.find("span",{"class":"value"}).getText()
270 | dierector = seller
271 | elif nfo.find("strong").getText() == "類別:":
272 | genre = [g.getText().strip() for g in nfo.find("span",{"class":"value"}).find_all("a") ]
273 | elif nfo.find("strong").getText() == "演員:":
274 | actress = [g.getText().strip() for g in nfo.find("span",{"class":"value"}).find_all("a") ]
275 |
276 | if key in ["FC2"]:
277 | Sort2Dir(key,code,mypath,mode=2,sub=seller)
278 | sleep(1)
279 | else:
280 | Sort2Dir(key,code,mypath)
281 | os.chdir(mypath)
282 | mergename = code+"_preview.jpg"
283 | mergepath = config.tempfolder+"\\"+code
284 | if not os.path.isfile(dirpath+"\\"+mergename) and len(allpreview)>0:
285 | mergeres = Merge(code,allpreview,tempfolder=config.tempfolder,signpic=config.signpic)
286 | if mergeres:
287 | shutil.move(mergepath+"\\"+mergename,dirpath+"\\"+mergename) #Move
288 | #shutil.rmtree(mergepath) #清除Cache
289 | save = [code,title.replace(code,'').strip(),series,",".join(actress),",".join(genre),date,time,dierector,producer,pulisher]
290 | return {'success':True,'dirpath':dirpath,'code':code,'save':save,'title':title.replace(code,'').strip()}
291 |
292 |
293 | def Database4(key,code,mypath): #搜尋JAVDB
294 | global dirpath,title,coverurl
295 |
296 | re_code = re.search(r"(\d+)(.+-?.+)",code)
297 | url = "https://javdb.com/videos/search_autocomplete.json?q="+re_code.group(2) if re_code else code
298 | vurl = ""
299 |
300 | #shutil.rmtree(config.tempfolder) #清除Cache
301 | '''
302 | #Test
303 | res = Database3("300MIUM","300MIUM-670",mypath)
304 | #res = Database3("ORE","ORE-670",mypath)
305 | print(res)
306 |
307 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName
308 | sql.input(db_name, 'JAV', save,replace=True)'''
--------------------------------------------------------------------------------
/JAV/sql.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sqlite3, config
3 |
4 | def init(db_name,table_name):
5 | conn = sqlite3.connect(db_name)
6 | cursor = conn.cursor()
7 | execute = '''CREATE TABLE IF NOT EXISTS %s
8 | (Code INT PRIMARY KEY NOT NULL,
9 | 標題 TEXT NOT NULL,
10 | 系列 TEXT,
11 | 女優 TEXT,
12 | 類別 TEXT,
13 | 日期 VARCHAR(10) ,
14 | 時長 VARCHAR(10) ,
15 | 導演 TEXT,
16 | 製作商 TEXT,
17 | 發行商 TEXT,
18 | UNIQUE(Code)
19 | )''' % (table_name)
20 | cursor.execute(execute)
21 | cursor.close()
22 | conn.close()
23 |
24 | def input(db_name,table_name,List,many=False,replace=False):
25 | num = len(List[0]) if many else len(List)
26 | conn = sqlite3.connect(db_name)
27 | cursor = conn.cursor()
28 | pattern = "IGNORE" if not replace else "REPLACE"
29 | execute = 'INSERT OR %s INTO %s VALUES (?%s)' % (pattern,table_name,",?"*(num-1))
30 | if many : #如果是批量資料(蜂巢迴圈)
31 | cursor.executemany(execute,List)
32 | else:
33 | cursor.execute(execute,List)
34 | conn.commit()
35 | cursor.close()
36 | conn.close()
37 |
38 | def output(db_name,table_name,file_name):
39 | with open(file_name, "w", encoding = 'utf-8-sig') as write_file:
40 | conn = sqlite3.connect(db_name)
41 | cursor = conn.cursor()
42 | execute = "SELECT * FROM %s" % (table_name)
43 | for row in cursor.execute(execute):
44 | writeRow = "\t".join('%s' % r for r in row)+"\n"
45 | write_file.write(writeRow)
46 | def query(db_name,table_name,sid):
47 | conn = sqlite3.connect(db_name)
48 | cursor = conn.cursor()
49 | execute = "SELECT * From %s WHERE SID = ?" % (table_name)
50 | result = cursor.execute(execute, [sid]).fetchone()
51 | cursor.close()
52 | conn.close()
53 |
54 | #Init
55 | db_name = "%s\\%s" % (config.LogPath,config.LogName) if config.LogPath else config.LogName
56 | init(db_name,"JAV")
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Gdist
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Movie/MVAutoSort.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2019-2020 GDST
4 |
5 | import json, requests, random, os, re, time
6 | from opencc import OpenCC
7 | from bs4 import BeautifulSoup
8 | #from fake_useragent import UserAgent
9 | from user_agent import generate_user_agent
10 | import gen as Gen
11 | import get as Get
12 | import sql, search,config
13 |
14 | #Initialize
15 | #ua = UserAgent().random
16 | ua = generate_user_agent()
17 | regDic = {}
18 |
19 | #SQL
20 | db_name = "%s\\%s.db" % (config.LogPath,config.LogName) if config.LogPath else config.LogName+".db"
21 | sql.init(db_name,"Movie")
22 | sql.init(db_name,"TV")
23 |
24 | with open("folder.txt" , "r", encoding = 'utf-8-sig') as data: #只在這些子資料夾執行
25 | folderList = [l.strip() for l in data ]
26 | with open("region.txt" , "r", encoding = 'utf-8-sig') as regdata: #地區縮寫對應
27 | regList = [l.strip().split(',') for l in regdata ]
28 | for reg in regList:
29 | regDic[reg[0]]=reg[1:]
30 |
31 | #Function
32 | def resjson(url):
33 | r = requests.get(url,headers={'User-Agent':ua})
34 | res = r.json() # return dict
35 | return res
36 | def move(src,dst):
37 | for root, dirs, files in os.walk(src):
38 | for d in dirs:
39 | src_dir = root+"\\"+d
40 | dst_dir = src_dir.replace(src,dst)
41 | if not os.path.exists(dst_dir):
42 | os.mkdir(dst_dir)
43 | for file in files:
44 | src_path = root+"\\"+file
45 | dst_path = src_path.replace(src,dst)
46 | if not os.path.exists(dst_path):
47 | os.rename(src_path,dst_path)
48 | def SaveLog(save):
49 | if subtype == "movie":
50 | #config.LogName2 = "%s_Movie_%s.csv" % (config.LogName,year)
51 | config.LogName2 = "%s_Movie.tsv" % (config.LogName)
52 | elif subtype == "tv":
53 | #config.LogName2 = "%s_TV_%s_%s.csv" % (config.LogName,reg1,year)
54 | config.LogName2 = "%s_TV_%s.tsv" % (config.LogName,reg1)
55 | fname = config.LogPath+"\\"+config.LogName2
56 | if not os.path.isfile(fname):
57 | save = "Folder\tSID\tRename\n"+save
58 | with open(fname,"a", encoding = 'utf-8-sig') as sdata: #寫檔
59 | sdata.write(save+"\n")
60 | def LogNPrint(text):
61 | print(text)
62 | with open(config.LogPath+"//AutoSort.log","a", encoding = 'utf8') as data:
63 | data.write(str(text)+"\n")
64 |
65 | class Search:
66 | def get_year(key1): #搜尋年份
67 | if re.search(r"(19|20\d{2})",key1):
68 | return re.search(r"(19|20\d{2})",key1).group(1)
69 | else:
70 | return False
71 | def DB(key1,mod=1,year_check=True):
72 | global subtype , dblink
73 | key2 = key1
74 | year0 = Search.get_year(key1)
75 | if mod == 1 : #搜尋年份或畫質之前的名稱
76 | if year0 :
77 | key2 = key1[:key1.find(year0)] if key1[:key1.find(year0)] != "" else key1
78 | else:
79 | key2 = re.search(r"(.+)\d{4}",key1).group(1) if re.search(r"(.+)\d{4}",key1) else key1
80 | if mod == 2 : #搜尋第一個.之前的中文名稱 或 包含季數(SXX)的英文名稱
81 | key2 = re.search(r"([\u4e00-\u9fa5]+)\.",key1).group(1) if re.search(r"([\u4e00-\u9fa5]+)\.",key1) else key1
82 | key2 = re.search(r"(.+\.S\d{2})",key1).group(1) if key2 == key1 and re.search(r"(.+\.S\d{2})",key1) else key1
83 | Bracket = re.search(r"\[(.+?)\]",key2) #搜尋中括弧
84 | if Bracket:
85 | key2 = Bracket.group(1)
86 | LogNPrint("Change : "+key2)
87 | url = config.dbapi+key2
88 | res = resjson(url)
89 | if res['msg'] == 'invalid_credencial2': #API失效應急
90 | return False
91 | if int(res['total']) == 1 and len(res['subjects'])==1: #Only 1 Result
92 | subtype = res['subjects'][0]['subtype']
93 | dblink = res['subjects'][0]['alt']
94 | year = res['subjects'][0]['year']
95 | if year in key1 or not year0: #當名稱包含搜尋到年份 或 名稱內不含年份
96 | return dblink
97 | else:
98 | print("*Error : Year doesn't match.")
99 | elif int(res['total']) == 0 or len(res['subjects'])==0: #找不到結果trf
100 | return ""
101 | elif int(res['total']) > 1 : #過多結果
102 | if not year_check: #如果不檢查年份,直接返回第一個搜尋結果
103 | return res['subjects'][0]['alt']
104 | for subject in res['subjects']: #例外處理-過多資料-年份比對
105 | if subject['year'] != "" and subject['year'] in key1: #去除年份空白的情況
106 | #if subject['year'] in key1 or (not year0 and subject['title'] in key1):
107 | subtype = subject['subtype']
108 | dblink = subject['alt']
109 | return dblink
110 | return False #Error : No results found.
111 | def GetInfo(dblink,switch=0):
112 | global year,subtype,reg1,reg2,reg3,save
113 | res = Gen.gen_douban(dblink)
114 | if not res['success']: # Success
115 | print("*Error :",res['error'])
116 | if 'exit' in res.keys(): exit()
117 | return ""
118 | else:
119 | subtype = res['subtype'] if not subtype else subtype
120 | year = year2 = res['year']
121 | if not year:
122 | year = res['playdate'][0][:4] if res['playdate'] else 0
123 | '''if int(len(res['seasons_list'])) > 1 and subtype == "tv": #多季的電視劇
124 | year = 999 #多季
125 | year2 = "多季"'''
126 | titleZH = res['chinese_title'].replace(" "," ") #中文標題
127 | this_title = res['this_title'][0] #原始標題
128 | trans_title = res['trans_title'] #List 用來取台灣譯名
129 | aka = res['aka']
130 |
131 | try:
132 | imdb_id = res['imdb_id']
133 | except KeyError:
134 | imdb_id = ""
135 | db_id,db_rating = "db_"+res['sid'],res['douban_rating_average']
136 | if not imdb_id:
137 | imdb_rating = ""
138 | else:
139 | try:
140 | imdb_rating = res['imdb_rating'][:res['imdb_rating'].find('/')]
141 | except:
142 | imdb_rating = 0
143 | mvid,rating = (imdb_id,imdb_rating )if imdb_id and imdb_rating else (db_id,db_rating)
144 |
145 | genre = "|".join(res['genre']) #List→str 類型
146 | region = res['region'] if type(res['region']) == type("str") else res['region'][0]
147 | region = region.split(" ")[0] #解決中英混合的地區
148 | reg1,reg2,reg3 = region,region,region
149 | for reg in regDic.keys(): #地區
150 | if reg == region:
151 | if "台湾" in region or "香港" in region: reg1 = reg.replace("中国","")
152 | elif region == "中国": reg1 = "中国大陆"
153 | else: reg1 = reg
154 | reg2 = regDic[reg][0]
155 | reg3 = regDic[reg][1]
156 | break
157 |
158 | AllTitle1 = [titleZH]+[this_title]+aka+trans_title
159 | AllTitle2 = list(set(AllTitle1))
160 | AllTitle2.sort(key=AllTitle1.index)
161 |
162 | if config.CHT_TW: #繁體、台灣譯名
163 | if this_title != "" and reg1 == "台湾": #原始標題為中文地區是台灣)
164 | titleZH = this_title
165 | breakcheck = False
166 | zhtwList = ["(台)","(港/台)","(台/港)","(台)","(港/台)","(台/港)"]
167 | for trans in AllTitle2:
168 | for zhtw in zhtwList:
169 | if zhtw in trans:
170 | if trans in AllTitle2:
171 | AllTitle2.remove(trans)
172 | breakcheck = True
173 | titleZH = trans.replace(zhtw,"")
174 | break
175 | if breakcheck:
176 | break
177 | titleZH = OpenCC('s2tw').convert(titleZH)
178 | genre = OpenCC('s2tw').convert(genre)
179 | reg1 = OpenCC('s2tw').convert(reg1)
180 | for i in range(len(AllTitle2)):
181 | AllTitle2[i] = OpenCC('s2tw').convert(AllTitle2[i])
182 | if reg2 == reg3:
183 | reg2 = OpenCC('s2tw').convert(reg2)
184 | if config.ZH_ENG: #中英標題
185 | titleEN = ""
186 | for tt in AllTitle2:
187 | if not Get.checkzh(tt):
188 | if tt in AllTitle2:
189 | AllTitle2.remove(tt)
190 | titleEN = tt.replace(" : ",":").replace(": ",":").replace("/","/").replace("\\","\")
191 | break
192 | for tt in [titleZH]+aka:
193 | if Get.checkzh(tt):
194 | if tt in AllTitle2:
195 | AllTitle2.remove(tt)
196 | titleZH = tt.replace(" : ",":").replace(": ",":").replace("/","/").replace("\\","\")
197 | break
198 | title = (titleZH+" "+titleEN) if titleEN and len(titleEN) <= config.ENGlen and titleZH != titleEN else titleZH
199 | titleOT = AllTitle2
200 |
201 | region = reg2 if config.regSt else reg1
202 | titleOT = [] if not titleOT else titleOT
203 | save = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (mvid,year,reg1,imdb_rating,db_rating,titleZH,titleEN,"/".join(titleOT),genre,imdb_id,db_id)
204 | if rating == '' or float(rating):
205 | return "[%s][%s]%s(%s)(%s)(%s)" % (year2,region,title,genre.replace("|","_"),rating,mvid)
206 | else:
207 | return "[%s][%s]%s(%s)(%s)" % (year2,region,title,genre.replace("|","_"),mvid)
208 |
209 | mypath = os.getcwd() if not config.UseRemote else config.remotepath #執行目錄
210 | Logfile = config.LogPath+"\\move.log" if config.LogPath else "move.log"
211 |
212 | for folder in folderList:
213 | if os.path.isdir(folder): #如果指定的資料夾存在
214 | for d in sorted(os.listdir(folder)):
215 | subtype,IMDbID,dblink= "","",""
216 | folderpath = "%s/%s" % (folder,d)
217 | SubD = False if re.match(r'.+?\.(mkv|mp4|ts).?', d) else config.SubFolder #若資料夾為檔案名稱,則不使用config.SubFolder
218 | LogNPrint("\nFolder : "+d)
219 | if re.search(r"\(db_(.+?)\)",d): #如果能從資料夾名稱找到dbID
220 | SubD = False
221 | dblink = "https://movie.douban.com/subject/%s/" % (re.search(r"\(db_(.+?)\)",d).group(1))
222 | elif re.search(r"\(tt(.+?)\)",d): #如果能從資料夾名稱找到IMDbID
223 | SubD = False
224 | IMDbID = re.search(r"\((tt\d+)\)",d).group(1)
225 | LogNPrint("IMDbID : "+IMDbID)
226 | dblink = search.imdb2db2(IMDbID)
227 | elif Get.findnfo(folderpath): #如果能從資料夾內的.nfo找到IMDb或douban鏈接
228 | get_nfo = Get.findnfo(folderpath)
229 | if 'imdb' in get_nfo.keys():
230 | IMDbID = get_nfo['imdb']
231 | LogNPrint("IMDbID : "+IMDbID)
232 | dblink = search.imdb2db2(get_nfo['imdb'])
233 | elif 'douban' in get_nfo.keys():
234 | dblink = get_nfo['douban']
235 | else:
236 | ptsearch = search.PT(d)
237 | if not (IMDbID or dblink) and ptsearch:
238 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
239 | dblink = ptsearch['douban'] if ptsearch['douban'] else ""
240 | LogNPrint("Search : from {}".format(ptsearch['source']))
241 | if not (IMDbID or dblink) and re.search(r"WiKi|DoA|JuJuYuJu|NGB|ARiN|ExREN|NTb|NTG|CHD",d) and search.TTG(d): #如果能從TTG找到IMDbID或dblink
242 | ptsearch = search.TTG(d)
243 | if ptsearch['imdb'] or ptsearch['douban']:
244 | LogNPrint("Search : from TTG")
245 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
246 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID)
247 | if ptsearch['title'] and not dblink:
248 | LogNPrint("UseThis: "+ptsearch['title'])
249 | dblink = Search.DB(ptsearch['title'],year_check=config.year_check)
250 | if not dblink:
251 | dblink = Search.DB(ptsearch['title'],year_check=False)
252 | if not (IMDbID or dblink) and re.search(r"PuTao",d) and search.PuTao(d): #如果能從PuTao找到IMDbID或dblink
253 | ptsearch = search.PuTao(d)
254 | LogNPrint("Search : from PuTao")
255 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
256 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID)
257 | if not (IMDbID or dblink) and re.search(r"MTeam|MPAD|OneHD|StBOX|beAst|CHD",d) and search.MTeam(d): #如果能從MTeam找到IMDbID或dblink
258 | ptsearch = search.MTeam(d)
259 | if 'imdb' in ptsearch.keys() or 'douban' in ptsearch.keys():
260 | LogNPrint("Search : from MTeam")
261 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
262 | dblink = ptsearch['douban'] if ptsearch['douban'] else search.imdb2db2(IMDbID)
263 | elif 'title' in ptsearch.keys():
264 | dblink = Search.DB(ptsearch['title'],year_check=config.year_check)
265 | if not dblink:
266 | dblink = Search.DB(ptsearch['title'],year_check=False)
267 | if not (dblink or IMDbID):
268 | dblink = Search.DB(d,year_check=config.year_check)
269 | '''if not dblink:
270 | dblink = Search.DB(d,mod=2,year_check=config.year_check)'''
271 | if dblink: #如果能返回豆瓣鏈接
272 | LogNPrint("dbLink : "+dblink)
273 | '''LogNPrint("*Debug : Pass.")
274 | continue'''
275 | try:
276 | name = Search.GetInfo(dblink)
277 | except:
278 | LogNPrint("*Error : Name error. Pass.")
279 | continue
280 | if not name and IMDbID: #如果無法從dblink找到資料,但存在IMDbID
281 | #LogNPrint("Change : IMDb&TMDb") # 待辦:研究TMDB回傳錯誤訊息
282 | GetTMDb = Get.IMDb2TMDb(IMDbID)
283 | if GetTMDb:
284 | subtype,year,reg1,name,save = GetTMDb[0],GetTMDb[1],GetTMDb[2],GetTMDb[3],GetTMDb[4]
285 | LogNPrint("Change : IMDb&TMDb")
286 | else:
287 | subtype,year,reg1,name,save = "","","","",""
288 | elif not dblink and IMDbID: #如果無法返回dbLink,但有IMDbID→改用TMDB跟IMDb搜尋資訊
289 | LogNPrint("IMDbID : "+IMDbID)
290 | GetTMDb = Get.IMDb2TMDb(IMDbID)
291 |
292 | if GetTMDb:
293 | subtype,year,reg1,name,save = GetTMDb[0],GetTMDb[1],GetTMDb[2],GetTMDb[3],GetTMDb[4]
294 | LogNPrint("Change : IMDb&TMDb")
295 | else:
296 | LogNPrint("*Error : Can't find info from IMDb&TMDb.")
297 | subtype,year,reg1,name,save = "","","","",""
298 | else:
299 | LogNPrint("*Error : Can't find dbLink.")
300 | continue
301 |
302 | if name:
303 | name = name.replace("\"","")
304 | LogNPrint("Subtype: "+subtype)
305 | else:
306 | continue
307 | if config.YearSort:
308 | if int(year[:4]) > 2000:
309 | year = year[:4]
310 | #elif int(year) == 999:
311 | # year = "多季"
312 | elif 1991<=int(year[:4]) and int(year[:4])<=2000:
313 | year = "1991-2000"
314 | elif 1981<=int(year[:4]) and int(year[:4])<=1990:
315 | year = "1981-1990"
316 | elif int(year[:4])<=1980:
317 | year = "1980以前"
318 | if subtype == "movie":
319 | table_name = "Movie"
320 | Path = ("Movie\\%s\\%s" % (year,name))
321 | elif subtype == "tv":
322 | table_name = "TV"
323 | Path = ("TVSeries\\%s\\%s\\%s" % (reg1,year,name))
324 |
325 | query = sql.query(db_name, table_name,save.split("\t")[0]) #查詢舊有資料
326 | print
327 | if query != None and (not config.DataUpdate or query[-1]==Path): #若存在舊有資料且不須更新現有資料
328 | LogNPrint("Ignore : "+query[-1])
329 | Path = query[-1]
330 | name = Path[Path.rfind("\\")+1:]
331 | elif query != None and config.DataUpdate and query[-1]!=Path : #若存在舊有資料且與新的資料不相符(數據更新)且更新資料參數為True
332 | sql.input(db_name, table_name, save.split("\t")+[Path],replace=True)
333 | command2 = ("rclone move -v \"%s\" \"%s\" --log-file=%s" %(mypath+"\\"+query[-1],mypath+"\\"+Path,Logfile))
334 | os.system(command2)
335 | if config.syncpath:
336 | for syncpath in config.syncpath:
337 | command2 = ("rclone move -v \"%s\" \"%s\" --log-file=%s" %(syncpath+"\\"+query[-1],syncpath+"\\"+Path,Logfile))
338 | os.system(command2)
339 | LogNPrint("Update : "+query[-1])
340 | else: #資料庫內無對應資料
341 | sql.input(db_name, table_name, save.split("\t")+[Path])
342 | LogNPrint("Rename : "+name)
343 |
344 | folder = folder.replace(config.mountpath,"") if config.UseRemote and config.mountpath else folder
345 | path1 = mypath+"\\"+folder+"\\"+d
346 | path2 = mypath+"\\"+Path+"\\"+d if SubD else mypath+"\\"+Path
347 | if len(path2) > config.pathlen and not subtype == "tv" : #路徑長度(但對TV類型不啟用)
348 | path2 = mypath+"\\"+Path
349 | command = ("rclone move -v \"%s\" \"%s\" --stats 30s --log-file=%s" %(path1,path2,Logfile))
350 | os.system(command)
351 | #os.popen(command)
352 | LogNPrint("MoveTo : "+path2)
353 | command = ("rclone rmdirs -v \"%s\"" % (mypath+"\\"+folder))
354 | os.system(command)
--------------------------------------------------------------------------------
/Movie/README.md:
--------------------------------------------------------------------------------
1 | # MVAutoSort
2 |
3 | 
4 |
5 | ## 需求套件
6 | pip install -r requirements.txt
7 |
8 | ## 說明
9 | 未完成待續
10 |
11 | ### 搜尋模式
12 |
13 | - 若資料夾名稱含有IMDbID(tt:d)或DoubanID(db_:d),則使用ID搜尋對應資料
14 |
15 | - 若資料夾內存在.nfo檔,則在其中尋找IMDbID,並以此做搜尋
16 |
17 | - 若可在PT站搜尋到IMDbID或DoubanID,則使用此資料。
18 |
19 | - 若以上皆非,則解析文件夾名稱調用豆瓣API做搜尋
20 |
21 | - 通常文件夾名稱由3個部分組成 $電影名稱.$年份.$壓制參數,目前採用以$年份為錨點解析出$電影名稱的方式
22 |
23 | ### 資料採集
24 |
25 | - 原採用PT-Gen的API,但受其穩定性&API調用次數限制,故後來棄用。
26 |
27 | - 改採參照其代碼簡化成gen.py,以此採集資料。感謝@Rhilip大佬、BFDZ大佬的PT-Gen。
28 |
29 | - 如果無法從豆瓣找到資料,則透過IMDbID在IMDb&TMDb分別搜尋資料,並合併
30 |
31 | ### 自動分類
32 |
33 | #### 標題
34 |
35 | - 中文標題優先採取台灣地區翻譯標題,若為標記則採用豆瓣中文標題,並使用OpenCC翻譯成繁體中文
36 |
37 | - 英文標題為在所有標題清單中找到由純英文組成之標題,若無英文標題則只使用中文標題
38 |
39 | ## 待加入功能
40 |
41 | - [ ] 手動模式(全手動、錯誤時手動、不使用手動),手動輸入IMDbID
42 |
43 | - [ ] 搜尋模式切換
44 |
45 | - [ ] 根據TMDB資料搜尋各季資訊(因IMDb不會返回各季資料),並存放到 合集 資料夾
46 |
47 | - [x] 引用資料庫已有的資料
48 |
--------------------------------------------------------------------------------
/Movie/config_sample.py:
--------------------------------------------------------------------------------
1 | #API
2 | TMDbAPI = ""
3 | Rapid_IMDb = ""
4 | dbapi = "https://api.douban.com/v2/movie/search?apikey=0dad551ec0f84ed02907ff5c42e8ec70&q="
5 |
6 | #Cookies
7 | ##格式為:{'cookie':cookie},留空則不使用PT搜尋
8 | ourbits = ""
9 | SSD = ""
10 | TJUPT = ""
11 | FRDS = ""
12 | MTeam = ""
13 | PuTao = ""
14 | TTG = ""
15 |
16 | #Search
17 | year_check = True #是否在使用豆瓣搜索時檢查年份(若名稱無包含年份則會搜尋不到結果),建議啟用避免搜尋錯誤
18 |
19 | #Rename
20 | CHT_TW = True #優先取台灣譯名,且轉為繁體;若為False則為豆瓣上簡體中文標題
21 | ZH_ENG = True #標題採中英混合;若為False則為僅中文標題 (當觸發ENGlen限制時則不保留英文標題)
22 | ENGlen = 65 #英文標題長度限制,若過長則僅保留中文標題。若不想啟用輸入極大值即可
23 | regSt = True #地區縮寫,使用region.txt文件
24 |
25 | #Move
26 | UseRemote = True #將路徑替換為遠端路徑 (讀取掛載信息,但在遠端上操作)
27 | remotepath = "remotepath:" #承上,遠端路徑
28 | pathlen = 200 #路徑長度限制(僅計算資料夾)。若不想啟用輸入極大值即可,觸發後將不建立子資料夾
29 | SubFolder = True #是否保留原始資料夾名稱,將其設為子資料夾 (當觸發config.pathlen限制時則不保留
30 | YearSort = True #老舊電影合併存放
31 |
32 | #Log
33 | LogPath = "D:\\AutoSort\\Movie" #默認為執行目錄
34 | LogName = "AutoSort"
35 | DataUpdate = False #資料是否更新,True為會將舊資料更新為新資料且移動資料夾,False會依據資料庫內現有資料做資料夾命名
--------------------------------------------------------------------------------
/Movie/folder.txt:
--------------------------------------------------------------------------------
1 | G:/Upload/ATS/@TJUPT/~TVSeries
2 | G:/Upload/ATS/@TJUPT/~TVShow
3 | G:/Upload/ATS/@Ourbits/Ao
4 | G:/Upload/ATS/@Ourbits/FLTTH
5 | G:/Upload/ATS/@Ourbits/OurTV
6 | G:/Upload/ATS/@PTer/~TVShow
7 | G:/Upload/ATS/@PTer/~Movie
8 | G:/Upload/ATS/@PTer/~Movie2
--------------------------------------------------------------------------------
/Movie/gen.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2017-2020 Rhilip
4 |
5 | import re,time
6 | import json
7 | import random
8 | import requests
9 | from bs4 import BeautifulSoup
10 | from html2bbcode.parser import HTML2BBCode
11 | import http.cookiejar
12 |
13 | __version__ = "0.4.5"
14 | __author__ = "Rhilip"
15 |
16 | douban_apikey_list = [
17 | "02646d3fb69a52ff072d47bf23cef8fd",
18 | "0b2bdeda43b5688921839c8ecb20399b",
19 | "0dad551ec0f84ed02907ff5c42e8ec70",
20 | "0df993c66c0c636e29ecbb5344252a4a"
21 | ]
22 | #"07c78782db00a121175696889101e363"
23 | headers = {
24 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
25 | 'Chrome/61.0.3163.100 Safari/537.36 ',
26 | "Accept-Language": "en,zh-CN;q=0.9,zh;q=0.8"
27 | }
28 |
29 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt')
30 | cookies.load()
31 |
32 | def get_db_apikey() -> str:
33 | return random.choice(douban_apikey_list)
34 |
35 | def get_page(url: str, json_=False, jsonp_=False, bs_=False, text_=False, **kwargs):
36 | kwargs.setdefault("headers", headers)
37 | page = requests.get(url, **kwargs,cookies=cookies)
38 |
39 | page.encoding = "utf-8"
40 | page_text = page.text
41 | if json_:
42 | try:
43 | return page.json()
44 | except:
45 | time.sleep(0.5)
46 | return get_page(url,json_=True)
47 | elif jsonp_:
48 | start_idx = page_text.find('(')
49 | end_idx = page_text.rfind(')')
50 | return json.loads(page_text[start_idx + 1:end_idx])
51 | elif bs_:
52 | return BeautifulSoup(page.text, "lxml")
53 | elif text_:
54 | return page_text
55 | else:
56 | return page
57 |
58 |
59 | def html2ubb(html: str) -> str:
60 | return str(HTML2BBCode().feed(html))
61 |
62 | def get_num_from_string(raw):
63 | return int(re.search('[\d,]+', raw).group(0).replace(',', ''))
64 |
65 | def gen_douban(dblink):
66 | data = {}
67 | sid = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",dblink).group(3)
68 | data['sid'] = sid
69 | douban_page = get_page(dblink, bs_=True)
70 | douban_api_json = get_page(
71 | 'https://api.douban.com/v2/movie/{}'.format(sid),
72 | params={'apikey': get_db_apikey()},
73 | json_=True
74 | )
75 | douban_abstract_json = get_page('https://movie.douban.com/j/subject_abstract?subject_id={}'.format(sid), json_=True)
76 | data['success'] = False
77 |
78 | if "msg" in douban_api_json and (douban_api_json["msg"] != 'invalid_credencial2'): #API失效應急
79 | data["error"] = douban_api_json["msg"]
80 | elif str(douban_page).find("检测到有异常请求") > -1:
81 | data["error"] = "GenHelp was banned by Douban."
82 | data['exit'] = True
83 | elif douban_page.title.text == "页面不存在":
84 | print(douban_page)
85 | data["error"] = "The corresponding resource does not exist."
86 | else:
87 | data["douban_link"] = dblink
88 | data['success'] = True
89 | def fetch(node):
90 | return node.next_element.next_element.strip()
91 | # 对主页面进行解析
92 | data["chinese_title"] = (douban_page.title.text.replace("(豆瓣)", "").strip())
93 | data["foreign_title"] = (douban_page.find("span", property="v:itemreviewed").text
94 | .replace(data["chinese_title"], '').strip()) if douban_page.find("span", property="v:itemreviewed") else ""
95 |
96 | aka_anchor = douban_page.find("span", class_="pl", text=re.compile("又名"))
97 | data["aka"] = sorted(fetch(aka_anchor).split(' / ')) if aka_anchor else []
98 |
99 | if data["foreign_title"]:
100 | trans_title = data["chinese_title"] + (('/' + "/".join(data["aka"])) if data["aka"] else "")
101 | this_title = data["foreign_title"]
102 | else:
103 | trans_title = "/".join(data["aka"]) if data["aka"] else ""
104 | this_title = data["chinese_title"]
105 |
106 | data["trans_title"] = trans_title.split("/")
107 | data["this_title"] = this_title.split("/")
108 |
109 | region_anchor = douban_page.find("span", class_="pl", text=re.compile("制片国家/地区"))
110 | language_anchor = douban_page.find("span", class_="pl", text=re.compile("语言"))
111 | seasons_anchor = douban_page.find("span", class_="pl", text=re.compile("季数"))
112 | episodes_anchor = douban_page.find("span", class_="pl", text=re.compile("集数"))
113 | imdb_link_anchor = douban_page.find("a", text=re.compile("tt\d+"))
114 | year_anchor = douban_page.find("span", class_="year")
115 |
116 | data["year"] = douban_page.find("span", class_="year").text[1:-1] if year_anchor else "" # 年代
117 | data["region"] = fetch(region_anchor).split(" / ") if region_anchor else [] # 产地
118 | data["genre"] = list(map(lambda l: l.text.strip(), douban_page.find_all("span", property="v:genre"))) # 类别
119 | data["language"] = fetch(language_anchor).split(" / ") if language_anchor else [] # 语言
120 | data["playdate"] = sorted(map(lambda l: l.text.strip(), # 上映日期
121 | douban_page.find_all("span", property="v:initialReleaseDate")))
122 | data["imdb_link"] = imdb_link_anchor.attrs["href"] if imdb_link_anchor else "" # IMDb链接
123 | data["imdb_id"] = imdb_link_anchor.text if imdb_link_anchor else "" # IMDb号
124 | data["episodes"] = fetch(episodes_anchor) if episodes_anchor else "" # 集数
125 | season_check = douban_page.find("select", id="season")
126 | data["seasons_list"] = [option.get("value") for option in douban_page.find("select", id="season").find_all("option")] if seasons_anchor and season_check else [] #季數
127 | data["seasons"] = douban_page.find("select", id="season").find_all("option")[-1].getText() if seasons_anchor and season_check else "" #季數
128 |
129 | duration_anchor = douban_page.find("span", class_="pl", text=re.compile("单集片长"))
130 | runtime_anchor = douban_page.find("span", property="v:runtime")
131 |
132 | duration = "" # 片长
133 | if duration_anchor:
134 | duration = fetch(duration_anchor)
135 | elif runtime_anchor:
136 | duration = runtime_anchor.text.strip()
137 | data["duration"] = duration
138 |
139 | # 请求其他资源
140 | if data["imdb_link"]: # 该影片在豆瓣上存在IMDb链接
141 | imdb_source = ("https://p.media-imdb.com/static-content/documents/v1/title/{}/ratings%3Fjsonp="
142 | "imdb.rating.run:imdb.api.title.ratings/data.json".format(data["imdb_id"]))
143 | try:
144 | imdb_json = get_page(imdb_source, jsonp_=True) # 通过IMDb的API获取信息,(经常超时555555)
145 | imdb_average_rating = imdb_json["resource"]["rating"]
146 | imdb_votes = imdb_json["resource"]["ratingCount"]
147 | if imdb_average_rating and imdb_votes:
148 | data["imdb_rating"] = "{}/10 from {} users".format(imdb_average_rating, imdb_votes)
149 | except Exception as err:
150 | pass
151 |
152 | # 豆瓣评分,简介,海报,导演,编剧,演员,标签
153 | '''data["douban_rating_average"] = douban_average_rating = douban_api_json["rating"]["average"] or 0
154 | data["douban_votes"] = douban_votes = douban_api_json["rating"]["numRaters"] or 0
155 | data["douban_rating"] = "{}/10 from {} users".format(douban_average_rating, douban_votes)
156 | data["tags"] = list(map(lambda member: member["name"], douban_api_json["tags"]))'''
157 |
158 | abstract_subject = douban_abstract_json["subject"]
159 | try:
160 | data["douban_rating_average"] = douban_average_rating = douban_page.find("strong", property="v:average").text or 0
161 | data["douban_votes"] = douban_votes = douban_page.find("span", property="v:votes").text or 0
162 | except:
163 | data["douban_rating_average"] = douban_average_rating = abstract_subject["rate"] or 0
164 | data["year"] = abstract_subject["release_year"] if not data["year"] else data["year"]
165 | data["subtype"] = 'tv' if abstract_subject['is_tv'] or data["episodes"] or abstract_subject['subtype'].lower() == 'tv' else 'movie'
166 |
167 | # 将清洗的数据一并发出
168 | return data
169 | if __name__ == '__main__':
170 | x = "https://movie.douban.com/subject/27200642"
171 | gen_douban(x)
172 |
--------------------------------------------------------------------------------
/Movie/get.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/python3
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2019-2020 GDST
4 | import os ,re ,requests, time
5 | from opencc import OpenCC
6 | from fake_useragent import UserAgent
7 | import config
8 | from bs4 import BeautifulSoup
9 | import http.cookiejar
10 |
11 | #UA = UserAgent()
12 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36'
13 | regDicEN = {}
14 |
15 | with open("region.txt" , "r", encoding = 'utf-8-sig') as regdataEN: #地區縮寫對應
16 | regListEN = [l.strip().split(',') for l in regdataEN ]
17 | for regEN in regListEN:
18 | regDicEN[regEN[-1]]=regEN[:-1]
19 |
20 | def resjson(url,cookies=''):
21 | r = requests.get(url,headers={'User-Agent':UA},cookies=cookies)
22 | res = r.json() # return dict
23 | return res
24 |
25 | def checkzh(text):
26 | for t in text:
27 | if ord(t) > 255:
28 | return True
29 |
30 | def findnfo(path):
31 | if not os.path.isdir(path):
32 | return False
33 | for file in sorted(os.listdir(path)):
34 | filepath = "%s\\%s" % (path,file)
35 | if os.path.isfile(filepath) and ( re.match(r'.+?\.nfo', file) or re.match(r'.+?\.txt', file) ):
36 | with open(filepath, "r", encoding="latin-1") as data:
37 | for line in data:
38 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",line)
39 | if imdb_search:
40 | return {'imdb':imdb_search.group(2)}
41 | db_search = re.search(r"https:\/\/movie\.douban\.com\/(subject|movie)\/(\d+)",line)
42 | if db_search:
43 | return {'douban':db_search.group()}
44 |
45 |
46 | def imdb2db(IMDbID):
47 | imdb2db = "https://api.douban.com/v2/movie/imdb/%s?apikey=0df993c66c0c636e29ecbb5344252a4a" % (IMDbID)
48 | res = resjson(imdb2db)
49 | dblink = res['alt'].replace("/movie/","/subject/")+"/" if 'alt' in res.keys() else ""
50 | return dblink
51 |
52 | def imdb2db2(IMDbID):
53 | url = "https://movie.douban.com/j/subject_suggest?q={}".format(IMDbID)
54 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt')
55 | cookies.load()
56 | res = resjson(url,cookies=cookies)
57 | time.sleep(0.5)
58 | try:
59 | dblink = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res[0]['url']).group(0)
60 | return dblink
61 | except:
62 | return False
63 |
64 | def IMDbInfo(IMDbID):
65 | rapidapi_imdb = "https://movie-database-imdb-alternative.p.rapidapi.com/?i=%s&r=json" % (IMDbID)
66 | payload = {"X-RapidAPI-Host": "movie-database-imdb-alternative.p.rapidapi.com",
67 | "X-RapidAPI-Key": config.Rapid_IMDb}
68 | try:
69 | res = requests.get(rapidapi_imdb,headers=payload).json()
70 | return res
71 | except Exception as e:
72 | return False
73 |
74 | def IMDb2TMDb(IMDbID,lan="zh-TW"):
75 | global year,subtype,reg1,reg2,reg3,save
76 | IMDbRating = IMDbInfo(IMDbID)
77 | imdb2tmdb = "https://api.themoviedb.org/3/find/%s?api_key=%s&language=%s&external_source=imdb_id" % (IMDbID ,config.TMDbAPI,lan)
78 |
79 | res = resjson(imdb2tmdb)
80 | if not "status_message" in res.keys() :
81 | if len(res["movie_results"]) != 0 or len(res["tv_results"]) != 0: #Movie+TVS
82 | IMDb =IMDbInfo(IMDbID)
83 | if not IMDb or "Error" in IMDb.keys():
84 | return False
85 | year = IMDb['Year'][0:4]
86 | titleIMDb = IMDb['Title']
87 | IMDbRating = IMDb['imdbRating'] if IMDb['imdbRating'] != "N/A" else "0"
88 | region = IMDb['Country'].replace(" ","").split(",")[0]
89 | subtype = IMDb['Type'] if IMDb['Type'] == "movie" else "tv"
90 |
91 | results = res['movie_results'][0] if subtype == "movie" else res['tv_results'][0]
92 | titleZH = results['title'] if subtype == "movie" else results['name'] #Movie為title、TVS為name
93 | titleEN = results['original_title'] if subtype == "movie" else results['original_name']
94 | if titleZH == titleEN and lan != "zh-CN":
95 | return IMDb2TMDb(IMDbID,lan="zh-CN") #若TW譯名不存在,返回CN譯名
96 | genre_ids = results['genre_ids']
97 |
98 | genres = "|".join([MVgenres[genre_id] if subtype == "movie" else TVgenres[genre_id] for genre_id in genre_ids])
99 | TMDbID = "TMDbMV_%s" % (results['id'])
100 | TMDbRating = results['vote_average']
101 |
102 | reg1 = reg2 = reg3 = "None"
103 | for reg in regDicEN.keys(): #地區
104 | if reg == region:
105 | reg1 = regDicEN[reg][0]
106 | reg2 = regDicEN[reg][1]
107 | reg3 = reg
108 | break
109 |
110 | AllTitle1 = [titleEN]+[titleIMDb]
111 | AllTitle2 = list(set(AllTitle1))
112 | AllTitle2.sort(key=AllTitle1.index)
113 |
114 | if config.CHT_TW: #繁體、台灣譯名
115 | titleZH = OpenCC('s2twp').convert(titleZH)
116 | genres = OpenCC('s2twp').convert(genres)
117 | reg1 = OpenCC('s2twp').convert(reg1)
118 | if config.ZH_ENG: #中英標題
119 | titleEN = ""
120 | for tt in AllTitle2:
121 | if not checkzh(tt):
122 | if tt in AllTitle2:
123 | AllTitle2.remove(tt)
124 | titleEN = tt.replace(" : ",":").replace(": ",":")
125 | break
126 | title = (titleZH+" "+ titleEN) if titleIMDb and len(titleEN) <= config.ENGlen and titleZH != titleEN else titleZH
127 | save = "%s\t%s\t%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s" % (IMDbID,year,reg1,IMDbRating,titleZH,titleEN,"/".join(AllTitle2),genres,IMDbID,TMDbID)
128 | name = "[%s][%s]%s(%s)(%s)(%s)" % (year,reg2,title,genres.replace("|","_"),IMDbRating,IMDbID)
129 | return [subtype,year,reg1,name,save]
130 |
131 | def IMDbInt():
132 | global MVgenres,TVgenres
133 | MVgenresAPI = "https://api.themoviedb.org/3/genre/movie/list?api_key=%s&language=zh_TW" % (config.TMDbAPI)
134 | genres = resjson(MVgenresAPI)['genres']
135 | MVgenres = {}
136 | for genre in genres:
137 | MVgenres[genre['id']] = genre['name']
138 | TVgenresAPI = "https://api.themoviedb.org/3/genre/tv/list?api_key=%s&language=zh_TW" % (config.TMDbAPI)
139 | genres = resjson(TVgenresAPI)['genres']
140 | TVgenres = {}
141 | for genre in genres:
142 | TVgenres[genre['id']] = genre['name']
143 |
144 | IMDbInt()
145 |
146 |
--------------------------------------------------------------------------------
/Movie/region.txt:
--------------------------------------------------------------------------------
1 | 香港,港,HongKong
2 | 中国香港,港,HongKong
3 | 台湾,台,Taiwan
4 | 中国台湾,台,Taiwan
5 | 中国大陆,陸,China
6 | 中国,陸,China
7 | 美国,美,USA
8 | 英国,英,UK
9 | 日本,日,Japan
10 | 韩国,韓,SouthKorea
11 | 法国,法,France
12 | 德国,德,Germany
13 | 意大利,義,Italy
14 | 西班牙,西,Spain
15 | 印度,印,India
16 | 泰国,泰,Thailand
17 | 俄罗斯,俄,Russia
18 | 伊朗,伊朗,Iran
19 | 加拿大,加,Canada
20 | 加拿大 Canada,加,Canada
21 | 澳大利亚,澳,Australia
22 | 爱尔兰,愛,Ireland
23 | 瑞典,瑞典,Sweden
24 | 巴西,巴西,Brazil
25 | 丹麦,丹,Denmark
26 | 墨西哥,墨,Mexico
27 | 土耳其,土,Turkey
28 | 土耳其 Turkey,土,Turkey
29 | 柬埔寨,柬,Cambodia
30 | 匈牙利,匈,Hungary
31 | 挪威,挪威,Norway
32 | 冰岛,冰島,Iceland
33 | 印度尼西亚,印尼,Indonesia
34 | 比利时,比,Belgium
--------------------------------------------------------------------------------
/Movie/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | bs4
3 | lxml
4 | fake-useragent
5 | opencc-python-reimplemented
6 | html2bbcode
--------------------------------------------------------------------------------
/Movie/search.py:
--------------------------------------------------------------------------------
1 | import requests,re,time
2 | from bs4 import BeautifulSoup
3 | #from fake_useragent import UserAgent
4 | from user_agent import generate_user_agent
5 | import http.cookiejar
6 |
7 | import config
8 | from sites import ourbits, ssd, tjupt, pter, frds, tccf
9 |
10 | #UA = UserAgent().random
11 | UA = generate_user_agent()
12 |
13 | def PT(dirname):
14 | IMDbID, dblink, source = '', '', ''
15 | if not (IMDbID or dblink) and re.search(r"FRDS|Yumi",dirname): #From FRDS
16 | ptsearch = frds.search(dirname, config.headers, "sites\\.cookies\\frds.txt")
17 | if ptsearch:
18 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
19 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
20 | return {'douban':dblink,'imdb':IMDbID,'source':'FRDS'}
21 | if not (IMDbID or dblink) and re.search(r"BMDru",dirname): #From TCCF
22 | ptsearch = tccf.search(dirname, config.headers, "sites\\.cookies\\tccf.txt")
23 | if ptsearch:
24 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
25 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
26 | return {'douban':dblink,'imdb':IMDbID,'source':'TCCF'}
27 | if not (IMDbID or dblink) and re.search(r"(Ao|FLTTH|iLoveHD|iLoveTV|MGs|OurPad|OurTV|PbK|NTb|NTG)",dirname): #From Ourbits
28 | ptsearch = ourbits.search(dirname, config.headers, "sites\\.cookies\\ourbits.txt")
29 | if ptsearch:
30 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
31 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
32 | return {'douban':dblink,'imdb':IMDbID,'source':'Ourbits'}
33 | if not (IMDbID or dblink) and re.search(r"CMCT|NTb|NTG",dirname): #From SSD
34 | ptsearch = ssd.search(dirname, config.headers, "sites\\.cookies\\ssd.txt")
35 | if ptsearch:
36 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
37 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
38 | return {'douban':dblink,'imdb':IMDbID,'source':'SSD'}
39 | if not (IMDbID or dblink) and re.search(r"TJUPT|AOA|QAQ|PBO|DGF|NigulaSi|VCB-Studio",dirname): #From TJUPT
40 | ptsearch = tjupt.search(dirname, config.headers, "sites\\.cookies\\tjupt.txt")
41 | if ptsearch:
42 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
43 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
44 | return {'douban':dblink,'imdb':IMDbID,'source':'TJUPT'}
45 | if not (IMDbID or dblink) and re.search(r"PTer|AREY|NTb|NTG|ExREN|FRDS|beAst|CHD|RBOF|recked89",dirname): #From PTer
46 | ptsearch = pter.search(dirname, config.headers, "sites\\.cookies\\pter.txt")
47 | if ptsearch:
48 | IMDbID = ptsearch['imdb'] if ptsearch['imdb'] else ""
49 | dblink = ptsearch['douban'] if ptsearch['douban'] else (imdb2db2(IMDbID) if IMDbID else "")
50 | return {'douban':dblink,'imdb':IMDbID,'source':'PTer'}
51 | return False
52 |
53 | def imdb2db2(IMDbID,count=3):
54 | if count < 0:
55 | return ''
56 | url = "https://movie.douban.com/j/subject_suggest?q={}".format(IMDbID)
57 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\douban.txt')
58 | cookies.load()
59 | res = requests.get(url,headers=config.headers,cookies=cookies)
60 | if '检测到有异常请求从你的 IP 发出' in res.text:
61 | print("*Error : IP banned by douban.")
62 | exit()
63 | return False
64 | res = res.json() # return dict
65 | try:
66 | dblink = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res[0]['url']).group(0)
67 | time.sleep(0.5)
68 | return dblink
69 | except:
70 | imdb2db2(IMDbID,count-1)
71 | return ''
72 | def MTeam(keyword,cookies=config.MTeam,headers=config.headers): #未知錯誤,疑似cookies無法使用
73 | if not config.MTeam:
74 | return False
75 | key2 = re.search(r'\.?([A-Za-z0-9.\']+\.S\d+)', keyword).group(1) if re.search(r'\.?([A-Za-z0-9.\']+\.S\d+)', keyword) else keyword
76 | url="https://pt.m-team.cc/torrents.php?search="+ key2
77 | response=requests.get(url,headers=headers,cookies=cookies)
78 | response.encoding = 'UTF-8'
79 | soup = BeautifulSoup(response.text, 'lxml')
80 | results = soup.find_all("table",{"class":"torrentname"})
81 | reslinks = ["https://pt.m-team.cc/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
82 | for reslink in reslinks:
83 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies)
84 | res.encoding = 'UTF-8'
85 | soup = BeautifulSoup(res.text, 'lxml')
86 | try:
87 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[M-TEAM].","")
88 | subtitle = soup.find("td",{"class":"rowfollow","valign":"top"}).getText()
89 | except:
90 | print(soup)
91 | continue
92 | if title == keyword:
93 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text)
94 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
95 | dblink = db_search.group() if db_search else ""
96 | imdbid = imdb_search.group(2) if imdb_search else ""
97 | if dblink or imdbid:
98 | return {'douban':dblink,'imdb':imdbid}
99 | elif re.search(r"(:|:)(.+)\(",subtitle):
100 | return {'title':re.search(r"(:|:)(.+)\(",subtitle).group(2).strip()}
101 | return False
102 | def PuTao(keyword,cookies=config.PuTao):
103 | if not config.PuTao:
104 | return False
105 | key2 = keyword if not re.match(r'(.+?)\.(mkv|mp4|ts)', keyword) else re.match(r'(.+?)\.(mkv|mp4|ts)', keyword).group(1)
106 | url="https://pt.sjtu.edu.cn/torrents.php?search="+key2
107 | response=requests.get(url,headers={'User-Agent':UA},cookies=cookies)
108 | response.encoding = 'UTF-8'
109 | soup = BeautifulSoup(response.text, 'lxml')
110 | results = soup.find_all("table",{"class":"torrentname"})
111 | reslinks = ["https://pt.sjtu.edu.cn/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
112 | for reslink in reslinks:
113 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies)
114 | res.encoding = 'UTF-8'
115 | soup = BeautifulSoup(res.text, 'lxml')
116 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[PT].","")
117 | if title == keyword:
118 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text)
119 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
120 | dblink = db_search.group() if db_search else ""
121 | imdbid = imdb_search.group(2) if imdb_search else ""
122 | if dblink or imdbid:
123 | return {'douban':dblink,'imdb':imdbid}
124 | return False
125 | def TTG(keyword,cookies=config.TTG):
126 | if not config.TTG:
127 | return False
128 | key2 = keyword if not re.match(r'(.+?)\.(mkv|mp4|ts)', keyword) else re.match(r'(.+?)\.(mkv|mp4|ts)', keyword).group(1)
129 | url="https://totheglory.im/browse.php?c=M&search_field="+key2
130 | cookies = http.cookiejar.MozillaCookieJar('sites\\.cookies\\ttg.txt')
131 | cookies.load()
132 | response=requests.get(url,headers={'User-Agent':UA},cookies=cookies)
133 | response.encoding = 'UTF-8'
134 | soup = BeautifulSoup(response.text, 'lxml')
135 | results = soup.find_all("div",{"class":"name_left"})
136 | reslinks = ["https://totheglory.im/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
137 | for reslink in reslinks:
138 | res=requests.get(reslink,headers={'User-Agent':UA},cookies=cookies)
139 | res.encoding = 'UTF-8'
140 | soup = BeautifulSoup(res.text, 'lxml')
141 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[TTG] ","")
142 | ftitle = soup.find("h1").getText().replace("[email protected]","")
143 | subtitle = ftitle[ftitle.find("[")+1:ftitle.find("]")]
144 | if title == keyword or title == key2:
145 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text)
146 | db_search = re.search(r"https:\/\/(movie\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
147 | dblink = db_search.group() if db_search else ""
148 | imdbid = imdb_search.group(2) if imdb_search else ""
149 | #標題
150 | search_name = ''
151 | title_search_1 = re.search(r"(.+) 全集",subtitle)
152 | title_search_2 = re.search(r"(\d{2})年( )?(\d{1,2}月|.季)( )?(.+劇) (.+) 全\d+(話|集)",subtitle)
153 | title_search_3 = re.search(r"(.+劇) (.+) 主演",subtitle)
154 | if title_search_1:
155 | search_name = title_search_1.group(1)
156 | elif title_search_2:
157 | year = title_search_2.group(1)
158 | search_name = title_search_2.group(6)+ " " + ("20"+year if int(year) < 30 else "19"+year)
159 | elif title_search_3:
160 | search_name = title_search_3.group(2)
161 | if dblink or imdbid or search_name:
162 | return {'douban':dblink,'imdb':imdbid,'title':search_name}
163 | return False
164 | if __name__ == "__main__":
165 | x = imdb2db2("tt10027990")
166 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxyfer/AutoSort/83f43f50a8d36d74442f92121b631bf482282d39/Movie/sites/__init__.py
--------------------------------------------------------------------------------
/Movie/sites/frds.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def search(keyword, headers, cookies='.cookies\\frds.txt'):
6 | if not os.path.exists(cookies):
7 | return False
8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名
9 | key1 = key2 = re_subname.group(1) if re_subname else keyword
10 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧
11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2
12 | key2 = key2.replace("@"," ")
13 | key2 = key2.replace(".Complete."," ")
14 | url = "https://pt.keepfrds.com/torrents.php?search=" + key2
15 |
16 | cookies = http.cookiejar.MozillaCookieJar(cookies)
17 | cookies.load()
18 | response=requests.get(url,headers=headers,cookies=cookies)
19 | response.encoding = 'UTF-8'
20 | print(response.status_code) if response.status_code != 200 else print("",end="")
21 |
22 | soup = BeautifulSoup(response.text, 'lxml')
23 | results = soup.find_all("table",{"class":"torrentname"})
24 | reslinks = ["https://pt.keepfrds.com/"+result.find("a").get("href") for result in results]
25 | for reslink in reslinks:
26 | res=requests.get(reslink,headers=headers,cookies=cookies)
27 | res.encoding = 'UTF-8'
28 | print(response.status_code) if response.status_code != 200 else print("",end="")
29 | soup = BeautifulSoup(res.text, 'lxml')
30 |
31 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[FRDS].","")
32 | if title == keyword:
33 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text)
34 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
35 | dblink = db_search.group() if db_search else ""
36 | imdbid = imdb_search.group(2) if imdb_search else ""
37 | if dblink or imdbid:
38 | return {'douban':dblink,'imdb':imdbid}
39 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用
40 | return False
41 |
42 | if __name__ == '__main__':
43 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
44 | x = search("天气之子.Weathering.With.You.2019.Bluray.1080p.HDR.x265.10bit.MNHD-FRDS", headers)
45 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/ourbits.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def search(keyword, headers, cookies='.cookies\\ourbits.txt'):
6 | if not os.path.exists(cookies):
7 | return False
8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名
9 | key2 = re_subname.group(1) if re_subname else keyword
10 | key2 = key2.replace(".Complete."," ").replace(".SUBBED."," ")
11 | key2 = key2.replace("第"," 第 ").replace("季"," 季 ")
12 | url="https://ourbits.club/torrents.php?search="+key2
13 |
14 | cookies = http.cookiejar.MozillaCookieJar(cookies)
15 | cookies.load()
16 | response=requests.get(url,headers=headers,cookies=cookies)
17 | response.encoding = 'UTF-8'
18 | print(response.status_code) if response.status_code != 200 else print("",end="")
19 |
20 | soup = BeautifulSoup(response.text, 'lxml')
21 | results = soup.find_all("table",{"class":"torrentname"})
22 | reslinks = ["https://ourbits.club/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
23 | for reslink in reslinks:
24 | res=requests.get(reslink,headers=headers,cookies=cookies)
25 | res.encoding = 'UTF-8'
26 | print(res.status_code) if res.status_code != 200 else print("",end="")
27 | soup = BeautifulSoup(res.text, 'lxml')
28 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[OurBits].","")
29 |
30 | if title == keyword or title == key2:
31 | imdb_search = re.search(r"(http|https)://www.imdb.com/title/(tt\d+)",res.text)
32 | douban_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
33 | imdbid = imdb_search.group(2) if imdb_search else ""
34 | dblink = douban_search.group() if douban_search else ""
35 | try:
36 | dblink = 'https://movie.douban.com/subject/' + soup.find('div',{'id':'kdouban'}).get('data-doubanid') if not dblink else dblink
37 | except:
38 | pass
39 | if dblink or imdbid:
40 | return {'douban':dblink,'imdb':imdbid}
41 | print(url) if len(reslinks) == 0 else print("",end="") #搜尋邏輯優化用
42 | return False
43 |
44 | if __name__ == '__main__':
45 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
46 | x = search("All.Together.Now.2020.1080p.NF.WEB-DL.DDP5.1.H264-Ao", headers)
47 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/pter.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def decode(cfemail):
6 | enc = bytes.fromhex(cfemail)
7 | return bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8')
8 |
9 | def search(keyword, headers, cookies='.cookies\\pter.txt'):
10 | if not os.path.exists(cookies):
11 | return False
12 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名
13 | key1 = key2 = re_subname.group(1) if re_subname else keyword
14 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧
15 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2
16 | re_year = re.search(r'(.+?)\.+(\d{4})', key2) #NAME.YEAR
17 | key2 = "{} {}".format(re_year.group(1), re_year.group(2)) if re_year else key2
18 | re_CNseason = re.search(r'第\w季', key2) #移除中文季數
19 | key2 = key2.replace(re_CNseason.group(0)," ") if re_CNseason else key2
20 | key2 = key2.replace("!"," ").replace("!"," ").replace("-"," ").replace("\'"," ")
21 |
22 | key2 = key2.replace("@"," ")
23 | key2 = key2.replace(".Complete."," ")
24 | url = "https://pterclub.com/torrents.php?search=" + key2
25 |
26 | cookies = http.cookiejar.MozillaCookieJar(cookies)
27 | cookies.load()
28 | response=requests.get(url,headers=headers,cookies=cookies)
29 | response.encoding = 'UTF-8'
30 | print(response.status_code) if response.status_code != 200 else print("",end="")
31 |
32 | soup = BeautifulSoup(response.text, 'lxml')
33 | results = soup.find_all("table",{"class":"torrentname"})
34 | reslinks = ["https://pterclub.com/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
35 | for reslink in reslinks:
36 | res=requests.get(reslink,headers=headers,cookies=cookies)
37 | res.encoding = 'UTF-8'
38 | print(response.status_code) if response.status_code != 200 else print("",end="")
39 |
40 | soup = BeautifulSoup(res.text, 'lxml')
41 | cf_email = soup.find("span",{"class":"__cf_email__"})
42 | decrypted = decode(cf_email.get("data-cfemail")) if cf_email else ""
43 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[PTer].","").replace("[email protected]",decrypted)
44 |
45 | if title == keyword or title == key1:
46 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text)
47 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
48 | dblink = db_search.group() if db_search else ""
49 | imdbid = imdb_search.group(2) if imdb_search else ""
50 | if dblink or imdbid:
51 | return {'douban':dblink,'imdb':imdbid}
52 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用
53 | return False
54 |
55 | if __name__ == '__main__':
56 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
57 | x = search("鬼马智多星.All.the.Wrong.Clues.1981.BluRay.1080p.HEVC.10bit.2Audio.MiniFHD-XPcl@PTer.mkv", headers)
58 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/ssd.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def search(keyword, headers, cookies=".cookies\\ssd.txt"):
6 | if not os.path.exists(cookies):
7 | return False
8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去除副檔名
9 | key1 = key2 = re_subname.group(1) if re_subname else keyword
10 | re_brackets = re.search(r'\[(.+?)(\(.+\))?\].+(\d{4})', key2) #去除中括弧
11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2
12 | ssd_movie = re.search(r'(.+?)\d{0,2}(\(.+\))?\.(\d{4})(\..+)?.?£.+', key2) #SSD-Movie
13 | key2 = "{} {} CMCT".format(ssd_movie.group(1),ssd_movie.group(3)) if ssd_movie else key2
14 | ssd_tv = re.search(r'(.+)\.全\d+集|话\.(\d{4})\..+£.+', key1) #SSD-TV
15 | key2 = "{} CMCT".format(ssd_tv.group(1)) if ssd_tv else key2
16 | ssd_version = re.search(r'(.+)( |\.)(.+版)', key2) #SSD-Version
17 | key2 = key2.replace(ssd_version.group(3),"") if ssd_version else key2
18 | key2 = key2.replace("!"," ").replace("!"," ").replace("-"," ").replace("\'"," ")
19 | url="https://springsunday.net/torrents.php?search="+key2
20 |
21 | cookies = http.cookiejar.MozillaCookieJar(cookies)
22 | cookies.load()
23 | response=requests.get(url,headers=headers,cookies=cookies)
24 | response.encoding = 'UTF-8'
25 | print(response.status_code) if response.status_code != 200 else print("",end="")
26 |
27 | soup = BeautifulSoup(response.text, 'lxml')
28 | results = soup.find_all("table",{"class":"torrentname"})
29 | reslinks = ["https://springsunday.net/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
30 | for reslink in reslinks:
31 | res=requests.get(reslink,headers=headers,cookies=cookies)
32 | res.encoding = 'UTF-8'
33 | print(res.status_code) if res.status_code != 200 else print("",end="")
34 | soup = BeautifulSoup(res.text, 'lxml')
35 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[SSD].","")
36 | if title == keyword:
37 | imdb_search = re.search(r"(http|https)://(www|us)\.imdb\.com/title/(tt\d+)",res.text)
38 | imdb_search2 = re.search(r'tt\d{6,}',res.text)
39 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
40 | dblink = db_search.group() if db_search else ""
41 | imdbid = imdb_search.group(3) if imdb_search else ""
42 | imdbid = imdb_search2.group() if not imdbid and imdb_search2 else imdbid
43 | if dblink or imdbid:
44 | return {'douban':dblink,'imdb':imdbid}
45 | time.sleep(0.5)
46 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用
47 | return False
48 |
49 | if __name__ == '__main__':
50 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
51 | x = search("[1922].1922.2017.1080p.NF.WEB-DL.DDP.5.1.x264-CMCTV.mkv", headers)
52 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/tccf.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def search(keyword, headers, cookies='.cookies\\tccf.txt'):
6 | if not os.path.exists(cookies):
7 | return False
8 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名
9 | key1 = key2 = re_subname.group(1) if re_subname else keyword
10 | re_brackets = re.search(r'\[(.+?)\d{0,2}(\(.+\))?\].+(\d{4})', key2) #去除中括弧
11 | key2 = "{} {}".format(re_brackets.group(1), re_brackets.group(3)) if re_brackets else key2
12 | key2 = key2.replace("@"," ")
13 | key2 = key2.replace(".Complete."," ")
14 | url = "https://et8.org/torrents.php?search=" + key2
15 |
16 | cookies = http.cookiejar.MozillaCookieJar(cookies)
17 | cookies.load()
18 | response=requests.get(url,headers=headers,cookies=cookies)
19 | response.encoding = 'UTF-8'
20 | print(response.status_code) if response.status_code != 200 else print("",end="")
21 |
22 | soup = BeautifulSoup(response.text, 'lxml')
23 | results = soup.find_all("table",{"class":"torrentname"})
24 | reslinks = ["https://et8.org/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
25 | for reslink in reslinks:
26 | res=requests.get(reslink,headers=headers,cookies=cookies)
27 | res.encoding = 'UTF-8'
28 | print(response.status_code) if response.status_code != 200 else print("",end="")
29 | soup = BeautifulSoup(res.text, 'lxml')
30 |
31 | title = soup.find("a",{"class":"index"}).getText().replace(".torrent","").replace("[TCCF].","")
32 | if title == keyword:
33 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text)
34 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
35 | dblink = db_search.group() if db_search else ""
36 | imdbid = imdb_search.group(2) if imdb_search else ""
37 | if dblink or imdbid:
38 | return {'douban':dblink,'imdb':imdbid}
39 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用
40 | return False
41 |
42 | if __name__ == '__main__':
43 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
44 | x = search("Tokyo.Olympiad.1965.Criterion.Collection.720p.BluRay.DD1.0.x264-BMDru", headers)
45 | print(x)
--------------------------------------------------------------------------------
/Movie/sites/tjupt.py:
--------------------------------------------------------------------------------
1 | import requests, re, os, time
2 | from bs4 import BeautifulSoup
3 | import http.cookiejar
4 |
5 | def decode(cfemail):
6 | enc = bytes.fromhex(cfemail)
7 | return bytes([c ^ enc[0] for c in enc[1:]]).decode('utf8')
8 |
9 | def search(keyword, headers, cookies='.cookies\\tjupt.txt'):
10 | if not os.path.exists(cookies):
11 | return False
12 | re_subname = re.match(r'(.+?)\.(mkv|mp4|ts|avi)', keyword) #去副檔名
13 | key1 = key2 = re_subname.group(1) if re_subname else keyword
14 | key2 = key2.replace("@"," ")
15 | key2 = key2.replace(".Complete."," ")
16 | url="https://www.tjupt.org/torrents.php?search="+key2
17 |
18 | cookies = http.cookiejar.MozillaCookieJar(cookies)
19 | cookies.load()
20 | response=requests.get(url,headers=headers,cookies=cookies)
21 | response.encoding = 'UTF-8'
22 | print(response.status_code) if response.status_code != 200 else print("",end="")
23 |
24 | soup = BeautifulSoup(response.text, 'lxml')
25 | results = soup.find_all("table",{"class":"torrentname"})
26 | reslinks = ["https://www.tjupt.org/"+result.find("a").get("href") for result in results] #取得搜尋結果鏈接
27 | for reslink in reslinks:
28 | res=requests.get(reslink,headers=headers,cookies=cookies)
29 | res.encoding = 'UTF-8'
30 | print(res.status_code) if res.status_code != 200 else print("",end="")
31 |
32 | soup = BeautifulSoup(res.text, 'lxml')
33 | cf_email = soup.find("span",{"class":"__cf_email__"})
34 | decrypted = decode(cf_email.get("data-cfemail")) if cf_email else ""
35 | title = soup.find("a",{"class":"index"}).getText().replace("[email protected]",decrypted).replace("[TJUPT].","").replace(".torrent","")
36 |
37 | if title == keyword or title == key1:
38 | imdb_search = re.search(r"(http|https)://www\.imdb\.com/title/(tt\d+)",res.text)
39 | db_search = re.search(r"https:\/\/(movie\.|www\.)?douban\.com\/(subject|movie)\/(\d+)",res.text)
40 | dblink = db_search.group() if db_search else ""
41 | imdbid = imdb_search.group(2) if imdb_search else ""
42 | if dblink or imdbid:
43 | return {'douban':dblink,'imdb':imdbid}
44 | print(url) if len(reslinks) == 0 else print("",end="") #無結果時顯示搜尋關鍵字,搜尋邏輯優化用
45 | return False
46 |
47 | if __name__ == '__main__':
48 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
49 | x = search("Liu.Lao.Gen.S02.1080p.WEB-DL.H264.AAC-RushB@TJUPT", headers)
50 | print(x)
--------------------------------------------------------------------------------
/Movie/sql.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sqlite3 ,re
3 |
4 | def init(db_name,table_name):
5 | conn = sqlite3.connect(db_name)
6 | cursor = conn.cursor()
7 | execute = '''CREATE TABLE IF NOT EXISTS %s
8 | (SID INT PRIMARY KEY NOT NULL,
9 | Year INT NOT NULL,
10 | 地區 VARCHAR(10) NOT NULL,
11 | IMDb REAL ,
12 | 豆瓣 REAL ,
13 | 中文標題 VARCHAR(100) NOT NULL,
14 | 英文標題 TEXT ,
15 | 其他標題 TEXT ,
16 | 類型 VARCHAR(20) NOT NULL,
17 | IMDbID VARCHAR(15) ,
18 | DBID VARCHAR(15) ,
19 | FolderPath TEXT,
20 | UNIQUE(SID)
21 | )''' % (table_name)
22 | cursor.execute(execute)
23 | cursor.close()
24 | conn.close()
25 |
26 | def build_tsv(tsvname): #將之前的TSV格式資料匯入成List並做改寫成新格式
27 | with open(tsvname , "r", encoding = 'utf-8-sig') as data:
28 | List = []
29 | for line in data:
30 | part1 = line.strip().split("\t")[0:4]
31 | part2 = line.strip().split("\t")[4:10]
32 | part3 = line.strip().split("\t")[10]
33 | if re.search(r"\((db_\d+)\)",line): #如果能從資料夾名稱找到dbID
34 | MainID = re.search(r"\((db_\d+)\)",line).group(1)
35 | elif re.search(r"\((tt\d+)\)",line): #如果能從資料夾名稱找到IMDbID
36 | MainID = re.search(r"\((tt\d+)\)",line).group(1)
37 | else:
38 | print(line)
39 | continue
40 | reList = [MainID] + part1 + part2 + [part3]
41 | List += [reList]
42 | return List
43 |
44 | def input(db_name,table_name,List,many=False,replace=False):
45 | num = len(List[0]) if many else len(List)
46 | conn = sqlite3.connect(db_name)
47 | cursor = conn.cursor()
48 | pattern = "IGNORE" if not replace else "REPLACE"
49 | execute = 'INSERT OR %s INTO %s VALUES (?%s)' % (pattern,table_name,",?"*(num-1))
50 | if many : #如果是批量資料(蜂巢迴圈)
51 | cursor.executemany(execute,List)
52 | else:
53 | cursor.execute(execute,List)
54 | conn.commit()
55 | cursor.close()
56 | conn.close()
57 |
58 | def output(db_name,table_name,file_name):
59 | with open(file_name, "w", encoding = 'utf-8-sig') as write_file:
60 | conn = sqlite3.connect(db_name)
61 | cursor = conn.cursor()
62 | execute = "SELECT * FROM %s" % (table_name)
63 | for row in cursor.execute(execute):
64 | writeRow = "\t".join('%s' % r for r in row)+"\n"
65 | write_file.write(writeRow)
66 | def query(db_name,table_name,sid):
67 | conn = sqlite3.connect(db_name)
68 | cursor = conn.cursor()
69 | execute = "SELECT * From %s WHERE SID = ?" % (table_name)
70 | result = cursor.execute(execute, [sid]).fetchone()
71 | cursor.close()
72 | conn.close()
73 | return result
74 | #init(db_name,table_name)
75 |
76 | '''
77 | db_name = "AutoSort.db"
78 | table_name = "Movie"
79 |
80 | #匯出用
81 | #output(db_name,table_name,"Movie.tsv")
82 |
83 | #匯入用
84 | List = build_tsv("IN.tsv")
85 | input(db_name,table_name,List,many=True,replace=True)'''
86 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AutoSortTools
2 |
3 | #### AnimeRenamer
4 |
5 | * 動漫標題重命名
6 |
7 | 僅有對各話標題進行重命名之功能,且需要自行找對應標題,若找到相關動漫資料庫會添加成自動讀取
8 |
9 | #### JAVAutoSort
10 |
11 | * JAVAutoSort
12 |
13 |
14 |
--------------------------------------------------------------------------------