├── AVDC_Main.py
├── Function
    ├── Function.py
    └── getHtml.py
├── Getter
    ├── avsox.py
    ├── dmm.py
    ├── jav321.py
    ├── javbus.py
    ├── javdb.py
    ├── mgstage.py
    └── xcity.py
├── Img
    ├── AVDC-ico.png
    ├── AVDC.ico
    ├── LEAK.png
    ├── SUB.png
    └── UNCENSORED.png
├── LICENSE
├── README.md
├── Ui
    ├── AVDC.py
    └── AVDC.ui
├── config.ini
├── py-require.txt
├── readme
    ├── about.png
    ├── emby.png
    ├── emby_each.png
    ├── main_window.png
    ├── setting.gif
    ├── tool.png
    ├── tree-jav-output.png
    └── 主页面.gif
└── update_check.json


/Function/Function.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import re
  4 | import os
  5 | import json
  6 | from PIL import Image
  7 | from configparser import ConfigParser
  8 | from Getter import avsox, javbus, javdb, mgstage, dmm, jav321, xcity
  9 | 
 10 | 
 11 | # ========================================================================获取config
 12 | def get_config():
 13 |     config_file = ''
 14 |     if os.path.exists('../config.ini'):
 15 |         config_file = '../config.ini'
 16 |     elif os.path.exists('config.ini'):
 17 |         config_file = 'config.ini'
 18 |     config = ConfigParser()
 19 |     config.read(config_file, encoding='UTF-8')
 20 |     return config
 21 | 
 22 | 
 23 | # ========================================================================是否为无码
 24 | def is_uncensored(number):
 25 |     if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
 26 |         return True
 27 |     config = get_config()
 28 |     prefix_list = str(config['uncensored']['uncensored_prefix']).split('|')
 29 |     for pre in prefix_list:
 30 |         if pre.upper() in number.upper():
 31 |             return True
 32 |     return False
 33 | 
 34 | 
 35 | # ========================================================================元数据获取失败检测
 36 | def getDataState(json_data):
 37 |     if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
 38 |         return 0
 39 |     else:
 40 |         return 1
 41 | 
 42 | 
 43 | # ========================================================================去掉异常字符
 44 | def escapePath(path, Config):  # Remove escape literals
 45 |     escapeLiterals = Config['escape']['literals']
 46 |     backslash = '\\'
 47 |     for literal in escapeLiterals:
 48 |         path = path.replace(backslash + literal, '')
 49 |     return path
 50 | 
 51 | 
 52 | # ========================================================================获取视频列表
 53 | def movie_lists(escape_folder, movie_type, movie_path):
 54 |     if escape_folder != '':
 55 |         escape_folder = re.split('[,，]', escape_folder)
 56 |     total = []
 57 |     file_type = movie_type.split('|')
 58 |     file_root = movie_path.replace('\\', '/')
 59 |     for root, dirs, files in os.walk(file_root):
 60 |         if escape_folder != '':
 61 |             flag_escape = 0
 62 |             for folder in escape_folder:
 63 |                 if folder in root:
 64 |                     flag_escape = 1
 65 |                     break
 66 |             if flag_escape == 1:
 67 |                 continue
 68 |         for f in files:
 69 |             file_type_current = os.path.splitext(f)[1]
 70 |             file_name = os.path.splitext(f)[0]
 71 |             if re.search(r'^\..+', file_name):
 72 |                 continue
 73 |             if file_type_current in file_type:
 74 |                 path = root + '/' + f
 75 |                 # path = path.replace(file_root, '.')
 76 |                 path = path.replace("\\\\", "/").replace("\\", "/")
 77 |                 total.append(path)
 78 |     return total
 79 | 
 80 | 
 81 | # ========================================================================获取番号
 82 | def getNumber(filepath, escape_string):
 83 |     filepath = filepath.replace('-C.', '.').replace('-c.', '.')
 84 |     filename = os.path.splitext(filepath.split('/')[-1])[0]
 85 |     escape_string_list = re.split('[,，]', escape_string)
 86 |     for string in escape_string_list:
 87 |         if string in filename:
 88 |             filename = filename.replace(string, '')
 89 |     part = ''
 90 |     if re.search('-CD\d+', filename):
 91 |         part = re.findall('-CD\d+', filename)[0]
 92 |     if re.search('-cd\d+', filename):
 93 |         part = re.findall('-cd\d+', filename)[0]
 94 |     filename = filename.replace(part, '')
 95 |     filename = str(re.sub("-\d{4}-\d{1,2}-\d{1,2}", "", filename))  # 去除文件名中时间
 96 |     filename = str(re.sub("\d{4}-\d{1,2}-\d{1,2}-", "", filename))  # 去除文件名中时间
 97 |     if re.search('^\D+\.\d{2}\.\d{2}\.\d{2}', filename):  # 提取欧美番号 sexart.11.11.11
 98 |         try:
 99 |             file_number = re.search('\D+\.\d{2}\.\d{2}\.\d{2}', filename).group()
100 |             return file_number
101 |         except:
102 |             return os.path.splitext(filepath.split('/')[-1])[0]
103 |     elif re.search('XXX-AV-\d{4,}', filename.upper()):  # 提取xxx-av-11111
104 |         file_number = re.search('XXX-AV-\d{4,}', filename.upper()).group()
105 |         return file_number
106 |     elif '-' in filename or '_' in filename:  # 普通提取番号 主要处理包含减号-和_的番号
107 |         if 'FC2' or 'fc2' in filename:
108 |             filename = filename.upper().replace('PPV', '').replace('--', '-')
109 |         if re.search('FC2-\d{5,}', filename):  # 提取类似fc2-111111番号
110 |             file_number = re.search('FC2-\d{5,}', filename).group()
111 |         elif re.search('[a-zA-Z]+-\d+', filename):  # 提取类似mkbd-120番号
112 |             file_number = re.search('\w+-\d+', filename).group()
113 |         elif re.search('\d+[a-zA-Z]+-\d+', filename):  # 提取类似259luxu-1111番号
114 |             file_number = re.search('\d+[a-zA-Z]+-\d+', filename).group()
115 |         elif re.search('[a-zA-Z]+-[a-zA-Z]\d+', filename):  # 提取类似mkbd-s120番号
116 |             file_number = re.search('[a-zA-Z]+-[a-zA-Z]\d+', filename).group()
117 |         elif re.search('\d+-[a-zA-Z]+', filename):  # 提取类似 111111-MMMM 番号
118 |             file_number = re.search('\d+-[a-zA-Z]+', filename).group()
119 |         elif re.search('\d+-\d+', filename):  # 提取类似 111111-000 番号
120 |             file_number = re.search('\d+-\d+', filename).group()
121 |         elif re.search('\d+_\d+', filename):  # 提取类似 111111_000 番号
122 |             file_number = re.search('\d+_\d+', filename).group()
123 |         else:
124 |             file_number = filename
125 |         return file_number
126 |     else:  # 提取不含减号-的番号，FANZA CID 保留ssni00644，将MIDE139改成MIDE-139
127 |         try:
128 |             file_number = os.path.splitext(filename.split('/')[-1])[0]
129 |             find_num = re.findall(r'\d+', file_number)[0]
130 |             find_char = re.findall(r'\D+', file_number)[0]
131 |             if len(find_num) <= 4 and len(find_char) > 1:
132 |                 file_number = find_char + '-' + find_num
133 |             return file_number
134 |         except:
135 |             return os.path.splitext(filepath.split('/')[-1])[0]
136 | 
137 | 
138 | # ========================================================================根据番号获取数据
139 | def getDataFromJSON(file_number, config, mode, appoint_url):  # 从JSON返回元数据
140 |     # ================================================网站规则添加开始================================================
141 |     isuncensored = is_uncensored(file_number)
142 |     json_data = {}
143 |     if mode == 1:  # 从全部网站刮削
144 |         # =======================================================================无码抓取:111111-111,n1111,HEYZO-1111,SMD-115
145 |         if isuncensored:
146 |             json_data = json.loads(javbus.main_uncensored(file_number, appoint_url))
147 |             if getDataState(json_data) == 0:
148 |                 json_data = json.loads(javdb.main(file_number, appoint_url, True))
149 |             if getDataState(json_data) == 0 and 'HEYZO' in file_number.upper():
150 |                 json_data = json.loads(jav321.main(file_number, appoint_url, True))
151 |             if getDataState(json_data) == 0:
152 |                 json_data = json.loads(avsox.main(file_number, appoint_url))
153 |         # =======================================================================259LUXU-1111
154 |         elif re.match('\d+[a-zA-Z]+-\d+', file_number) or 'SIRO' in file_number.upper():
155 |             json_data = json.loads(mgstage.main(file_number, appoint_url))
156 |             file_number = re.search('[a-zA-Z]+-\d+', file_number).group()
157 |             if getDataState(json_data) == 0:
158 |                 json_data = json.loads(jav321.main(file_number, appoint_url))
159 |             if getDataState(json_data) == 0:
160 |                 json_data = json.loads(javdb.main(file_number, appoint_url))
161 |             if getDataState(json_data) == 0:
162 |                 json_data = json.loads(javbus.main(file_number, appoint_url))
163 |         # =======================================================================FC2-111111
164 |         elif 'FC2' in file_number.upper():
165 |             json_data = json.loads(javdb.main(file_number, appoint_url))
166 |         # =======================================================================ssni00321
167 |         elif re.match('\D{2,}00\d{3,}', file_number) and '-' not in file_number and '_' not in file_number:
168 |             json_data = json.loads(dmm.main(file_number, appoint_url))
169 |         # =======================================================================sexart.15.06.14
170 |         elif re.search('\D+\.\d{2}\.\d{2}\.\d{2}', file_number):
171 |             json_data = json.loads(javdb.main_us(file_number, appoint_url))
172 |             if getDataState(json_data) == 0:
173 |                 json_data = json.loads(javbus.main_us(file_number, appoint_url))
174 |         # =======================================================================MIDE-139
175 |         else:
176 |             json_data = json.loads(javbus.main(file_number, appoint_url))
177 |             if getDataState(json_data) == 0:
178 |                 json_data = json.loads(jav321.main(file_number, appoint_url))
179 |             if getDataState(json_data) == 0:
180 |                 json_data = json.loads(xcity.main(file_number, appoint_url))
181 |             if getDataState(json_data) == 0:
182 |                 json_data = json.loads(javdb.main(file_number, appoint_url))
183 |             if getDataState(json_data) == 0:
184 |                 json_data = json.loads(avsox.main(file_number, appoint_url))
185 |     elif re.match('\D{2,}00\d{3,}', file_number) and mode != 7:
186 |         json_data = {
187 |             'title': '',
188 |             'actor': '',
189 |             'website': '',
190 |         }
191 |     elif mode == 2:  # 仅从mgstage
192 |         json_data = json.loads(mgstage.main(file_number, appoint_url))
193 |     elif mode == 3:  # 仅从javbus
194 |         if isuncensored:
195 |             json_data = json.loads(javbus.main_uncensored(file_number, appoint_url))
196 |         elif re.search('\D+\.\d{2}\.\d{2}\.\d{2}', file_number):
197 |             json_data = json.loads(javbus.main_us(file_number, appoint_url))
198 |         else:
199 |             json_data = json.loads(javbus.main(file_number, appoint_url))
200 |     elif mode == 4:  # 仅从jav321
201 |         json_data = json.loads(jav321.main(file_number, isuncensored, appoint_url))
202 |     elif mode == 5:  # 仅从javdb
203 |         if re.search('\D+\.\d{2}\.\d{2}\.\d{2}', file_number):
204 |             json_data = json.loads(javdb.main_us(file_number, appoint_url))
205 |         else:
206 |             json_data = json.loads(javdb.main(file_number, appoint_url, isuncensored))
207 |     elif mode == 6:  # 仅从avsox
208 |         json_data = json.loads(avsox.main(file_number, appoint_url))
209 |     elif mode == 7:  # 仅从xcity
210 |         json_data = json.loads(xcity.main(file_number, appoint_url))
211 |     elif mode == 8:  # 仅从dmm
212 |         json_data = json.loads(dmm.main(file_number, appoint_url))
213 | 
214 |     # ================================================网站规则添加结束================================================
215 |     # print(json_data)
216 |     # ======================================超时或未找到
217 |     if json_data['website'] == 'timeout':
218 |         return json_data
219 |     elif json_data['title'] == '':
220 |         return json_data
221 |     # ======================================处理得到的信息
222 |     title = json_data['title']
223 |     number = json_data['number']
224 |     actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',')  # 字符串转列表
225 |     release = json_data['release']
226 |     try:
227 |         cover_small = json_data['cover_small']
228 |     except:
229 |         cover_small = ''
230 |     tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',')  # 字符串转列表 @
231 |     actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
232 |     if actor == '':
233 |         actor = 'Unknown'
234 | 
235 |     # ====================处理异常字符====================== #\/:*?"<>|
236 |     title = title.replace('\\', '')
237 |     title = title.replace('/', '')
238 |     title = title.replace(':', '')
239 |     title = title.replace('*', '')
240 |     title = title.replace('?', '')
241 |     title = title.replace('"', '')
242 |     title = title.replace('<', '')
243 |     title = title.replace('>', '')
244 |     title = title.replace('|', '')
245 |     title = title.replace(' ', '.')
246 |     title = title.replace('【', '')
247 |     title = title.replace('】', '')
248 |     release = release.replace('/', '-')
249 |     tmpArr = cover_small.split(',')
250 |     if len(tmpArr) > 0:
251 |         cover_small = tmpArr[0].strip('\"').strip('\'')
252 |     for key, value in json_data.items():
253 |         if key == 'title' or key == 'studio' or key == 'director' or key == 'series' or key == 'publisher':
254 |             json_data[key] = str(value).replace('/', '')
255 |     # ====================处理异常字符 END================== #\/:*?"<>|
256 | 
257 |     naming_media = config['Name_Rule']['naming_media']
258 |     naming_file = config['Name_Rule']['naming_file']
259 |     folder_name = config['Name_Rule']['folder_name']
260 | 
261 |     # 返回处理后的json_data
262 |     json_data['title'] = title
263 |     json_data['number'] = number
264 |     json_data['actor'] = actor
265 |     json_data['release'] = release
266 |     json_data['cover_small'] = cover_small
267 |     json_data['tag'] = tag
268 |     json_data['naming_media'] = naming_media
269 |     json_data['naming_file'] = naming_file
270 |     json_data['folder_name'] = folder_name
271 |     return json_data
272 | 
273 | 
274 | # ========================================================================返回json里的数据
275 | def get_info(json_data):
276 |     for key, value in json_data.items():
277 |         if value == '' or value == 'N/A':
278 |             json_data[key] = 'unknown'
279 |     title = json_data['title']
280 |     studio = json_data['studio']
281 |     publisher = json_data['publisher']
282 |     year = json_data['year']
283 |     outline = json_data['outline']
284 |     runtime = json_data['runtime']
285 |     director = json_data['director']
286 |     actor_photo = json_data['actor_photo']
287 |     actor = json_data['actor']
288 |     release = json_data['release']
289 |     tag = json_data['tag']
290 |     number = json_data['number']
291 |     cover = json_data['cover']
292 |     website = json_data['website']
293 |     series = json_data['series']
294 |     return title, studio, publisher, year, outline, runtime, director, actor_photo, actor, release, tag, number, cover, website, series
295 | 
296 | 
297 | # ========================================================================保存配置到config.ini
298 | def save_config(json_config):
299 |     # json_config = json.loads(json_config)
300 |     config_file = ''
301 |     if os.path.exists('../config.ini'):
302 |         config_file = '../config.ini'
303 |     elif os.path.exists('config.ini'):
304 |         config_file = 'config.ini'
305 |     with open(config_file, "wt", encoding='UTF-8') as code:
306 |         print("[common]", file=code)
307 |         print("main_mode = " + str(json_config['main_mode']), file=code)
308 |         print("failed_output_folder = " + json_config['failed_output_folder'], file=code)
309 |         print("success_output_folder = " + json_config['success_output_folder'], file=code)
310 |         print("failed_file_move = " + str(json_config['failed_file_move']), file=code)
311 |         print("soft_link = " + str(json_config['soft_link']), file=code)
312 |         print("show_poster = " + str(json_config['show_poster']), file=code)
313 |         print("website = " + json_config['website'], file=code)
314 |         print("# all or mgstage or fc2club or javbus or jav321 or javdb or avsox or xcity or dmm", file=code)
315 |         print("", file=code)
316 |         print("[proxy]", file=code)
317 |         print("type = " + json_config['type'], file=code)
318 |         print("proxy = " + json_config['proxy'], file=code)
319 |         print("timeout = " + str(json_config['timeout']), file=code)
320 |         print("retry = " + str(json_config['retry']), file=code)
321 |         print("# type: no, http, socks5", file=code)
322 |         print("", file=code)
323 |         print("[Name_Rule]", file=code)
324 |         print("folder_name = " + json_config['folder_name'], file=code)
325 |         print("naming_media = " + json_config['naming_media'], file=code)
326 |         print("naming_file = " + json_config['naming_file'], file=code)
327 |         print("", file=code)
328 |         print("[update]", file=code)
329 |         print("update_check = " + str(json_config['update_check']), file=code)
330 |         print("", file=code)
331 |         print("[log]", file=code)
332 |         print("save_log = " + str(json_config['save_log']), file=code)
333 |         print("", file=code)
334 |         print("[media]", file=code)
335 |         print("media_type = " + json_config['media_type'], file=code)
336 |         print("sub_type = " + json_config['sub_type'], file=code)
337 |         print("media_path = " + json_config['media_path'], file=code)
338 |         print("", file=code)
339 |         print("[escape]", file=code)
340 |         print("literals = " + json_config['literals'], file=code)
341 |         print("folders = " + json_config['folders'], file=code)
342 |         print("string = " + json_config['string'], file=code)
343 |         print("", file=code)
344 |         print("[debug_mode]", file=code)
345 |         print("switch = " + str(json_config['switch_debug']), file=code)
346 |         print("", file=code)
347 |         print("[emby]", file=code)
348 |         print("emby_url = " + json_config['emby_url'], file=code)
349 |         print("api_key = " + json_config['api_key'], file=code)
350 |         print("", file=code)
351 |         print("[mark]", file=code)
352 |         print("poster_mark = " + str(json_config['poster_mark']), file=code)
353 |         print("thumb_mark = " + str(json_config['thumb_mark']), file=code)
354 |         print("mark_size = " + str(json_config['mark_size']), file=code)
355 |         print("mark_type = " + json_config['mark_type'], file=code)
356 |         print("mark_pos = " + json_config['mark_pos'], file=code)
357 |         print("# mark_size : range 1-5", file=code)
358 |         print("# mark_type : sub, leak, uncensored", file=code)
359 |         print("# mark_pos  : bottom_right or bottom_left or top_right or top_left", file=code)
360 |         print("", file=code)
361 |         print("[uncensored]", file=code)
362 |         print("uncensored_prefix = " + str(json_config['uncensored_prefix']), file=code)
363 |         print("uncensored_poster = " + str(json_config['uncensored_poster']), file=code)
364 |         print("# 0 : official, 1 : cut", file=code)
365 |         print("", file=code)
366 |         print("[file_download]", file=code)
367 |         print("nfo = " + str(json_config['nfo_download']), file=code)
368 |         print("poster = " + str(json_config['poster_download']), file=code)
369 |         print("fanart = " + str(json_config['fanart_download']), file=code)
370 |         print("thumb = " + str(json_config['thumb_download']), file=code)
371 |         print("", file=code)
372 |         print("[extrafanart]", file=code)
373 |         print("extrafanart_download = " + str(json_config['extrafanart_download']), file=code)
374 |         print("extrafanart_folder = " + str(json_config['extrafanart_folder']), file=code)
375 | 
376 |     code.close()
377 | 
378 | 
379 | def check_pic(path_pic):
380 |     try:
381 |         img = Image.open(path_pic)
382 |         img.load()
383 |         return True
384 |     except (FileNotFoundError, OSError):
385 |         # print('文件损坏')
386 |         return False


--------------------------------------------------------------------------------
/Function/getHtml.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | from configparser import ConfigParser
 4 | import cloudscraper
 5 | 
 6 | 
 7 | # ========================================================================获取config
 8 | def get_config():
 9 |     config_file = ''
10 |     if os.path.exists('../config.ini'):
11 |         config_file = '../config.ini'
12 |     elif os.path.exists('config.ini'):
13 |         config_file = 'config.ini'
14 |     config = ConfigParser()
15 |     config.read(config_file, encoding='UTF-8')
16 |     proxy_type = str(config['proxy']['type'])
17 |     proxy = str(config['proxy']['proxy'])
18 |     timeout = int(config['proxy']['timeout'])
19 |     retry_count = int(config['proxy']['retry'])
20 |     return proxy_type, proxy, timeout, retry_count
21 | 
22 | 
23 | # ========================================================================获取proxies
24 | def get_proxies(proxy_type, proxy):
25 |     proxies = {}
26 |     if proxy == '' or proxy_type == '' or proxy_type == 'no':
27 |         proxies = {}
28 |     elif proxy_type == 'http':
29 |         proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
30 |     elif proxy_type == 'socks5':
31 |         proxies = {"http": "socks5://" + proxy, "https": "socks5://" + proxy}
32 |     return proxies
33 | 
34 | 
35 | # ========================================================================网页请求
36 | # 破解cf5秒盾
37 | def get_html_javdb(url):
38 |     scraper = cloudscraper.create_scraper()
39 |     # 发送请求，获得响应
40 |     response = scraper.get(url)
41 |     # 获得网页源代码
42 |     html = response.text
43 |     return html
44 | 
45 | 
46 | def get_html(url, cookies=None):
47 |     proxy_type = ''
48 |     retry_count = 0
49 |     proxy = ''
50 |     timeout = 0
51 |     try:
52 |         proxy_type, proxy, timeout, retry_count = get_config()
53 |     except Exception as error_info:
54 |         print('Error in get_html :' + str(error_info))
55 |         print('[-]Proxy config error! Please check the config.')
56 |     proxies = get_proxies(proxy_type, proxy)
57 |     i = 0
58 |     while i < retry_count:
59 |         try:
60 |             headers = {
61 |                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
62 |                               'Chrome/60.0.3100.0 Safari/537.36'}
63 |             getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
64 |             getweb.encoding = 'utf-8'
65 |             return getweb.text
66 |         except Exception as error_info:
67 |             i += 1
68 |             print('Error in get_html :' + str(error_info))
69 |             print('[-]Connect retry ' + str(i) + '/' + str(retry_count))
70 |     print('[-]Connect Failed! Please check your Proxy or Network!')
71 |     return 'ProxyError'
72 | 
73 | 
74 | def post_html(url: str, query: dict):
75 |     proxy_type = ''
76 |     retry_count = 0
77 |     proxy = ''
78 |     timeout = 0
79 |     try:
80 |         proxy_type, proxy, timeout, retry_count = get_config()
81 |     except Exception as error_info:
82 |         print('Error in post_html :' + str(error_info))
83 |         print('[-]Proxy config error! Please check the config.')
84 |     proxies = get_proxies(proxy_type, proxy)
85 |     for i in range(retry_count):
86 |         try:
87 |             result = requests.post(url, data=query, proxies=proxies, timeout=timeout)
88 |             result.encoding = 'utf-8'
89 |             result = result.text
90 |             return result
91 |         except Exception as error_info:
92 |             print('Error in post_html :' + str(error_info))
93 |             print("[-]Connect retry {}/{}".format(i + 1, retry_count))
94 |     print("[-]Connect Failed! Please check your Proxy or Network!")
95 |     return 'ProxyError'
96 | 


--------------------------------------------------------------------------------
/Getter/avsox.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | from bs4 import BeautifulSoup
  4 | from lxml import etree
  5 | from Function.getHtml import get_html
  6 | 
  7 | 
  8 | def getActorPhoto(htmlcode):  # //*[@id="star_qdt"]/li/a/img
  9 |     soup = BeautifulSoup(htmlcode, 'lxml')
 10 |     a = soup.find_all(attrs={'class': 'avatar-box'})
 11 |     d = {}
 12 |     for i in a:
 13 |         l = i.img['src']
 14 |         t = i.span.get_text()
 15 |         p2 = {t: l}
 16 |         d.update(p2)
 17 |     return d
 18 | 
 19 | 
 20 | def getTitle(a):
 21 |     try:
 22 |         html = etree.fromstring(a, etree.HTMLParser())
 23 |         result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']")  # [0]
 24 |         return result.replace('/', '')
 25 |     except:
 26 |         return ''
 27 | 
 28 | 
 29 | def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
 30 |     soup = BeautifulSoup(a, 'lxml')
 31 |     a = soup.find_all(attrs={'class': 'avatar-box'})
 32 |     d = []
 33 |     for i in a:
 34 |         d.append(i.span.get_text())
 35 |     return d
 36 | 
 37 | 
 38 | def getStudio(a):
 39 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 40 |     result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace(
 41 |         "', '", ' ')
 42 |     return result1
 43 | 
 44 | 
 45 | def getRuntime(a):
 46 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 47 |     result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
 48 |     return result1
 49 | 
 50 | 
 51 | def getSeries(a):
 52 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 53 |     result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
 54 |     return result1
 55 | 
 56 | 
 57 | def getNum(a):
 58 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 59 |     result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
 60 |     return result1
 61 | 
 62 | 
 63 | def getYear(release):
 64 |     try:
 65 |         result = str(re.search('\d{4}', release).group())
 66 |         return result
 67 |     except:
 68 |         return release
 69 | 
 70 | 
 71 | def getRelease(a):
 72 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 73 |     result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
 74 |     return result1
 75 | 
 76 | 
 77 | def getCover(htmlcode):
 78 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 79 |     result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
 80 |     return result
 81 | 
 82 | 
 83 | def getCover_small(htmlcode, count):
 84 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 85 |     cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
 86 |     return cover_small
 87 | 
 88 | 
 89 | def getTag(a):  # 获取演员
 90 |     soup = BeautifulSoup(a, 'lxml')
 91 |     a = soup.find_all(attrs={'class': 'genre'})
 92 |     d = []
 93 |     for i in a:
 94 |         d.append(i.get_text())
 95 |     return d
 96 | 
 97 | 
 98 | def getUrl(number):
 99 |     response = get_html('https://avsox.website/cn/search/' + number)
100 |     html = etree.fromstring(response, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
101 |     url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
102 |     if len(url_list) > 0:
103 |         for i in range(1, len(url_list) + 1):
104 |             number_get = str(html.xpath(
105 |                 '//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(
106 |                 " ['']")
107 |             if number.upper() == number_get.upper():
108 |                 page_url = 'https:' + url_list[i-1]
109 |                 return i, response, page_url
110 |     return 0, response, ''
111 | 
112 | 
113 | def main(number, appoint_url=''):
114 |     try:
115 |         count, response, url = getUrl(number)
116 |         if str(response) == 'ProxyError':
117 |             raise TimeoutError
118 |         if appoint_url != '':
119 |             url = appoint_url
120 |         elif url == '':
121 |             raise Exception('Movie Data not found in avsox!')
122 |         web = get_html(url)
123 |         soup = BeautifulSoup(web, 'lxml')
124 |         info = str(soup.find(attrs={'class': 'row movie'}))
125 |         number = getNum(web)
126 |         dic = {
127 |             'actor': getActor(web),
128 |             'title': getTitle(web).strip(number).strip().replace(' ', '-'),
129 |             'studio': getStudio(info),
130 |             'runtime': getRuntime(info),
131 |             'release': getRelease(info),
132 |             'number': getNum(info),
133 |             'tag': getTag(web),
134 |             'series': getSeries(info),
135 |             'year': getYear(getRelease(info)),
136 |             'actor_photo': getActorPhoto(web),
137 |             'cover': getCover(web),
138 |             'cover_small': getCover_small(response, count),
139 |             'extrafanart': '',
140 |             'imagecut': 3,
141 |             'director': '',
142 |             'publisher': '',
143 |             'outline': '',
144 |             'score': '',
145 |             'website': url,
146 |             'source': 'avsox.website',
147 |         }
148 |     except TimeoutError:
149 |         dic = {
150 |             'title': '',
151 |             'website': 'timeout',
152 |         }
153 |     except Exception as error_info:
154 |         print('Error in avsox.main : ' + str(error_info))
155 |         dic = {
156 |             'title': '',
157 |             'website': '',
158 |         }
159 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
160 |     return js
161 | 
162 | 
163 | # print(main('051119-917'))
164 | # print(main('032620_001'))
165 | # print(main('032620_001', 'https://avsox.website/cn/movie/cb8d28437cff4e90'))


--------------------------------------------------------------------------------
/Getter/dmm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | import re
  4 | from lxml import etree
  5 | import json
  6 | from Function.getHtml import get_html
  7 | from urllib.parse import urlencode
  8 | 
  9 | 
 10 | def getTitle(text):
 11 |     html = etree.fromstring(text, etree.HTMLParser())
 12 |     result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
 13 |     return result
 14 | 
 15 | 
 16 | def getActor(text):
 17 |     # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
 18 |     html = etree.fromstring(text, etree.HTMLParser())
 19 |     result = (
 20 |         str(
 21 |             html.xpath(
 22 |                 "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
 23 |             )
 24 |         )
 25 |             .strip(" ['']")
 26 |             .replace("', '", ",")
 27 |     )
 28 |     return result
 29 | 
 30 | 
 31 | def getStudio(text):
 32 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 33 |     try:
 34 |         result = html.xpath(
 35 |             "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
 36 |         )[0]
 37 |     except:
 38 |         result = html.xpath(
 39 |             "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
 40 |         )[0]
 41 |     return result
 42 | 
 43 | 
 44 | def getRuntime(text):
 45 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 46 |     result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
 47 |     return re.search(r"\d+", str(result)).group()
 48 | 
 49 | 
 50 | def getLabel(text):
 51 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 52 |     try:
 53 |         result = html.xpath(
 54 |             "//td[contains(text(),'レーベル：')]/following-sibling::td/a/text()"
 55 |         )[0]
 56 |     except:
 57 |         result = html.xpath(
 58 |             "//td[contains(text(),'レーベル：')]/following-sibling::td/text()"
 59 |         )[0]
 60 |     return result
 61 | 
 62 | 
 63 | def getNum(text):
 64 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 65 |     try:
 66 |         result = html.xpath(
 67 |             "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
 68 |         )[0]
 69 |     except:
 70 |         result = html.xpath(
 71 |             "//td[contains(text(),'品番：')]/following-sibling::td/text()"
 72 |         )[0]
 73 |     return result
 74 | 
 75 | 
 76 | def getYear(getRelease):
 77 |     try:
 78 |         result = str(re.search(r"\d{4}", getRelease).group())
 79 |         return result
 80 |     except:
 81 |         return getRelease
 82 | 
 83 | 
 84 | def getRelease(text):
 85 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 86 |     try:
 87 |         result = html.xpath(
 88 |             "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
 89 |         )[0].lstrip("\n")
 90 |     except:
 91 |         try:
 92 |             result = html.xpath(
 93 |                 "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
 94 |             )[0].lstrip("\n")
 95 |         except:
 96 |             result = "----"
 97 |     if result == "----":
 98 |         try:
 99 |             result = html.xpath(
100 |                 "//td[contains(text(),'配信開始日：')]/following-sibling::td/a/text()"
101 |             )[0].lstrip("\n")
102 |         except:
103 |             try:
104 |                 result = html.xpath(
105 |                     "//td[contains(text(),'配信開始日：')]/following-sibling::td/text()"
106 |                 )[0].lstrip("\n")
107 |             except:
108 |                 pass
109 |     return result.replace("/", "-")
110 | 
111 | 
112 | def getTag(text):
113 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
114 |     try:
115 |         result = html.xpath(
116 |             "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
117 |         )
118 |     except:
119 |         result = html.xpath(
120 |             "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
121 |         )
122 |     return result
123 | 
124 | 
125 | def getCover(text, number):
126 |     html = etree.fromstring(text, etree.HTMLParser())
127 |     cover_number = number
128 |     try:
129 |         result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
130 |     except:
131 |         # sometimes fanza modify _ to \u0005f for image id
132 |         if "_" in cover_number:
133 |             cover_number = cover_number.replace("_", r"\u005f")
134 |         try:
135 |             result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
136 |         except:
137 |             # (TODO) handle more edge case
138 |             # print(html)
139 |             # raise exception here, same behavior as before
140 |             # people's major requirement is fetching the picture
141 |             raise ValueError("can not find image")
142 |     return result
143 | 
144 | 
145 | def getDirector(text):
146 |     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
147 |     try:
148 |         result = html.xpath(
149 |             "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
150 |         )[0]
151 |     except:
152 |         result = html.xpath(
153 |             "//td[contains(text(),'監督：')]/following-sibling::td/text()"
154 |         )[0]
155 |     return result
156 | 
157 | 
158 | def getOutline(text):
159 |     html = etree.fromstring(text, etree.HTMLParser())
160 |     try:
161 |         result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
162 |             "\n", ""
163 |         )
164 |         if result == "":
165 |             result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
166 |                 "\n", ""
167 |             )
168 |     except:
169 |         # (TODO) handle more edge case
170 |         # print(html)
171 |         return ""
172 |     return result
173 | 
174 | 
175 | def getSeries(text):
176 |     try:
177 |         html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
178 |         try:
179 |             result = html.xpath(
180 |                 "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
181 |             )[0]
182 |         except:
183 |             result = html.xpath(
184 |                 "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
185 |             )[0]
186 |         return result
187 |     except:
188 |         return ""
189 | 
190 | 
191 | def getExtrafanart(htmlcode):  # 获取剧照
192 |     html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div></div>')
193 |     html = html_pather.search(htmlcode)
194 |     if html:
195 |         html = html.group()
196 |         extrafanart_pather = re.compile(r'<img.*?src=\"(.*?)\"')
197 |         extrafanart_imgs = extrafanart_pather.findall(html)
198 |         if extrafanart_imgs:
199 |             s = []
200 |             for img_url in extrafanart_imgs:
201 |                 img_urls = img_url.rsplit('-', 1)
202 |                 img_url = img_urls[0] + 'jp-' + img_urls[1]
203 |                 s.append(img_url)
204 |             return s
205 |     return ''
206 | 
207 | 
208 | def getScore(htmlcode):
209 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
210 |     result = str(html.xpath("//p[@class='d-review__average']/strong/text()")[0]).replace('\\n', '').replace('\n',
211 |                                                                                                             '').replace(
212 |         '点', '')
213 |     return result
214 | 
215 | 
216 | def getPublisher(htmlcode):
217 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
218 |     try:
219 |         result = html.xpath("//td[contains(text(),'レーベル')]/following-sibling::td/a/text()")[0]
220 |     except:
221 |         result = html.xpath("//td[contains(text(),'レーベル')]/following-sibling::td/text()")[0]
222 |     return result
223 | 
224 | 
225 | def main(number, appoint_url=''):
226 |     # fanza allow letter + number + underscore, normalize the input here
227 |     # @note: I only find the usage of underscore as h_test123456789
228 |     fanza_search_number = number
229 |     # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
230 |     if fanza_search_number.startswith("h-"):
231 |         fanza_search_number = fanza_search_number.replace("h-", "h_")
232 | 
233 |     fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
234 | 
235 |     fanza_urls = [
236 |         "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
237 |         "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
238 |         "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
239 |         "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
240 |         "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
241 |         "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
242 |         "https://www.dmm.co.jp/rental/-/detail/=/cid=",
243 |     ]
244 |     chosen_url = ""
245 |     htmlcode = ''
246 |     if appoint_url:
247 |         chosen_url = appoint_url
248 |         htmlcode = get_html(
249 |             "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
250 |                 urlencode({"rurl": appoint_url})
251 |             ))
252 |     else:
253 |         for url in fanza_urls:
254 |             chosen_url = url + fanza_search_number
255 |             final_url = "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
256 |                     urlencode({"rurl": chosen_url})
257 |                 )
258 |             htmlcode = get_html(final_url)
259 |             if "404 Not Found" not in htmlcode:
260 |                 break
261 |     if "404 Not Found" in htmlcode:
262 |         return json.dumps({"title": "", 'website': ''})
263 |     try:
264 |         # for some old page, the input number does not match the page
265 |         # for example, the url will be cid=test012
266 |         # but the hinban on the page is test00012
267 |         # so get the hinban first, and then pass it to following functions
268 |         fanza_hinban = getNum(htmlcode)
269 |         release = getRelease(htmlcode)
270 |         dic = {
271 |             "title": getTitle(htmlcode).strip(),
272 |             'publisher': getPublisher(htmlcode),
273 |             'score': getScore(htmlcode),
274 |             "studio": getStudio(htmlcode),
275 |             "outline": getOutline(htmlcode),
276 |             "runtime": getRuntime(htmlcode),
277 |             "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
278 |             "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
279 |             "release": release,
280 |             "number": fanza_hinban,
281 |             "cover": getCover(htmlcode, fanza_hinban),
282 |             "imagecut": 1,
283 |             "tag": getTag(htmlcode),
284 |             "extrafanart": getExtrafanart(htmlcode),
285 |             "label": getLabel(htmlcode),
286 |             "year": getYear(release),  # str(re.search('\d{4}',getRelease(a)).group()),
287 |             "actor_photo": "",
288 |             "website": chosen_url,
289 |             "source": "fanza.py",
290 |             "series": getSeries(htmlcode),
291 |         }
292 |     except TimeoutError:
293 |         dic = {
294 |             'title': '',
295 |             'website': 'timeout',
296 |         }
297 |     except Exception as error_info:
298 |         print('Error in dmm.main : ' + str(error_info))
299 |         dic = {
300 |             'title': '',
301 |             'website': '',
302 |         }
303 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))  # .encode('UTF-8')
304 |     return js
305 | 
306 | 
307 | # main('DV-1562')
308 | # print(main('mide00139', "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=mide00139"))
309 | # print(main('mide00139', ""))
310 | # print(main('kawd00969'))
311 | 


--------------------------------------------------------------------------------
/Getter/jav321.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from lxml import etree
  3 | import json
  4 | from Function.getHtml import post_html
  5 | 
  6 | 
  7 | def getActorPhoto(actor):
  8 |     data = {}
  9 |     for i in actor:
 10 |         actor_photo = {i: ''}
 11 |         data.update(actor_photo)
 12 |     return data
 13 | 
 14 | 
 15 | def getTitle(response):
 16 |     return str(re.findall(r'<h3>(.+) <small>', response)).strip(" ['']")
 17 | 
 18 | 
 19 | def getActor(response):
 20 |     if re.search(r'<a href="/star/\S+">(\S+)</a> &nbsp;', response):
 21 |         return str(re.findall(r'<a href="/star/\S+">(\S+)</a> &nbsp;', response)).strip(" [',']").replace('\'', '')
 22 |     elif re.search(r'<a href="/heyzo_star/\S+">(\S+)</a> &nbsp;', response):
 23 |         return str(re.findall(r'<a href="/heyzo_star/\S+">(\S+)</a> &nbsp;', response)).strip(" [',']").replace('\'',
 24 |                                                                                                                 '')
 25 |     else:
 26 |         return str(re.findall(r'<b>出演者</b>: ([^<]+) &nbsp; <br>', response)).strip(" [',']").replace('\'', '')
 27 | 
 28 | 
 29 | def getStudio(response):
 30 |     return str(re.findall(r'<a href="/company/\S+">(\S+)</a>', response)).strip(" ['']")
 31 | 
 32 | 
 33 | def getRuntime(response):
 34 |     return str(re.findall(r'<b>収録時間</b>: (\d+) \S+<br>', response)).strip(" ['']")
 35 | 
 36 | 
 37 | def getSeries(response):
 38 |     return str(re.findall(r'<b>系列</b>: <a href="/series/\S+">(\S+)</a>', response)).strip(" ['']")
 39 | 
 40 | 
 41 | def getWebsite(detail_page):
 42 |     return 'https:' + detail_page.xpath('//a[contains(text(),"简体中文")]/@href')[0]
 43 | 
 44 | 
 45 | def getNum(response):
 46 |     return str(re.findall(r'<b>品番</b>: (\S+)<br>', response)).strip(" ['']").upper()
 47 | 
 48 | 
 49 | def getScore(response):
 50 |     if re.search(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response):
 51 |         score = re.findall(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response)[0]
 52 |         return str(float(score) / 10.0)
 53 |     else:
 54 |         return str(re.findall(r'<b>平均評価</b>: ([^<]+)<br>', response)).strip(" [',']").replace('\'', '')
 55 | 
 56 | 
 57 | def getYear(release):
 58 |     try:
 59 |         result = str(re.search('\d{4}', release).group())
 60 |         return result
 61 |     except:
 62 |         return release
 63 | 
 64 | 
 65 | def getRelease(response):
 66 |     return str(re.findall(r'<b>配信開始日</b>: (\d+-\d+-\d+)<br>', response)).strip(" ['']").replace('0000-00-00', '')
 67 | 
 68 | 
 69 | def getCover(detail_page):
 70 |     cover_url = str(detail_page.xpath("/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 "
 71 |                                       "col-md-12'][1]/p/a/img[@class='img-responsive']/@src")).strip(" ['']")
 72 |     if cover_url == '':
 73 |         cover_url = str(
 74 |             detail_page.xpath("//*[@id='vjs_sample_player']/@poster")).strip(" ['']")
 75 |     return cover_url
 76 | 
 77 | 
 78 | def getExtraFanart(htmlcode):
 79 |     extrafanart_list = htmlcode.xpath(
 80 |         "/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 col-md-12']/p/a/img[@class='img-responsive']/@src")
 81 |     return extrafanart_list
 82 | 
 83 | 
 84 | def getCoverSmall(detail_page):
 85 |     return str(detail_page.xpath("//div[@class='panel-body']/div[@class='row'][1]/div[@class='col-md-3']/img["
 86 |                                  "@class='img-responsive']/@src")).strip(" ['']")
 87 | 
 88 | 
 89 | def getTag(response):  # 获取标签
 90 |     return re.findall(r'<a href="/genre/\S+">(\S+)</a>', response)
 91 | 
 92 | 
 93 | def getOutline(detail_page):
 94 |     return str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']")
 95 | 
 96 | 
 97 | def main(number, appoint_url, isuncensored=False):
 98 |     try:
 99 |         result_url = "https://www.jav321.com/search"
100 |         if appoint_url != '':
101 |             result_url = appoint_url
102 |         response = post_html(result_url, query={"sn": number})
103 |         if str(response) == 'ProxyError':
104 |             raise TimeoutError
105 |         if '未找到您要找的AV' in response:
106 |             raise Exception('Movie Data not found in jav321!')
107 |         detail_page = etree.fromstring(response, etree.HTMLParser())
108 |         release = getRelease(response)
109 |         actor = getActor(response)
110 |         imagecut = 1
111 |         cover_small = ''
112 |         if 'HEYZO' in number.upper() or isuncensored:
113 |             imagecut = 3
114 |             cover_small = getCoverSmall(detail_page)
115 |             if cover_small == '':
116 |                 imagecut = 0
117 |         dic = {
118 |             'actor': actor,
119 |             'title': getTitle(response),
120 |             'studio': getStudio(response),
121 |             'outline': getOutline(detail_page),
122 |             'runtime': getRuntime(response),
123 |             'release': release,
124 |             'number': getNum(response),
125 |             'score': getScore(response),
126 |             'tag': getTag(response),
127 |             'series': getSeries(response),
128 |             'year': getYear(release),
129 |             'actor_photo': getActorPhoto(actor.split(',')),
130 |             'cover': getCover(detail_page),
131 |             'extrafanart': getExtraFanart(detail_page),
132 |             'cover_small': cover_small,
133 |             'imagecut': imagecut,
134 |             'director': '',
135 |             'publisher': '',
136 |             'website': getWebsite(detail_page),
137 |             'source': 'jav321.py',
138 |         }
139 |     except TimeoutError:
140 |         dic = {
141 |             'title': '',
142 |             'website': 'timeout',
143 |         }
144 |     except Exception as error_info:
145 |         print('Error in jav321.main : ' + str(error_info))
146 |         dic = {
147 |             'title': '',
148 |             'website': '',
149 |         }
150 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
151 |     return js
152 | 
153 | 
154 | '''
155 | print(main('msfh-010'))
156 | print(main('kavr-065'))
157 | print(main('ssni-645'))
158 | print(main('sivr-038'))
159 | print(main('ara-415'))
160 | print(main('luxu-1257'))
161 | print(main('heyzo-1031'))
162 | print(main('ABP-905'))
163 | '''
164 | # print(main('heyzo-1031', ''))
165 | # print(main('ssni-645', ''))
166 | # print(main('ymdd-173', 'https://www.jav321.com/video/ymdd00173'))
167 | 


--------------------------------------------------------------------------------
/Getter/javbus.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from pyquery import PyQuery as pq
  3 | from lxml import etree
  4 | from bs4 import BeautifulSoup
  5 | import json
  6 | from Function.getHtml import get_html
  7 | from Function.getHtml import post_html
  8 | 
  9 | 
 10 | def getActorPhoto(htmlcode):
 11 |     soup = BeautifulSoup(htmlcode, 'lxml')
 12 |     a = soup.find_all(attrs={'class': 'star-name'})
 13 |     d = {}
 14 |     for i in a:
 15 |         l = i.a['href']
 16 |         t = i.get_text()
 17 |         html = etree.fromstring(get_html(l), etree.HTMLParser())
 18 |         p = 'https://javbus.com' + str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
 19 |         p2 = {t: p}
 20 |         d.update(p2)
 21 |     return d
 22 | 
 23 | 
 24 | def getTitle(htmlcode):  # 获取标题
 25 |     doc = pq(htmlcode)
 26 |     title = str(doc('div.container h3').text())
 27 |     try:
 28 |         title2 = re.sub('n\d+-', '', title)
 29 |         return title2
 30 |     except:
 31 |         return title
 32 | 
 33 | 
 34 | def getStudio(htmlcode):  # 获取厂商
 35 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 36 |     result = str(html.xpath('//span[contains(text(),"製作商")]/following-sibling::a/text()')).strip(" ['']")
 37 |     return result
 38 | 
 39 | 
 40 | def getPublisher(htmlcode):  # 获取发行商
 41 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 42 |     result = str(html.xpath('//span[contains(text(),"發行商")]/following-sibling::a/text()')).strip(" ['']")
 43 |     return result
 44 | 
 45 | 
 46 | def getYear(getRelease):  # 获取年份
 47 |     try:
 48 |         result = str(re.search('\d{4}', getRelease).group())
 49 |         return result
 50 |     except:
 51 |         return getRelease
 52 | 
 53 | 
 54 | def getCover(htmlcode):  # 获取封面链接
 55 |     doc = pq(htmlcode)
 56 |     image = doc('a.bigImage')
 57 |     return 'https://javbus.com' + image.attr('href')
 58 | 
 59 | 
 60 | def getExtraFanart(htmlcode):  # 获取封面链接
 61 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 62 |     extrafanart_list = html.xpath("//div[@id='sample-waterfall']/a/@href")
 63 |     return extrafanart_list
 64 | 
 65 | 
 66 | def getRelease(htmlcode):  # 获取出版日期
 67 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 68 |     result = str(html.xpath('//span[contains(text(),"發行日期")]/../text()')).strip(" ['']")
 69 |     return result
 70 | 
 71 | 
 72 | def getRuntime(htmlcode):  # 获取分钟
 73 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 74 |     result = str(html.xpath('//span[contains(text(),"長度")]/../text()')).strip(" ['']")
 75 |     return result
 76 | 
 77 | 
 78 | def getActor(htmlcode):  # 获取女优
 79 |     b = []
 80 |     soup = BeautifulSoup(htmlcode, 'lxml')
 81 |     a = soup.find_all(attrs={'class': 'star-name'})
 82 |     for i in a:
 83 |         b.append(i.get_text())
 84 |     return b
 85 | 
 86 | 
 87 | def getNum(htmlcode):  # 获取番号
 88 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 89 |     result = str(html.xpath('//span[contains(text(),"識別碼")]/following-sibling::span/text()')).strip(" ['']")
 90 |     return result
 91 | 
 92 | 
 93 | def getDirector(htmlcode):  # 获取导演
 94 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 95 |     result = str(html.xpath('//span[contains(text(),"導演")]/following-sibling::a/text()')).strip(" ['']")
 96 |     return result
 97 | 
 98 | 
 99 | def getOutlineScore(number):  # 获取简介
100 |     outline = ''
101 |     score = ''
102 |     try:
103 |         response = post_html("https://www.jav321.com/search", query={"sn": number})
104 |         detail_page = etree.fromstring(response, etree.HTMLParser())
105 |         outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']")
106 |         if re.search(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response):
107 |             score = re.findall(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response)[0]
108 |             score = str(float(score) / 10.0)
109 |         else:
110 |             score = str(re.findall(r'<b>平均評価</b>: ([^<]+)<br>', response)).strip(" [',']").replace('\'', '')
111 |         if outline == '':
112 |             dmm_htmlcode = get_html(
113 |                 "https://www.dmm.co.jp/search/=/searchstr=" + number.replace('-', '') + "/sort=ranking/")
114 |             if 'に一致する商品は見つかりませんでした' not in dmm_htmlcode:
115 |                 dmm_page = etree.fromstring(dmm_htmlcode, etree.HTMLParser())
116 |                 url_detail = str(dmm_page.xpath('//*[@id="list"]/li[1]/div/p[2]/a/@href')).split(',', 1)[0].strip(
117 |                     " ['']")
118 |                 if url_detail != '':
119 |                     dmm_detail = get_html(url_detail)
120 |                     html = etree.fromstring(dmm_detail, etree.HTMLParser())
121 |                     outline = str(html.xpath('//*[@class="mg-t0 mg-b20"]/text()')).strip(" ['']").replace('\\n',
122 |                                                                                                           '').replace(
123 |                         '\n', '')
124 |     except Exception as error_info:
125 |         print('Error in javbus.getOutlineScore : ' + str(error_info))
126 |     return outline, score
127 | 
128 | 
129 | def getSeries(htmlcode):
130 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
131 |     result = str(html.xpath('//span[contains(text(),"系列")]/following-sibling::a/text()')).strip(" ['']")
132 |     return result
133 | 
134 | 
135 | def getCover_small(number):  # 从avsox获取封面图
136 |     try:
137 |         htmlcode = get_html('https://avsox.website/cn/search/' + number)
138 |         html = etree.fromstring(htmlcode, etree.HTMLParser())
139 |         counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
140 |         if counts == 0:
141 |             return ''
142 |         for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
143 |             number_get = html.xpath(
144 |                 "//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()")
145 |             if len(number_get) > 0 and number_get[0].upper() == number.upper():
146 |                 cover_small = \
147 |                 html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
148 |                 return cover_small
149 |     except Exception as error_info:
150 |         print('Error in javbus.getCover_small : ' + str(error_info))
151 |     return ''
152 | 
153 | 
154 | def getTag(htmlcode):  # 获取标签
155 |     tag = []
156 |     soup = BeautifulSoup(htmlcode, 'lxml')
157 |     a = soup.find_all(attrs={'class': 'genre'})
158 |     for i in a:
159 |         if 'onmouseout' in str(i):
160 |             continue
161 |         tag.append(i.get_text())
162 |     return tag
163 | 
164 | 
165 | def find_number(number):
166 |     # =======================================================================有码搜索
167 |     if not (re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper()):
168 |         htmlcode = get_html('https://www.javbus.com/search/' + number + '&type=1')
169 |         html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
170 |         counts = len(html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div"))
171 |         if counts != 0:
172 |             for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
173 |                 number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(
174 |                     count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
175 |                 number_get = number_get.upper()
176 |                 number = number.upper()
177 |                 if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_',
178 |                                                                                                                  ''):
179 |                     result_url = html.xpath(
180 |                         "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(
181 |                             count) + "]/a[@class='movie-box']/@href")[0]
182 |                     return result_url
183 |     # =======================================================================无码搜索
184 |     htmlcode = get_html('https://www.javbus.com/uncensored/search/' + number + '&type=1')
185 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
186 |     counts = len(html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div"))
187 |     if counts == 0:
188 |         return 'not found'
189 |     for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
190 |         number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(
191 |             count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
192 |         number_get = number_get.upper()
193 |         number = number.upper()
194 |         if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', ''):
195 |             result_url = html.xpath(
196 |                 "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
197 |             return result_url
198 |         elif number_get == number.replace('-', '_') or number_get == number.replace('_', '-'):
199 |             result_url = html.xpath(
200 |                 "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
201 |             return result_url
202 |     return 'not found'
203 | 
204 | 
205 | def main(number, appoint_url):
206 |     try:
207 |         if appoint_url:
208 |             result_url = appoint_url
209 |         else:
210 |             result_url = find_number(number)
211 |         if result_url == 'not found':
212 |             raise Exception('Movie Data not found in javbus.main!')
213 |         htmlcode = get_html(result_url)
214 |         if str(htmlcode) == 'ProxyError':
215 |             raise TimeoutError
216 |         outline, score = getOutlineScore(number)
217 |         number = getNum(htmlcode)
218 |         dic = {
219 |             'title': str(getTitle(htmlcode)).replace(number, '').strip().replace(' ', '-'),
220 |             'studio': getStudio(htmlcode),
221 |             'publisher': getPublisher(htmlcode),
222 |             'year': getYear(getRelease(htmlcode)),
223 |             'outline': outline,
224 |             'score': score,
225 |             'runtime': getRuntime(htmlcode).replace('分鐘', '').strip(),
226 |             'director': getDirector(htmlcode),
227 |             'actor': getActor(htmlcode),
228 |             'release': getRelease(htmlcode),
229 |             'number': number,
230 |             'cover': getCover(htmlcode),
231 |             'extrafanart': getExtraFanart(htmlcode),
232 |             'imagecut': 1,
233 |             'tag': getTag(htmlcode),
234 |             'series': getSeries(htmlcode),
235 |             'actor_photo': getActorPhoto(htmlcode),
236 |             'website': result_url,
237 |             'source': 'javbus.py',
238 |         }
239 |     except TimeoutError:
240 |         dic = {
241 |             'title': '',
242 |             'website': 'timeout',
243 |         }
244 |     except Exception as error_info:
245 |         print('Error in javbus.main : ' + str(error_info))
246 |         dic = {
247 |             'title': '',
248 |             'website': '',
249 |         }
250 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
251 |     return js
252 | 
253 | 
254 | def main_uncensored(number, appoint_url):
255 |     try:
256 |         result_url = ''
257 |         if appoint_url == '':
258 |             result_url = find_number(number)
259 |         else:
260 |             result_url = appoint_url
261 |         if result_url == 'not found':
262 |             raise Exception('Movie Data not found in javbus.main_uncensored!')
263 |         htmlcode = get_html(result_url)
264 |         if str(htmlcode) == 'ProxyError':
265 |             raise TimeoutError
266 |         number = getNum(htmlcode)
267 |         outline = ''
268 |         score = ''
269 |         if 'HEYZO' in number.upper():
270 |             outline, score = getOutlineScore(number)
271 |         dic = {
272 |             'title': getTitle(htmlcode).replace(number, '').strip().replace(' ', '-'),
273 |             'studio': getStudio(htmlcode),
274 |             'publisher': '',
275 |             'year': getYear(getRelease(htmlcode)),
276 |             'outline': outline,
277 |             'score': score,
278 |             'runtime': getRuntime(htmlcode).replace('分鐘', '').strip(),
279 |             'director': getDirector(htmlcode),
280 |             'actor': getActor(htmlcode),
281 |             'release': getRelease(htmlcode),
282 |             'number': getNum(htmlcode),
283 |             'cover': getCover(htmlcode),
284 |             'extrafanart': getExtraFanart(htmlcode),
285 |             'tag': getTag(htmlcode),
286 |             'series': getSeries(htmlcode),
287 |             'imagecut': 3,
288 |             'cover_small': getCover_small(number),  # 从avsox获取封面图
289 |             'actor_photo': getActorPhoto(htmlcode),
290 |             'website': result_url,
291 |             'source': 'javbus.py',
292 |         }
293 |         if dic['cover_small'] == '':
294 |             dic['imagecut'] = 0
295 |     except TimeoutError:
296 |         dic = {
297 |             'title': '',
298 |             'website': 'timeout',
299 |         }
300 |     except Exception as error_info:
301 |         print('Error in javbus.main_uncensored : ' + str(error_info))
302 |         dic = {
303 |             'title': '',
304 |             'website': '',
305 |         }
306 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
307 |     return js
308 | 
309 | 
310 | def main_us(number, appoint_url=''):
311 |     try:
312 |         if appoint_url:
313 |             result_url = appoint_url
314 |         else:
315 |             htmlcode = get_html('https://www.javbus.one/search/' + number)
316 |             if str(htmlcode) == 'ProxyError':
317 |                 raise TimeoutError
318 |             html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
319 |             counts = len(html.xpath("//div[@class='row']/div[@id='waterfall']/div"))
320 |             if counts == 0:
321 |                 raise Exception('Movie Data not found in javbus.main_us!')
322 |             result_url = ''
323 |             cover_small = ''
324 |             for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
325 |                 number_get = html.xpath("//div[@id='waterfall']/div[" + str(
326 |                     count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
327 |                 if number_get.upper() == number.upper() or number_get.replace('-', '').upper() == number.upper():
328 |                     result_url = html.xpath(
329 |                         "//div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
330 |                     cover_small = html.xpath(
331 |                         "//div[@id='waterfall']/div[" + str(
332 |                             count) + "]/a[@class='movie-box']/div[@class='photo-frame']/img[@class='img']/@src")[0]
333 |                     break
334 |             if result_url == '':
335 |                 raise Exception('Movie Data not found in javbus.main_us!')
336 |         htmlcode = get_html(result_url)
337 |         if str(htmlcode) == 'ProxyError':
338 |             raise TimeoutError
339 |         number = getNum(htmlcode)
340 |         dic = {
341 |             'title': getTitle(htmlcode).replace(number, '').strip(),
342 |             'studio': getStudio(htmlcode),
343 |             'year': getYear(getRelease(htmlcode)),
344 |             'runtime': getRuntime(htmlcode).replace('分鐘', '').strip(),
345 |             'director': getDirector(htmlcode),
346 |             'actor': getActor(htmlcode),
347 |             'release': getRelease(htmlcode),
348 |             'number': getNum(htmlcode),
349 |             'tag': getTag(htmlcode),
350 |             'series': getSeries(htmlcode),
351 |             'cover': getCover(htmlcode),
352 |             'extrafanart': getExtraFanart(htmlcode),
353 |             'cover_small': '',
354 |             'imagecut': 0,
355 |             'actor_photo': getActorPhoto(htmlcode),
356 |             'publisher': '',
357 |             'outline': '',
358 |             'score': '',
359 |             'website': result_url,
360 |             'source': 'javbus.py',
361 |         }
362 |     except TimeoutError:
363 |         dic = {
364 |             'title': '',
365 |             'website': 'timeout',
366 |         }
367 |     except Exception as error_info:
368 |         print('Error in javbus.main_us : ' + str(error_info))
369 |         dic = {
370 |             'title': '',
371 |             'website': '',
372 |         }
373 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
374 |     return js
375 | 
376 | 
377 | '''
378 | print(find_number('KA-001'))
379 | print(main_uncensored('010115-001'))
380 | print(main('ssni-644'))
381 | print(main_uncensored('012715-793'))
382 | print(main_us('sexart.15.06.10'))
383 | print(main_uncensored('heyzo-1031'))
384 | '''
385 | 
386 | # print(main('ssni-644', "https://www.javbus.com/SSNI-644"))
387 | # print(main('ssni-802', ""))
388 | # print(main_us('DirtyMasseur.20.07.26', "https://www.javbus.one/DirtyMasseur-20-07-26"))
389 | 


--------------------------------------------------------------------------------
/Getter/javdb.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from bs4 import BeautifulSoup, SoupStrainer
  3 | from lxml import etree
  4 | import json
  5 | from Function.getHtml import get_html_javdb
  6 | from Function.getHtml import post_html
  7 | 
  8 | 
  9 | def getTitle(htmlcode):
 10 |     try:
 11 |         html = etree.fromstring(htmlcode, etree.HTMLParser())
 12 |         result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']")
 13 |         return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', ''))
 14 |     except:
 15 |         return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', ''))
 16 | 
 17 | 
 18 | def getActor(htmlcode):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
 19 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 20 |     result1 = html.xpath(
 21 |         '//strong[text()="演員:"]/../span/strong[@class="symbol female"][last()]/preceding-sibling::a/text()')
 22 |     result2 = html.xpath(
 23 |         '//strong[text()="Actor(s):"]/../span/strong[@class="symbol female"][last()]/preceding-sibling::a/text()')
 24 |     return result1 + result2
 25 | 
 26 | 
 27 | def getActorPhoto(actor):  # //*[@id="star_qdt"]/li/a/img
 28 |     d = {}
 29 |     for i in actor:
 30 |         if ',' not in i or ')' in i:
 31 |             p = {i: ''}
 32 |             d.update(p)
 33 |     return d
 34 | 
 35 | 
 36 | def getStudio(htmlcode):
 37 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 38 |     result1 = str(html.xpath('//strong[contains(text(),"片商:")]/../span/a/text()')).strip(" ['']")
 39 |     result2 = str(html.xpath('//strong[contains(text(),"Maker:")]/../span/a/text()')).strip(" ['']")
 40 |     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
 41 | 
 42 | 
 43 | def getPublisher(htmlcode):
 44 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 45 |     result1 = str(html.xpath('//strong[contains(text(),"發行:")]/../span/a/text()')).strip(" ['']")
 46 |     result2 = str(html.xpath('//strong[contains(text(),"Publisher:")]/../span/a/text()')).strip(" ['']")
 47 |     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
 48 | 
 49 | 
 50 | def getRuntime(htmlcode):
 51 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 52 |     result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
 53 |     result2 = str(html.xpath('//strong[contains(text(),"Duration:")]/../span/text()')).strip(" ['']")
 54 |     return str(result1 + result2).strip('+').rstrip('mi').rstrip(' minute(s)')
 55 | 
 56 | 
 57 | def getSeries(htmlcode):
 58 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 59 |     result1 = str(html.xpath('//strong[contains(text(),"系列:")]/../span/a/text()')).strip(" ['']")
 60 |     result2 = str(html.xpath('//strong[contains(text(),"Series:")]/../span/a/text()')).strip(" ['']")
 61 |     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
 62 | 
 63 | 
 64 | def getNumber(htmlcode):
 65 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 66 |     result1 = str(html.xpath('//strong[contains(text(),"番號:")]/../span/a/text()')).strip(
 67 |         " ['']").replace('_', '-')
 68 |     result2 = str(html.xpath('//strong[contains(text(),"ID:")]/../span/a/text()')).strip(
 69 |         " ['']").replace('_', '-')
 70 |     return str(result2 + result1).strip('+')
 71 | 
 72 | 
 73 | def getYear(release):
 74 |     try:
 75 |         result = str(re.search('\d{4}', release).group())
 76 |         return result
 77 |     except:
 78 |         return release
 79 | 
 80 | 
 81 | def getRelease(htmlcode):
 82 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 83 |     result1 = str(html.xpath('//strong[contains(text(),"日期:")]/../span/text()')).strip(" ['']")
 84 |     result2 = str(html.xpath('//strong[contains(text(),"Released Date:")]/../span/text()')).strip(" ['']")
 85 |     return str(result1 + result2).strip('+')
 86 | 
 87 | 
 88 | def getTag(htmlcode):
 89 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 90 |     result1 = str(html.xpath('//strong[contains(text(),"類別:")]/../span/a/text()')).strip(" ['']")
 91 |     result2 = str(html.xpath('//strong[contains(text(),"Tags:")]/../span/a/text()')).strip(" ['']")
 92 |     return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,',
 93 |                                                                                                              '').lstrip(
 94 |         ',')
 95 | 
 96 | 
 97 | # def getCover_small(htmlcode, count):
 98 | #     html = etree.fromstring(htmlcode, etree.HTMLParser())
 99 | #     result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@data-src")[count]
100 | #     if 'thumbs' not in result:
101 | #         result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@src")[count]
102 | #     if not 'https' in result:
103 | #         result = 'https:' + result
104 | #     return result
105 | 
106 | 
107 | def getCover(htmlcode):
108 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
109 |     result = html.xpath("//img[@class='video-cover']/@src")
110 |     if len(result) >= 1:
111 |         result = result[0]
112 |     # 有时xpath找不到元素，所以要用bs4
113 |     if not result:
114 |         soup = BeautifulSoup(htmlcode, 'lxml', parse_only=SoupStrainer('img', {'class': 'video-cover'}))
115 |         if soup.img is not None:
116 |             result = soup.img['src']
117 |     return result
118 | 
119 | 
120 | def getExtraFanart(htmlcode):  # 获取封面链接
121 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
122 |     extrafanart_list = html.xpath("//div[@class='message-body']/div[@class='tile-images preview-images']/a/@href")
123 |     if '#preview-video' in extrafanart_list:
124 |         extrafanart_list.remove('#preview-video')
125 |     return extrafanart_list
126 | 
127 | 
128 | def getDirector(htmlcode):
129 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
130 |     result1 = str(html.xpath('//strong[contains(text(),"導演:")]/../span/a/text()')).strip(" ['']")
131 |     result2 = str(html.xpath('//strong[contains(text(),"Director:")]/../span/a/text()')).strip(" ['']")
132 |     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
133 | 
134 | 
135 | def getScore(htmlcode):
136 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
137 |     result = str(html.xpath("//span[@class='score-label']/text()")).strip(" ['']")
138 |     score = 0
139 |     if re.search(r'\(.+分\)', result):
140 |         score = re.findall(r'\((.+)分\)', result)[0]
141 |     return format(float(score), '0.1f')
142 | 
143 | 
144 | def getOutlineScore(number):  # 获取简介
145 |     outline = ''
146 |     score = ''
147 |     try:
148 |         response = post_html("https://www.jav321.com/search", query={"sn": number})
149 |         detail_page = etree.fromstring(response, etree.HTMLParser())
150 |         outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']")
151 |         if re.search(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response):
152 |             score = re.findall(r'<b>平均評価</b>: <img data-original="/img/(\d+).gif" />', response)[0]
153 |             score = str(float(score) / 10.0)
154 |         else:
155 |             score = str(re.findall(r'<b>平均評価</b>: ([^<]+)<br>', response)).strip(" [',']").replace('\'', '')
156 |     except Exception as error_info:
157 |         print('Error in javdb.getOutlineScore : ' + str(error_info))
158 |     return outline, score
159 | 
160 | 
161 | def main(number, appoint_url, isuncensored=False):
162 |     try:
163 |         if appoint_url == '':
164 |             # ========================================================================搜索番号
165 |             htmlcode = get_html_javdb('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
166 |             if str(htmlcode) == 'ProxyError':
167 |                 raise TimeoutError
168 |             html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
169 |             counts = len(html.xpath(
170 |                 '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']'))
171 |             if counts == 0:
172 |                 raise Exception('Movie Data not found in javdb.main!')
173 |             # ========================================================================遍历搜索结果，找到需要的番号所在URL
174 |             count = 1
175 |             movie_found = 0
176 |             for count in range(1, counts + 1):
177 |                 number_get = html.xpath(
178 |                     '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str(
179 |                         count) + ']/a[@class=\'box\']/div[@class=\'uid\']/text()')[0]
180 |                 if number_get.upper() == number.upper():
181 |                     movie_found = 1
182 |                     break
183 |             if movie_found == 0:
184 |                 raise Exception('Movie Data not found in javdb.main!')
185 |             result_url = 'https://javdb.com' + html.xpath('//*[@id="videos"]/div/div/a/@href')[count - 1]
186 |         else:
187 |             result_url = appoint_url
188 |         # ========================================================================请求、判断结果
189 |         html_info = get_html_javdb(result_url + '?locale=zh').replace(u'\xa0', u' ')
190 |         if str(html_info) == 'ProxyError':
191 |             raise TimeoutError
192 |         # ========================================================================获取评分、简介
193 |         imagecut = 1
194 |         outline = ''
195 |         if isuncensored and (re.match('^\d{4,}', number) or re.match('n\d{4}', number)):  # 无码，收集封面、评分
196 |             imagecut = 0
197 |             score = getScore(html_info)
198 |         elif 'HEYZO' in number.upper():  # HEYZO，收集封面、评分、简介
199 |             imagecut = 0
200 |             outline, score = getOutlineScore(number)
201 |         else:  # 其他，收集评分、简介
202 |             outline, score = getOutlineScore(number)
203 |         # ========================================================================收集信息
204 |         actor = getActor(html_info)
205 |         if len(actor) == 0 and 'FC2-' in number.upper():
206 |             actor.append('FC2-NoActor')
207 |         dic = {
208 |             'actor': str(actor).strip(" [',']").replace('\'', ''),
209 |             'title': getTitle(html_info).replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_',
210 |                                                                                                           '-').replace(
211 |                 number.upper(), '').replace(number, '').strip().replace(' ', '-').replace('--', '-'),
212 |             'studio': getStudio(html_info),
213 |             'publisher': getPublisher(html_info),
214 |             'outline': outline,
215 |             'score': score,
216 |             'runtime': getRuntime(html_info).replace(' 分鍾', ''),
217 |             'director': getDirector(html_info),
218 |             'release': getRelease(html_info),
219 |             'number': number.upper(),
220 |             'cover': getCover(html_info),
221 |             'cover_small': '',
222 |             'extrafanart': getExtraFanart(html_info),
223 |             'imagecut': imagecut,
224 |             'tag': getTag(html_info),
225 |             'series': getSeries(html_info),
226 |             'year': getYear(getRelease(html_info)),  # str(re.search('\d{4}',getRelease(htmlcode)).group()),
227 |             'actor_photo': getActorPhoto(actor),
228 |             'website': result_url,
229 |             'source': 'javdb.py',
230 |         }
231 |     except TimeoutError:
232 |         dic = {
233 |             'title': '',
234 |             'website': 'timeout',
235 |         }
236 |     except Exception as error_info:
237 |         print('Error in javdb.main : ' + str(error_info))
238 |         dic = {
239 |             'title': '',
240 |             'website': '',
241 |         }
242 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
243 |     return js
244 | 
245 | 
246 | def main_us(number, appoint_url=''):
247 |     try:
248 |         if appoint_url:
249 |             result_url = appoint_url
250 |         else:
251 |             # ========================================================================搜索番号
252 |             htmlcode = get_html_javdb('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
253 |             if str(htmlcode) == 'ProxyError':
254 |                 raise TimeoutError
255 |             html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
256 |             counts = len(html.xpath(
257 |                 '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']'))
258 |             if counts == 0:
259 |                 raise Exception('Movie Data not found in javdb.main_us!')
260 |             # ========================================================================遍历搜索结果，找到需要的番号所在URL
261 |             number_series = number.split('.')[0]
262 |             number_date = '20' + number.replace(number_series, '').strip('.')
263 |             number_date = number_date.replace('.', '-')
264 |             count = 1
265 |             movie_found = 0
266 |             for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
267 |                 series_get = html.xpath(
268 |                     '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str(
269 |                         count) + ']/a[@class=\'box\']/div[@class=\'uid2\']/text()')[0]
270 |                 if not series_get:
271 |                     series_get = html.xpath(
272 |                         '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column horz-cover\'][' + str(
273 |                             count) + ']/a[@class=\'box\']/div[@class=\'uid2\']/text()')[0]
274 |                 date_get = html.xpath(
275 |                     '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str(
276 |                         count) + ']/a[@class=\'box\']/div[@class=\'meta\']/text()')[0]
277 |                 if not date_get:
278 |                     date_get = html.xpath(
279 |                         '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column horz-cover\'][' + str(
280 |                             count) + ']/a[@class=\'box\']/div[@class=\'meta\']/text()')[0]
281 |                 if re.search('\d{4}-\d{1,2}-\d{1,2}', date_get):
282 |                     date_get = re.findall('\d{4}-\d{1,2}-\d{1,2}', date_get)[0]
283 |                 elif re.search('\d{1,2}/\d{1,2}/\d{4}', date_get):
284 |                     date_get_tmp = date_get
285 |                     date_get = re.findall('\d{1,2}/\d{1,2}', date_get_tmp)[0]
286 |                     year_get = re.findall('\d{4}', date_get_tmp)[0]
287 |                     date_get = year_get + '-' + date_get
288 |                 series_get = series_get.replace(' ', '')
289 |                 date_get = date_get.replace('/', '-')
290 |                 if (series_get.upper() == number_series.upper()
291 |                     or series_get.replace('-', '').upper() == number_series.upper()) and number_date == date_get:
292 |                     movie_found = 1
293 |                     break
294 |             if movie_found == 0:
295 |                 raise Exception('Movie Data not found in javdb.main_us!')
296 |             result_url = 'https://javdb.com' + html.xpath('//*[@id="videos"]/div/div/a/@href')[count - 1]
297 |         # ========================================================================请求、判断结果
298 |         html_info = get_html_javdb(result_url + '?locale=zh').replace(u'\xa0', u' ')
299 |         if str(html_info) == 'ProxyError':
300 |             raise TimeoutError
301 |         # ========================================================================收集信息
302 |         actor = getActor(html_info)
303 |         number = getNumber(html_info)
304 |         dic = {
305 |             'actor': str(actor).strip(" [',']").replace('\'', ''),
306 |             'title': getTitle(html_info).replace('中文字幕', '').replace("\\n", '').replace('_', '-').replace(number,
307 |                                                                                                           '').strip(),
308 |             'studio': getStudio(html_info),
309 |             'publisher': getPublisher(html_info),
310 |             'outline': '',
311 |             'score': getScore(html_info),
312 |             'runtime': getRuntime(html_info).replace(' 分鍾', ''),
313 |             'director': getDirector(html_info),
314 |             'release': getRelease(html_info),
315 |             'number': number,
316 |             'cover': getCover(html_info),
317 |             'cover_small': '',
318 |             'extrafanart': getExtraFanart(html_info),
319 |             'imagecut': 0,
320 |             'tag': getTag(html_info),
321 |             'series': getSeries(html_info),
322 |             'year': getYear(getRelease(html_info)),  # str(re.search('\d{4}',getRelease(htmlcode)).group()),
323 |             'actor_photo': getActorPhoto(actor),
324 |             'website': result_url,
325 |             'source': 'javdb.py',
326 |         }
327 |     except TimeoutError:
328 |         dic = {
329 |             'title': '',
330 |             'website': 'timeout',
331 |         }
332 |     except Exception as error_info:
333 |         print('Error in javdb.main_us : ' + str(error_info))
334 |         dic = {
335 |             'title': '',
336 |             'website': '',
337 |         }
338 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
339 |     return js
340 | 
341 | 
342 | '''
343 | print(main('abs-141'))
344 | print(main('HYSD-00083'))
345 | print(main('IESP-660'))
346 | print(main('n1403'))
347 | print(main('GANA-1910'))
348 | print(main('heyzo-1031'))
349 | print(main_us('x-art.19.11.03'))
350 | print(main('032020-001'))
351 | print(main('S2M-055'))
352 | print(main('LUXU-1217'))
353 | '''
354 | # print(main('IPX-604', ''))
355 | # print(main('SSIS-084', ''))
356 | # print(main('abs-141', ''))
357 | # print(main('HYSD-00083', ''))
358 | # print(main('IESP-660', ''))
359 | # print(main('n1403', ''))
360 | # print(main('GANA-1910', ''))
361 | # print(main('heyzo-1031', ''))
362 | # print(main_us('x-art.19.11.03', ''))
363 | # print(main('032020-001', ''))
364 | # print(main('S2M-055', ''))
365 | # print(main('LUXU-1217', ''))
366 | # print(main_us('x-art.19.11.03', ''))
367 | 


--------------------------------------------------------------------------------
/Getter/mgstage.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from lxml import etree
  3 | import json
  4 | from Function.getHtml import get_html
  5 | 
  6 | 
  7 | def getTitle(htmlcode):
  8 |     try:
  9 |         html = etree.fromstring(htmlcode, etree.HTMLParser())
 10 |         result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
 11 |         return result.replace('/', ',')
 12 |     except:
 13 |         return ''
 14 | 
 15 | 
 16 | def getActor(htmlcode):
 17 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 18 |     result1 = str(html.xpath('//th[contains(text(),"出演")]/../td/a/text()')).strip(" ['']")
 19 |     result2 = str(html.xpath('//th[contains(text(),"出演")]/../td/text()')).strip(" ['']")
 20 |     return str(result1 + result2).replace('/', ',').replace('\'', '').replace(' ', '').replace('\\n', '')
 21 | 
 22 | 
 23 | def getActorPhoto(actor):
 24 |     d = {}
 25 |     for i in actor:
 26 |         if i and ',' not in i or ')' in i:
 27 |             p = {i: ''}
 28 |             d.update(p)
 29 |     return d
 30 | 
 31 | 
 32 | def getStudio(htmlcode):
 33 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 34 |     result1 = str(html.xpath('//th[contains(text(),"メーカー：")]/../td/a/text()')).strip(" ['']")
 35 |     result2 = str(html.xpath('//th[contains(text(),"メーカー：")]/../td/text()')).strip(" ['']")
 36 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 37 | 
 38 | 
 39 | def getPublisher(htmlcode):
 40 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 41 |     result1 = str(html.xpath('//th[contains(text(),"レーベル：")]/../td/a/text()')).strip(" ['']")
 42 |     result2 = str(html.xpath('//th[contains(text(),"レーベル：")]/../td/text()')).strip(" ['']")
 43 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 44 | 
 45 | 
 46 | def getRuntime(htmlcode):
 47 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 48 |     result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']")
 49 |     result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']")
 50 |     return str(result1 + result2).rstrip('min').replace('\'', '').replace(' ', '').replace('\\n', '')
 51 | 
 52 | 
 53 | def getSeries(htmlcode):
 54 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 55 |     result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']")
 56 |     result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']")
 57 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 58 | 
 59 | 
 60 | def getNum(htmlcode):
 61 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 62 |     result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']")
 63 |     result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']")
 64 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 65 | 
 66 | 
 67 | def getYear(getRelease):
 68 |     try:
 69 |         result = str(re.search('\d{4}', getRelease).group())
 70 |         return result
 71 |     except:
 72 |         return getRelease
 73 | 
 74 | 
 75 | def getRelease(htmlcode):
 76 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 77 |     result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']")
 78 |     result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']")
 79 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 80 | 
 81 | 
 82 | def getTag(htmlcode):
 83 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 84 |     result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']")
 85 |     result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']")
 86 |     return str(result1 + result2).replace('\'', '').replace(' ', '').replace('\\n', '')
 87 | 
 88 | 
 89 | def getCover(htmlcode):
 90 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 91 |     result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
 92 |     return result
 93 | 
 94 | 
 95 | def getExtraFanart(htmlcode):  # 获取封面链接
 96 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 97 |     extrafanart_list = html.xpath("//dl[@id='sample-photo']/dd/ul/li/a[@class='sample_image']/@href")
 98 |     return extrafanart_list
 99 | 
100 | 
101 | def getOutline(htmlcode):
102 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
103 |     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
104 |     return result
105 | 
106 | 
107 | def getScore(htmlcode):
108 |     return str(re.findall(r'5点満点中 (\S+)点', htmlcode)).strip(" ['']")
109 | 
110 | 
111 | def main(number, appoint_url):
112 |     try:
113 |         number = number.upper()
114 |         url = 'https://www.mgstage.com/product/product_detail/' + str(number) + '/'
115 |         if appoint_url != '':
116 |             url = appoint_url
117 |         htmlcode = str(get_html(url, cookies={'adc': '1'}))
118 |         htmlcode = htmlcode.replace('ahref', 'a href')  # 针对a标签、属性中间未分开
119 |         if str(htmlcode) == 'ProxyError':
120 |             raise TimeoutError
121 |         actor = getActor(htmlcode).replace(' ', '')
122 |         release = getRelease(htmlcode)
123 |         dic = {
124 |             'title': getTitle(htmlcode).replace("\\n", '').replace('        ', '').strip(','),
125 |             'studio': getStudio(htmlcode).strip(','),
126 |             'publisher': getPublisher(htmlcode).strip(','),
127 |             'outline': getOutline(htmlcode).replace('\n', '').strip(','),
128 |             'score': getScore(htmlcode).strip(','),
129 |             'runtime': getRuntime(htmlcode).strip(','),
130 |             'actor': actor.strip(','),
131 |             'release': release.strip(',').replace('/', '-'),
132 |             'number': getNum(htmlcode).strip(','),
133 |             'cover': getCover(htmlcode).strip(','),
134 |             'extrafanart': getExtraFanart(htmlcode),
135 |             'imagecut': 0,
136 |             'tag': getTag(htmlcode).strip(','),
137 |             'series': getSeries(htmlcode).strip(','),
138 |             'year': getYear(release).strip(','),
139 |             'actor_photo': getActorPhoto(actor.split(',')),
140 |             'director': '',
141 |             'website': url,
142 |             'source': 'mgstage.py',
143 |         }
144 |     except TimeoutError:
145 |         dic = {
146 |             'title': '',
147 |             'website': 'timeout',
148 |         }
149 |     except Exception as error_info:
150 |         print('Error in mgstage.main : ' + str(error_info))
151 |         dic = {
152 |             'title': '',
153 |             'website': '',
154 |         }
155 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
156 |     return js
157 | '''
158 | print(main('200GANA-2240'))
159 | print(main('SIRO-4042'))
160 | print(main('300MIUM-382'))
161 | '''
162 | # print(main('300MIUM-382', ''))
163 | # print(main('300MIUM-382', 'https://www.mgstage.com/product/product_detail/300MIUM-382/'))
164 | 


--------------------------------------------------------------------------------
/Getter/xcity.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | import re
  4 | from lxml import etree
  5 | import json
  6 | from Function.getHtml import get_html
  7 | 
  8 | 
  9 | def getTitle(a):
 10 |     html = etree.fromstring(a, etree.HTMLParser())
 11 |     result = str(html.xpath("//span[@id='program_detail_title']/text()")).strip(" ['']").replace("'", '')
 12 |     return result
 13 | 
 14 | 
 15 | def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
 16 |     html = etree.fromstring(a, etree.HTMLParser())
 17 |     result = str(html.xpath("//li[@class='credit-links']/a/text()")).strip(" ['']").replace("'", '')
 18 |     return result
 19 | 
 20 | 
 21 | def getActorPhoto(actor):  # //*[@id="star_qdt"]/li/a/img
 22 |     actor = actor.split(',')
 23 |     d = {}
 24 |     for i in actor:
 25 |         if ',' not in i:
 26 |             p = {i: ''}
 27 |             d.update(p)
 28 |     return d
 29 | 
 30 | 
 31 | def getStudio(a):
 32 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 33 |     result = str(html.xpath("//span[@id='program_detail_maker_name']/text()")).strip(" ['']")
 34 |     return result
 35 | 
 36 | 
 37 | def getRuntime(a):
 38 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 39 |     try:
 40 |         result = str(html.xpath("//span[contains(text(),'収録時間')]/parent::li/text()"))
 41 |         if re.search(r'\d+', result):
 42 |             result = re.findall(r'\d+', result)[0].replace('/', '-')
 43 |     except:
 44 |         result = ''
 45 |     return result
 46 | 
 47 | 
 48 | def getSeries(a):
 49 |     html = etree.fromstring(a, etree.HTMLParser())
 50 |     result = str(html.xpath("//span[contains(text(),'シリーズ')]/following-sibling::a/span/text()")).strip(" ['']").replace('\'', '')
 51 |     return result
 52 | 
 53 | 
 54 | def getNum(a):
 55 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 56 |     result = str(html.xpath("//span[@id='hinban']/text()")).strip(" ['']")
 57 |     return result
 58 | 
 59 | 
 60 | def getYear(getRelease):
 61 |     try:
 62 |         result = str(re.search('\d{4}', getRelease).group())
 63 |         return result
 64 |     except:
 65 |         return getRelease
 66 | 
 67 | 
 68 | def getRelease(a):
 69 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 70 |     try:
 71 |         result = str(html.xpath("//span[contains(text(),'発売日')]/parent::li/text()"))
 72 |         if re.search(r'\d{4}/\d{2}/\d{2}', result):
 73 |             result = re.findall(r'\d{4}/\d{2}/\d{2}', result)[0].replace('/', '-')
 74 |     except:
 75 |         result = ''
 76 |     return result
 77 | 
 78 | 
 79 | def getTag(a):
 80 |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
 81 |     result1 = str(html.xpath("//a[@class='genre']/text()")).strip(" ['']").replace('\'', '').replace('\\t', '').replace('\\n', '')
 82 |     return result1.replace("', '", ",")
 83 | 
 84 | 
 85 | def getCover(htmlcode):
 86 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 87 |     result = 'https:' + str(html.xpath("//div[@class='photo']/p[@class='tn']/a/@href")).strip(" ['']").replace('\'', '')
 88 |     return result
 89 | 
 90 | 
 91 | def getExtraFanart(htmlcode):  # 获取封面链接
 92 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
 93 |     old_list = html.xpath("//div[@id='sample_images']/div/a/@href")
 94 |     new_list = []
 95 |     for extrafanart in old_list:
 96 |         new_list.append('https:' + extrafanart.replace('scene/small/', ''))
 97 |     return new_list
 98 | 
 99 | 
100 | def getDirector(a):
101 |     html = etree.fromstring(a, etree.HTMLParser())
102 |     result1 = str(html.xpath("//span[@id='program_detail_director']/text()")).strip(" ['']").replace('\\t', '').replace('\\n', '')
103 |     return result1
104 | 
105 | 
106 | def getOutline(htmlcode):
107 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
108 |     result = str(html.xpath("//p[@class='lead']/text()")).strip(" ['']").replace('\\n', '').replace('\n', '')
109 |     return result
110 | 
111 | 
112 | def getScore(htmlcode):
113 |     html = etree.fromstring(htmlcode, etree.HTMLParser())
114 |     result = str(html.xpath("//p[@class='d-review__average']/strong/text()")[0]).replace('\\n', '').replace('\n', '').replace('点', '')
115 |     return result
116 | 
117 | 
118 | def find_number(number, appoint_url):
119 |     if appoint_url:
120 |         return appoint_url, get_html(appoint_url)
121 |     htmlcode = get_html('https://xcity.jp/result_published/?q=' + number.replace('-', ''))
122 |     if '該当する作品はみつかりませんでした' in htmlcode:
123 |         return 'not found', ''
124 |     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
125 |     counts = len(html.xpath("//div[@id='searchResult']/table[@class='resultList']/tr"))
126 |     if counts >= 2:
127 |         for count in range(2, counts + 1):  # 遍历搜索结果，找到需要的番号
128 |             result_url = 'https://xcity.jp' + html.xpath("//div[@id='searchResult']/table[@class='resultList']/tr[" + str(count) + "]/td[1]/a/@href")[0]
129 |             detail_page = get_html(result_url)
130 |             detail_page_html = etree.fromstring(detail_page, etree.HTMLParser())
131 |             number_get = str(detail_page_html.xpath("//span[@id='hinban']/text()")[0])
132 |             if number_get.upper() == number.replace('-', '').upper():
133 |                 return result_url, detail_page
134 |     return 'not found', ''
135 | 
136 | 
137 | def main(number, appoint_url):
138 |     try:
139 |         url, detail_page = find_number(number, appoint_url)
140 |         if url == 'not found':
141 |             raise Exception('Movie Data not found in xcity!')
142 |         actor = getActor(detail_page)
143 |         release = getRelease(detail_page)
144 |         dic = {
145 |             'title': getTitle(detail_page),
146 |             'release': release,
147 |             'year': getYear(release),
148 |             'actor': actor,
149 |             'actor_photo': getActorPhoto(actor),
150 |             'number': getNum(detail_page),
151 |             'outline': getOutline(detail_page),
152 |             'director': getDirector(detail_page),
153 |             'tag': getTag(detail_page),
154 |             'runtime': getRuntime(detail_page),
155 |             'studio': getStudio(detail_page),
156 |             'series': getSeries(detail_page),
157 |             'cover': getCover(detail_page),
158 |             'extrafanart': getExtraFanart(detail_page),
159 |             'imagecut': 1,
160 |             'score': '',
161 |             'publisher': '',
162 |             'website': url,
163 |             'source': 'xcity.py',
164 |         }
165 |     except TimeoutError:
166 |         dic = {
167 |             'title': '',
168 |             'website': 'timeout',
169 |         }
170 |     except Exception as error_info:
171 |         print('Error in xcity.main : ' + str(error_info))
172 |         dic = {
173 |             'title': '',
174 |             'website': '',
175 |         }
176 |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))  # .encode('UTF-8')
177 |     return js
178 | 
179 | 
180 | '''
181 | print(main('xc-1280'))
182 | print(main('xv-163'))
183 | print(main('sea-081'))
184 | print(main('IA-28'))
185 | print(main('xc-1298'))
186 | print(main('DMOW185'))
187 | print(main('EMOT007'))
188 | '''
189 | # print(main('EMOT007', "https://xcity.jp/avod/detail/?id=147036"))
190 | 


--------------------------------------------------------------------------------
/Img/AVDC-ico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/Img/AVDC-ico.png


--------------------------------------------------------------------------------
/Img/AVDC.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/Img/AVDC.ico


--------------------------------------------------------------------------------
/Img/LEAK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/Img/LEAK.png


--------------------------------------------------------------------------------
/Img/SUB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/Img/SUB.png


--------------------------------------------------------------------------------
/Img/UNCENSORED.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/Img/UNCENSORED.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AVDC
  2 | 
  3 | 
  4 | # 目录
  5 | * [1.简介](#1简介)<br>
  6 | * [2.反馈](#2反馈)
  7 | * [3.常见番号命名规范(必看!!!!!!!!!)](#3常见番号命名规范)
  8 | * [4.效果图](#4效果图)
  9 |     * [界面截图](#41界面截图)
 10 |     * [文件结构](#43文件结构)
 11 |     * [媒体库](#44媒体库)
 12 | * [5.如何使用](#5如何使用)
 13 |     * [下载](#51下载)
 14 |     * [简明教程](#52简要教程)
 15 |     * [模块安装](#53模块安装)
 16 |     * [配置设置](#54配置设置)
 17 | * [6.工具](#6工具)
 18 | * [7.异常处理（重要）](#7异常处理重要)
 19 | * [8.关于群晖NAS](#8关于群晖NAS)
 20 | * [9.FAQ](#9FAQ)
 21 | * [10.故事](#10故事)
 22 | * [11.申明](#11申明)
 23 | * [12.写在后面](#12写在后面)
 24 | 
 25 | 
 26 | # 1.简介
 27 | **[命令行版](https://github.com/yoshiko2/AV_Data_Capture)(原作者)**：<br>
 28 | <a title="Hits" target="_blank" href="https://github.com/yoshiko2/AV_Data_Capture"><img src="https://hits.b3log.org/yoshiko2/AV_Data_Capture.svg"></a>
 29 | ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
 30 | ![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)
 31 | ![](https://img.shields.io/github/license/yoshiko2/av_data_capture.svg?style=flat-square)
 32 | ![](https://img.shields.io/github/release/yoshiko2/av_data_capture.svg?style=flat-square)
 33 | ![](https://img.shields.io/badge/Python-3.7-yellow.svg?style=flat-square&logo=python)<br>
 34 | **GUI版(本项目)**：<br>
 35 | <a title="Hits" target="_blank" href="https://github.com/moyy996/avdc"><img src="https://hits.b3log.org/moyy996/AVDC.svg"></a>
 36 | ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
 37 | ![](https://img.shields.io/github/downloads/moyy996/avdc/total.svg?style=flat-square)
 38 | ![](https://img.shields.io/github/license/moyy996/avdc.svg?style=flat-square)
 39 | ![](https://img.shields.io/github/release/moyy996/avdc.svg?style=flat-square)
 40 | ![](https://img.shields.io/badge/Python-3.7-yellow.svg?style=flat-square&logo=python)
 41 | ![](https://img.shields.io/badge/Pyqt-5-blue.svg?style=flat-square)<br>
 42 | 
 43 | ## 主要功能
 44 | * **日本电影元数据 抓取工具 | 刮削器**，配合本地影片管理软件EMBY,KODI，PLEX等管理本地影片，该软件起到分类与元数据抓取作用，利用元数据信息来分类，供本地影片分类整理使用。<br>
 45 | * 可**批量抓取**，也可**单个抓取**。可抓取**多集视频**（-cd1/-cd2）,带**字幕**作品（-c., -C.）。<br>
 46 | * 可抓取**子目录下视频**：遍历**视频目录及子目录**（除指定的**排除目录**），对遍历到的所有视频进行刮削，成功则同**元数据、封面图**一起输出到**JAV_output**目录，失败移入**failed**目录。
 47 | * 目前可抓取网站：**jav321,javbus,javdb,avsox,fc2club,dmm,mgstage**。<br>
 48 | * 批量添加Emby演员头像。<br>
 49 | * 封面可添加无码、字幕、流出水印。<br>
 50 | 
 51 | # 2.反馈
 52 | * 欢迎使用体验,有**程序BUG问题（带截图提问）、功能建议**,可进**电报群**反馈    [点击进群](https://t.me/joinchat/J54y1g3-a7nxJ_-WS4-KFQ)<br>
 53 | 
 54 | # 3.常见番号命名规范
 55 | **刮削前尽量命名规范！！！！**
 56 | **不区分大小写**<br>
 57 | 
 58 | ### 1、标准有码
 59 | * **Javdb、Javbus、Jav321**:  SSNI-111
 60 | * **Dmm**:  ssni00111
 61 | ### 2、无码
 62 | * **Javdb、Javbus、Avsox**:  111111-1111、111111_111、HEYZO-1111、n1111
 63 | * **Ja321**: HEYZO-1111
 64 | ### 3、素人
 65 | * **Jav321、Mgstage**:  259LUXU-1111
 66 | * **Jav321、Javdb**:  LUXU-1111
 67 | * **Fc2club**:  FC2-111111、FC2-PPV-111111
 68 | ### 4、欧美
 69 | * **Javdb、Javbus**:  sexart.11.11.11(系列.年.月.日)
 70 | ### 5、自带字幕影片
 71 | 可以把电影命名为类似**ssni-xxx-c.mp4,ssni-xxx-C.mp4，abp-xxx-CD1-C.mp4**的规则。
 72 | ### 6、多集影片
 73 | 可以把多集电影按照集数后缀命名为类似**ssni-xxx-cd1.mp4,ssni-xxx-cd2.mp4，abp-xxx-CD1-C.mp4**的规则，只要含有```-CDn/-cdn```类似命名规则，即可使用分集功能.**不支持-A -B -1 -2,容易跟字幕的-C混淆**.
 74 | ### 7、多集、字幕顺序
 75 | **abp-xxx-CD1-C.mp4**，**分集在前，字幕在后，字幕必须与拓展名靠近，-C.mp4**.
 76 | ### 8、外挂字幕文件
 77 | **字幕文件名**必须与**影片文件名**一致，才可以一起移动到新目录，目前支持**srt ass sub**类型的字幕文件。
 78 | ### 9、流出影片
 79 | **影片文件名**包含**流出**即可。
 80 | 
 81 | # 4.效果图
 82 | ## 4.1.界面截图
 83 | **主界面，设置，工具，关于**
 84 | 
 85 | <div align="center">
 86 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/main_window.png" height="300">
 87 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/setting.gif" height="300">
 88 | </div>
 89 | <div align="center">
 90 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/tool.png" height="300">
 91 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/about.png" height="300">
 92 | </div>
 93 | 
 94 | ## 4.2.**查看成功番号的信息(GIF演示)**
 95 | <div>
 96 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/主页面.gif" height="500">
 97 | </div>
 98 | 
 99 | 
100 | ## 4.3.**文件结构**<br>
101 | 
102 | <div>
103 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/tree-jav-output.png" height="700">
104 | </div>
105 | 
106 | ## 4.4.媒体库
107 | **以下为刮削、导入后的EMBY**<br>
108 | 
109 | <div>
110 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/emby.png" height="400">
111 | <img src="https://github.com/moyy996/AVDC/blob/master/readme/emby_each.png" height="400">
112 | </div>
113 | 
114 | # 5.如何使用
115 | ## 5.1.下载
116 | * **Release** 的程序可脱离**python环境**运行，源码包需要 [安装模块](#53模块安装)<br>
117 | * **Release** 下载地址(**仅限Windows**): [点击下载](https://github.com/moyy996/AVDC/releases)<br>
118 | * **源码包** 下载地址(**Windows,Linux,MacOS**): [点击下载](https://github.com/moyy996/AVDC/archive/master.zip)<br>
119 | 
120 | * Windows Python环境: [点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载
121 | * MacOS Python环境： [点击前往](https://www.python.org/downloads/mac-osx/)
122 | * Linux Python环境：Linux用户懂的吧，不解释下载地址
123 | 
124 | ## 5.2.简要教程:<br>
125 | * **(1).运行AVDC.exe/AVDC_Main.py，配置设置页各项（配置方法请看以下[教程](54配置设置)）**<br>
126 | * **(2).把视频所在目录填在设置->目录设置->视频目录。**<br>
127 | * **(3).在主页面点击开始等待完成(出错请开调试模式后截图)**<br>
128 | * **(4).软件会自动把元数据获取成功的电影移动到```成功输出目录```中，根据演员分类，失败的电影移动到```失败输出目录```中（可选不移动）。**<br>
129 | * **(5).把JAV_output导入至KODI,EMBY,PLEX中。**<br>
130 | 
131 | ## 5.3..模块安装
132 | 如果运行**源码**版，运行前请安装**Python环境**和安装以下**模块**<br>  
133 | 在终端/cmd/Powershell中输入以下代码来安装模块,两种方法任选其一。<br>
134 | * **5.3.1、批量**从py-require.txt安装<br>
135 | >pip install -r py-require.txt<br>
136 | 
137 | * **5.3.2、单个**按需安装<br>
138 | >pip install requests<br>
139 | >pip install pyquery<br>
140 | >pip install lxml<br>
141 | >pip install Beautifulsoup4<br>
142 | >pip install pillow<br>
143 | >pip install pyqt5<br>
144 | 
145 | ## 5.4.配置设置
146 | **设置界面**
147 | ![](https://github.com/moyy996/AVDC/blob/master/readme/setting.gif)
148 | 
149 | ---
150 | ### 普通设置
151 | ### 5.4.1.模式
152 |   **1、刮削模式**：通过番号刮削数据，包括元数据、封面图、缩略图、背景图。<br>
153 |   **2、整理模式**：仅根据女优把电影命名为番号并分类到女优名称的文件夹下。<br>
154 | 
155 | ### 5.4.2.软链接模式
156 |   使用此模式，要以```管理员身份```运行。<br>
157 |   刮削完**不移动视频**，而是在相应目录创建**软链接**（类似于快捷方式），方便PT下载完既想刮削又想继续上传的仓鼠党同志。<br>
158 |   但是，只能在媒体库展示，**不能在媒体库播放**。<br>
159 | 
160 | ### 5.4.3.调试模式
161 |   输出番号的**元数据**，包括封面，导演，演员，简介等。
162 | 
163 | ### 5.4.4.检测更新
164 | 点击**开始**后，检测是否有新版本。<br>
165 | 
166 | ### 5.4.5.保存日志
167 | 开启后日志保存在程序目录的**Log**目录下的**txt文件**内，每次运行会产生一个txt文件，**txt文件可以删除**，不影响程序运行。<br>
168 | 
169 | ### 5.4.6.失败后移动文件
170 | 如果刮削不到影片信息，可选择不移动视频，或者自动移动到**失败输出目录**中。<br>
171 | 
172 | ### 5.4.7.网站选择
173 | 可以使用**所有网站**，或者指定网站（**jav321,avsox,javbus,dmm,javdb,fc2club，mgstage**）进行刮削。<br>
174 | **仅使用javdb进行刮削**，尽量不要用，刮削30左右会被JAVDB封IP一段时间。<br>
175 | 
176 | ---
177 | ### 目录设置
178 | ### 5.4.8.命名规则
179 |   **1、目录命名**：存放视频数据的目录名，支持**多层目录**，支持**自定义符号**，例：[actor]/studio/number-【title】。<br>
180 |   **2、视频标题（媒体库中）**：nfo中的标题命名。例：number-[title]。可以自定义符号。<br>
181 |   **3、视频标题（本地文件）**：本地视频、图片的命名。例：number-[title]。可以自定义符号。<br>
182 |   **4、可选项**为title（片名）、actor（演员）、studio（制作商）、director（导演）、release（发售日）、year（发行年份）、number（番号）、runtime（时长）、series（系列）、publisher（发行商）<br>
183 |   
184 | ### 5.4.9.目录设置
185 |   **1、视频目录**：要整理的视频的目录，**带盘符的绝对路径**，会遍历此目录下的**所有视频**，包括**子目录**中。<br>
186 |   **2、排除目录**：在多层目录刮削时，**排除所填目录**。<br>
187 |   **3、视频、字幕类型**：程序搜索不到想要的文件类型，可自行按格式添加。<br>
188 |   **4、失败输出目录**：开启失败移动视频后，失败的视频会移动到此目录。<br>
189 |   **5、成功输出目录**：刮削成功的视频，会在此目录创建文件夹，并移动视频、下载图片、写入nfo到此目录。<br>
190 |   
191 | ---
192 | ### 水印设置
193 | ### 5.4.10.水印设置
194 |   **1、封面图、缩略图添加水印**：可选择封面图、缩略图是否添加水印。<br>
195 |   **2、水印类型**：可选择添加无码、字幕、流出三种水印。<br>
196 |   **3、首个水印位置**：可选择添加左上、左下、右上、右下四个位置。<br>
197 |   **4、水印大小**：有五个等级可调节。<br>
198 |   **5、说明**：**多个水印**时，从首个水印开始**顺时针**添加。**水印文件**可**自定义**，要求长宽500x300、背景透明、png格式。  <br>
199 | 
200 | ---
201 | ### 其它设置
202 | ### 5.4.11.代理设置 
203 |   **1、代理**：设置本地代理地址和端口。代理软件开**全局模式**  ,**使用DMM网站时需要使用日本代理**。<br>
204 |   **2、超时重试设置**：单位：秒，**可选范围3-10**。<br>
205 |   **3、连接重试次数**：**可选范围2-5**。<br>
206 | 
207 | ### 5.4.12.排除设置
208 | **1、排除字符**:指定字符删除，例如```排除字符： \()```，删除创建文件夹时的```\()```字符。<br>
209 | **2、排除字符串**:提取番号时，先删除指定字符串，提高成功率，字符串之间用','隔开。<br>
210 | 
211 | ### 5.4.13.无码封面
212 | **1、封面类型**:可选官方(完整、不清晰)、裁剪(清晰、不完整)<br>
213 | **2、说明**:官方无图，会自动使用缩略图裁剪。<br>
214 | 
215 | ### 5.4.14.无码番号
216 | 添加HEYZO、n1111、111111-111、111111_111以外的无码番号**前缀**。例如S2M、SMD、LAF。<br>
217 | 
218 | ---
219 | # 6.工具
220 | **工具界面**
221 | ![](https://github.com/moyy996/AVDC/blob/master/readme/tool.png)
222 | **1、视频移动**：可将**视频目录**下除排除目录下的所有视频以及同名字幕，移动到**视频目录**下的**Movie_moved**目录下。<br><br>
223 | **2、单文件刮削**：偶尔有失败情况时，选择这个视频文件，使用文件名当番号进行刮削。<br>
224 | &emsp;&ensp;**建议**的使用流程：到某网站找到这个番号,把番号改成网站上的规范番号,选用对应的网站刮削。<br>
225 | &emsp;&ensp;**条件**：文件名至少与一个网站上的番号相同，没有多余的内容只有番号为最佳，可以让软件更好获取元数据。<br>
226 | 对于多影片重命名，可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br><br>
227 | **3、Emby批量添加头像**：头像文件放在程序所在目录的Actor目录下，填写emby网址、api密钥即可使用。[头像包下载](https://github.com/moyy996/AVDC/releases/tag/%E5%A4%B4%E5%83%8F%E5%8C%85-2)<br>
228 | 可查看有头像，无头像女优，可往emby添加头像的女优。<br><br>
229 | **功能更强大、头像更丰富的头像仓库及上传工具 ===>>> [GFriend头像库](https://github.com/xinxin8816/gfriends)**<br><br>
230 | **4、裁剪封面**：针对封面图比例错误，分辨率低的情况，判断人脸位置，裁剪缩略图(thumb)为封面图(poster)。<br><br>
231 | 
232 | # 7.异常处理（重要）
233 | 
234 | ---
235 | ## 7.1.关于软件打不开
236 | * 请确保软件是完整的！，**AVDC.exe，ACDV-ico.png,config.ini**需要在同一目录下，确保ini文件内容是和下载提供ini文件内容的一致的！<br>
237 | 
238 | ---
239 | ## 7.2.关于软件闪退
240 | * 尝试**重新运行**<br>
241 | * 还解决不了，查看**log**日志，尝试以下**7.3、7.4**解决<br>
242 | 
243 | ---
244 | ## 7.3.网络错误
245 | ##### * (1).报```Connect Failed! Please check your Proxy or Network!```错误<br>
246 | ##### * (2).报```Updata_check``` 和 ```JSON``` 相关的错误<br>
247 | ##### * (3).关于```Nonetype,xpath```报错<br>
248 | ##### * (4).关于```KeyError```报错<br>
249 | * 上述错误都可能是**代理问题**，尝试以下办法解决:
250 |     * 使用DMM，如不是日本代理，**请更换日本代理**，确保可以打开[这个网址](https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=ssni518/?dmmref=aMonoDvd_List/)<br>
251 |     * 把代理设置中的**代理：后面的地址和端口删除**<br>
252 |     * 开启代理软件**全局模式**<br>
253 | 
254 | ---
255 | ## 7.4.关于番号提取失败或者异常
256 | * 查看命名是否符合[常见番号命名规范](#3常见番号命名规范)。<br>
257 | * 目前可以提取信息的网址:**JAV321、JAVBUS、JAVDB、AVSOX、dmm、FC2CLUB、mgstage**，请确保视频名能在这些网站找到<br>
258 | * 使用**工具页里的单个视频刮削**，选择**刮削网站**，进行刮削。<br>
259 | 
260 | ---
261 | ## 7.5.PLEX不显示封面
262 | 请安装插件：[**XBMCnfoMoviesImporter**](https://github.com/gboudreau/XBMCnfoMoviesImporter.bundle)
263 | 
264 | 
265 | # 8.关于群晖NAS
266 | 开启SMB在Windows上映射为本地磁盘(要分配盘符)即可使用本软件，也适用于其他NAS
267 | 
268 | # 9.FAQ
269 | ## 9.1.这软件能下片吗？
270 | * 该软件不提供任何影片下载地址，仅供本地影片分类整理使用。
271 | ## 9.2.什么是元数据？
272 | * 元数据包括了影片的：封面，导演，演员，简介，类型......
273 | ## 9.3.软件收费吗？
274 | * 软件永久免费。**除了作者钦点以外**
275 | ## 9.4.软件运行异常怎么办？
276 | * 认真看 [异常处理（重要）](#7异常处理重要)
277 | 
278 | # 10.故事
279 | [点击跳转至原作者博客文章](https://yoshiko2.github.io/2019/10/18/AVDC/)
280 | 
281 | # 11.申明
282 | 当你查阅、下载了本项目源代码或二进制程序，即代表你接受了以下条款
283 | 
284 | * 本软件仅供技术交流，学术交流使用
285 | * **请勿在热门的社交平台上宣传此项目**
286 | * 本软件作者编写出该软件旨在学习 Python ，提高编程水平
287 | * 本软件不提供任何影片下载的线索
288 | * 用户在使用本软件前，请用户了解并遵守当地法律法规，如果本软件使用过程中存在违反当地法律法规的行为，请勿使用该软件
289 | * 用户在使用本软件时，若用户在当地产生一切违法行为由用户承担
290 | * 严禁用户将本软件使用于商业和个人其他意图
291 | * 源代码和二进制程序请在下载后24小时内删除
292 | * 本软件作者yoshiko2保留最终决定权和最终解释权
293 | * 若用户不同意上述条款任意一条，请勿使用本软件
294 | ---
295 | When you run the software, you accept the following terms
296 | 
297 | * This software is only for technical exchange and academic exchange
298 | * **Please do not promote this project on popular social platforms**
299 | * The software author wrote this software to learn Python and improve programming
300 | * This software does not provide any clues for video download
301 | * Before using this software, please understand and abide by local laws and regulations. If there is any violation of local laws and regulations during the use of this software, * please do not use this software
302 | * When the user uses this software, if the user has any illegal acts in the local area, the user shall bear
303 | * It is strictly forbidden for users to use this software for commercial and personal intentions
304 | * Please delete the source code and binary program within 24 hours after downloading
305 | * The author of this software yoshiko2 reserves the right of final decision and final interpretation
306 | * If the user does not agree with any of the above terms, please do not use this software
307 | ---
308 | このソフトウェアを実行すると、次の条件に同意したことになります
309 | 
310 | * このソフトウェアは、技術交換、学術交換専用です。
311 | * **人気のソーシャルプラットフォームでこのプロジェクトを宣伝しないでください**
312 | * ソフトウェアの作成者は、Pythonを学習してプログラミングを改善するためにこのソフトウェアを作成しました
313 | * このソフトウェアは、ビデオダウンロードの手がかりを提供しません
314 | * 本ソフトウェアを使用する前に、現地の法令を理解し、遵守してください本ソフトウェアの使用中に現地の法令に違反する場合は、本ソフトウェアを使用しないでください
315 | * 本ソフトウェアをご利用の際、地域で違法行為を行った場合は、お客様の負担となります。
316 | * ユーザーがこのソフトウェアを商業的および個人的な目的で使用することは固く禁じられています
317 | * ダウンロード後24時間以内にソースコードとバイナリプログラムを削除してください
318 | * このソフトウェアの作者yoshiko2は、最終決定および最終解釈の権利を留保します。
319 | * ユーザーが上記の条件のいずれかに同意しない場合は、このソフトウェアを使用しないでください
320 | 
321 | # 12.写在后面
322 | 怎么样，看着自己的日本电影被这样完美地管理，是不是感觉成就感爆棚呢?<br>
323 | 
324 | 
325 | 
326 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [common]
 2 | main_mode = 1
 3 | failed_output_folder = failed
 4 | success_output_folder = JAV_output
 5 | failed_file_move = 1
 6 | soft_link = 0
 7 | show_poster = 1
 8 | website = all
 9 | # all or mgstage or fc2club or javbus or jav321 or javdb or avsox or xcity or dmm
10 | 
11 | [proxy]
12 | type = http
13 | proxy = 127.0.0.1:10809
14 | timeout = 7
15 | retry = 3
16 | # type: no, http, socks5
17 | 
18 | [Name_Rule]
19 | folder_name = actor/number-title-release
20 | naming_media = number-title
21 | naming_file = number
22 | 
23 | [update]
24 | update_check = 1
25 | 
26 | [log]
27 | save_log = 1
28 | 
29 | [media]
30 | media_type = .mp4|.avi|.rmvb|.wmv|.mov|.mkv|.flv|.ts|.webm|.MP4|.AVI|.RMVB|.WMV|.MOV|.MKV|.FLV|.TS|.WEBM
31 | sub_type = .smi|.srt|.idx|.sub|.sup|.psb|.ssa|.ass|.txt|.usf|.xss|.ssf|.rt|.lrc|.sbv|.vtt|.ttml
32 | media_path = E:/TEMP
33 | 
34 | [escape]
35 | literals = \()
36 | folders = failed,JAV_output
37 | string = 1080p,720p,22-sht.me,-HD
38 | 
39 | [debug_mode]
40 | switch = 1
41 | 
42 | [emby]
43 | emby_url = localhost:8096
44 | api_key = 7c3529128b0542b081d32991b536fb9d
45 | 
46 | [mark]
47 | poster_mark = 1
48 | thumb_mark = 1
49 | mark_size = 3
50 | mark_type = SUB,LEAK,UNCENSORED
51 | mark_pos = top_left
52 | # mark_size : range 1-5
53 | # mark_type : sub, leak, uncensored
54 | # mark_pos  : bottom_right or bottom_left or top_right or top_left
55 | 
56 | [uncensored]
57 | uncensored_prefix = S2M|BT|LAF|SMD
58 | uncensored_poster = 0
59 | # 0 : official, 1 : cut
60 | 
61 | [file_download]
62 | nfo = 1
63 | poster = 1
64 | fanart = 1
65 | thumb = 1
66 | 
67 | [extrafanart]
68 | extrafanart_download = 0
69 | extrafanart_folder = extrafanart
70 | 


--------------------------------------------------------------------------------
/py-require.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | pyqt5
3 | pyqt5-tools
4 | pillow
5 | bs4
6 | pyquery
7 | requests
8 | baidu-aip


--------------------------------------------------------------------------------
/readme/about.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/about.png


--------------------------------------------------------------------------------
/readme/emby.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/emby.png


--------------------------------------------------------------------------------
/readme/emby_each.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/emby_each.png


--------------------------------------------------------------------------------
/readme/main_window.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/main_window.png


--------------------------------------------------------------------------------
/readme/setting.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/setting.gif


--------------------------------------------------------------------------------
/readme/tool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/tool.png


--------------------------------------------------------------------------------
/readme/tree-jav-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/tree-jav-output.png


--------------------------------------------------------------------------------
/readme/主页面.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yq8519/AVDC/42ea8c08b26ac956f08e6cd0e88ffd83819c8b62/readme/主页面.gif


--------------------------------------------------------------------------------
/update_check.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"version": "3.964",
3 | 	"version_show":"3.964",
4 | 	"download": "https://github.com/moyy996/AVDC/releases"
5 | }
6 | 


--------------------------------------------------------------------------------