├── Client.py ├── DMM ├── ActressSpider.py ├── CommonTools.py ├── DataBase.py ├── IdolRank.py ├── IdolSpider.py ├── MediaDownloader.py ├── TagSpider.py ├── WorkInfoSpider.py ├── WorkSpider.py └── __init__.py ├── README.md └── preview ├── app.PNG ├── cover.PNG ├── idol_info.PNG ├── proxy.PNG ├── work_info.PNG └── 立花里子.jpg /Client.py: -------------------------------------------------------------------------------- 1 | 2 | import DMM; 3 | import requests; 4 | import os; 5 | from functools import reduce; 6 | import threadpool; 7 | 8 | MAX_THREAD_NUM = 3; 9 | 10 | API_KEY = "q8IbzsBPD4N1xtz2fe2aYhg0e4v0JOLq"; 11 | API_SECRET = "ZHTD0ESfmeu8OOaP_6SyhKYS7h4nLC5X"; 12 | FACESET_TAG = "CoverFace"; 13 | 14 | DETECT_FACE_API = "https://api-cn.faceplusplus.com/facepp/v3/detect" 15 | ADD_FACE_API = "https://api-cn.faceplusplus.com/facepp/v3/faceset/addface" 16 | SEARCH_FACE_API = "https://api-cn.faceplusplus.com/facepp/v3/search"; 17 | 18 | DETAIL_FACESET_API = "https://api-cn.faceplusplus.com/facepp/v3/faceset/getdetail"; 19 | CREATE_FACESET_API = "https://api-cn.faceplusplus.com/facepp/v3/faceset/create"; 20 | GET_FACESETS_API = "https://api-cn.faceplusplus.com/facepp/v3/faceset/getfacesets"; 21 | 22 | def fetchCoverFace(dmm_code): 23 | work = getWorkInfo(dmm_code); 24 | return _fetchCoverFace(work); 25 | 26 | def _getBiggestFace(faceA,faceB): 27 | sizeA = int(faceA["face_rectangle"]["width"])*int(faceA["face_rectangle"]["height"]); 28 | sizeB = int(faceB["face_rectangle"]["width"])*int(faceB["face_rectangle"]["height"]); 29 | if sizeB>sizeA: 30 | return faceB; 31 | else: 32 | return faceA; 33 | 34 | def _fetchCoverFace(work): 35 | if not work["cover"]: 36 | return None; 37 | if "coverFace" in work: 38 | return True; 39 | print("-"+work["dmmCode"]); 40 | try: 41 | response = requests.post(DETECT_FACE_API,{"api_key":API_KEY,"api_secret":API_SECRET,"image_url":work["cover"]},timeout=10).json(); 42 | work["coverFace"] = reduce(_getBiggestFace,response["faces"])["face_token"]; 43 | DMM.saveWorkInfo(work);#获取到的面部为空或者请求超时 直接跳过此步骤 44 | print("+"+work["dmmCode"]); 45 | return True; 46 | except: 47 | print("?"+work["dmmCode"]); 48 | print(response); 49 | return False; 50 | 51 | def fetchCoverFaces(): 52 | works = DMM.getWorkList({"performer":{"$size":1}},{"dmmCode":1,"cover":1,"coverFace":1}); 53 | pool = threadpool.ThreadPool(MAX_THREAD_NUM); 54 | requests = threadpool.makeRequests(_fetchCoverFace,works); 55 | [pool.putRequest(req) for req in requests]; 56 | pool.wait(); 57 | 58 | def getFaceSets(): 59 | response = requests.post(GET_FACESETS_API,{"api_key":API_KEY,"api_secret":API_SECRET},timeout=10).json(); 60 | if "facesets" not in response: 61 | return []; 62 | else: 63 | return response["facesets"]; 64 | 65 | 66 | def getCoverFaceSets(): 67 | response = requests.post(GET_FACESETS_API,{"api_key":API_KEY,"api_secret":API_SECRET,"tags":FACESET_TAG},timeout=10).json(); 68 | if "facesets" not in response: 69 | return []; 70 | else: 71 | def _getOuterId(faceset): 72 | return faceset["outer_id"]; 73 | return list(map(_getOuterId,response["facesets"])); 74 | 75 | def createCoverFaceSet(id): 76 | name = FACESET_TAG+"_"+str(id); 77 | response = requests.post(CREATE_FACESET_API,{"api_key":API_KEY,"api_secret":API_SECRET,"tags":FACESET_TAG,"outer_id":name,"display_name":name},timeout=10).json(); 78 | if "error_message" in response: 79 | print(response["error_message"]); 80 | if response["error_message"]=="FACESET_EXIST": 81 | return createCoverFaceSet(id+1); 82 | else: 83 | return False; 84 | else: 85 | print("+FACESET:"+str(id)); 86 | return id; 87 | 88 | 89 | def getCoverFaceSet(id): 90 | name = FACESET_TAG+"_"+str(id); 91 | response = requests.post(DETAIL_FACESET_API,{"api_key":API_KEY,"api_secret":API_SECRET,"tags":FACESET_TAG,"outer_id":name},timeout=10).json(); 92 | if "error_message" in response: 93 | print(response["error_message"]); 94 | return None; 95 | else: 96 | return response; 97 | 98 | 99 | class FaceSetNotExitsError(Exception): 100 | pass 101 | 102 | class FaceSetFullFilledError(Exception): 103 | pass 104 | 105 | 106 | def uploadCoverFaces(): 107 | faceset_id = 1; 108 | works = list(DMM.getWorkList({"coverFace":{"$exists":True},"coverFaceSet":{"$exists":False}},{"dmmCode":1,"coverFace":1}).limit(5)); 109 | def _combine(workA,workB): 110 | temp = workA; 111 | if type(temp) is not str: 112 | temp = workA["coverFace"]; 113 | return temp+","+workB["coverFace"]; 114 | def _save(work): 115 | work["coverFaceSet"] = outer_id; 116 | DMM.saveWorkInfo(work); 117 | 118 | while len(works)!=0: 119 | tokens = reduce(_combine,works); 120 | outer_id = FACESET_TAG+"_"+str(faceset_id); 121 | print(outer_id+":"+tokens); 122 | try: 123 | response = requests.post(ADD_FACE_API,{"api_key":API_KEY,"api_secret":API_SECRET,"tags":FACESET_TAG,"outer_id":outer_id,"face_tokens":tokens},timeout=10).json(); 124 | if "failure_detail" in response and len(response["failure_detail"])!=0: 125 | print(response["failure_detail"]); 126 | if response["failure_detail"][0]["reason"]=="QUOTA_EXCEEDED":#FaceSet容量不够 创建新的 127 | raise FaceSetFullFilledError; 128 | else: 129 | raise Exception; 130 | elif "error_message" in response: 131 | print(response["error_message"]); 132 | if response["error_message"]=="INVALID_OUTER_ID":#初始id不存在 133 | raise FaceSetNotExitsError; 134 | else: 135 | raise Exception; 136 | else: 137 | list(map(_save,works));#添加成功 本地保存信息 138 | print(outer_id+"+"+tokens); 139 | except FaceSetNotExitsError: 140 | faceset_id = createCoverFaceSet(faceset_id+1); 141 | except FaceSetFullFilledError: 142 | faceset_id = createCoverFaceSet(faceset_id); 143 | except Exception as e: 144 | print(e); 145 | finally: 146 | works = list(DMM.getWorkList({"coverFace":{"$exists":True},"coverFaceSet":{"$exists":False}},{"dmmCode":1,"coverFace":1}).limit(5));#继续 147 | 148 | 149 | def _searchFace(face_token): 150 | face_sets = getCoverFaceSets(); 151 | results = []; 152 | dic = {}; 153 | def _searchFaceSet(face_set): 154 | nonlocal results; 155 | response = requests.post(SEARCH_FACE_API,{"api_key":API_KEY,"api_secret":API_SECRET,"face_token":face_token,"outer_id":face_set,"return_result_count":5},timeout=10).json(); 156 | if "error_message" not in response: 157 | results += response["results"]; 158 | def _getWorkInfo(result): 159 | result["work"] = DMM.getWorkInfoBy("coverFace",result["face_token"],{"performer":1}); 160 | performer = result["work"]["performer"][0]["id"]; 161 | dic.setdefault(performer,{}); 162 | dic[performer]["name"] = result["work"]["performer"][0]["name"]; 163 | #sum count avg 164 | dic[performer].setdefault("sum",0); 165 | dic[performer]["sum"] += result["confidence"]; 166 | dic[performer].setdefault("count",0); 167 | dic[performer]["count"] += 1; 168 | dic[performer]["avg"] = dic[performer]["sum"] / dic[performer]["count"]; 169 | list(map(_searchFaceSet,face_sets)); 170 | list(map(_getWorkInfo,results)); 171 | return dic; 172 | 173 | def searchFace(path): 174 | response = requests.post(DETECT_FACE_API,{"api_key":API_KEY,"api_secret":API_SECRET},files={"image_file":open(path,'rb')},timeout=10).json(); 175 | if "error_message" in response: 176 | print(response["error_messagee"]); 177 | else: 178 | face = reduce(_getBiggestFace,response["faces"])["face_token"]; 179 | return _searchFace(face); 180 | 181 | 182 | 183 | #妈卖批 写完才发现python没有尾递归优化 184 | print(searchFace("preview/立花里子.jpg")); 185 | 186 | #DMM.updateProfies("profile"); 187 | -------------------------------------------------------------------------------- /DMM/ActressSpider.py: -------------------------------------------------------------------------------- 1 | from .DataBase import saveActressInfo,clearActressCommendedInfo,setActressInfo; 2 | from .CommonTools import *; 3 | 4 | 5 | 6 | def _fetchActressList(base_url):#获取单个首字母对应的多个页面中所有演员信息 7 | print(base_url); 8 | def _getPageUrl(page_num=1): 9 | print(page_num); 10 | return base_url+"/page="+str(page_num); 11 | def _findPagenation(bf): 12 | return bf.find("div",class_="d-boxcaptside d-boxpagenation"); 13 | def _findPageActress(bf):#获取一页中所有的演员信息 14 | return bf.find("div",class_="act-box").find_all("a"); 15 | def _getActressInfo(a):#获取每个演员的信息 16 | actress_id = a.get("href").split("id=")[1][:-1]; 17 | profile_url = a.img.get("src"); 18 | name = a.text; 19 | actress = {"name":name,"alias":getAlias(name),"id":actress_id,"profile":profile_url}; 20 | saveActressInfo(actress); 21 | 22 | return fetchInfoFromAllPage(_getPageUrl,_findPagenation,_findPageActress,_getActressInfo); 23 | 24 | def _markHotActress(table): 25 | actress_list = table.find_all("a"); 26 | def _mark(a): 27 | name = a.text; 28 | setActressInfo(name,{"hot":True}); 29 | list(map(_mark,actress_list)); 30 | 31 | def _markNewActress(table): 32 | actress_list = table.find_all("a"); 33 | def _mark(a): 34 | name = a.text; 35 | setActressInfo(name,{"new":True}); 36 | list(map(_mark,actress_list)); 37 | 38 | def updateActressCommended():#更新首页的新人女优、推荐女优信息 39 | clearActressCommendedInfo(); 40 | front_page = getPage("https://www.dmm.co.jp/mono/dvd/-/actress/"); 41 | tables = front_page.find_all("div",class_="act-box"); 42 | _markNewActress(tables[0]); 43 | _markHotActress(tables[1]); 44 | 45 | def updateActressList(): 46 | 47 | URL_GENERATOR = ['a','i','u','e','o',"ka","ki","ku","ke","ko","sa","si","su","se","so","ta","ti","tu","te","to","na","ni","nu","ne","no","ha","hi","hu","he","ho","ma","mi","mu","me","mo","ya","yu","yo","ra","ri","ru","re","ro","wa","wo","nn"]; 48 | #DMM的演员信息列表是按五十音图(首字母)再分页的 49 | url_list = getUrlList("https://www.dmm.co.jp/mono/dvd/-/actress/=/keyword=",URL_GENERATOR); 50 | list(map(_fetchActressList,url_list)); 51 | 52 | updateActressCommended(); 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /DMM/CommonTools.py: -------------------------------------------------------------------------------- 1 | import requests; 2 | from bs4 import BeautifulSoup; 3 | from functools import *; 4 | import os; 5 | #多个模块中共用的工具函数 6 | 7 | 8 | 9 | PROXY={"https":"140.227.201.218:60088"}; 10 | 11 | 12 | def getAlias(name): 13 | alias = name.split("("); 14 | if len(alias)==1: 15 | return (name); 16 | else: 17 | first = alias[0]; 18 | others = alias[1].split("、"); 19 | others[-1] = others[-1][:-1];#去掉最后的括号 20 | alias = first,*others; 21 | return alias; 22 | 23 | 24 | def getPageList(pagenation): 25 | nav_box = pagenation.find("ul"); 26 | children = len(nav_box.find_all("li")); 27 | if children==1:#1 28 | return [1]; 29 | else: 30 | last = nav_box.find("li",class_="terminal"); 31 | if last: 32 | href = last.a.get("href");#1 2 3 4 5 下一页 ... 最后一页 33 | page_num = int(href.split("page=")[1][:-1]); 34 | return list(range(1,page_num+1)); 35 | else: 36 | return list(range(1,children));#1 2 3 4 下一页 37 | 38 | 39 | def combine(pageA,pageB):#用于reduce将若干页面合并 40 | return pageA+pageB; 41 | 42 | def getResponse(url): 43 | return requests.get(url,proxies=PROXY,timeout=10); 44 | 45 | def getHtml(url): 46 | response = getResponse(url); 47 | if "status_code" not in response or response.status_code != requests.codes.ok: 48 | response.raise_for_status(); 49 | return response.text; 50 | 51 | def getPage(url): 52 | return BeautifulSoup(getHtml(url),features="html.parser"); 53 | 54 | def getUrlList(base_url,generators): 55 | return [base_url+generator for generator in generators]; 56 | 57 | def fetchInfoFromAllPage(_getPageUrl,_findPagenation,_findPageUnits,_getUnitInfo,_remoteCounter=None,_localCounter=None): 58 | front_page = getPage(_getPageUrl());#加载首页 59 | pagenation = _findPagenation(front_page);#获取分页导航栏 60 | #远程数量获取器与本地数量获取器比较(页面中的总数与数据库中的总数) 一致就跳过首页之后的页面爬取 61 | if _remoteCounter(pagenation)<=_localCounter(): 62 | return []; 63 | page_list = getPageList(pagenation);#解析首页 获取页码列表 64 | def _getPage(page_num): 65 | if page_num==1: 66 | return front_page; 67 | else: 68 | return getPage(_getPageUrl(page_num)); 69 | def _getPageUnits(page_num): 70 | page = _getPage(page_num); 71 | units = _findPageUnits(page); 72 | return list(map(_getUnitInfo,units)); 73 | page_units = map(_getPageUnits,page_list);#获取页面列表中每个页面的信息单元列表 74 | return reduce(combine,page_units);#二维数组合并成一维数组 75 | 76 | 77 | def getFileNameFromPath(path): 78 | return path.split("/")[-1]; 79 | 80 | 81 | def createPath(path): 82 | try: 83 | os.mkdir(path); 84 | except FileNotFoundError: 85 | joiner = "\\"; 86 | parent = joiner.join(path.split("\\")[:-1]); 87 | createPath(parent); 88 | os.mkdir(path); 89 | 90 | 91 | 92 | def download(url,save_path,name): 93 | path = save_path+"/"+name; 94 | def _save(content): 95 | try: 96 | file = open(path, 'wb'); 97 | except FileNotFoundError: 98 | createPath(save_path); 99 | file = open(path,'wb'); 100 | try: 101 | file.write(content); 102 | except: 103 | pass;#不能写入就空文件直接保存 104 | file.close(); 105 | try: 106 | res = getResponse(url); 107 | _save(res.content); 108 | except: 109 | _save(None);#下载失败 保存空文件 110 | -------------------------------------------------------------------------------- /DMM/DataBase.py: -------------------------------------------------------------------------------- 1 | import pymongo; 2 | from datetime import datetime; 3 | 4 | 5 | #数据库配置 6 | DB_URL = "mongodb://localhost:27017/"; 7 | DB_DOC = "DMM"; 8 | IDOL_RANK = "idol_rank"; 9 | IDOL_INFO = "idol_info"; 10 | WORK_INFO = "work_info"; 11 | TAG_INFO = "tag_info"; 12 | ACTRESS_INFO = "actress_info"; 13 | 14 | 15 | ########## 16 | 17 | def _getDB(): 18 | client = pymongo.MongoClient(DB_URL); 19 | return client[DB_DOC]; 20 | 21 | ########## 22 | 23 | def _getIdolRankCollection(): 24 | return _getDB()[IDOL_RANK]; 25 | 26 | def _getIdolInfoCollection(): 27 | return _getDB()[IDOL_INFO]; 28 | 29 | def _getWorkInfoCollection(): 30 | return _getDB()[WORK_INFO]; 31 | 32 | def _getTagInfoCollection(): 33 | return _getDB()[TAG_INFO]; 34 | 35 | def _getActressInfoCollection(): 36 | return _getDB()[ACTRESS_INFO]; 37 | 38 | ########## 39 | 40 | def saveIdolRank(year,setter): 41 | return _getIdolRankCollection().update_one({"year":year},{"$set":setter},True); 42 | 43 | def getIdolRankList(): 44 | return _getIdolRankCollection().find({}); 45 | 46 | ########## 47 | 48 | def getIdolInfo(name): 49 | return _getIdolInfoCollection().find_one({"name":name}); 50 | 51 | def setIdolInfo(name,setter): 52 | return _getIdolInfoCollection().update_one({"name":name},{"$set":setter}); 53 | 54 | def saveIdolInfo(setter,inc={}): 55 | return _getIdolInfoCollection().update_one({"name":setter["name"]},{"$set":setter,"$inc":inc},True); 56 | 57 | def getIdolList(search={},fields=None): 58 | return _getIdolInfoCollection().find(search,fields,no_cursor_timeout =True); 59 | 60 | def clearIdolInfo(): 61 | _getIdolInfoCollection().delete_many({}); 62 | 63 | def updateIdolWorkSuccess(name): 64 | setIdolInfo(name,{"lastUpdated":datetime.now()}); 65 | 66 | def updateIdolWorkFailed(name): 67 | setIdolInfo(name,{"lastUpdated":False}); 68 | 69 | ########## 70 | def saveActressInfo(actress): 71 | _getActressInfoCollection().update_one({"name":actress["name"]},{"$set":actress},True); 72 | 73 | def setActressInfo(name,setter): 74 | _getActressInfoCollection().update_one({"name":name},{"$set":setter}); 75 | 76 | def getActressInfo(name): 77 | return _getActressInfoCollection().find_one({"name":name}); 78 | 79 | def getActressID(name): 80 | actress = _getActressInfoCollection().find_one({"name":name}); 81 | if actress: 82 | return actress["id"]; 83 | else: 84 | return None; 85 | 86 | def clearActressCommendedInfo():#清除以前的热门、新人信息 87 | _getActressInfoCollection().update_many({"$or":[{"hot":True},{"new":True}]},{"$unset":{"hot":1,"new":1}}); 88 | 89 | 90 | ########## 91 | 92 | def saveWorkInfo(work): 93 | if work: 94 | _getWorkInfoCollection().update_one({"dmmCode":work["dmmCode"]},{"$set":work,"$unset":{"failed":1}},True); 95 | 96 | def fetchWorkFailed(dmm_code): 97 | _getWorkInfoCollection().update_one({"dmmCode":dmm_code},{"$set":{"failed":True}},True); 98 | 99 | def getWorkList(search={},fields=None): 100 | return _getWorkInfoCollection().find(search,fields,no_cursor_timeout =True); 101 | 102 | def getWorkInfo(dmm_code): 103 | return _getWorkInfoCollection().find_one({"dmmCode":dmm_code}); 104 | 105 | def getWorkInfoBy(field,value,fields): 106 | return _getWorkInfoCollection().find_one({field:value},fields); 107 | 108 | def clearWorkInfo(selector={}): 109 | _getWorkInfoCollection().delete_many(selector); 110 | 111 | #根据Actress从数据库获取作品列表 112 | def getWorkListByActress(alias): 113 | return getWorkList({"performer.name":alias}); 114 | 115 | #根据Actress从数据库获取作品列表 116 | def getWorkListByTag(tag_name): 117 | return getWorkList({"tags":tag_name}); 118 | 119 | def getLatestWork(limit,page=1): 120 | sorted_list = getWorkList().sort([("publishedTime.year",pymongo.DESCENDING),("publishedTime.month",pymongo.DESCENDING),("publishedTime.day",pymongo.DESCENDING)]); 121 | final_list = sorted_list.limit(limit).skip(page-1); 122 | return list(final_list); 123 | 124 | ########## 125 | 126 | def saveTagInfo(tag): 127 | _getTagInfoCollection().update_one({"id":tag["id"]},{"$set":tag},True); 128 | 129 | -------------------------------------------------------------------------------- /DMM/IdolRank.py: -------------------------------------------------------------------------------- 1 | import requests; 2 | from bs4 import BeautifulSoup; 3 | from functools import *; 4 | from .DataBase import saveIdolRank; 5 | from .CommonTools import *; 6 | import datetime; 7 | 8 | #URL生成器 DMM把每个排行榜分成了五个页面 9 | RANK_GENERATOR = ("1_20","21_40","41_60","61_80","81_100"); 10 | 11 | 12 | 13 | 14 | def _getUrl(year,type_name,rank):#获取排名页的URL 15 | return "https://www.dmm.co.jp/mono/dvd/-/ranking/=/mode=actress/term="+type_name+"_"+str(year)+"/rank="+rank; 16 | 17 | def _getRank(td):#获取排名表格中每个方块的内容 18 | anchor = td.find_next_sibling("a"); 19 | name = anchor.img.get("alt"); 20 | actress_id = anchor.get("href").split("id=")[1][:-1]; 21 | rank = td.string; 22 | print(rank.zfill(3)+":"+name); 23 | return {rank:{"name":name,"id":actress_id}}; 24 | 25 | def _combineRank(a,b):#将每个方块中的内容合并成一个排名 26 | a.update(b) 27 | return a; 28 | 29 | 30 | def _getRankListOfYear(year): 31 | print("["+str(year)+"]========================="); 32 | def _getRankListOfType(type_name): 33 | def _getRankRange(rank):#获取某个排名区间 34 | bf = getPage(_getUrl(year,type_name,rank)); 35 | td = bf.find_all("span",class_="rank");#空页面bug 原网页2017/2018下半年排名丢失了 36 | if len(td)==0: 37 | return {}; 38 | else: 39 | rank_list = map(_getRank,td); 40 | return reduce(_combineRank,rank_list); 41 | print("["+type_name+"]-------------------------"); 42 | range_list = map(_getRankRange,RANK_GENERATOR); 43 | return reduce(_combineRank,range_list); 44 | 45 | return {"year":year,"rankH1":_getRankListOfType("first"),"rankH2":_getRankListOfType("last"),"rankY":_getRankListOfType("year")}; 46 | 47 | 48 | def _getAllRank(last_year): 49 | year_range = list(range(2005,last_year+1)); 50 | return list(map(_getRankListOfYear,year_range)); 51 | 52 | def updateIdolRankList(): 53 | t = datetime.datetime.now(); 54 | year = t.year; 55 | idol_ranks = _getAllRank(year); 56 | 57 | def _saveIdolRank(rank): 58 | saveIdolRank(rank["year"],rank); 59 | 60 | list(map(_saveIdolRank,idol_ranks)); 61 | 62 | 63 | -------------------------------------------------------------------------------- /DMM/IdolSpider.py: -------------------------------------------------------------------------------- 1 | from .DataBase import clearIdolInfo,getIdolRankList,getIdolInfo,setIdolInfo,saveIdolInfo; 2 | from .CommonTools import *; 3 | from datetime import datetime; 4 | from .IdolRank import updateIdolRankList; 5 | 6 | CURRENT_YEAR = datetime.now().year; 7 | 8 | ''' 9 | sum:按每年名次的累加分 10 | avg:按照参与评分的次数平均 11 | weight:加权分 3年内系数2 ,3-6系数1.5 6-9系数1 超过9年系数0.5 12 | ''' 13 | 14 | def _getWeight(year): 15 | delta = CURRENT_YEAR - year; 16 | if delta > 9: 17 | return 0.5; 18 | elif delta > 6: 19 | return 1; 20 | elif delta >3: 21 | return 1.25; 22 | else: 23 | return 1.5; 24 | 25 | 26 | def _countEachYear(ranks): 27 | year = ranks["year"]; 28 | def _countRank(rank_type,rank): 29 | def _countEachIdol(num): 30 | actress_id = rank[num]["id"]; 31 | name = rank[num]["name"]; 32 | point = 101 - int(num); 33 | point_weighted = point * _getWeight(year); 34 | 35 | info_set = {"id":actress_id,"name":name,"alias":getAlias(name),"rank."+str(year)+"_"+rank_type:num}; 36 | points_inc = {"points.sum":point,"points.weight":point_weighted}; 37 | saveIdolInfo(info_set,points_inc); 38 | 39 | idol = getIdolInfo(name); 40 | avg = idol["points"]["sum"] / len(idol["rank"]); 41 | setIdolInfo(name,{"points.avg":avg}); 42 | 43 | list(map(_countEachIdol,rank)); 44 | 45 | _countRank("H1",ranks["rankH1"]); 46 | _countRank("H2",ranks["rankH2"]); 47 | _countRank("Y",ranks["rankY"]); 48 | 49 | 50 | 51 | def updateIdolList(): 52 | updateIdolRankList(); 53 | clearIdolInfo(); 54 | ranks = getIdolRankList(); 55 | list(map(_countEachYear,ranks)); 56 | 57 | -------------------------------------------------------------------------------- /DMM/MediaDownloader.py: -------------------------------------------------------------------------------- 1 | import os; 2 | from .DataBase import getActressInfo,getWorkInfo,getWorkList,getIdolList; 3 | from .CommonTools import *; 4 | import threadpool; 5 | 6 | 7 | MAX_THREAD_NUM = 100; 8 | 9 | def _getFilePath(save_path,dmm_code): 10 | return save_path + "\\"+dmm_code+".jpg"; 11 | 12 | def downloadWorkCover(dmm_code,save_path): 13 | print("-"+dmm_code); 14 | work = getWorkInfo(dmm_code); 15 | url = work["cover"]; 16 | if os.path.exists(_getFilePath(save_path,dmm_code)):#文件已存在 跳过 17 | return; 18 | if not url:#无封面 跳过 19 | return; 20 | download(url,save_path,dmm_code+".jpg"); 21 | print("+"+dmm_code); 22 | 23 | def downloadWorkSnapShots(dmm_code,save_path): 24 | work = getWorkInfo(dmm_code); 25 | def _download(url): 26 | download(url,_getSavePath(dmm_code),dmm_code+".jpg"); 27 | list(map(_download,work["snapShots"])); 28 | 29 | def downloadWorkPreview(dmm_code,save_path): 30 | work = getWorkInfo(dmm_code); 31 | url = work["preview"]; 32 | if not url: 33 | return; 34 | download(url,save_path,dmm_code+".jpg"); 35 | 36 | def updateWorkCovers(save_path): 37 | def _getCode(work): 38 | return work["dmmCode"]; 39 | def _download(dmm_code): 40 | downloadWorkCover(dmm_code,save_path); 41 | works = getWorkList(); 42 | #多线程 43 | pool = threadpool.ThreadPool(MAX_THREAD_NUM); 44 | code_list = list(map(_getCode,works)); 45 | requests = threadpool.makeRequests(_download,code_list); 46 | [pool.putRequest(req) for req in requests]; 47 | pool.wait(); 48 | 49 | 50 | def _getProfileDownloader(save_path): 51 | def _downloadProfile(actress): 52 | print(actress); 53 | url = actress["profile"]; 54 | if os.path.exists(_getFilePath(save_path,name)):#文件已存在 跳过 55 | return; 56 | if not url:#无封面 跳过 57 | return; 58 | download(url,save_path,name+".jpg"); 59 | return _downloadProfile; 60 | 61 | def downloadProfile(name,save_path): 62 | print("-"+name); 63 | actress = getActressInfo(name); 64 | _downloader = _getProfileDownloader(save_path); 65 | _downloader(actress); 66 | print("+"+name); 67 | 68 | def updateProfies(save_path): 69 | idols = getIdolList(); 70 | _downloader = _getProfileDownloader(save_path); 71 | #多线程 72 | pool = threadpool.ThreadPool(MAX_THREAD_NUM); 73 | requests = threadpool.makeRequests(_downloader,idols); 74 | [pool.putRequest(req) for req in requests]; 75 | pool.wait(); 76 | -------------------------------------------------------------------------------- /DMM/TagSpider.py: -------------------------------------------------------------------------------- 1 | from .DataBase import saveTagInfo; 2 | from .CommonTools import *; 3 | #从DMM上爬取Tag列表 4 | 5 | 6 | def _fetchAllTags(bf): 7 | return bf.find("div",class_="area-category").find_all("a"); 8 | 9 | def _fetchTagInfo(a): 10 | tag_id = a.get("href").split("id=")[1][:-1]; 11 | return {"name":a.string,"id":tag_id}; 12 | 13 | def _saveTag(a): 14 | saveTagInfo(_getTagInfo(a)); 15 | 16 | #刷新DMM的Tag列表 17 | def updateTagList(): 18 | bf = getPage("https://www.dmm.co.jp/mono/dvd/-/genre/"); 19 | list(map(_saveTag,_getAllTags(bf))); 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /DMM/WorkInfoSpider.py: -------------------------------------------------------------------------------- 1 | from .CommonTools import *; 2 | import re; 3 | 4 | 5 | def _getInfoAnchor(bf,key): 6 | anchor = bf.find("td",string=key+":"); 7 | return anchor.find_next_sibling("td"); 8 | 9 | def _getNormalizedInfo(td):#DMM当丢失导演或发行商等信息时会显示 ---- 10 | if td.string=="----": 11 | return None; 12 | else: 13 | return td.string;#实际结构是td.a.string 14 | 15 | def _getMetaContent(anchor): 16 | if not anchor: 17 | return None; 18 | else: 19 | return anchor.get("content"); 20 | 21 | ######元信息 22 | 23 | def _getDmmCode(bf):#不知道为啥DMM会整出118abp001这种作为URL 可能是多表横向切割? 24 | #还有url中的番号和页面中的番号不一样的情况...干你妈的日本人 25 | url = bf.find("meta",property="og:url").get("content"); 26 | code = url.split("cid=")[1][:-1]; 27 | return code; 28 | 29 | def _getCode(bf):#蓝光版和典藏版还有不同的番号比如abp120dod 还有dvaj358so这种莫名加个后缀的 53pbdvaj333这种53pb奇怪前缀的出口版 h_479gne114这种不知所云的前缀 30 | return _getDmmCode(bf); 31 | 32 | def _getTitle(bf): 33 | anchor = bf.find("meta",property="og:title"); 34 | return _getMetaContent(anchor); 35 | 36 | def _getDescription(bf): 37 | anchor = bf.find("meta",property="og:description"); 38 | return _getMetaContent(anchor); 39 | 40 | def _getLength(bf): 41 | anchor = _getInfoAnchor(bf,"収録時間"); 42 | val = _getNormalizedInfo(anchor); 43 | if not val: 44 | return val; 45 | else: 46 | return int(val[:-1]); 47 | 48 | def _getPublishedTime(bf): 49 | anchor = _getInfoAnchor(bf,"発売日"); 50 | time = anchor.string.split("/"); 51 | return {"year":int(time[0]),"month":int(time[1]),"day":int(time[2])}; 52 | 53 | ######制作信息 54 | 55 | def _getDirector(bf): 56 | #和出演者等 ----的情况 57 | anchor = _getInfoAnchor(bf,"監督"); 58 | return _getNormalizedInfo(anchor); 59 | 60 | def _getProducer(bf): 61 | anchor = _getInfoAnchor(bf,"メーカー") 62 | return _getNormalizedInfo(anchor); 63 | 64 | def _getPublisher(bf): 65 | anchor = _getInfoAnchor(bf,"レーベル"); 66 | return _getNormalizedInfo(anchor); 67 | 68 | ######作品内容信息 69 | 70 | def _getSeries(bf): 71 | anchor = _getInfoAnchor(bf,"シリーズ"); 72 | return _getNormalizedInfo(anchor); 73 | 74 | def _getScore(bf): 75 | anchor = _getInfoAnchor(bf,"平均評価"); 76 | score = anchor.img.get("alt")[:-1]; 77 | if score=="0": 78 | return {"num":0,"score":0};#还没有人评分 79 | else: 80 | num = int(bf.find("p",class_="d-review__evaluates").strong.string);#总共多少条评分 81 | avg = float(bf.find("p",class_="d-review__average").strong.string[:-1]);#平均分 82 | return {"num":num,"score":avg} 83 | 84 | def _getTags(bf): 85 | anchor = _getInfoAnchor(bf,"ジャンル"); 86 | def _getTag(a): 87 | tag_id = a.get("href").split("id=")[1][:-1]; 88 | return {"name":a.string,"id":tag_id}; 89 | tags = anchor.find_all("a"); 90 | return list(map(_getTag,tags)); 91 | 92 | def _getPerformer(bf): 93 | anchor = _getInfoAnchor(bf,"出演者"); 94 | 95 | def _getPerformer(performer): 96 | actress_id = performer.get("href").split("id=")[1][:-1]; 97 | return {"name":performer.string,"id":actress_id}; 98 | def _getPerformerAjax(code): 99 | return "https://www.dmm.co.jp/mono/dvd/-/detail/performer/=/cid="+code; 100 | 101 | if anchor.span.find("a",string="▼すべて表示する"):#说明有多个演员而且无法一次性显示 102 | ajax_url = _getPerformerAjax(_getDmmCode(bf)); 103 | response = requests.get(ajax_url,proxies=PROXY);#访问ajax获取a标签列表 104 | ajax = BeautifulSoup(response.text,features="html.parser"); 105 | performers = ajax.find_all("a"); 106 | return list(map(_getPerformer,performers)); 107 | else: 108 | if anchor.string=="----":#丢失出演者信息 109 | return []; 110 | else: 111 | return list(map(_getPerformer,anchor.span.find_all("a"))); 112 | 113 | ######预览信息 114 | 115 | def _getCoverHref(bf):#封面 116 | anchor = bf.find("div",id="sample-video").find("a",string="イメージを拡大"); 117 | if not anchor: 118 | return None; 119 | else: 120 | return anchor.get("href"); 121 | 122 | def _getSnapShots(bf):#截图 123 | def _getSnapShotHref(a):#通过缩略图url获取大图url 124 | url = a.img.get("src"); 125 | if("name" not in a):#无大图 126 | return url; 127 | url = url.split("-");#有大图 128 | return url[0]+"jp-"+url[1]; 129 | anchor = bf.find("div",id="sample-image-block"); 130 | if not anchor:#没有截图的情况 131 | return []; 132 | else:#分有大图和没大图的情况 133 | snap_shots = anchor.find_all("a"); 134 | return list(map(_getSnapShotHref,snap_shots)); 135 | 136 | #DMM是点击按钮后用jQuery加载一个视频的iframe 里面有video js加载视频 137 | #不能直接根据规律预测URL 总有视频地址和番号不匹配的情况 138 | def _getPreview(bf):#预览视频 139 | def _getPreviewAjax(code): 140 | return "https://www.dmm.co.jp/service/-/html5_player/=/cid="+code+"/mtype=AhRVShI_/service=mono/floor=dvd/mode=/"; 141 | anchor = bf.find("div",id="detail-sample-movie"); 142 | if not anchor: 143 | return None; 144 | ajax_url = _getPreviewAjax(_getDmmCode(bf)); 145 | try: 146 | html = getHtml(ajax_url);#访问ajax获取iframe 147 | except: 148 | return False; 149 | result = re.search('''("bitrate":1500,"src":"){1}.*?("}]){1}''',html);#正则匹配从js代码里获取视频地址 150 | if result: 151 | return "http://"+result.group()[26:-3].replace("\/","/");#去除匹配到的头尾 得到video_url 152 | else: 153 | return None;#视频不存在的情形 154 | 155 | 156 | ######汇总接口 解析作品信息 157 | def _parseWorkInfo(bf): 158 | return { 159 | "title":_getTitle(bf), 160 | "description":_getDescription(bf), 161 | "code":_getCode(bf), 162 | "dmmCode":_getDmmCode(bf), 163 | "length":_getLength(bf), 164 | "publishedTime":_getPublishedTime(bf), 165 | "director":_getDirector(bf), 166 | "performer":_getPerformer(bf), 167 | "producer":_getProducer(bf), 168 | "publisher":_getPublisher(bf), 169 | "series":_getSeries(bf), 170 | "tags":_getTags(bf), 171 | "score":_getScore(bf), 172 | "cover":_getCoverHref(bf), 173 | "snapShots":_getSnapShots(bf), 174 | "preview":_getPreview(bf) 175 | }; 176 | 177 | 178 | def _getWorkUrl(dmm_code): 179 | return "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid="+dmm_code; 180 | 181 | def fetchWorkInfo(dmm_code):#本函数完成了下载器和解析器的功能) 182 | bf = getPage(_getWorkUrl(dmm_code)); 183 | return _parseWorkInfo(bf); 184 | -------------------------------------------------------------------------------- /DMM/WorkSpider.py: -------------------------------------------------------------------------------- 1 | from functools import *; 2 | from .DataBase import getActressID,getIdolInfo,getIdolList,getWorkList,getWorkInfo,saveWorkInfo,fetchWorkFailed,updateIdolWorkSuccess,updateIdolWorkFailed,getLatestWork; 3 | from .WorkInfoSpider import fetchWorkInfo; 4 | import threadpool; 5 | from .CommonTools import *; 6 | from datetime import datetime; 7 | 8 | MAX_THREAD_NUM = 10; 9 | 10 | 11 | def updateWorkInfo(dmm_code):#更新作品信息 12 | try: 13 | print("-"+dmm_code); 14 | work_info = fetchWorkInfo(dmm_code); 15 | saveWorkInfo(work_info); 16 | print("+"+dmm_code); 17 | except Exception as e: 18 | print("?"+dmm_code); 19 | print(e); 20 | fetchWorkFailed(dmm_code); 21 | 22 | def updateFailedWorks():#更新加载失败了的作品信息 23 | works = getWorkList({ "$or": [ { "failed":True },{"preview":False}]},{"dmmCode":1}); 24 | pool = threadpool.ThreadPool(MAX_THREAD_NUM); 25 | def _getCode(work): 26 | return work["dmmCode"]; 27 | 28 | code_list = list(map(_getCode,works)); 29 | requests = threadpool.makeRequests(updateWorkInfo,code_list); 30 | [pool.putRequest(req) for req in requests]; 31 | pool.wait(); 32 | 33 | 34 | def updateFailedIdolWorks(): 35 | idols = getIdolList({"lastUpdated":False},{"name":1}) 36 | def _update(idol): 37 | updateIdolWorks(idol["name"]); 38 | list(map(_update,idols)); 39 | 40 | 41 | def updateIdolWorks(name=None,past=2592000):#更新演员的作品信息(数据库中已有作品会跳过) 42 | def _saveWorkInfo(dmm_code): 43 | work = getWorkInfo(dmm_code); 44 | if not work or "failed" in work:#判断是否存在failed 以及unset掉failed 45 | updateWorkInfo(dmm_code); 46 | 47 | def _fetchWorks(name): 48 | idol = getIdolInfo(name); 49 | if "lastUpdated" not in idol or not idol["lastUpdated"] or (datetime.now()-idol["lastUpdated"]).total_seconds()>past: 50 | try: 51 | works = _fetchWorkListByIdol(name); 52 | requests = threadpool.makeRequests(_saveWorkInfo,works); 53 | [pool.putRequest(req) for req in requests] 54 | pool.wait(); 55 | updateIdolWorkSuccess(name); 56 | except: 57 | updateIdolWorkFailed(name); 58 | print("?"+idol["name"]); 59 | 60 | pool = threadpool.ThreadPool(MAX_THREAD_NUM); 61 | if name: 62 | print("*"+name); 63 | _fetchWorks(name); 64 | else: 65 | idols = getIdolList({},fields={"name":1}); 66 | count = 0; 67 | def _fetchWorkList(idol): 68 | nonlocal count; 69 | count+=1; 70 | print("*("+str(count)+")"+idol["name"]); 71 | _fetchWorks(idol["name"]); 72 | list(map(_fetchWorkList,idols)); 73 | 74 | 75 | 76 | def _findPagenation(bf): 77 | return bf.find("div",class_="list-boxpagenation"); 78 | 79 | def _findPageWorks(bf): 80 | return bf.find_all("p",class_="ttl"); 81 | 82 | def _getWorkDmmCode(p): 83 | return p.a.get("href").split("cid=")[1][:-1]; 84 | 85 | def _remoteWorkCounter(pagenation): 86 | return int(pagenation.find("p").string.split("タイトル中")[0]); 87 | 88 | 89 | #根据Actress从DMM获取作品列表 90 | def _fetchWorkListByActress(name): 91 | actress_id = getActressID(name); 92 | def _getPageUrl(page_num=1): 93 | return "https://www.dmm.co.jp/mono/dvd/-/list/=/article=actress/format=dvd/id="+actress_id+"/limit=120/view=text/page="+str(page_num); 94 | if actress_id: 95 | return fetchInfoFromAllPage(_getPageUrl,_findPagenation,_findPageWorks,_getWorkDmmCode); 96 | else: 97 | return []; 98 | 99 | 100 | #根据Actress从DMM获取作品列表 但只获取单体作品 101 | def _fetchWorkListByIdol(name):#format(dvd/bd) 102 | actress_id = getActressID(name); 103 | def _getPageUrl(page_num=1): 104 | return "https://www.dmm.co.jp/mono/dvd/-/list/=/article=actress/format=dvd/id="+actress_id+"/limit=120/n1=DgRJTglEBQ4GpoD6,YyI,qs_/view=text/page="+str(page_num); 105 | def _localWorkCounter(): 106 | return getWorkList({"performer.name":name},{"_id":1}).count(); 107 | if actress_id: 108 | return fetchInfoFromAllPage(_getPageUrl,_findPagenation,_findPageWorks,_getWorkDmmCode,_remoteWorkCounter,_localWorkCounter); 109 | else: 110 | return []; 111 | 112 | #根据Tag从DMM获取作品列表 113 | def _fetchWorkListByTag(tag_name): 114 | tag_id = getActressID(name); 115 | def _getPageUrl(page_num=1): 116 | return "https://www.dmm.co.jp/mono/dvd/-/list/=/article=keyword/format=dvd/id="+tag_id+"/limit=120/view=text/page="+str(page_num); 117 | if tag_id: 118 | return fetchInfoFromAllPage(_getPageUrl,_findPagenation,_findPageWorks,_getWorkDmmCode); 119 | else: 120 | return []; 121 | 122 | def _updateWorkInfo(dmm_code): 123 | work = fetchWorkInfo(dmm_code); 124 | saveWorkInfo(work); 125 | 126 | #根据爬取到的作品列表获取作品信息 127 | def fetchWorkListInfo(): 128 | pass; 129 | 130 | 131 | #新片更新 132 | def _fetchWorkListRecently(): 133 | #获取列表到数据库中已存在时中断 134 | #https://www.dmm.co.jp/mono/dvd/-/list/=/format=dvd/limit=120/sort=date/view=text/ 135 | pass; 136 | 137 | 138 | #####################从alias转换为name 139 | -------------------------------------------------------------------------------- /DMM/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | if __name__ == "__main__": 3 | from DataBase import *;#数据层 4 | from IdolRank import updateIdolRankList;#爬取2005年以来DMM的TOP100女优排行榜 rankY全年年榜 rankH1上半年榜 rankH2下半年榜 存入IDOL_RANK表 5 | from IdolSpider import updateIdolList;#从女优排行榜中统计得到知名女优表 总共700余位 存入IDOL_INFO表 并根据排行进行计分 6 | 7 | from TagSpider import updateTagList;#Tag爬虫 爬取DMM的Tag列表 8 | from ActressSpider import updateActressCommended,updateActressList;#爬取女优列表和更新推荐女优信息 9 | 10 | from WorkInfoSpider import fetchWorkInfo;#作品信息抓取/解析器 11 | from WorkSpider import updateWorkInfo,updateFailedWorks,updateFailedIdolWorks,updateIdolWorks;#作品爬虫 12 | from MediaDownloader import downloadProfile,updateProfies,downloadWorkCover,downloadWorkSnapShots,downloadWorkPreview,updateWorkCovers #预览信息下载器 下载封面、下载截图、下载宣传片、批量更新封面 13 | else: 14 | from .DataBase import *; 15 | from .IdolRank import updateIdolRankList; 16 | from .IdolSpider import updateIdolList; 17 | 18 | from .TagSpider import updateTagList; 19 | from .ActressSpider import updateActressCommended,updateActressList; 20 | 21 | from .WorkInfoSpider import fetchWorkInfo; 22 | from .WorkSpider import *; 23 | from .MediaDownloader import downloadProfile,updateProfies,downloadWorkCover,downloadWorkSnapShots,downloadWorkPreview,updateWorkCovers 24 | 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AV-Hub 2 | AV女优/小电影爬虫 3 | 自动爬取日本最大的成人网站DMM 获取最新的女优/小电影情报 4 | 5 | # 使用说明 6 | 本爬虫基于**Python3**及**MongoDB**数据库写成,请先配置好基本环境 7 | //爬虫会读取到大量"脏数据"(比如因为404导致的字段缺失) 使用MySQL等数据库会很难处理 若使用其它Non-SQL数据库,可以在DataBase.py中自行修改数据层 8 | 项目所依赖的第三方库:**requests**(网络请求) **bs4**(html解析) **pymongo**(数据库) **threadpool**(线程池) 运行前请自行使用pip命令下载安装 9 | 以上完成之后,在DataBase.py中配置好数据库,将DMM文件夹复制到你的项目中,然后import DMM; 之后即可使用__init__.py中所导出的函数 10 | 11 | # 使用示范 12 | ``` 13 | import DMM; 14 | DMM.updateIdolRankList();#将DMM自2005年以来的TOP100女优榜单更新进入数据库 得到idol_rank表 15 | DMM.updateIdolList();#根据榜单统计著名女优表 得到idol_info表 目前统计得到731人 16 | DMM.updateIdolWorks();#根据著名女优表爬取单体作品信息(即排除合集作品) 截止2019-03-07共计50864条 17 | DMM.updateWorkCovers("covers");#将这些作品的封面全部下载至covers文件夹 共计8.23GB 18 | #顺带一提,直接从女优列表而不是排行榜单爬取到的数量是6753位而不是731位,数据量太大爬起来有点麻烦,所以只爬了上过榜的 19 | ``` 20 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/idol_info.PNG) 21 | 爬取到的女优信息 22 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/work_info.PNG) 23 | 爬取到的作品信息 24 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/cover.PNG) 25 | 爬取到的封面图片 26 | 27 | # 注意事项 28 | 由于是成人网站,DMM自己墙掉了日本以外全世界的ip,所以爬虫使用时会用到代理,你可以在CommonTools.py中修改它 29 | 你可以在[free-proxy-list.net](https://free-proxy-list.net/)找到一个稳定的、使用HTTPS协议的日本代理 30 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/proxy.PNG) 31 | //当然,这网站也是不出所料的被墙掉了2333 所以首先你需要能够科学上网:D 32 | 33 | 因为我电脑有点菜的原因,线程池最大上限设成了10,可以自行修改 34 | 35 | DMM这个网站不知道为啥给番号加了各种莫名其妙的前缀和后缀,毫无规律可寻。本来想用正则匹配一下,放弃治疗了,反正去掉tk dod re h_123这些乱七八糟的前后缀你大概能看懂番号是啥就行行 36 | 37 | 38 | # 应用展示 39 | Client.py是调用人脸识别平台Face++接口实现的一个【女优识图】应用 40 | 随便从数据库里找了个叫立花里子的女优,谷歌找一张她的不是作品封面的照片 41 | emmm,效果还不错,今后大概会做个网页版的吧:D 42 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/立花里子.jpg) 43 | ![](https://github.com/XiaYaoShiXin/AV-Hub/blob/master/preview/app.PNG) 44 | 45 | 46 | # 吐槽 47 | 第一次尝试函数式编程,有点原教旨的几乎连一个for循环都没用wwww闭包警告! 48 | 还有 Python的import机制好蠢啊,还是喜欢es6 49 | 50 | -------------------------------------------------------------------------------- /preview/app.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/app.PNG -------------------------------------------------------------------------------- /preview/cover.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/cover.PNG -------------------------------------------------------------------------------- /preview/idol_info.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/idol_info.PNG -------------------------------------------------------------------------------- /preview/proxy.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/proxy.PNG -------------------------------------------------------------------------------- /preview/work_info.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/work_info.PNG -------------------------------------------------------------------------------- /preview/立花里子.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhikun-hou/AV-Hub/fc80f24ea87ac3bf1d568c08fc932f9e1c07b495/preview/立花里子.jpg --------------------------------------------------------------------------------