├── .gitignore ├── 1. sort_jav.py ├── 2. Set-JAVNfo.ps1 ├── 3. Sort-7mmtv.ps1 ├── Get-R18ThumbUrls.ps1 ├── LICENSE.md ├── R18-Oct-09-2019-first-last.csv ├── R18-Oct-09-2019-last-first.csv ├── README.md ├── emby_actor_thumbs ├── 1. Get-EmbyActorThumbs.ps1 └── 2. Set-EmbyActorThumbs.ps1 ├── extras ├── demo.gif ├── edit_covers.py └── jf-example.png └── settings_sort_jav.ini /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore internal releases 2 | /releases 3 | /db 4 | /emby_actor_thumbs/db -------------------------------------------------------------------------------- /1. sort_jav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request 3 | import re 4 | #import cfscrape 5 | import cloudscraper 6 | from PIL import Image 7 | from shutil import move 8 | 9 | # if we make this global or at least pass it in to the function 10 | # it will yield significantly faster results because it can cache the cookie 11 | # which will make cloudflare think it's the same connection 12 | scraper = cloudscraper.create_scraper() 13 | 14 | # Run cloudscraper on base URL to resolve loop error 15 | scraper.get('http://javlibrary.com/en/') 16 | 17 | class AppUrlopener(urllib.request.FancyURLopener): 18 | version = "Mozilla/5.0" 19 | 20 | def read_file(path): 21 | """Return a dictionary containing a map of name of setting -> value""" 22 | d = {} 23 | # so we can strip invalid characters for filenames 24 | translator = str.maketrans({key: None for key in '<>/\\|*:?'}) 25 | with open(path, 'r') as content_file: 26 | for line in content_file.readlines(): 27 | line = line.strip('\n') 28 | if not line.startswith('path'): 29 | line = line.translate(translator) 30 | if line and not line.startswith('#'): 31 | split = line.split('=') 32 | d[split[0]] = split[1] 33 | if split[1].lower() == 'true': 34 | d[split[0]] = True 35 | elif split[1].lower() == 'false': 36 | d[split[0]] = False 37 | return d 38 | 39 | def strip_id_from_video(path, s): 40 | """get the id of the video from a video's path""" 41 | 42 | partial_split = path.split(os.sep)[-1].rsplit(".")[0] 43 | delimiter = s['delimiter-between-multiple-videos'] 44 | if delimiter in partial_split: 45 | partial_split = partial_split.split(delimiter)[0] 46 | 47 | partial_split = strip_definition_from_video(partial_split) 48 | return partial_split 49 | 50 | def strip_definition_from_video(vid_id): 51 | """Strip any sort of HD tag from the video""" 52 | 53 | hd = ['[HD]', '[FHD]', '[SD]', '(SD)', '(HD)', '(FHD)'] 54 | for item in hd: 55 | vid_id = vid_id.replace(item, '') 56 | return vid_id 57 | 58 | def check_vid_id_has_dash(vid_id): 59 | """Check if the video id has a dash and return one with it if it doesn't""" 60 | if '-' not in vid_id: 61 | for i in range(len(vid_id)): 62 | if vid_id[i] in '0123456789': 63 | vid_id = vid_id[:i] + '-' + vid_id[i:] 64 | break 65 | return vid_id 66 | 67 | def getNumExtension(vid_id): 68 | """Given a video return the number extension""" 69 | # assume the video id is already fixed to cheat checking and return the fixed set 70 | return str(vid_id.split('-')[1]) 71 | 72 | def strip_video_number_from_video(path, vid_id, s): 73 | """Return the portion that specifies the video number, or none if none exists""" 74 | delimiter = s['delimiter-between-multiple-videos'] 75 | qualified = path.split(os.sep)[-1] 76 | split = qualified.split(delimiter) 77 | if len(split) == 1: 78 | # check for videos of the form XXX-123A.HD.mp4, XXX-123B.HD.mp4 79 | # they won't match the delimiter but still need to be caught 80 | nums = getNumExtension(vid_id) 81 | chars = 'ABCDEFGHIJ' 82 | index = qualified.find(nums) 83 | if index > -1: # found a match 84 | c = qualified[index + len(nums)].upper() 85 | if c in chars: 86 | return str(chars.find(c) + 1) 87 | return None 88 | else: 89 | ret = split[1].rsplit('.')[0] 90 | if len(ret) > 2: # this way we catch straggling names 91 | return None 92 | else: 93 | return ret 94 | 95 | def get_javlibrary_url(vid_id): 96 | """get the URL of the video on javlibrary 97 | returns None if a URL could not be found""" 98 | vid_id = check_vid_id_has_dash(vid_id.upper()) 99 | try: 100 | search_url = "http://www.javlibrary.com/en/vl_searchbyid.php?keyword=" + vid_id 101 | html = get_url_response(search_url, vid_id) 102 | 103 | # we didn't get a valid response 104 | if html == None: 105 | return None 106 | return html 107 | except: 108 | return None 109 | 110 | def get_url_response(url, vid_id): 111 | """get the response from a given URL 112 | includes the video id to verify the URL is correct""" 113 | # opener = AppUrlopener() 114 | # response = opener.open(url) 115 | # contents = (response.read()).decode() 116 | global scraper 117 | contents = scraper.get(url).content.decode() 118 | if check_valid_response(contents, vid_id): 119 | return contents # the URL was good 120 | else: 121 | # this may return None if the correct URL does not exist 122 | return get_url_response(get_correct_url(contents, vid_id), vid_id) 123 | 124 | def check_valid_response(html, vid_id): 125 | """check if the html was the page we wanted""" 126 | s = "" + vid_id 127 | if s in html: 128 | return True 129 | return False 130 | 131 | def get_correct_url(html, vid_id): 132 | """get the url that's the exact video we want from a link with multiple results""" 133 | try: 134 | url_portion = html.split('" title="' + vid_id + ' ')[0].split('><a href=".')[1] 135 | return "http://www.javlibrary.com/en" + url_portion 136 | except: 137 | return None 138 | 139 | def rename_file(path, html, s, vid_id): 140 | """Rename the file per our settings 141 | Returns the name of the file regardless of whether it has been renamed""" 142 | if s['do-not-rename-file']: 143 | return path 144 | else: 145 | if s['include-actress-in-video-name']: 146 | actress_string = get_actress_string(html, s) 147 | base = strip_partial_path_from_file(path) 148 | if s['video-number']: 149 | if s['actress-before-video-number']: 150 | new_fname = vid_id + s['delimiter-between-video-name-actress'] \ 151 | + actress_string + s['delimiter-between-multiple-videos'] \ 152 | + s['video-number'] + '.' + path.rsplit('.')[-1] 153 | new_path = os.path.join(base, new_fname) 154 | else: # actress after 155 | new_fname = vid_id + s['delimiter-between-multiple-videos'] \ 156 | + s['video-number'] + s['delimiter-between-video-name-actress'] \ 157 | + actress_string + '.' + path.rsplit('.')[-1] 158 | new_path = os.path.join(base, new_fname) 159 | else: 160 | new_fname = vid_id + s['delimiter-between-video-name-actress'] \ 161 | + actress_string + '.' + path.rsplit('.')[-1] 162 | new_path = os.path.join(base, new_fname) 163 | try: 164 | os.rename(path, new_path) 165 | return new_path 166 | # this happens on dupe or some other failure 167 | # suce as invalid filname (shouldn't happen), drive full, or filename too long 168 | except: 169 | return path 170 | elif strip_file_name(path) != vid_id: 171 | base = strip_partial_path_from_file(path) 172 | if s['video-number']: 173 | new_fname = vid_id + s['delimiter-between-multiple-videos'] \ 174 | + s['video-number'] + '.' + path.rsplit('.')[-1] 175 | new_path = os.path.join(base, new_fname) 176 | else: 177 | new_fname = vid_id + '.' + path.rsplit('.')[-1] 178 | new_path = os.path.join(base, new_fname) 179 | try: 180 | os.rename(path, new_path) 181 | return new_path 182 | except: 183 | return path 184 | 185 | else: 186 | # easier to always return the file name so we can treat both cases the same 187 | # because when we move it (potentially) we don't care if it has been renamed 188 | return path 189 | 190 | def get_actress_string(html, s): 191 | """Return the string of the actress names as per the naming convention specified 192 | Takes in the html contents to filter out the actress names""" 193 | a_list = get_actress_from_html(html, s) 194 | actress_string = '' 195 | # if javlibrary returns no actresses then we'll just say whatever we specified 196 | if len(a_list) == 0: 197 | return s['name-for-actress-if-blank'] 198 | for actress in a_list: 199 | actress_string += actress + s['delimiter-between-multiple-actresses'] 200 | # strip the last delimiter, we don't want it 201 | actress_string = actress_string[0:-1] 202 | return actress_string 203 | 204 | def get_actress_string_txt(html, s): 205 | """Return the string of the actress names as per the naming convention specified 206 | Takes in the html contents to filter out the actress names""" 207 | a_list = get_actress_from_html_txt(html, s) 208 | actress_string = '' 209 | # if javlibrary returns no actresses then we'll just say whatever we specified 210 | if len(a_list) == 0: 211 | return s['name-for-actress-if-blank'] 212 | for actress in a_list: 213 | actress_string += actress + '|' 214 | # strip the last delimiter, we don't want it 215 | actress_string = actress_string[0:-1] 216 | return actress_string 217 | 218 | def fix_actress_name(name): 219 | """Returns the updated name for any actress based on our replacement scheme""" 220 | """ if you want to ad any additional ways to fix names, simply add another elif line below 221 | elif name == 'name returned from javlibrary' 222 | return 'different name' 223 | """ 224 | if name == 'Kitagawa Eria': 225 | return 'Kitagawa Erika' 226 | elif name == 'Oshikawa Yuuri': 227 | return 'Oshikawa Yuri' 228 | return name 229 | 230 | def get_actress_from_html(html, s): 231 | """Return a list of actresses from the html 232 | actresses are strings that are formatted the way we can put them straight in the name""" 233 | a_list = [] 234 | split_str = '<span class="star">' 235 | # 1 to end because first will have nothing 236 | for section in html.split(split_str)[1:]: 237 | # fname is the full name 238 | fname = section.split('rel="tag">')[1].split('<')[0] 239 | fname = fix_actress_name(fname) 240 | a_list.append(fname) 241 | 242 | fixed_names = [] 243 | for fname in a_list: 244 | # format this correctly 245 | if fname.count(' ') == 1: 246 | last = fname.split(' ')[0] 247 | first = fname.split(' ')[1] 248 | if s['name-order'].lower() == 'first': 249 | new_name = first + s['delimiter-between-actress-names'] + last 250 | fixed_names.append(new_name) 251 | elif s['name-order'].lower() == 'last': 252 | new_name = last + s['delimiter-between-actress-names'] + first 253 | fixed_names.append(new_name) 254 | else: 255 | fixed_names.append(fname) 256 | 257 | return fixed_names 258 | 259 | def get_actress_from_html_txt(html, s): 260 | """Return a list of actresses from the html 261 | actresses are strings that are formatted the way we can put them straight in the name""" 262 | a_list = [] 263 | split_str = '<span class="star">' 264 | # 1 to end because first will have nothing 265 | for section in html.split(split_str)[1:]: 266 | # fname is the full name 267 | fname = section.split('rel="tag">')[1].split('<')[0] 268 | fname = fix_actress_name(fname) 269 | a_list.append(fname) 270 | 271 | fixed_names = [] 272 | for fname in a_list: 273 | # format this correctly 274 | if fname.count(' ') == 1: 275 | last = fname.split(' ')[0] 276 | first = fname.split(' ')[1] 277 | if s['name-order'].lower() == 'first': 278 | new_name = first + ' ' + last 279 | fixed_names.append(new_name) 280 | elif s['name-order'].lower() == 'last': 281 | new_name = last + ' ' + first 282 | fixed_names.append(new_name) 283 | else: 284 | fixed_names.append(fname) 285 | 286 | return fixed_names 287 | 288 | 289 | def create_and_move_video_into_folder(path, s, vid_id, html): 290 | """Create a folder and then move the given video into it 291 | Path is the fullpath of the video 292 | Returns the new path of the video 293 | Returns the old path if there's an unknown error""" 294 | folder_name = strip_definition_from_video(vid_id) 295 | if s['include-actress-name-in-folder']: 296 | folder_name += s['delimiter-between-video-name-actress'] + get_actress_string(html, s) 297 | 298 | fullpath = os.path.join(strip_partial_path_from_file(path), folder_name) 299 | if os.path.isdir(fullpath): # folder already exists so just try moving us there 300 | try: 301 | vid_path_portion = path.split(os.sep)[-1] 302 | new_path = os.path.join(fullpath, vid_path_portion) 303 | os.rename(path, new_path) 304 | return new_path 305 | except FileExistsError as e: 306 | print('File already exists, could not sort. This might be a multiple file issue') 307 | return path 308 | try: 309 | os.makedirs(fullpath) 310 | except: 311 | # this can occur if the folder already exists, name is too long, drive is full 312 | # since we still want to move files if the folder exists then we'll keep going if that's the case 313 | if os.path.isdir(fullpath): 314 | pass 315 | elif len(fullpath) > 255: 316 | try: 317 | print("Path was too long so did not move " + vid_id) 318 | except: 319 | print("Path was too long so did not move one file with unknown characters") 320 | return path 321 | vid_path_portion = path.split(os.sep)[-1] 322 | new_path = os.path.join(fullpath, vid_path_portion) 323 | if os.path.exists(new_path): 324 | print('Path exists') 325 | try: 326 | os.rename(path, new_path) 327 | return new_path 328 | except: # this only happens if it exists or there's a catastrophic failure 329 | if len(new_path) > 255: 330 | try: 331 | print("Path was too long so did not move " + vid_id) 332 | except: 333 | print("Path was too long so did not move one file with unknown characters") 334 | return path 335 | return path 336 | 337 | 338 | def strip_partial_path_from_file(path): 339 | """given a file, strip the path from the file name 340 | This is the essentially the path to the file's directory 341 | Returns the stripped path or none if it does not exist""" 342 | partial_path = _strip_partial_path_helper(path, os.sep) 343 | if (os.path.exists(partial_path)): 344 | return partial_path 345 | return None 346 | 347 | 348 | def _strip_partial_path_helper(path, delimiter): 349 | stripped_path = path.split(delimiter) 350 | partial_path = stripped_path[0] + os.sep + stripped_path[1] 351 | for part in stripped_path[2:-1]: 352 | partial_path = os.path.join(partial_path, part) 353 | return partial_path 354 | 355 | 356 | def get_cover_for_video(path, vid_id, s, html): 357 | """Get the cover for the video denoted by path from the specified html""" 358 | # path needs to be stripped but otherwise we can use it to store the video 359 | # html should have the cover there we can take 360 | img_link = get_image_url_from_html(html) 361 | # TODO: 362 | # create the path name based on the settings file 363 | base = strip_partial_path_from_file(path) 364 | fname = strip_definition_from_video(vid_id) 365 | if s['do-not-rename-file']: 366 | fullpath = (os.path.splitext(path)[0]) 367 | save_image_from_url_to_path(fullpath, img_link) 368 | else: 369 | if (s['include-actress-name-in-cover']): 370 | if (s['include-cover-all']): 371 | if(s['include-actress-in-video-name']): 372 | if s['video-number']: 373 | fname += s['delimiter-between-multiple-videos'] + s['video-number'] + s['delimiter-between-video-name-actress'] 374 | actress_string = get_actress_string(html, s) 375 | fname += actress_string 376 | else: 377 | fname += s['delimiter-between-video-name-actress'] 378 | actress_string = get_actress_string(html, s) 379 | fname += actress_string 380 | else: 381 | if(s['include-cover-all']): 382 | if s['video-number']: 383 | fname += s['delimiter-between-multiple-videos'] + s['video-number'] 384 | 385 | fullpath = os.path.join(base, fname) 386 | save_image_from_url_to_path(fullpath, img_link) 387 | 388 | # Crop full cover to 378x539 to show only front cover 389 | if s['crop-cover-to-poster']: 390 | cover_path = fullpath + ".jpg" 391 | cover_thumb_path = fullpath + "-thumb.jpg" 392 | original_cover = Image.open(cover_path) 393 | width, height = original_cover.size 394 | left = width/1.895734597 395 | top = 0 396 | right = width 397 | bottom = height 398 | cropped_cover = original_cover.crop((left, top, right, bottom)) 399 | # Recreate and rename original cover to cover-thumb.jpg if setting is true 400 | if s['keep-original-cover']: 401 | original_cover.save(cover_thumb_path) 402 | cropped_cover.save(cover_path) 403 | else: 404 | cropped_cover.save(cover_path) 405 | 406 | def save_image_from_url_to_path(path, url): 407 | """save an image denoted by the url to the path denoted by path 408 | with the given name""" 409 | urllib.request.urlretrieve(url, path + ".jpg") 410 | # if we move the file it should fix itself 411 | try: 412 | drive = path.split(os.sep)[0] 413 | temp_location = drive + os.sep + path + '.jpg' 414 | os.rename(path + ".jpg", temp_location) 415 | os.rename(temp_location, path + ".jpg") 416 | except: 417 | pass 418 | 419 | 420 | def get_image_url_from_html(html): 421 | """get the url of the image from the supplied html for the page""" 422 | return "http:" + html.split('<img id="video_jacket_img" src="')[1].split('" width')[0] 423 | 424 | def rename_start_quotation(path): 425 | """Rename files that start with quotations right away just because it'll work easier this way""" 426 | fname = path.split(os.sep)[-1] 427 | fname.replace("'", '') # replace ' with nothing 428 | new_path = os.sep.join(path.split(os.sep)[:-2]) 429 | try: 430 | os.rename(path, new_path) 431 | except: 432 | pass 433 | 434 | def find_id(s): 435 | """Given a string s, try to find an id within it and return it""" 436 | regex = "[a-zA-Z]{2,8}[-]?[0-9]{2,5}" 437 | match = re.search(regex, s) 438 | if match: 439 | return correct_vid_id(match.group()) 440 | else: 441 | return None 442 | 443 | def correct_vid_id(vid_id): 444 | """Check if the video id has a dash and return it with one if it doesn't have it 445 | also fixes anything that has a digit first""" 446 | if vid_id[0] in '0123456789': 447 | findex = None 448 | for i in range(len(vid_id)): 449 | if vid_id[i] not in '0123456789': 450 | findex = i 451 | break 452 | for i in range(findex, len(vid_id)): 453 | if vid_id[i] in '0123456789' and findex: 454 | if '-' not in vid_id: 455 | return vid_id[findex:i] + '-' + vid_id[:findex] + vid_id[i:] 456 | else: 457 | return vid_id[findex:i] + vid_id[:findex] + vid_id[i:] 458 | 459 | if '-' not in vid_id: 460 | if 'R18' in vid_id or 'T28' in vid_id: 461 | return vid_id[0:3] + '-' + vid_id[3:] 462 | else: 463 | for i in range(len(vid_id)): 464 | if vid_id[i] in '0123456789': 465 | return vid_id[:i] + '-' + vid_id[i:] 466 | return vid_id 467 | 468 | 469 | def strip_file_name(path): 470 | """Given a filepath, strip the name of the file from it""" 471 | return path.split(os.sep)[-1].rpartition('.')[0] 472 | 473 | 474 | def strip_full_file_name(path): 475 | """Given a filepath, return the full name with the filetype""" 476 | return path.split(os.sep)[-1] 477 | 478 | 479 | def strip_bad_data(path): 480 | """Remove any data from the path that might conflict""" 481 | bad = ['hjd2048.com', 'h264', 'play999'] 482 | 483 | for str_to_remove in bad: 484 | if path.find(str_to_remove) != -1: 485 | path = path.replace(str_to_remove, '') 486 | 487 | return path 488 | 489 | 490 | def sort_jav(s): 491 | """Sort all our unsorted jav as per the specified settings""" 492 | # store all the files to rename in a list so we don't mess with looping over the files 493 | temp = [] 494 | 495 | for f in os.listdir(s['path']): 496 | fullpath = os.path.join(s['path'], f) 497 | 498 | # only consider video files 499 | if not os.path.isdir(fullpath): 500 | temp.append(fullpath) 501 | 502 | count = 0 503 | for path in temp: 504 | count += 1 505 | try: 506 | vid_id = correct_vid_id(find_id(strip_bad_data(strip_file_name(path)))) 507 | except: 508 | # to prevent crashing on r18/t28 files 509 | vid_id = strip_file_name(path) 510 | print("Sorting {0}: {1} of {2}".format(vid_id, count, len(temp))) 511 | try: 512 | s['video-number'] = strip_video_number_from_video(path, vid_id, s) 513 | if s['make-video-id-all-uppercase']: 514 | vid_id = vid_id.upper() 515 | except Exception as e: 516 | print("Not sorting {} as it is does not look like a JAV ID".format(vid_id)) 517 | continue 518 | html = get_javlibrary_url(vid_id) 519 | 520 | if not html: 521 | try: 522 | print("Could not find video on javlibrary so skipping " + vid_id) 523 | except: 524 | print("Skipping one file with unknown characters in the file name") 525 | continue 526 | 527 | # rename the file according to our convention 528 | new_fname = rename_file(path, html, s, vid_id) 529 | 530 | # write a txt file containing html metadata for parsing with Set-JAVNfo.ps1 script 531 | if s['include-html-txt']: 532 | # write html to txt file and move to folder 533 | split_fname = str(os.path.splitext(new_fname)[0]) 534 | base_fname = os.path.basename(new_fname) 535 | split_base_fname = str(os.path.splitext(base_fname)[0]) 536 | base = strip_partial_path_from_file(split_fname) 537 | text_file = open(split_fname + '.txt', "w", encoding="utf-8") 538 | text_file.write(html) 539 | 540 | # write actresses to html metadata file 541 | actress_string = get_actress_string_txt(html, s) 542 | text_file.write("\n<ActressSorted>") 543 | text_file.write(actress_string) 544 | text_file.write("</ActressSorted>") 545 | text_file.close() 546 | 547 | # move the file into a folder (if we say to) 548 | if s['move-video-to-new-folder']: 549 | path = create_and_move_video_into_folder(new_fname, s, vid_id, html) 550 | if s['include-html-txt']: 551 | move(split_fname + '.txt', (os.path.splitext(path))[0] + '.txt') 552 | 553 | # get the cover (if we say to) 554 | if s['include-cover']: 555 | get_cover_for_video(path, vid_id, s, html) 556 | 557 | if __name__ == '__main__': 558 | try: 559 | print("Sorting your JAV, please wait...") 560 | settings = read_file('settings_sort_jav.ini') 561 | sort_jav(settings) 562 | input("Press Enter to finish.") 563 | except Exception as e: 564 | print(e) 565 | print("Panic! Go find help.") 566 | -------------------------------------------------------------------------------- /2. Set-JAVNfo.ps1: -------------------------------------------------------------------------------- 1 | # Write nfo metadata file if html .txt from sort_jav.py exists 2 | function Set-JAVNfo { 3 | [CmdletBinding()] 4 | param( 5 | [Parameter()] 6 | [System.IO.FileInfo]$FilePath = ((Get-Content -Path (Join-Path -Path $PSScriptRoot -ChildPath 'settings_sort_jav.ini')) -match '^path').Split('=')[1], 7 | [Parameter()] 8 | [Switch]$Prompt 9 | ) 10 | 11 | function Show-FileChanges { 12 | # Display file changes to host 13 | $Table = @{Expression = { $_.Index }; Label = "#"; Width = 4 }, 14 | @{Expression = { $_.Name }; Label = "Name"; Width = 25 }, 15 | @{Expression = { $_.Path }; Label = "Directory" } 16 | $FileObject | Sort-Object Index | Format-Table -Property $Table | Out-Host 17 | } 18 | 19 | # Remove progress bar to speed up REST requests 20 | $ProgressPreference = 'SilentlyContinue' 21 | 22 | # Options from settings_sort_jav.ini 23 | $SettingsPath = Resolve-Path -Path (Join-Path -Path $PSScriptRoot -ChildPath 'settings_sort_jav.ini') 24 | $KeepMetadataTxt = ((Get-Content $SettingsPath) -match '^keep-metadata-txt').Split('=')[1] 25 | $AddGenres = ((Get-Content $SettingsPath) -match '^include-genre-metadata').Split('=')[1] 26 | $AddTags = ((Get-Content $SettingsPath) -match '^include-tag-metadata').Split('=')[1] 27 | $AddTitle = ((Get-Content $SettingsPath) -match '^include-video-title').Split('=')[1] 28 | $PartDelimiter = ((Get-Content $SettingsPath) -match '^delimiter-between-multiple-videos').Split('=')[1] 29 | $NameSetting = ((Get-Content $SettingsPath) -match '^actress-before-video-number').Split('=')[1] 30 | $R18TitleCheck = ((Get-Content $SettingsPath) -match '^prefer-r18-title').Split('=')[1] 31 | $R18MetadataCheck = ((Get-Content $SettingsPath) -match '^scrape-r18-other-metadata').Split('=')[1] 32 | $RenameCheck = ((Get-Content $SettingsPath) -match '^do-not-rename-file').Split('=')[1] 33 | $R18ThumbPath = ((Get-Content $SettingsPath) -match '^r18-export-csv-path').Split('=')[1] 34 | 35 | Write-Host "Metadata to be written:" 36 | # Write txt metadata file paths to $HtmlMetadata 37 | if ($RenameCheck -like 'true') { 38 | # Match all .txt files if you are not renaming files 39 | $HtmlMetadata = Get-ChildItem -LiteralPath $FilePath -Recurse | Where-Object { $_.Name -match '(.*).txt' } 40 | } 41 | else { 42 | $HtmlMetadata = Get-ChildItem -LiteralPath $FilePath -Recurse | Where-Object { $_.Name -match '[a-zA-Z]{1,8}-[0-9]{1,8}(.*.txt)' -or $_.Name -match 't28(.*).txt' -or $_.Name -match 'r18(.*).txt' } | Select-Object Name, BaseName, FullName, Directory 43 | } 44 | if ($null -eq $HtmlMetadata) { 45 | Write-Warning 'No metadata files found! Exiting...' 46 | pause 47 | } 48 | else { 49 | # Create table to show files being written 50 | $Index = 1 51 | $FileObject = @() 52 | foreach ($File in $HtmlMetadata) { 53 | $FileObject += New-Object -TypeName psobject -Property @{ 54 | Index = $Index 55 | Name = $File.BaseName 56 | Path = $File.Directory 57 | } 58 | $Index++ 59 | } 60 | # Default prompt yes 61 | $Input = 'y' 62 | if ($Prompt) { 63 | Show-FileChanges 64 | Write-Host 'Do you want to write nfo metadata for these files?' 65 | Write-Host 'Confirm changes?' 66 | $Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")' 67 | } 68 | if ($Input -like 'y' -or $Input -like 'yes') { 69 | Write-Host "Writing metadata .nfo files in path: $FilePath ..." 70 | # Write each nfo file 71 | $Count = 1 72 | $Total = $HtmlMetadata.Count 73 | if ($R18ThumbPath) { 74 | $ActorThumbs = Import-Csv -Path $R18ThumbPath 75 | } 76 | foreach ($MetadataFile in $HtmlMetadata) { 77 | # Read html txt 78 | $FileLocation = $MetadataFile.FullName 79 | # Read and encode html in UTF8 for better reading of asian characters and symbols 80 | $HtmlContent = Get-Content -LiteralPath $FileLocation -Encoding UTF8 81 | $FileName = $MetadataFile.BaseName 82 | $NfoName = $MetadataFile.BaseName + '.nfo' 83 | $VideoId = ($FileName -split "$PartDelimiter")[0] 84 | $NfoPath = Join-Path -Path $MetadataFile.Directory -ChildPath $NfoName 85 | # Check if the video has multiple parts 86 | # If it does, write the part number to a variable 87 | if ($NameSetting -like 'true') { 88 | $PartNumber = $FileName[-1] 89 | } 90 | else { 91 | if ($PartDelimiter -like '-') { 92 | $PartNumber = ($FileName -split ($PartDelimiter))[2] 93 | } 94 | else { 95 | $PartNumber = ($FileName -split ($PartDelimiter))[1] 96 | } 97 | } 98 | # Get video title name from html with regex 99 | $Title = $HtmlContent -match '<title>(.*) - JAVLibrary<\/title>' 100 | # Remove broken HTML causing title not to write correctly 101 | $TitleFixHTML = ($Title -replace '"', '') -replace '#39;s', '' 102 | $TitleFixed = ((($TitleFixHTML -replace '<title>', '') -replace '- JAVLibrary', '').Trim()) 103 | if ($R18TitleCheck -like 'true' -or $R18MetadataCheck -like 'true') { 104 | # Perform a search on R18.com for the video ID 105 | $R18Search = Invoke-WebRequest "https://www.r18.com/common/search/searchword=$VideoId/" 106 | $R18Url = (($R18Search.Links | Where-Object {$_.href -like "*/videos/vod/movies/detail/-/id=*"}).href) 107 | } 108 | if ($R18TitleCheck -like 'true') { 109 | if ($R18Search.Content -match "data-product-page-url=`"https://www.r18.com/videos/vod/movies/detail") { 110 | $R18Title = (((($R18Search.Content -split "data-title=`"")[1] -split "data-title=`"")[0] -split "data-description")[0].Trim()) -replace ".$" 111 | } 112 | else { 113 | $R18Title = $null 114 | } 115 | if ($null -like $R18Title -or '' -like $R18Title) { 116 | $TitleFixed = ((($TitleFixHTML -replace '', '') -replace '- JAVLibrary', '').Trim()) 117 | } 118 | else { 119 | $TitleFixed = "$VideoId $R18Title" 120 | } 121 | } 122 | # Since the above does a split to find if it's a part 123 | # Match if the part number found is a one digit number 124 | if ($PartNumber -match '^\d$') { 125 | $Temp = $TitleFixed.Split(' ')[0] + ' ' + "($PartNumber) " 126 | $Temp2 = $TitleFixed.Split(' ')[1..$TitleFixed.Length] -join ' ' 127 | # If html file is detected as multi-part, create a new title as "VidID (Part#) Title" 128 | $TitleFixed = ($Temp + $Temp2) 129 | } 130 | # Scrape series title from R18 131 | $R18SeriesTitle = $null 132 | $R18DirectorName = $null 133 | if ($R18MetadataCheck -like 'true') { 134 | if ($R18Url) { 135 | if ($R18Url.Count -gt 1) { 136 | $R18Search = Invoke-WebRequest -Uri $R18Url[0] -Method Get 137 | } 138 | else { 139 | $R18Search = Invoke-WebRequest -Uri $R18Url -Method Get 140 | } 141 | # Scrape series title from R18 142 | $R18SeriesUrl = $R18Search.Links.href | Where-Object { $_ -match "Type=series\/" } 143 | if ($null -ne $R18SeriesUrl) { 144 | $R18SeriesSearch = Invoke-WebRequest -Uri $R18SeriesUrl -Method Get 145 | $R18SeriesTitle = (((((($R18SeriesSearch.Content -split "
")[1]) -split "
")[1]) -split "")[1]) -split "
")[0]) 148 | if ($R18DirectorString -notmatch '----') { 149 | $R18DirectorName = $R18DirectorString.Trim() 150 | } 151 | } 152 | } 153 | $VideoTitle = $TitleFixed 154 | $ReleaseDate = ((($HtmlContent -match '\d{4}-\d{2}-\d{2}<\/td>') -split '') -split '(.*)<\/a>')) -split 'rel="tag">') -split '  ')[1] 157 | $Genres = (($HtmlContent -match 'rel="category tag">(.*)<\/a><\/span><\/td>') -Split 'rel="category tag">') 158 | # Write metadata to file 159 | Set-Content -LiteralPath $NfoPath -Value '' -Force 160 | Add-Content -LiteralPath $NfoPath -Value '' 161 | if ($AddTitle -like 'true') { Add-Content -LiteralPath $NfoPath -Value " $VideoTitle" } 162 | Add-Content -LiteralPath $NfoPath -Value " $ReleaseYear" 163 | Add-Content -LiteralPath $NfoPath -Value " $ReleaseDate" 164 | if ($R18DirectorName) { Add-Content -LiteralPath $NfoPath -Value " $R18DirectorName" } 165 | Add-Content -LiteralPath $NfoPath -Value " $Studio" 166 | if ($AddGenres -like 'true') { 167 | foreach ($Genre in $Genres[1..($Genres.Length - 1)]) { 168 | $GenreString = (($Genre.Split('<'))[0]).Trim() 169 | Add-Content -LiteralPath $NfoPath -Value " $GenreString" 170 | } 171 | } 172 | if ($AddTags -like 'true') { 173 | foreach ($Genre in $Genres[1..($Genres.Length - 1)]) { 174 | $GenreString = (($Genre.Split('<'))[0]).Trim() 175 | Add-Content -LiteralPath $NfoPath -Value " $GenreString" 176 | } 177 | } 178 | if ($R18SeriesTitle) { Add-Content -LiteralPath $NfoPath -Value " Series: $R18SeriesTitle"} 179 | # Add actress metadata 180 | $ActorSplitString = '' 181 | $ActorSplitHtml = $HtmlContent -split $ActorSplitString 182 | $Actors = @() 183 | foreach ($Section in $ActorSplitHtml) { 184 | $FullName = (($Section -split "rel=`"tag`">")[1] -split "<\/a><\/span>")[0] 185 | if ($FullName -ne '') { 186 | if ($FullName.Length -lt 25) { 187 | $Actors += $FullName 188 | } 189 | } 190 | } 191 | foreach ($Actor in $Actors) { 192 | if ($R18ThumbPath) { 193 | $Index = [array]::indexof(($ActorThumbs.Name).ToLower(), $Actor.ToLower()) 194 | if ($Index -eq '-1' -or $null -eq $Index) { 195 | $Content = @( 196 | " " 197 | " $Actor" 198 | " " 199 | ) 200 | } 201 | else { 202 | $ActorThumb = $ActorThumbs.ThumbUrl[$Index] 203 | $Content = @( 204 | " " 205 | " $Actor" 206 | " $ActorThumb" 207 | " " 208 | ) 209 | } 210 | } 211 | else { 212 | $Content = @( 213 | " " 214 | " $Actor" 215 | " " 216 | ) 217 | } 218 | Add-Content -LiteralPath $NfoPath -Value $Content 219 | } 220 | # End file 221 | Add-Content -LiteralPath $NfoPath -Value '' 222 | Write-Output "($Count of $Total) $FileName .nfo processed..." 223 | # Remove html txt file 224 | if ($KeepMetadataTxt -eq 'false') { 225 | Remove-Item -LiteralPath $MetadataFile.FullName 226 | } 227 | $Count++ 228 | } 229 | pause 230 | } 231 | else { 232 | Write-Warning 'Cancelled by user input. Exiting...' 233 | pause 234 | } 235 | } 236 | } 237 | 238 | 239 | Set-JAVNfo -Prompt -------------------------------------------------------------------------------- /3. Sort-7mmtv.ps1: -------------------------------------------------------------------------------- 1 | # This script is very basic in functionality and not well-tested 2 | # If you encounter a bug, please open an issue or send me a message directly 3 | # Use at your own risk 4 | 5 | $FilePath = ((Get-Content -Path (Join-Path -Path $PSScriptRoot -ChildPath 'settings_sort_jav.ini')) -match '^7mm-files-path').Split('=')[1] 6 | $Videos = Get-ChildItem -Path $FilePath | Where-Object {$_.Extension -like ".mp4" ` 7 | -or $_.Extension -like ".mkv"` 8 | -or $_.Extension -like ".wmv"` 9 | -or $_.Extension -like '.avi'` 10 | -or $_.Extension -like '.flv'} 11 | $Count = 1 12 | $Total = $Videos.Count 13 | Write-Host "Starting scrape for directory $FilePath..." 14 | foreach ($Video in $Videos) { 15 | $Result = $true 16 | $VideoId = ($Video.BaseName).ToUpper() 17 | $r = Invoke-WebRequest ' https://7mmtv.tv/en/amateurjav_random/all/index.html' -SessionVariable my_session 18 | $form = $r.Forms[0] 19 | $form.fields['search_keyword'] = $VideoId 20 | $GoogleScrape = Invoke-WebRequest -Uri ('https://7mmtv.tv/en/searchform_search/all/index.html' + $form.Action) -WebSession $my_session -Method POST -Body $form.Fields 21 | $7mmLink = (((((($GoogleScrape.Links.href -match '7mmtv.tv/../amateurjav_content')) -replace '7mmtv.tv/..', '7mmtv.tv/ja') -replace '\/url\?q=', '') -split "&")[0]) 22 | if ($7mmLink -notmatch $VideoId) { 23 | $r = Invoke-WebRequest 'https://7mmtv.tv/en/uncensored_random/all/index.html' -SessionVariable my_session 24 | $form = $r.Forms[0] 25 | $form.fields['search_keyword'] = $VideoId 26 | $GoogleScrape = Invoke-WebRequest -Uri ('https://7mmtv.tv/en/searchform_search/all/index.html' + $form.Action) -WebSession $my_session -Method POST -Body $form.Fields 27 | $7mmLink = (((((($GoogleScrape.Links.href -match '7mmtv.tv/../uncensored_content')) -replace '7mmtv.tv/..', '7mmtv.tv/ja') -replace '\/url\?q=', '') -split "&")[0]) 28 | if (($7mmLink -replace "%2520", " ") -notmatch $VideoId -or $null -like $7mmLink -or $7mmLink -like '') { 29 | "$VideoId not found on 7mmtv. Skipping..." 30 | $Result = $false 31 | } 32 | } 33 | if ($Result -eq $true) { 34 | New-Item -ItemType Directory -Path (Join-Path -Path $Video.DirectoryName -ChildPath $VideoId) 35 | Move-Item -Path $Video.FullName -Destination (Join-Path -Path (Join-Path -Path $Video.DirectoryName -ChildPath $VideoId) -Childpath $Video.Name) 36 | $NfoPath = (Join-Path -Path (Join-Path -Path $Video.DirectoryName -ChildPath $VideoId) -Childpath "$VideoId.txt") 37 | $7mmScrape = Invoke-WebRequest -Uri $7mmLink 38 | $Cover = ($7mmScrape.images | Where-Object {$null -ne $_.title -and $_.title.length -gt 1} | Select-Object src) 39 | Invoke-WebRequest $Cover.src -OutFile (Join-Path -Path (Join-Path -Path $Video.DirectoryName -ChildPath $VideoId) -Childpath "$VideoId.jpg") 40 | $ScrapedTitle = ((($7mmScrape.Content -split "")[1]) -split " -")[0] 41 | $Studio = ((((($7mmScrape.Content -split "<li class='posts-message'><a target=`"_top`"")[1]) -split ".html'>")[1]) -split "<\/a>")[0] 42 | if ($Studio -like "----") { 43 | $Studio = ((((($7mmScrape.Content -split "<li class='posts-message'><a target=`"_top`" href='https:\/\/7mmtv.tv\/ja\/amateurjav_makersr")[1]) -split ".html'>")[1]) -split "<\/a>")[0] 44 | } 45 | $ReleaseDate = ((($7mmScrape.Content -split "<li class='posts-message'>")[2]) -split "<\/li>")[0] 46 | #$ReleaseDate = ((((($7mmScrape.Content -split "配信開始日:<\/li>")[1]) -split ">")[1]) -split "<")[0] 47 | $ReleaseYear = ($ReleaseDate.Split('-'))[0] 48 | $Genres = $7mmScrape.Links | Where-Object { $_.outerHTML -match '_category\/' } 49 | $GenreObject = @() 50 | foreach ($Genre in $Genres) { 51 | $GenreObject += (((($Genre -split "_category\/\d{1,6}\/")[1]) -split "\/")[0]) 52 | } 53 | $Actors = $7mmScrape.Links.href | Where-Object {$_ -like "*avperformer*"} 54 | $ActorObject = @() 55 | foreach ($Actor in $Actors) { 56 | $ActorObject += ((($Actor -split "\d{1,6}\/")[1]) -split "\/")[0] 57 | } 58 | <# 59 | Write-Host "Link is: $7mmLink" 60 | Write-Host "Title is: $ScrapedTitle" 61 | Write-Host "Studio is: $Studio" 62 | Write-Host "Genres is: $GenreObject" 63 | Write-Host "Actor is: $ActorObject" 64 | Write-Host "Release date: $ReleaseDate" 65 | Write-Host "Release year: $ReleaseYear" 66 | #> 67 | # Write metadata to file 68 | Set-Content -LiteralPath $NfoPath -Value '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>' 69 | Add-Content -LiteralPath $NfoPath -Value '<movie>' 70 | Add-Content -LiteralPath $NfoPath -Value " <title>$ScrapedTitle" 71 | Add-Content -LiteralPath $NfoPath -Value " $ReleaseYear" 72 | Add-Content -LiteralPath $NfoPath -Value " $ReleaseDate" 73 | Add-Content -LiteralPath $NfoPath -Value " $Studio" 74 | foreach ($Genre in $GenreObject) { 75 | Add-Content -LiteralPath $NfoPath -Value " $Genre" 76 | } 77 | foreach ($Actor in $ActorObject) { 78 | $Content = @( 79 | " " 80 | " $Actor" 81 | " " 82 | ) 83 | Add-Content -LiteralPath $NfoPath -Value $Content 84 | } 85 | # End file 86 | Add-Content -LiteralPath $NfoPath -Value '' 87 | $Content = Get-Content $NfoPath 88 | $NfoFile = Join-Path -Path (Join-Path -Path $Video.DirectoryName -ChildPath $VideoId) -Childpath "$VideoId.nfo" 89 | $Content | Out-File -FilePath $NfoFile -Encoding utf8 90 | Remove-Item -Path $NfoPath 91 | Write-Output "($Count of $Total) $VideoId .nfo processed..." 92 | $Count++ 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /Get-R18ThumbUrls.ps1: -------------------------------------------------------------------------------- 1 | function Get-R18ThumbUrl { 2 | [CmdletBinding()] 3 | param( 4 | [Parameter(Mandatory = $true)] 5 | [int]$StartPage, 6 | [Parameter(Mandatory = $true)] 7 | [int]$EndPage, 8 | [Parameter(Mandatory = $true)] 9 | [System.IO.FileInfo]$ExportPath 10 | ) 11 | 12 | for ($Counter = $StartPage; $Counter -le $EndPage; $Counter++) { 13 | $PageNumber = $Counter.ToString() 14 | $Page = Invoke-WebRequest -Uri "https://www.r18.com/videos/vod/movies/actress/letter=a/sort=popular/page=$PageNumber/" 15 | $Results = $Page.Images | Select-Object src, alt | Where-Object { 16 | $_.src -like '*/actjpgs/*' -and ` 17 | $_.alt -notlike $null 18 | } 19 | 20 | $Results | Export-Csv -Path $ExportPath -Force -Append -NoTypeInformation 21 | Write-Host "Page $Counter added to $ExportPath" 22 | } 23 | } 24 | 25 | function Set-NameOrder { 26 | [CmdletBinding()] 27 | param( 28 | [Parameter(Mandatory = $true)] 29 | [System.IO.FileInfo]$Path 30 | ) 31 | 32 | # Create backup directory in scriptroot 33 | $BackupPath = Join-Path -Path $PSScriptRoot -ChildPath "db" 34 | if (!(Test-Path $BackupPath)) { 35 | New-Item -ItemType Directory -Path $BackupPath -ErrorAction SilentlyContinue 36 | } 37 | 38 | # Copy original scraped thumbs to backup directory 39 | Write-Host "Backing up original scraped csv file to $BackupPath" 40 | Copy-Item -Path $Path -Destination (Join-Path $BackupPath -ChildPath "r18thumb_original.csv") 41 | Write-Host "Writing to cleaned names to csv... please wait" 42 | $R18Thumbs = Import-Csv -Path $Path 43 | # Remove periods from R18 scrape 44 | $Names = ($R18Thumbs.alt).replace('...', '') 45 | $NewName = @() 46 | if ($NameOrder -eq 'true') { 47 | foreach ($Name in $Names) { 48 | $Temp = $Name.split(' ') 49 | if ($Temp[1].length -ne 0) { 50 | $First, $Last = $Name.split(' ') 51 | $NewName += "$Last $First" 52 | } 53 | else { 54 | $NewName += $Name.TrimEnd() 55 | } 56 | if (($x % 20) -eq 0) { Write-Host '.' -NoNewline } 57 | } 58 | } 59 | 60 | if ($NameOrder -eq 'false') { 61 | foreach ($Name in $Names) { 62 | $NewName += $Name.TrimEnd() 63 | } 64 | } 65 | 66 | $R18Actors = @() 67 | $Temp = @() 68 | for ($x = 0; $x -lt $NewName.Length; $x++) { 69 | if ($NewName[$x] -in $Temp.Name) { 70 | # Do not add to R18Actors object 71 | } 72 | else { 73 | $R18Actors += New-Object -TypeName psobject -Property @{ 74 | Name = $NewName[$x] 75 | ThumbUrl = $R18Thumbs.src[$x] 76 | } 77 | } 78 | $Temp += New-Object -TypeName psobject -Property @{ 79 | Name = $NewName[$x] 80 | } 81 | if (($x % 20) -eq 0) { Write-Host '.' -NoNewline } 82 | } 83 | 84 | Write-Output $R18Actors 85 | } 86 | 87 | # Removes PowerShell progress bar which speeds up Invoke-WebRequest calls 88 | $ProgressPreference = 'SilentlyContinue' 89 | 90 | # Check settings file for config options 91 | $SettingsPath = Resolve-Path -Path (Join-Path -Path $PSScriptRoot -ChildPath 'settings_sort_jav.ini') 92 | $NameOrder = ((Get-Content $SettingsPath) -match '^swap-name-order').Split('=')[1] 93 | $StartPage = ((Get-Content $SettingsPath) -match '^r18-start-page').Split('=')[1] 94 | $EndPage = ((Get-Content $SettingsPath) -match '^r18-end-page').Split('=')[1] 95 | $CsvExportPath = ((Get-Content $SettingsPath) -match '^r18-export-csv-path').Split('=')[1] 96 | 97 | # Write thumb links csv file 98 | if (!(Test-Path -Path $CsvExportPath)) { 99 | Get-R18ThumbUrl -StartPage $StartPage -EndPage $EndPage -ExportPath $CsvExportPath 100 | } 101 | 102 | else { 103 | Write-Host "File specified in r18-export-csv-path already exists. Replace?" 104 | $Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")' 105 | if ($Input -like 'y') { 106 | # Create backup directory in scriptroot 107 | $BackupPath = Join-Path -Path $PSScriptRoot -ChildPath "db" 108 | if (!(Test-Path $BackupPath)) { 109 | New-Item -ItemType Directory -Path $BackupPath -ErrorAction SilentlyContinue 110 | } 111 | 112 | # Copy original scraped thumbs to backup directory 113 | Copy-Item -Path $CsvExportPath -Destination (Join-Path $BackupPath -ChildPath "r18thumb_original.csv") 114 | Get-R18ThumbUrl -StartPage $StartPage -EndPage $EndPage -ExportPath $CsvExportPath 115 | } 116 | 117 | else { 118 | Write-Host "Are you trying to rewrite the original scraped csv?" 119 | $Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")' 120 | if ($Input -notlike 'y') { 121 | Write-Warning "Cancelled by user input. Exiting..." 122 | return 123 | } 124 | else { 125 | # Do nothing 126 | } 127 | } 128 | } 129 | 130 | # Write fixed names to original csv file while backing up original to 'db' directory 131 | $ActorCsv = Set-NameOrder -Path $CsvExportPath 132 | 133 | # First csv rewrite - names only 134 | $ActorCsv | Select-Object Name, ThumbUrl | Export-Csv $CsvExportPath -Force -NoTypeInformation 135 | Write-Host "R18 actor thumb urls written to $CsvExportPath" 136 | pause -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ohura, Marc Runkel, jvlflame 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JAV-Sort-Scrape-javlibrary 2 | 3 | [![GitHub release](https://img.shields.io/github/release/jvlflame/JAV-Sort-Scrape-javlibrary?style=flat-square)](https://github.com/jvlflame/JAV-Sort-Scrape-javlibrary/releases) 4 | [![Commits since lastest release](https://img.shields.io/github/commits-since/jvlflame/JAV-Sort-Scrape-javlibrary/latest/master?style=flat-square)](#) 5 | [![Last commit](https://img.shields.io/github/last-commit/jvlflame/JAV-Sort-Scrape-javlibrary?style=flat-square)](https://github.com/jvlflame/JAV-Sort-Scrape-javlibrary/commits/master) 6 | [![Discord](https://img.shields.io/discord/608449512352120834?style=flat-square)](https://discord.gg/K2Yjevk) 7 | 8 | ## [Dec 16, 2019] Final Update 9 | **The `JAV-Sort-Scrape-javlibrary` repository is officially put on maintenance mode and will not receive any future updates. Please check out the successor to this project, [Javinizer](https://github.com/jvlflame/Javinizer).** 10 | 11 | --- 12 | 13 | The JAV-Sort-Scrape-javlibrary repository is a series of scripts used to manage your local JAV (Japanese Adult Video) library. It automatically scrapes content from JavLibrary, R18, and 7mmtv to create an easily usable content library within Emby or Jellyfin. My goal in maintining this project is for it to function as a simple and lightweight alternative to [JAVMovieScraper](https://github.com/DoctorD1501 JAVMovieScraper). If you have any questions, criticisms, or requests, or want to be kept up-to-date of any new features and releases, feel free to hop into my [discord channel](https://discord.gg/K2Yjevk). 14 | 15 | Big thanks to the original author of the sort_jav.py script 16 | [/u/Oppaira](https://reddit.com/user/Oppaira). 17 | 18 | ## Demo 19 | 20 | [**Emby thumbnail demo**](https://gfycat.com/kindheartedethicalgerenuk) 21 | 22 | ![Demo](extras/demo.gif) ![jf-example](extras/jf-example.png) 23 | 24 | ## Table of Contents: 25 | 26 | - [Changelog](#Change-Notes) 27 | - [Getting Started](#Getting-Started) 28 | - [Usage](#Usage) 29 | - [Notes](#notes) 30 | - [FAQ](#FAQ) 31 | - [Feature Ideas](#Feature-Ideas) 32 | 33 | ## Changelog 34 | 35 | **Older changes have been moved to the [wiki.](https://github.com/jvlflame/JAV-Sort-Scrape-javlibrary/wiki)** 36 | 37 | ### v 1.5.4 (Current release) 38 | 39 | - Changes 40 | - Add actor thumbnail URL directly in the .nfo metadata within `Set-JAVNfo.ps1` to work with 41 | Plex 42 | - Change some default video naming settings in `settings_sort_jav.ini` 43 | - Change directory structure for `Get-R18ThumbUrls.ps1` 44 | - Add updated actor .csv files 45 | - Fixes 46 | - Fix `Sort-7mmtv.ps1` not sorting some videos 47 | - Fix `$SettingsPath` on `Get-R18ThumbUrls.ps1` 48 | 49 | ### v 1.5.3 50 | 51 | - Additions 52 | - Add setting `scrape-r18-other-metadata` to scrape and set R18.com metadata for series and 53 | video director 54 | - **Experimental** - Add script `Sort-7mmtv.ps1` to scrape and set 7mmtv.tv metadata for 55 | uncensored and amateur JAV 56 | - Fixes 57 | - Fix setting `do-not-rename-file` causing script to break when setting is true 58 | 59 | ### v 1.5.2 60 | 61 | - Additions 62 | - Add setting `prefer-r18-title` to prefer R18.com title for video metadata 63 | - ~~Add setting `do-not-rename-file` to run sort_jav.py without renaming video files~~ 64 | **Removed Sep 10, 2019 due to breaking bug** 65 | - Fixes 66 | - Fix html causing inconsistencies on video titles in Emby 67 | - Fix Set-JAVNfo.ps1 not writing metadata for videos with naming starting with "R18" 68 | - Fix R18 titles not scraping with PowerShell 6.0+ 69 | 70 | ### v.1.5.1 71 | 72 | - Fixes 73 | - Fix error causing nfo metadata file not being read by Emby 74 | - Fix Set-JAVNfo.ps1 file compatibility with PowerShell Core 6.0+ 75 | 76 | ## Getting Started 77 | 78 | ### Prerequisities 79 | 80 | - [Python 3.5+](https://www.python.org/downloads/) 81 | - [Pillow](https://pypi.org/project/Pillow/) 82 | - [cfscrape](https://pypi.org/project/cfscrape/) - requires Node.js 83 | - [cloudscraper](https://github.com/VeNoMouS/cloudscraper/tree/master/cloudscraper) 84 | - [PowerShell 5.0 or higher (6.0+ recommended)](https://github.com/PowerShell/PowerShell) 85 | 86 | ### Installing 87 | 88 | [Clone the repository](https://github.com/jvlflame/JAV-Sort-Scrape-javlibrary/archive/master.zip) or 89 | [download the latest release](https://github.com/jvlflame/JAV-Sort-Scrape-javlibrary/releases). 90 | 91 | #### Install Pillow module on Python 92 | 93 | ```python 94 | # Required to crop cover images 95 | > pip install Pillow 96 | ``` 97 | 98 | #### Install cfscrape module on Python 99 | 100 | ```python 101 | # Required to scrape JavLibrary 102 | # If you are using the latest release, install cfscrape 103 | # If you cloned the repository, install cloudscraper 104 | 105 | # Used in latest commit in master 106 | > pip install cloudscraper 107 | 108 | # Used in latest release 109 | > pip install cfscrape 110 | ``` 111 | 112 | #### Install Node.js 113 | 114 | Download and install from [here](https://nodejs.org/en/download/). 115 | 116 | #### You will need PowerShell v5.0 or higher installed to run any of the .ps1 scripts (PowerShell 5.0 is installed on Windows 10 by default). If you get a Remote-ExecutionPolicy error when running, open an **administrator** PowerShell prompt, and run the following to unrestrict the scripts: 117 | 118 | ```powershell 119 | PS> Set-ExecutionPolicy Unrestricted 120 | ``` 121 | 122 | ## Usage 123 | 124 | **Before running any of the scripts, configure your settings in `settings_sort_jav.ini`**. 125 | Documentation for each option is listed in the settings file, with defaults set to my best practice 126 | guideline. Most notably, you will need to change each of the path settings to match your local 127 | directory structure. Completely read through this section, the [Notes](#Notes), and [FAQ](#FAQ) 128 | section before using. 129 | 130 | If you are running this for the first time and you have many directories of videos, you can run the 131 | `sort_jav.py` or `Set-JAVNfo.ps1` scripts concurrently with many different paths. Simply edit the 132 | `path` in your settings file, save, and run the script. Edit `path` and run again, etc. 133 | 134 | **_To run PowerShell (.ps1) scripts, right click the file and select "Run with PowerShell". To run 135 | Python (.py) scripts, double click to run._** You can also invoke the scripts from a 136 | **non-administrator** shell like shown in the demo. 137 | 138 | ### Run order 139 | 140 | The scripts are numbered in the order that they should be run. They were written with ease-of-use in 141 | mind, so they are a one-click solution once your settings are configured properly. 142 | 143 | 1. Run `sort_jav.py` to sort your JAV files. - **_Stop here if you don't use any media servers that 144 | use .nfo metadata_** 145 | 146 | 2. Run `Get-R18ThumbUrls.ps1` to scrape R18 and get actor thumbnail urls and write to .csv 147 | spreadsheet specified in `r18-export-csv-path` (you only need to do this once every so often when 148 | new actors are added to R18). 149 | 150 | - I have provided two recently scraped spreadsheets in the repository as 151 | `emby_actor_thumbs/R18-Aug-30-2019.csv` Use last-first if your `name-order` in your settings 152 | is set to _last_, and vice-versa. 153 | 154 | 3. Run `Set-JAVNfo.ps1` to create .nfo metadata files for each video. - **_Stop here if you are 155 | using Plex or you don't want actor images in Emby/Jellyfin_** 156 | 157 | - You can re-run this script on existing directories to replace old metadata files if you make 158 | changes to your settings 159 | 160 | 4. Go to your Emby server and make sure all your videos are imported. This is important, as the next 161 | step will call Emby's API to get your current actor list. Take a look at 162 | [Emby's documentation](https://github.com/MediaBrowser/Emby/wiki/Api-Key-Authentication) on how 163 | to generate an API key to insert into your settings file. 164 | 165 | 5. Run `Get-EmbyActorThumbs.ps1` to get Emby actor name/id to compare with R18 spreadsheet and 166 | create a .csv spreadsheet specified in `actor-csv-export-path`. Make sure the spreadsheet you 167 | created in `Get-R18ThumbUrls.ps1` is specified in `r18-export-csv-path` as it will be referenced 168 | in this script. 169 | 170 | - After creating this spreadsheet, you can modify specific actor images manually 171 | - You can re-run this script after importing new videos/actors into Emby to update your 172 | spreadsheet 173 | 174 | 6. Run `Set-EmbyActorThumbs.ps1` to import actor images into Emby. It will import based on the 175 | spreadsheet specified in `actor-csv-export-path`. A .csv database will be created to record all 176 | changes made to your actor images specified in `actor-csv-database-path`. 177 | 178 | - After your first write to Emby, your .csv database will be created 179 | - Any changes made to your `actor-csv-export-path` file will be compared to the database in 180 | `actor-csv-database-path` and be written accordingly 181 | 182 | 7. **Optional** Run `Sort-7mmtv.ps1` download cover and create a metadata file for uncensored and 183 | amateur JAV videos as listed on 7mmtv.tv. There are currently no configurable settings for this 184 | script besides the path `7mm-files-path`. By default, the video will be moved to a new folder, 185 | have a cover image downloaded, and .nfo metadata file created. Review additional notes and how to 186 | run in the [notes](#sort-7mmtvps1), and in the settings file comments above `7mm-files-path` 187 | 188 | If you are having trouble with any of these steps, review my [script-run demos](#demo), or send me a 189 | message in my [discord channel](https://discord.gg/K2Yjevk). 190 | 191 | ### Media library setup 192 | 193 | #### Emby/Jellyin 194 | 195 | Set up a `Movies` media library, and select `show advanced settings`. Uncheck all settings that try 196 | to get movie metadata from the internet. 197 | 198 | #### Running on Plex 199 | 200 | Install the [XBMCnfoMoviesImporter](https://github.com/gboudreau/XBMCnfoMoviesImporter.bundle) 201 | plugin on your Plex Media Server. Set up a `Movies` media library, and under the `Advanced` 202 | settings, set XBMCnfoMoviesImporter as the scanner agent. Leave all other settings default. 203 | 204 | ## Notes 205 | 206 | ### sort_jav.py 207 | 208 | - Matches your jav file and scrapes javlibrary for content 209 | - Renames and sorts your jav file to your specified settings 210 | 211 | sort_jav.py will run a non-recursive search of video files located in the `path` specified in your 212 | settings file. Files located in folders will not be detected. 213 | 214 | If you are trying to sort a video with multiple parts, follow any of the naming schemas below: 215 | 216 | 1. MIRD-151A, MIRD-151B 217 | 218 | 2. MIRD151A, MIRD-151B 219 | 220 | 3. MIRD-151[delimiter-between-multiple-videos]A, MIRD-151[delimiter-between-multiple-videos]B 221 | 222 | 4. MIRD-151[delimiter-between-multiple-videos]1[delimiter-between-multiple-videos], 223 | MIRD-151[delimiter-between-multiple-videos]2[delimiter-between-multiple-videos] 224 | 225 | ### Set-JAVNfo.ps1 226 | 227 | - Matches all html .txt files created by sort_jav.py 228 | - Creates a .nfo metadata file that is readable by Media servers like Emby/Jellyfin 229 | 230 | `Set-JAVNfo.ps1` will run a recursive search of .txt files located in the `path` specified in your 231 | settings file. A .nfo metadata file will be generated with information such as title, release date, 232 | studio, genres, actors, actor thumb URL. Set `prefer-r18-titles` true to do an additional scrape of 233 | R18.com for better translated titles in your metadata. 234 | 235 | ### Get-R18ThumbUrls.ps1 236 | 237 | - Scrapes R18 for all actor thumbnails and creates a csv database for actor name and their thumbnail 238 | URL. 239 | 240 | `Get-R18ThumbUrls.ps1` will take a while to run, as it needs to parse over 300 pages of R18 actors. 241 | I have provided recent (Aug-30-2019) scrape files created with this script for you to use if you do 242 | not want to create your own file. Use `R18-Aug-30-2019-last-first.csv` if you have `name-order` set 243 | to _last_, and vice versa. Specify this path in `r18-export-csv-path`. 244 | 245 | ### Get-EmbyActorThumbs.ps1 246 | 247 | - Calls Emby/Jellyfin API to get a list of actors and their IDs 248 | - Matches names of both Emby actor list and R18 csv database created by Get-R18ThumbUrls 249 | - Creates a modifiable .csv spreadsheet to import into Emby 250 | 251 | `Get-EmbyActorThumbs.ps1` will parse the R18ThumbUrl .csv and match with your Emby/Jellyfin actor 252 | list generated by an API call. A new spreadsheet will be created for direct import. You can modify 253 | this spreadsheet by hand if you want to add/delete/update external actor images (such as from WAPdB) 254 | in Emby/Jellyfin. You can also run this script again after adding new videos/actors into Emby to 255 | update the spreadsheet with new actors. 256 | 257 | ### Set-EmbyActorThumbs.ps1 258 | 259 | - Reads csv spreadsheet created by Get-EmbyActorThumbs.ps1 and imports matching thumbnails into 260 | Emby/Jellyfin using API 261 | - Creates an up-to-date csv database of all API calls made to Emby/Jellyfin 262 | 263 | `Set-EmbyActorThumbs.ps1` will read the csv created by `Get-EmbyActorThumbs.ps1` and import new 264 | actor images to Emby while writing a separate csv database of all changes made. If the csv database 265 | already exists, it will be compared to your actor csv and only import new changes. 266 | 267 | ### Sort-7mmtv.ps1 268 | 269 | - Scrapes and sorts all videos in path set in `7mm-files-path` in the settings file 270 | 271 | `Sort-7mmtv.ps1` will scrape metadata, download the video cover, write a .nfo metadata file, and 272 | move videos to a new directory. It will scrape metadata information from the Japanese version of the 273 | site. Unfortunately the English version of the site has too many errors and discrepencies. I 274 | recommend you separate your uncensored/amateur JAV from your censored ones in Emby/Jellyfin as to 275 | better filter through them. This is a very basic script, with no settings options besides the path. 276 | The script will sort one video file every 10 seconds, as the script scrapes from Google, and you 277 | will be blocked if you scrape too much. 278 | 279 | ### edit_covers.py 280 | 281 | - Finds all original thumbnail-size covers and creates an extra poster-size cover 282 | 283 | `edit_covers.py` is deprecated past v1.4.5. Only use this script if you are using a very old version 284 | that did not have poster covers and you want to add them to your already scraped files. 285 | 286 | ### settings_sort_jav.ini 287 | 288 | Please note that where it gives you options to include delimiters, certain characters are disallowed 289 | by the OS. If you include them, they will be forcibly removed from your delimiter. For windows, that 290 | would be: / \ : \* ? < > | 291 | 292 | ## FAQ 293 | 294 | ### My video isn't being sorted by sort_jav.py 295 | 296 | Try renaming your file exactly how it appears on javlibrary. If it still doesn't work for some 297 | reason, follow the instructions below to manually sort the file. 298 | 299 | ### "Could not find video on javlibrary so skipping..." for all files 300 | 301 | This problem likely stems from the cfscrape/cloudscraper module failing. First ensure that both cfscrape AND 302 | Node.js are installed properly (restart computer after installing Node.js). If that still does not 303 | work, run `pip install -U cfscrape` to upgrade your cfscrape module. 304 | 305 | ### The video was sorted by sort_jav.py, but it's the wrong video 306 | 307 | Occasionally the results will be incorrect due to there being multiple videos with the same title. 308 | To manually sort the file, go to the correct javlibrary page, and save the page as an html. Also 309 | save the cover image. Rename the .html to a .txt, and rename both the cover and .txt the same as the 310 | video. Run Set-JAVNfo.ps1 and it will create a metadata file for you. To create a poster cover, you 311 | can either crop it yourself, or run the `edit_covers.py` script with the FULL directory path of your 312 | manually sorted files set in the settings path. 313 | 314 | ### r18 and t28 videos aren't being sorted 315 | 316 | For these special cases, rename the files to exactly how they appear on javlibrary. Anything extra 317 | in the video name will cause it not to sort. If you have a lot of them, check out my 318 | [JAV renamer script](https://nodejs.org/en/download/). 319 | 320 | ### Shell closes immediately after erroring 321 | 322 | Try calling the scripts through a shell window rather than double-clicking to run. You will be able 323 | to diagnose what the issue is through the error messages. 324 | 325 | ### Not all my actresses have thumbnail images 326 | 327 | Unfortunately R18 and javlibrary use different English naming conventions for their actresses, so 328 | until I have a solution in place to either scrape Japanese, or do some manual switcharoos, we'll 329 | have to deal with it. You can also manually put in image urls into the .csv specified in 330 | `actor-csv-export-path`. 331 | 332 | ### Sort_jav.py is erroring out when the filename is too long with actors set 333 | 334 | As of now, I dont have a solution in place to handle hard caps on filename. If you have files that 335 | won't sort due to this error, turn off actor names for that file. 336 | 337 | ## Feature ideas 338 | 339 | - [x] Add option to input tags/genres in metadata file - v.1.4.0 340 | - [x] Add functionality to crop cover to poster size - v1.4.4 341 | - [x] Scrape actor images from R18.com and push to Emby - v1.5.0 342 | - [x] Add option to run sort_jav.py without renaming local files - v1.5.2 343 | - [x] Scrape scene title from R18.com - v1.5.2 344 | - [x] Scrape series title and director name from R18.com - v1.5.3 345 | - [x] Scrape amateur/uncensored video metadata from 7mmtv - v1.5.3 346 | - [x] Add functionality to add actor thumb link in .nfo metadata - v1.5.4 347 | - [ ] Add option to rename \*-thumb.jpg files to fanart.jpg for better Plex compatibility 348 | - [ ] Add option to do recursive search on sort_jav.py 349 | - [ ] Add option to manually scrape a javlibrary url if it can't match automatically 350 | - [ ] Add more video title renaming options 351 | - [ ] Scrape video plot/description 352 | -------------------------------------------------------------------------------- /emby_actor_thumbs/1. Get-EmbyActorThumbs.ps1: -------------------------------------------------------------------------------- 1 | function Get-EmbyActors { 2 | [CmdletBinding()] 3 | param( 4 | [Parameter(Mandatory = $true)] 5 | [string]$ServerUri, 6 | [Parameter(Mandatory = $true)] 7 | [string]$ApiKey 8 | ) 9 | 10 | Invoke-RestMethod -Method Get -Uri "$ServerUri/emby/Persons/?api_key=$ApiKey" 11 | } 12 | 13 | function New-ActorObject { 14 | [CmdletBinding()] 15 | param( 16 | [Parameter(Mandatory = $true)] 17 | [string]$CsvPath 18 | ) 19 | 20 | $Csv = Import-Csv -Path $CsvPath -ErrorAction Stop 21 | 22 | $ActorObject = @() 23 | foreach ($Object in $Csv) { 24 | $ActorObject += New-Object -TypeName psobject -Property @{ 25 | Name = $Object.$alt 26 | EmbyId = $Object.EmbyId 27 | ThumbUrl = $Object.src 28 | PrimaryUrl = $Object.PrimaryUrl 29 | } 30 | } 31 | Write-Output $ActorObject 32 | } 33 | 34 | # Remove progress bar to speed up REST requests 35 | $ProgressPreference = 'SilentlyContinue' 36 | 37 | # Check settings file for config 38 | $SettingsPath = Resolve-Path -Path (Join-Path -Path $PSScriptRoot -ChildPath (Join-Path -Path '..' -ChildPath 'settings_sort_jav.ini')) 39 | $EmbyServerUri = ((Get-Content $SettingsPath) -match '^emby-server-uri').Split('=')[1] 40 | $EmbyApiKey = ((Get-Content $SettingsPath) -match '^emby-api-key').Split('=')[1] 41 | $R18ImportPath = ((Get-Content $SettingsPath) -match '^r18-export-csv-path').Split('=')[1] 42 | $ActorExportPath = ((Get-Content $SettingsPath) -match '^actor-csv-export-path').Split('=')[1] 43 | 44 | # Write Emby actors and id to object 45 | Write-Host "Getting actors from Emby..." 46 | $EmbyActors = Get-EmbyActors -ServerUri $EmbyServerUri -ApiKey $EmbyApiKey -ErrorAction Stop 47 | $EmbyActorObject = @() 48 | for ($x = 0; $x -lt $EmbyActors.Items.Length; $x++) { 49 | $EmbyActorObject += New-Object -TypeName psobject -Property @{ 50 | Name = $EmbyActors.Items.Name[$x] 51 | EmbyId = $EmbyActors.Items.Id[$x] 52 | } 53 | } 54 | 55 | # Import R18 actors and thumburls to object 56 | Write-Host "Importing R18 actors with thumb urls..." 57 | $R18ActorObject = Import-Csv -Path $R18ImportPath -ErrorAction Stop 58 | 59 | Write-Host "Comparing Emby actor list with R18, please wait..." 60 | # Compare both Emby and R18 actors for matching actors, and combine to a single object 61 | $ActorNames = @() 62 | $ActorObject = @() 63 | for ($x = 0; $x -lt $EmbyActorObject.Length; $x++) { 64 | $ActorNames += ($EmbyActorObject[$x].Name).ToLower() 65 | if ($ActorNames[$x] -notin $R18ActorObject.Name) { 66 | #Write-Host "Missing" 67 | $ActorObject += New-Object -TypeName psobject -Property @{ 68 | Name = $EmbyActorObject[$x].Name 69 | EmbyId = $EmbyActorObject[$x].EmbyId 70 | ThumbUrl = '' 71 | PrimaryUrl = '' 72 | } 73 | } 74 | else { 75 | $Index = [array]::indexof(($R18ActorObject.Name).ToLower(), $ActorNames[$x]) 76 | #Write-Host ""$EmbyActorObject[$x].Name" is index $Index" 77 | $ActorObject += New-Object -TypeName psobject -Property @{ 78 | Name = $EmbyActorObject[$x].Name 79 | EmbyId = $EmbyActorObject[$x].EmbyId 80 | ThumbUrl = $R18ActorObject[$Index].ThumbUrl 81 | PrimaryUrl = $R18ActorObject[$Index].ThumbUrl 82 | } 83 | } 84 | if (($x % 20) -eq 0) { Write-Host '.' -NoNewline } 85 | } 86 | 87 | if (Test-Path $ActorExportPath) { 88 | Write-Warning "File specified in actor-csv-export-path already exists. Overwrite with a new copy? " 89 | Write-Host "If you select N, your existing file will be updated with any new Emby entries." 90 | $Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")' 91 | } 92 | else { 93 | $Input = 'y' 94 | } 95 | 96 | if ($Input -like 'y') { 97 | $ActorObject | Select-Object Name, EmbyId, ThumbUrl, PrimaryUrl | Export-Csv -Path $ActorExportPath -Force -NoTypeInformation -ErrorAction Stop 98 | } 99 | 100 | else { 101 | $ExistingActors = Import-Csv -Path $ActorExportPath -ErrorAction Stop 102 | $Count = 1 103 | foreach ($Actor in $ActorObject) { 104 | # If EmbyId already exists in the csv 105 | if ($Actor.EmbyId -in $ExistingActors.EmbyId) { 106 | # Do nothing 107 | } 108 | # If new actor (EmbyId) found, append to existing csv 109 | else { 110 | $Actor | Select-Object Name, EmbyId, ThumbUrl, PrimaryUrl | Export-Csv -Path $ActorExportPath -Append -NoClobber -NoTypeInformation -ErrorAction Stop 111 | Write-Host "($Count) Appending $Actor" 112 | } 113 | $Count++ 114 | } 115 | } 116 | 117 | Write-Host "Combined actor thumb file written to $ActorExportPath" 118 | pause -------------------------------------------------------------------------------- /emby_actor_thumbs/2. Set-EmbyActorThumbs.ps1: -------------------------------------------------------------------------------- 1 | function Add-ActorThumbs { 2 | [CmdletBinding()] 3 | param( 4 | [Parameter(Mandatory = $true)] 5 | [string]$ServerUri, 6 | [Parameter(Mandatory = $true)] 7 | [string]$ActorId, 8 | [Parameter(Mandatory = $true)] 9 | [string]$ImageUrl, 10 | [Parameter(Mandatory = $true)] 11 | [string]$ImageType, 12 | [Parameter(Mandatory = $true)] 13 | [string]$ApiKey 14 | ) 15 | 16 | Invoke-RestMethod -Method Post -Uri "$ServerUri/emby/Items/$ActorId/RemoteImages/Download?Type=$ImageType&ImageUrl=$ImageUrl&api_key=$ApiKey" -ErrorAction Continue 17 | } 18 | 19 | function Remove-ActorThumbs { 20 | [CmdletBinding()] 21 | param( 22 | [Parameter(Mandatory = $true)] 23 | [string]$ServerUri, 24 | [Parameter(Mandatory = $true)] 25 | [string]$ActorId, 26 | [Parameter(Mandatory = $true)] 27 | [string]$ImageType, 28 | [Parameter(Mandatory = $true)] 29 | [string]$ApiKey 30 | ) 31 | 32 | Invoke-RestMethod -Method Delete -Uri "$ServerUri/emby/Items/$ActorId/Images/Download?Type=$ImageType&api_key=$ApiKey" -ErrorAction Continue 33 | } 34 | 35 | function Set-CsvDb { 36 | [CmdletBinding()] 37 | param( 38 | [Parameter(Mandatory = $true)] 39 | [System.IO.FileInfo]$Path, 40 | [Parameter(Mandatory = $true)] 41 | [int]$Index, 42 | [Parameter(Mandatory = $true)] 43 | [string]$Name, 44 | [Parameter(Mandatory = $true)] 45 | [string]$EmbyId, 46 | [Parameter(Mandatory = $false)] 47 | [string]$ThumbUrl = '', 48 | [Parameter(Mandatory = $false)] 49 | [string]$PrimaryUrl = '' 50 | ) 51 | 52 | # Get contents of csv file in $Path 53 | $DbContent = Get-Content $Path 54 | 55 | # Rewrite new csv string to input 56 | $UpdatedDbString = "`"$Name`",`"$EmbyId`",`"$ThumbUrl`",`"$PrimaryUrl`"" 57 | 58 | # Update the line with updated string and set the file 59 | $DbContent[$Index + 1] = $UpdatedDbString 60 | $DbContent | Set-Content $Path 61 | } 62 | 63 | # Remove progress bar to speed up REST requests 64 | $ProgressPreference = 'SilentlyContinue' 65 | 66 | # Check settings file for config 67 | $SettingsPath = Resolve-Path -Path (Join-Path -Path $PSScriptRoot -ChildPath (Join-Path -Path '..' -ChildPath 'settings_sort_jav.ini')) 68 | $EmbyServerUri = ((Get-Content $SettingsPath) -match '^emby-server-uri').Split('=')[1] 69 | $EmbyApiKey = ((Get-Content $SettingsPath) -match '^emby-api-key').Split('=')[1] 70 | $ActorImportPath = ((Get-Content $SettingsPath) -match '^actor-csv-export-path').Split('=')[1] 71 | $ActorDbPath = ((Get-Content $SettingsPath) -match '^actor-csv-database-path').Split('=')[1] 72 | 73 | $ActorObject = Import-Csv -Path $ActorImportPath -ErrorAction Stop 74 | 75 | # Check if db file specified in 'actor-csv-database-path' exists, create if not exists 76 | if (!(Test-Path $ActorDbPath)) { 77 | Write-Host "Database file not found. Creating..." 78 | New-Item -ItemType File -Path $ActorDbPath 79 | } 80 | 81 | else { 82 | $ActorDbObject = Import-Csv -Path $ActorDbPath -ErrorAction Stop 83 | } 84 | 85 | Write-Host "Querying for changes in $ActorImportPath..." 86 | $ActorNames = @() 87 | for ($x = 0; $x -lt $ActorObject.Length; $x++) { 88 | # Write names to string object to query for index 89 | $ActorNames += ($ActorObject[$x].Name).ToLower() 90 | if ($ActorObject[$x].ThumbUrl -notlike '' -or $ActorObject[$x].PrimaryUrl -notlike '') { 91 | if ($ActorObject[$x].ThumbUrl -notlike '' -and $ActorObject[$x].PrimaryUrl -notlike '') { 92 | if ($ActorObject[$x].Name -notin $ActorDbObject.Name -and $ActorObject[$x].EmbyId -notin $ActorDbObject.EmbyId) { 93 | Write-Host "ADD thumb to "$ActorObject[$x].Name"" 94 | # POST thumb to Emby 95 | Add-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageUrl $ActorObject[$x].ThumbURL -ImageType Thumb -ApiKey $EmbyApiKey 96 | 97 | Write-Host "ADD primary to "$ActorObject[$x].Name"" 98 | # POST primary to Emby 99 | Add-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageUrl $ActorObject[$x].PrimaryUrl -ImageType Primary -ApiKey $EmbyApiKey 100 | 101 | try { 102 | # Write to db file if posted 103 | $ActorObject[$x] | Export-Csv -Path $ActorDbPath -Append -NoClobber -ErrorAction Stop 104 | } 105 | catch { Write-Error "Error writing to csv Database file. Make sure your database csv file is closed and restart the script." } 106 | } 107 | else { 108 | # Query for index of existing actor in db 109 | $Index = [array]::indexof(($ActorDbObject.Name).ToLower(), $ActorNames[$x]) 110 | if ($ActorObject[$x].Name -eq $ActorDbObject[$Index].Name -and $ActorObject[$x].EmbyId -eq $ActorDbObject[$Index].EmbyId) { 111 | if ($ActorObject[$x].ThumbUrl -notlike $ActorDbObject[$Index].ThumbUrl) { 112 | Write-Host "ADD thumb image for "$ActorObject[$x].Name"" 113 | Add-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageUrl $ActorObject[$x].ThumbURL -ImageType Thumb -ApiKey $EmbyApiKey 114 | Set-CsvDb -Path $ActorDbPath -Index $Index -Name $ActorObject[$x].Name -EmbyId $ActorObject[$x].EmbyId -ThumbUrl $ActorObject[$x].ThumbUrl -PrimaryUrl $ActorObject[$x].PrimaryUrl -ErrorAction Stop 115 | } 116 | if ($ActorObject[$x].PrimaryUrl -notlike $ActorDbObject[$Index].PrimaryUrl) { 117 | Write-Host "ADD primary image for "$ActorObject[$x].Name"" 118 | Add-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageUrl $ActorObject[$x].PrimaryUrl -ImageType Primary -ApiKey $EmbyApiKey 119 | Set-CsvDb -Path $ActorDbPath -Index $Index -Name $ActorObject[$x].Name -EmbyId $ActorObject[$x].EmbyId -ThumbUrl $ActorObject[$x].ThumbUrl -PrimaryUrl $ActorObject[$x].PrimaryUrl -ErrorAction Stop 120 | } 121 | } 122 | } 123 | } 124 | else { 125 | if ('' -eq $ActorObject[$x].ThumbUrl) { 126 | Write-Host "REMOVE thumb image for "$ActorObject[$x].Name"" 127 | Remove-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageType Thumb -ApiKey $EmbyApiKey 128 | Set-CsvDb -Path $ActorDbPath -Index $Index -Name $ActorObject[$x].Name -EmbyId $ActorObject[$x].EmbyId -ThumbUrl $ActorObject[$x].ThumbUrl -PrimaryUrl $ActorObject[$x].PrimaryUrl -ErrorAction Stop 129 | } 130 | 131 | if ('' -eq $ActorObject[$x].PrimaryUrl) { 132 | Write-Host "REMOVE primary image for "$ActorObject[$x].Name"" 133 | Remove-ActorThumbs -ServerUri $EmbyServerUri -ActorId $ActorObject[$x].EmbyId -ImageType Primary -ApiKey $EmbyApiKey 134 | Set-CsvDb -Path $ActorDbPath -Index $Index -Name $ActorObject[$x].Name -EmbyId $ActorObject[$x].EmbyId -ThumbUrl $ActorObject[$x].ThumbUrl -PrimaryUrl $ActorObject[$x].PrimaryUrl -ErrorAction Stop 135 | } 136 | } 137 | } 138 | else { 139 | if (($x % 20) -eq 0) { Write-Host '.' -NoNewline } 140 | } 141 | } 142 | pause -------------------------------------------------------------------------------- /extras/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jvlflame/JAV-Sort-Scrape-javlibrary/bb60d62ec107dffafc875be57e855d38ac7a6b1c/extras/demo.gif -------------------------------------------------------------------------------- /extras/edit_covers.py: -------------------------------------------------------------------------------- 1 | # Use this script if you previously scraped your JAV without having the feature to crop covers to poster size 2 | # CAUTION: This script checks for all .jpgs recursively within the directory matching resolution (790-810)x(400-535) 3 | # CAUTION: Make sure only scraped JAV files are within the directory you specify in the settings 4 | 5 | import os 6 | import urllib.request 7 | from PIL import Image 8 | from shutil import move 9 | 10 | def edit_covers(s): 11 | path = os.path.join(s['scraped-covers-path']) 12 | print("CAUTION: This script will modify all .jpgs recursively in the specified path") 13 | print("CAUTION: Make sure only scraped JAV files are within the path specified") 14 | input("Confirm before editing covers in path: " + path) 15 | files = [] 16 | # r=root, d=directories, f = files 17 | for r, d, f in os.walk(path): 18 | for file in f: 19 | if ('.jpg' in file and '-thumb' not in file): 20 | files.append(os.path.join(r, file)) 21 | 22 | for f in files: 23 | cover_path = f 24 | original_cover = Image.open(cover_path) 25 | cover_thumb_path = (os.path.splitext(cover_path))[0] + "-thumb.jpg" 26 | width, height = original_cover.size 27 | # match JavLibrary cover size 28 | if (width > 790 and width < 810): 29 | if (height > 400 and height < 535): 30 | left = width/1.895734597 31 | top = 0 32 | right = width 33 | bottom = height 34 | # crop cover 35 | cropped_cover = original_cover.crop((left, top, right, bottom)) 36 | if s['keep-original-cover']: 37 | # save original cover to cover_thumb_path 38 | original_cover.save(cover_thumb_path) 39 | print("Saving " + cover_thumb_path) 40 | # save cropped cover to original cover_path 41 | cropped_cover.save(cover_path) 42 | print("Saving " + cover_path) 43 | else: 44 | cropped_cover.save(cover_path) 45 | print("Saving " + cover_path) 46 | 47 | def read_file(path): 48 | """Return a dictionary containing a map of name of setting -> value""" 49 | d = {} 50 | # so we can strip invalid characters for filenames 51 | translator = str.maketrans({key: None for key in '<>/\\|*:?'}) 52 | with open(path, 'r') as content_file: 53 | for line in content_file.readlines(): 54 | line = line.strip('\n') 55 | if not line.startswith('path' and 'scraped-covers-path'): 56 | line = line.translate(translator) 57 | if line and not line.startswith('#'): 58 | split = line.split('=') 59 | d[split[0]] = split[1] 60 | if split[1].lower() == 'true': 61 | d[split[0]] = True 62 | elif split[1].lower() == 'false': 63 | d[split[0]] = False 64 | return d 65 | 66 | if __name__ == '__main__': 67 | try: 68 | script_dir = os.path.dirname(__file__) 69 | rel_path = "../settings_sort_jav.ini" 70 | settings = read_file(os.path.join(script_dir, rel_path)) 71 | edit_covers(settings) 72 | input("Press Enter to finish.") 73 | except Exception as e: 74 | print(e) 75 | print("Panic! Go find help.") 76 | -------------------------------------------------------------------------------- /extras/jf-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jvlflame/JAV-Sort-Scrape-javlibrary/bb60d62ec107dffafc875be57e855d38ac7a6b1c/extras/jf-example.png -------------------------------------------------------------------------------- /settings_sort_jav.ini: -------------------------------------------------------------------------------- 1 | # This is the settings file, edit these to make the program work 2 | # The current default settings are my go-to for best experience in Emby 3 | 4 | # Settings for sort_jav.py ################################################################################ 5 | 6 | # Path is folder you're trying to sort 7 | # This should contain videos you're trying to sort 8 | # This does not search recursively, so only videos in the root folder will be sorted. 9 | #path=E:\New Folder\Downloads\Completed Torrents\__staging 10 | path=Z:\private\Downloads\#Sorted 11 | 12 | # This option will download covers for all videos 13 | include-cover=true 14 | 15 | # This option will create a cover for all videos if it has multiple parts and the previous option is true 16 | # From my testing, required for multi-part videos on Emby 17 | include-cover-all=true 18 | 19 | # This option will crop the cover for all videos to poster size, showing only the front cover of the DVD/BD 20 | # Useful in Emby when viewing in movie poster view 21 | crop-cover-to-poster=true 22 | 23 | # This option is available if the above option (crop-cover-to-poster) is true. 24 | # It will keep the original cover in a separate file after it is cropped. 25 | # It will be saved under the standard Emby naming convention of $cover-thumb.jpg 26 | # I recommend leaving this true, as you will need to rescrape the JAV files to regain the full-size cover 27 | keep-original-cover=true 28 | 29 | # This option will include the name of all the actresses in the name of the video 30 | include-actress-in-video-name=false 31 | 32 | # This option will include the name of all the actresses in the name of the folder 33 | include-actress-name-in-folder=false 34 | 35 | # This option will include the name of all the actresses in the name of the cover 36 | # Keep this option the same as "include-actress-in-video-name", as Emby requires the filename to be the same 37 | include-actress-name-in-cover=false 38 | 39 | # This option will move each video into a separate folder 40 | # Recommend to keep this as true 41 | move-video-to-new-folder=true 42 | 43 | # This option will allow you to scrape video files without renaming them 44 | # Useful if you want to keep your current file structure while seeding torrents 45 | # For this use-case, you should set move-video-to-new-folder to false 46 | do-not-rename-file=false 47 | 48 | # This option specifies which character goes between the video name and the actress name 49 | # For example, if you are naming PPPD-400, which stars Meguri 50 | # This would make the name PPPD-400_Meguri 51 | # You may want this to match the delimiter-between-multiple-actresses value 52 | delimiter-between-video-name-actress=_ 53 | 54 | # This option specifies which character goes between the first and last name of an actress 55 | # For example, if you are naming PPPD-490, which stars Ayumi Shinoda 56 | # This would make the name PPPD-490_Ayumi_Shinoda 57 | delimiter-between-actress-names=. 58 | 59 | # This option specifies which character goes between the the names of different 60 | # For example, if you are naming MIRD-150, which stars Hitomi Tanaka and Anri Okita 61 | # This would make the name MIRD-150_Hitomi_Tanaka#Anri_Okita 62 | delimiter-between-multiple-actresses=_ 63 | 64 | # This option specifies which character goes between the characters you use to specify the second video 65 | # For example if you have MIRD-151 and it has two video files 66 | # You can do MIRD-151_1 and MIRD-151_2 or MIRD-151_A and MIRD-151_B 67 | # If you fail to use a delimiter and do MIRD-151B it will fail 68 | delimiter-between-multiple-videos=_ 69 | 70 | # This option is used when you have multiple files for a single video, say you have PPPD-400_1 and PPPD-400_2 71 | # If this value is true, the file will get renamed PPPD-400_Meguri_1, if this is false, it will get renamed 72 | # PPPD-400_1_Meguri 73 | actress-before-video-number=false 74 | 75 | # This option specifies the order of the name of an actress 76 | # It can either be first last or last first 77 | # Specify first for first last order 78 | # Specify last for last first order 79 | name-order=last 80 | 81 | # This option specifies that, in the final name, the video ID should be all capitalized 82 | # so if you have a video called meyd-094 it will be changed to MEYD-094 83 | # If it is false, the casing of the name will be used as-is 84 | make-video-id-all-uppercase=true 85 | 86 | # This is the value to use if you specify to use the actress name 87 | # but javlibrary returns there is no actress for the video 88 | name-for-actress-if-blank=Unknown 89 | 90 | # Metadata options 91 | # This option will write html metadata to txt >> (MUST BE TRUE FOR THE REST OF SCRIPTS TO WORK) 92 | # Required true if you want to write nfo (Emby/Jellyfin/Kodi) metadata for the video 93 | include-html-txt=true 94 | 95 | # Settings for Set-JAVNfo.ps1 #################################################################################### 96 | 97 | # Path is same as sort_jav.py 98 | 99 | # This option will scrape the title from r18.com instead of using the machine-translated version from JAVLibrary 100 | # Using this option will make the script run significantly slower, as it will be scraping R18 101 | prefer-r18-title=true 102 | 103 | # This option will scrape r18 for additional metadata such as the director's name and video series title 104 | # Video series title will be added as an Emby 'tag' 105 | scrape-r18-other-metadata=true 106 | 107 | # This option will add the video title in the metadata to use instead of filename 108 | include-video-title=true 109 | 110 | # This option will add video genre metadata to the nfo file 111 | include-genre-metadata=true 112 | 113 | # This option will add video genre metadata as a 'tag' to the nfo file 114 | # Recommended false, but may be of use if you know what you're doing 115 | include-tag-metadata=false 116 | 117 | # This option will keep the original html metadata txt file instead of deleting it 118 | # Recommend true, as you will need to rescrape the video to get this file if you need it again 119 | keep-metadata-txt=true 120 | 121 | # Settings for edit_covers.py ##################################################################################### 122 | 123 | # This option is mostly deprecated. Leave blank unless you are coming from an older version without poster covers 124 | # Edit_covers.py will search this directory recursively and find all uncropped .jpg files between (810-790)x(535-400) 125 | # Make sure that no other non-cover jpg files are in this directory or they may be cropped unintentionally 126 | # V:\J\Unsorted 127 | scraped-covers-path= 128 | 129 | # Settings for Get-R18ThumbUrls.ps1 ################################################################################ 130 | 131 | # Page on https://www.r18.com/videos/vod/movies/actress/letter=a/sort=popular/ to start scraping actress links from 132 | r18-start-page=1 133 | 134 | # Page on https://www.r18.com/videos/vod/movies/actress/letter=a/sort=popular/ to end scraping actress links from 135 | r18-end-page=332 136 | 137 | # By default, R18 lists actresses as FirstName LastName 138 | # Set this to true to swap to LastName FirstName 139 | # If option "name-order=last", set this to true 140 | swap-name-order=true 141 | 142 | # Path to the csv file the script will create to reference thumbnail urls from 143 | # This file will automatically be created when the script runs 144 | # Do NOT move or rename this file after it is created, as it will be referenced by Get-EmbyActorThumbs.ps1 145 | # Z:\git\other\Jav-Sort-Scrape-javlibrary\R18-Aug-30-2019.csv 146 | r18-export-csv-path=Z:\git\other\Jav-Sort-Scrape-javlibrary\R18-Oct-09-2019-last-first.csv 147 | 148 | # Settings for Get-EmbyActorThumbs.ps1 ############################################################################# 149 | 150 | # Enter your emby server URL and port 151 | # 192.168.14.10:8096 152 | emby-server-uri=192.168.14.10:8096 153 | 154 | # Enter your emby API key - Create an API key under dashboard -> advanced -> security 155 | # 27d3c17ba69540828f141df8d2c743fb 156 | emby-api-key=27d3c17ba69540828f141df8d2c743fb 157 | 158 | # r18-export-csv-path is needed. Enter the path above. 159 | 160 | # Enter the path to the Actors/Thumbs csv file you want to create/read 161 | # This is the csv file you can edit manually and will be updated with new actors when you add additional videos/actors 162 | # This file will be automatically created when the script runs 163 | # Z:\git\other\Jav-Sort-Scrape-javlibrary\emby_actor_thumbs\db\actors.csv 164 | actor-csv-export-path=Z:\git\other\Jav-Sort-Scrape-javlibrary\emby_actor_thumbs\db\actors.csv 165 | 166 | # Settings for Get-EmbyActorThumbs.ps1 ############################################################################# 167 | 168 | # Enter the path to the Actors/Thumbs csv file you want to store images POSTed to Emby 169 | # This will serve as a database file so you won't have to repeatedly upload the same files 170 | # This file will be automatically created when you specify the path and run the script 171 | # Do NOT modify this file manually if you can help it. Special case applies if you edit images manually on Emby 172 | # Z:\git\other\Jav-Sort-Scrape-javlibrary\emby_actor_thumbs\db\actors_written.csv 173 | actor-csv-database-path=Z:\git\other\Jav-Sort-Scrape-javlibrary\emby_actor_thumbs\db\actors_written.csv 174 | 175 | # Settings for Sort-7mmtv.ps1 ####################################################################################### 176 | 177 | # Enter the path to the directory of your video files. Make sure the files are named exactly as the ID listed on 7mmtv 178 | # Example: 7mmtv = [sg033] vol33 | Your file = sg033.mp4 179 | # This script will only sort videos listed under Amateur or Uncensored on 7mmtv.tv, in JAPANESE 180 | # Make sure no other unwanted videos are in this directory, as this script scrapes from Google which limits requests 181 | # This script ignores all above settings. By default, your file will NOT be renamed, and will be moved to a new directory 182 | 7mm-files-path=Z:\private\J\Unsorted 183 | --------------------------------------------------------------------------------