├── .gitignore ├── LICENSE ├── Movie-Extra-Downloader.py ├── README.md ├── default_extra_configs ├── 1080p-trailer.cfg ├── 720p-trailer.cfg ├── experimental.cfg └── theme-song.cfg ├── directory.py ├── empty_default_config.cfg ├── extra_config.py ├── extra_finder.py ├── main.py ├── old_code ├── Movie-Extra-Downloader.py ├── Stream.py ├── YoutubeVideo.py ├── config-example.cfg └── old_code.py ├── tools.py └── url_finders.py /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | testdir 3 | testdir* 4 | venv 5 | .idea 6 | *(* 7 | *)* 8 | *.log 9 | default_config.cfg 10 | testing.py 11 | record_data 12 | *tmp* 13 | records 14 | extra_configs/** 15 | failed movies/** 16 | !extra_configs/.gitkeep 17 | !failed_movies/.gitkeep 18 | 19 | # Byte-compiled / optimized / DLL files 20 | __pycache__/ 21 | *.py[cod] 22 | *$py.class 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | .pytest_cache/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Environments 103 | .env 104 | .venv 105 | env/ 106 | venv/ 107 | ENV/ 108 | env.bak/ 109 | venv.bak/ 110 | 111 | # Spyder project settings 112 | .spyderproject 113 | .spyproject 114 | 115 | # Rope project settings 116 | .ropeproject 117 | 118 | # mkdocs documentation 119 | /site 120 | 121 | # mypy 122 | .mypy_cache/ 123 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 KBlixt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Movie-Extra-Downloader.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | 3 | from main import download_extra 4 | from extra_config import ExtraSettings 5 | import os 6 | import sys 7 | from directory import Directory 8 | import shutil 9 | from urllib.error import URLError, HTTPError 10 | import configparser 11 | from _socket import timeout 12 | import argparse 13 | import tools 14 | import time 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("-d", "--directory", help="directory to search extras for") 18 | parser.add_argument("-l", "--library", help="library of directories to search extras for") 19 | parser.add_argument("-f", "--force", action="store_true", help="force scan the directories.") 20 | parser.add_argument("-r", "--replace", action="store_true", help="remove and ban the existing extra.") 21 | args = parser.parse_args() 22 | 23 | if args.directory and os.path.split(args.directory)[1] == '': 24 | args.directory = os.path.split(args.directory)[0] 25 | 26 | if args.library and os.path.split(args.library)[1] == '': 27 | args.library = os.path.split(args.library)[0] 28 | 29 | 30 | def handle_directory(folder): 31 | print('working on directory: "' + os.path.join('...', os.path.split(folder)[1]) + '"') 32 | for config in configs_content: 33 | 34 | if config.startswith('.') or config.startswith('_'): 35 | continue 36 | try: 37 | try: 38 | directory = Directory.load_directory(os.path.join(records, os.path.split(folder)[1])) 39 | except FileNotFoundError: 40 | if has_tmdb_key: 41 | directory = Directory(folder, tmdb_api_key=c.get('SETTINGS', 'tmdb_api_key')) 42 | else: 43 | directory = Directory(folder) 44 | 45 | extra_config = ExtraSettings(os.path.join(configs, config)) 46 | 47 | if args.replace and 'trailer' in extra_config.extra_type.lower(): 48 | args.force = True 49 | 50 | if extra_config.config_id in directory.completed_configs and not args.force: 51 | continue 52 | 53 | if extra_config.skip_movies_with_existing_trailers and not args.replace: 54 | skip = False 55 | for file in os.listdir(directory.full_path): 56 | if file.lower().endswith('trailer.mp4')\ 57 | or file.lower().endswith('trailer.mkv'): 58 | skip = True 59 | break 60 | if skip: 61 | print('movie already have a trailer. skipping.') 62 | directory.save_directory(records) 63 | continue 64 | if os.path.isdir(os.path.join(directory.full_path, 'trailers')): 65 | for file in os.listdir(os.path.join(directory.full_path, 'trailers')): 66 | if file.lower().endswith('.mp4')\ 67 | or file.lower().endswith('.mkv'): 68 | skip = True 69 | break 70 | if skip: 71 | print('movie already have a trailer. skipping.') 72 | directory.save_directory(records) 73 | continue 74 | 75 | if extra_config.skip_movies_with_existing_theme: 76 | skip = False 77 | for file in os.listdir(directory.full_path): 78 | if file.lower().endswith('theme.mp3')\ 79 | or file.lower().endswith('theme.wma')\ 80 | or file.lower().endswith('theme.flac'): 81 | skip = True 82 | break 83 | if skip: 84 | print('movie already have a theme song. skipping.') 85 | directory.save_directory(records) 86 | continue 87 | if os.path.isdir(os.path.join(directory.full_path, 'theme-music')): 88 | for file in os.listdir(os.path.join(directory.full_path, 'theme-music')): 89 | if file.lower().endswith('.mp3')\ 90 | or file.lower().endswith('.wma')\ 91 | or file.lower().endswith('.flac'): 92 | skip = True 93 | break 94 | if skip: 95 | print('movie already have a theme song. skipping.') 96 | directory.save_directory(records) 97 | continue 98 | 99 | directory.update_content() 100 | 101 | if args.force: 102 | old_record = directory.record 103 | directory.record = list() 104 | for record in old_record: 105 | if record != extra_config.extra_type: 106 | directory.record.append(record) 107 | extra_config.force = True 108 | 109 | if args.replace: 110 | directory.banned_youtube_videos_id.append(directory.trailer_youtube_video_id) 111 | shutil.rmtree(os.path.join(directory.full_path, extra_config.extra_type)) 112 | os.mkdir(os.path.join(directory.full_path, extra_config.extra_type)) 113 | 114 | if not os.path.isdir(tmp_folder): 115 | os.mkdir(tmp_folder) 116 | 117 | download_extra(directory, extra_config, tmp_folder) 118 | directory.completed_configs.append(extra_config.config_id) 119 | directory.save_directory(records) 120 | 121 | if args.force: 122 | # todo: delete all paths in the old record that are not in the new record 123 | pass 124 | 125 | except FileNotFoundError as e: 126 | print('file not found: ' + str(e)) 127 | continue 128 | 129 | except HTTPError: 130 | print('You might have been flagged by google search. try again tomorrow.') 131 | sys.exit() 132 | 133 | except URLError: 134 | print('you might have lost your internet connections. exiting') 135 | sys.exit() 136 | 137 | except timeout: 138 | print('you might have lost your internet connections. exiting') 139 | sys.exit() 140 | 141 | except ConnectionResetError: 142 | print('you might have lost your internet connections. exiting') 143 | sys.exit() 144 | 145 | except KeyboardInterrupt: 146 | print('exiting! keyboard interrupt.') 147 | sys.exit() 148 | 149 | 150 | def handle_library(library): 151 | if args.replace: 152 | print('the replace mode is unable in library mode, please use the directory mode.') 153 | return False 154 | for folder in os.listdir(library): 155 | if folder.startswith('.'): 156 | continue 157 | if not os.path.isdir(os.path.join(library, folder)): 158 | continue 159 | try: 160 | handle_directory(os.path.join(library, folder)) 161 | except KeyboardInterrupt: 162 | raise 163 | except Exception as e: 164 | print("----------------------------------------------------------") 165 | print("----------------------------------------------------------") 166 | print("----------------------------------------------------------") 167 | print("----------------------------------------------------------") 168 | print("----------------------------------------------------------") 169 | print("--------------------AN ERROR OCCURRED---------------------") 170 | print("------------------------SKIPPING--------------------------") 171 | print("------PLEASE REPORT MOVIE TITLE TO THE GITHUB ISSUES------") 172 | print("-----------------THE SCRIPT WILL CONTINUE-----------------") 173 | print("----------------------------------------------------------") 174 | print("-------------------- Exception: --------------------------") 175 | print(e) 176 | print(traceback.format_exc()) 177 | print("----------------------------------------------------------") 178 | print("----------------------------------------------------------") 179 | time.sleep(1) 180 | exit() 181 | 182 | if not os.path.isdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies")): 183 | os.mkdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies")) 184 | if not os.path.isdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies", folder)): 185 | os.mkdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies", folder)) 186 | if library == 'testdir': 187 | raise 188 | return True 189 | 190 | 191 | c = configparser.ConfigParser() 192 | c.read('default_config.cfg') 193 | 194 | tmp_folder = os.path.join(os.path.dirname(sys.argv[0]), 'tmp') 195 | 196 | configs = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs') 197 | configs_content = os.listdir(configs) 198 | 199 | records = os.path.join(os.path.dirname(sys.argv[0]), 'records') 200 | 201 | result = tools.get_tmdb_search_data(c.get('SETTINGS', 'tmdb_api_key'), 'star wars') 202 | if result is None: 203 | print('Warning: No working TMDB api key was specified.') 204 | time.sleep(10) 205 | has_tmdb_key = False 206 | else: 207 | has_tmdb_key = True 208 | 209 | 210 | if args.directory: 211 | handle_directory(args.directory) 212 | elif args.library: 213 | handle_library(args.library) 214 | else: 215 | print('please specify a directory (-d) or a library (-l) to search extras for') 216 | 217 | try: 218 | shutil.rmtree(tmp_folder) 219 | except FileNotFoundError: 220 | pass 221 | os.mkdir(tmp_folder) 222 | 223 | sys.exit() 224 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Movie-Extra-Downloader 2 | A python 3.6 script that downloads movie extras from youtube. 3 | 4 | This script searches for movie extras on youtube and then uses Youtube-DL to download found videos from youtube. 5 | Downloaded videos are put into subfolders in the movie directory. 6 | 7 | The script uses folder names as a basis of the search. it can also recognise that the folder name ends with the release 8 | year and will use it to make better searches. to improve results the script can use a tmdb api key to filter out 9 | similarly named titles. the script asumes that you are using a folder naming schemes similar to 10 | "{movie title} {movie release year}". it can handle most common delimiters and will remove all parentheses and brackets 11 | 12 | My goal and vision of this program is to find all kind of movie extra content on youtube but for now **I've only implemented 13 | trailers.** The goal is also to provide options for usefull stuff like renaming schemes. editing the filtering process 14 | should also be fairly easy to mess about with. for now however I woudln't recommend changing to much in the extra configs. 15 | 16 | ## INFO 17 | 18 | it'sa a quite slow script. I've hard coded in a limit of one movie per minute so that you don't get flagged by google. 19 | so expect to run it for a day or two the first time you run it 20 | 21 | The provided configs that I've included are well tested but they are **not perfect**. if you find a issue with the script 22 | finding the wrong movie entirely please let me know. 23 | 24 | for now it'll download the video and its thumbnail, name them after the youtube video and move it to a subfolder in the 25 | movie directory 26 | 27 | 28 | 29 | ## Installation 30 | 31 | it should be dead simple to install. simply clone the repository and install the python3 modules "youtube-dl" and "google" 32 | 33 | #### modules needed: 34 | 35 | - youtube-dl 36 | - google 37 | 38 | if you wish to download 1080p versions then you'll also need to install ffmpeg so that it can be run from the terminal. 39 | this should be easy in linux but it's a bit messier on Mac or Windows. 40 | 41 | ## Configuring 42 | 43 | you'll need to add a default_config.cfg file into the program folder. there should be a "empty_default_config.cfg". 44 | simply remove the "empty_" from its name and you'll be good to go. 45 | 46 | if you have a TMDB API key and wish to use it. open the default_config.cfg file and add it to the tmdb_api_key field. 47 | this is highly recomended since it will provide a better result! 48 | getting a tmdb api key is really simple and is completed in under 5 minutes. 49 | 50 | You'll also need to add extra configs to the "extra_configs" folder. there should be a folder called "default_extra_configs" 51 | here you'll find fairly well tested configs that should work well, simply copy any wanted config to the "extra_config" folder. 52 | each config in the "extra_config" folder represents one extra type. one config can download multiple videos but can only download for 53 | one type of extra at a time. configs starting with "." or "_" is ignored. 54 | 55 | ## Running 56 | 57 | the program should now be ready to use. run it with python3.5 or 3.6. 58 | the program expects to be given a movie directory or a movie library to work on. 59 | giving it a movie directory will only execute the script once on the given directory while giving it a movie library will 60 | run the script on every folder in the given library. 61 | 62 | a few exmples on a ubuntu machine: 63 | 64 | #### movie directory example: 65 | 66 | python3 Movie-Extra-Downloader.py -d /media/plex/Movies/Avatar (2009) 67 | 68 | #### movie library example: 69 | 70 | python3 Movie-Extra-Downloader.py -l /media/plex/Movies 71 | 72 | ## as a costum script for radarr 73 | 74 | You'll probably need to write a script yourself that calls this program since the script would be different on different systems. 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /default_extra_configs/1080p-trailer.cfg: -------------------------------------------------------------------------------- 1 | [EXTRA_CONFIG] 2 | config_id = trailer_1080 3 | 4 | # name of the subfolder that this config puts downloaded videos in. 5 | extra_type = trailers 6 | 7 | force = false 8 | 9 | 10 | #### trailers specifics: 11 | only_play_trailers = false 12 | disable_play_trailers = false 13 | skip_movies_with_existing_trailers = true 14 | 15 | 16 | #----------------------------------------------------------------------------------------------------------------------- 17 | [SEARCHES] 18 | 19 | query_1 = site:youtube.com/watch?v= {movie_title} {movie_release_year} trailer 20 | limit_1 = 7 21 | source_1 = google_search 22 | 23 | query_2 = {movie_original_title} {movie_release_year} trailer 24 | limit_2 = 7 25 | source_2 = youtube_search 26 | 27 | #----------------------------------------------------------------------------------------------------------------------- 28 | [FILTERING] 29 | 30 | required_phrases = trailer 31 | 32 | banned_phrases = Side-by-Side, Side by Side, italiano, español, deutch, german, series, comparision, clip, clips 33 | 34 | banned_channels = KinoCheck comedy, KinoCheck horror, KinoCheck action, KinoCheck kids, KinoCheck Home, 35 | KinoCheck, KinoCheck.com, New Trailer Buzz, Screen Junkies, movieclips, KinoCheck International, FilmSelect, FilmSelect Trailer, 36 | Entertainment Access, trailer city, MOVIE PREDICTOR, Movieclips Classic Trailers, Machinima, ZappMovieTrailer, 37 | TV Promos, Zero Media, One Media, moviemanTrailers, CheckTrailer, Movieclips Trailers 38 | 39 | # If you wonder why these channels are banned it's because they either watermark their trailers, begin or end their 40 | # trailer with massive channel promo that lasts for a stupidly long time linking to other videos on their channel. 41 | 42 | 43 | 44 | 45 | #----------------------------------------------------------------------------------------------------------------------- 46 | [CUSTOM_FILTERS] 47 | 48 | break_limit = 3 49 | 50 | 1_min_relative_adjusted_rating = 0.91 51 | 1_min_absolute_resolution = 1080 52 | 1_min_absolute_resolution_ratio = 1.6 53 | 1_max_absolute_duration = 190 54 | 55 | 2_min_relative_adjusted_rating = 0.92 56 | 2_min_absolute_resolution = 720 57 | 2_min_absolute_resolution_ratio = 1.6 58 | 2_max_absolute_duration = 190 59 | 60 | 3_min_relative_adjusted_rating = 0.92 61 | 3_min_absolute_resolution = 720 62 | 3_min_absolute_resolution_ratio = 1.3 63 | 3_max_absolute_duration = 190 64 | 65 | 4_min_relative_adjusted_rating = 0.91 66 | 4_min_relative_resolution = 0.75 67 | 4_max_absolute_duration = 190 68 | 69 | 5_min_relative_adjusted_rating = 0.91 70 | 5_min_relative_resolution = 0.45 71 | 72 | #----------------------------------------------------------------------------------------------------------------------- 73 | [PRIORITY_RULES] 74 | 75 | preferred_channels = 76 | order = highest_view_count 77 | 78 | #----------------------------------------------------------------------------------------------------------------------- 79 | [DOWNLOADING_AND_POSTPROCESSING] 80 | 81 | videos_to_download = 1 82 | 83 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation) 84 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field. 85 | 86 | youtube_dl_arguments = {"socket_timeout": 3, 87 | "writethumbnail": "true", 88 | "outtmpl": "%(title)s.%(ext)s", 89 | "format": "bestvideo[ext=mp4][height <= 1080]+bestaudio[ext=m4a]/best[ext=mp4][height <= 1080]/best[height <= 1080]"} 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /default_extra_configs/720p-trailer.cfg: -------------------------------------------------------------------------------- 1 | [EXTRA_CONFIG] 2 | config_id = trailer_720 3 | 4 | # name of the subfolder that this config puts downloaded videos in. 5 | extra_type = trailers 6 | 7 | force = false 8 | 9 | 10 | #### trailers specifics: 11 | only_play_trailers = false 12 | disable_play_trailers = false 13 | skip_movies_with_existing_trailers = true 14 | 15 | 16 | #----------------------------------------------------------------------------------------------------------------------- 17 | [SEARCHES] 18 | 19 | query_1 = site:youtube.com/watch?v= {movie_title} {movie_release_year} trailer 20 | limit_1 = 7 21 | source_1 = google_search 22 | 23 | query_2 = {movie_original_title} {movie_release_year} trailer 24 | limit_2 = 7 25 | source_2 = youtube_search 26 | 27 | #----------------------------------------------------------------------------------------------------------------------- 28 | [FILTERING] 29 | 30 | required_phrases = trailer 31 | 32 | banned_phrases = Side-by-Side, Side by Side, italiano, español, deutsch, german, series, comparision, clip, clips 33 | 34 | banned_channels = KinoCheck comedy, KinoCheck horror, KinoCheck action, KinoCheck kids, KinoCheck Home, 35 | KinoCheck, KinoCheck.com, New Trailer Buzz, Screen Junkies, movieclips, KinoCheck International, FilmSelect, FilmSelect Trailer, 36 | Entertainment Access, trailer city, MOVIE PREDICTOR, Movieclips Classic Trailers, Machinima, ZappMovieTrailer, 37 | TV Promos, Zero Media, One Media, moviemanTrailers, CheckTrailer, Movieclips Trailers 38 | 39 | # If you wonder why these channels are banned it's because they either watermark their trailers or end the trailer with 40 | # massive channel promo that lasts for a stupidly long time linking to other videos on their channel. 41 | 42 | 43 | 44 | 45 | #----------------------------------------------------------------------------------------------------------------------- 46 | [CUSTOM_FILTERS] 47 | 48 | break_limit = 3 49 | 50 | 2_min_relative_adjusted_rating = 0.92 51 | 2_min_absolute_resolution = 720 52 | 2_min_absolute_resolution_ratio = 1.6 53 | 2_max_absolute_duration = 190 54 | 55 | 3_min_relative_adjusted_rating = 0.92 56 | 3_min_absolute_resolution = 720 57 | 3_min_absolute_resolution_ratio = 1.3 58 | 3_max_absolute_duration = 190 59 | 60 | 4_min_relative_adjusted_rating = 0.91 61 | 4_min_relative_resolution = 0.75 62 | 4_max_absolute_duration = 190 63 | 64 | 5_min_relative_adjusted_rating = 0.91 65 | 5_min_relative_resolution = 0.45 66 | 67 | #----------------------------------------------------------------------------------------------------------------------- 68 | [PRIORITY_RULES] 69 | 70 | preferred_channels = 71 | order = highest_view_count 72 | 73 | #----------------------------------------------------------------------------------------------------------------------- 74 | [DOWNLOADING_AND_POSTPROCESSING] 75 | 76 | videos_to_download = 1 77 | 78 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation) 79 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field. 80 | 81 | youtube_dl_arguments = {"socket_timeout": 3, 82 | "writethumbnail": "true", 83 | "outtmpl": "%(title)s.%(ext)s", 84 | "format": "best[ext=mp4][height <=? 720]/best[height <=? 720]"} 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /default_extra_configs/experimental.cfg: -------------------------------------------------------------------------------- 1 | [EXTRA_CONFIG] 2 | config_id = behind the scenes 3 | 4 | # name of the subfolder that this config puts downloaded videos in. 5 | extra_type = behind the scenes 6 | 7 | #### trailers specifics: 8 | # only_play_trailers = false 9 | # 10 | 11 | #### interviews specifics: 12 | # 13 | 14 | #----------------------------------------------------------------------------------------------------------------------- 15 | [SEARCHES] 16 | 17 | query_1 = site:youtube.com/watch?v= {movie_original_title} {movie_release_year} behind the scenes 18 | limit_1 = 7 19 | source_1 = google_search 20 | 21 | query_2 = {movie_original_title} {movie_release_year} behind the scenes 22 | limit_2 = 7 23 | source_2 = youtube_search 24 | 25 | #----------------------------------------------------------------------------------------------------------------------- 26 | [FILTERING] 27 | 28 | required_phrases = behind the scenes|backstage|back stage 29 | 30 | banned_phrases = Side-by-Side, Side by Side 31 | 32 | banned_channels = 33 | 34 | #----------------------------------------------------------------------------------------------------------------------- 35 | [CUSTOM_FILTERS] 36 | 37 | break_limit = 5 38 | 39 | 1_min_absolute_average_rating = 4.5 40 | 1_min_absolute_resolution = 720 41 | 1_min_absolute_view_count = 2000 42 | 43 | 44 | 2_min_absolute_average_rating = 4.5 45 | 2_min_absolute_resolution = 480 46 | 2_min_absolute_view_count = 1000 47 | 48 | 49 | 3_min_absolute_average_rating = 4 50 | 3_min_absolute_resolution = 480 51 | 3_min_absolute_view_count = 1000 52 | 53 | 4_min_absolute_average_rating = 3.5 54 | 4_min_absolute_resolution = 360 55 | 4_min_absolute_view_count = 500 56 | 57 | last_resort_policy = play-trailer/skip 58 | #----------------------------------------------------------------------------------------------------------------------- 59 | [PRIORITY_RULES] 60 | 61 | prefered_channels = 62 | order = highest_view_count 63 | 64 | #----------------------------------------------------------------------------------------------------------------------- 65 | [DOWNLOADING_AND_POSTPROCESSING] 66 | 67 | videos_to_download = 3 68 | 69 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation) 70 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field. 71 | 72 | youtube_dl_arguments = {"socket_timeout": 3, 73 | "writethumbnail": "true", 74 | "outtmpl": "%(title)s.%(ext)s"} 75 | 76 | naming_scheme = {video_title} - [{video_url_id}] 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /default_extra_configs/theme-song.cfg: -------------------------------------------------------------------------------- 1 | [EXTRA_CONFIG] 2 | #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 3 | #This preset need you to have ffmpeg installed!!! 4 | #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 5 | config_id = theme-music 6 | 7 | # name of the subfolder that this config puts downloaded videos in. 8 | extra_type = theme-music 9 | 10 | force = false 11 | 12 | skip_movies_with_existing_theme = true 13 | 14 | 15 | #----------------------------------------------------------------------------------------------------------------------- 16 | [SEARCHES] 17 | 18 | query_2 = site:youtube.com/watch?v= {movie_original_title} {movie_release_year} theme song 19 | limit_2 = 5 20 | source_2 = google_search 21 | 22 | #----------------------------------------------------------------------------------------------------------------------- 23 | [FILTERING] 24 | 25 | required_phrases = theme, song, ost, soundtrack, sound, music 26 | 27 | banned_phrases = italiano, español, deutsch, german, series, scene, all, top 10, featured, every song, full songs, 28 | full songlist, full song list, best of 29 | 30 | banned_channels = 31 | 32 | 33 | 34 | 35 | #----------------------------------------------------------------------------------------------------------------------- 36 | [CUSTOM_FILTERS] 37 | 38 | break_limit = 0 39 | 40 | 1_min_absolute_duration = 90 41 | 1_max_absolute_duration = 420 42 | 43 | #----------------------------------------------------------------------------------------------------------------------- 44 | [PRIORITY_RULES] 45 | 46 | preferred_channels = 47 | order = highest_view_count 48 | 49 | #----------------------------------------------------------------------------------------------------------------------- 50 | [DOWNLOADING_AND_POSTPROCESSING] 51 | 52 | videos_to_download = 1 53 | 54 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation) 55 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field. 56 | 57 | youtube_dl_arguments = {"socket_timeout": 3, 58 | "writethumbnail": "false", 59 | "outtmpl": "theme.%(ext)s", 60 | "format": "bestaudio", 61 | "postprocessors": [{ 62 | "key": "FFmpegExtractAudio", 63 | "preferredcodec": "mp3", 64 | "preferredquality": "192" 65 | }] 66 | } -------------------------------------------------------------------------------- /directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tools as tools 3 | from datetime import date 4 | import json 5 | 6 | 7 | class Directory(object): 8 | 9 | def __init__(self, full_path, tmdb_api_key=None, tmdb_id=None, json_dict=None): 10 | 11 | ######################################## 12 | self.name = None 13 | self.full_path = None 14 | self.content = dict 15 | self.subdirectories = dict() 16 | 17 | self.tmdb_id = None 18 | self.movie_title = None 19 | self.movie_original_title = None 20 | self.movie_original_title_keywords = None 21 | self.movie_release_year = None 22 | self.movie_title_keywords = list() 23 | self.movie_crew_data = list() 24 | self.trailer_youtube_video_id = None 25 | 26 | self.banned_title_keywords = list() 27 | self.banned_years = list() 28 | self.banned_youtube_videos_id = list() 29 | 30 | self.record = list() 31 | self.completed_configs = list() 32 | ######################################## 33 | 34 | if full_path is None: 35 | for key, value in json_dict.items(): 36 | setattr(self, key, value) 37 | else: 38 | self.update_all(full_path=full_path, tmdb_api_key=tmdb_api_key, tmdb_id=tmdb_id) 39 | 40 | @classmethod 41 | def load_directory(cls, file): 42 | with open(file, 'r') as load_file: 43 | return Directory(None, json_dict=json.load(load_file)) 44 | 45 | def update_all(self, full_path=None, tmdb_api_key=None, tmdb_id=None): 46 | if full_path is not None: 47 | self.name = os.path.split(full_path)[1] 48 | self.full_path = full_path 49 | self.update_content() 50 | self.update_movie_info(tmdb_api_key=tmdb_api_key, tmdb_id=tmdb_id) 51 | if tmdb_api_key is not None: 52 | self.update_similar_results(tmdb_api_key) 53 | 54 | def update_content(self): 55 | 56 | self.content = dict() 57 | self.subdirectories = dict() 58 | 59 | for file in os.listdir(self.full_path): 60 | if os.path.isdir(os.path.join(self.full_path, file)): 61 | sub_content = dict() 62 | for sub_file in os.listdir(os.path.join(self.full_path, file)): 63 | sub_content[sub_file] = tools.hash_file(os.path.join(self.full_path, file, sub_file)) 64 | self.subdirectories[file] = sub_content 65 | else: 66 | self.content[file] = tools.hash_file(os.path.join(self.full_path, file)) 67 | 68 | def update_movie_info(self, tmdb_api_key=None, tmdb_id=None): 69 | def get_info_from_directory(): 70 | clean_name_tuple = tools.get_clean_string(self.name).split(' ') 71 | 72 | if any(clean_name_tuple[-1] == str(year) for year in range(1896, date.today().year + 2)): 73 | self.movie_release_year = int(clean_name_tuple[-1]) 74 | self.movie_title = ' '.join(clean_name_tuple[:-1]) 75 | self.movie_original_title = ' '.join(clean_name_tuple[:-1]) 76 | 77 | else: 78 | self.movie_release_year = None 79 | self.movie_title = ' '.join(clean_name_tuple) 80 | self.movie_original_title = ' '.join(clean_name_tuple) 81 | 82 | self.movie_title_keywords = tools.get_keyword_list(self.movie_title) 83 | self.movie_original_title_keywords = tools.get_keyword_list(self.movie_original_title) 84 | 85 | return True 86 | 87 | def get_info_from_details(): 88 | details_data = tools.get_tmdb_details_data(tmdb_api_key, tmdb_id) 89 | if details_data is not None: 90 | try: 91 | self.tmdb_id = details_data['id'] 92 | self.movie_title = details_data['title'] 93 | self.movie_original_title = details_data['original_title'] 94 | self.movie_title_keywords = tools.get_keyword_list(details_data['title']) 95 | self.movie_original_title_keywords = tools.get_keyword_list(details_data['original_title']) 96 | 97 | if len(details_data['release_date'][:4]) == 4: 98 | self.movie_release_year = int(details_data['release_date'][:4]) 99 | else: 100 | self.movie_release_year = None 101 | return True 102 | except KeyError as ke: 103 | return False 104 | except TypeError as te: 105 | return False 106 | else: 107 | return False 108 | 109 | def get_info_from_search(): 110 | search_data = tools.get_tmdb_search_data(tmdb_api_key, self.movie_title) 111 | 112 | if search_data is None or search_data['total_results'] == 0: 113 | return False 114 | 115 | movie_data = None 116 | movie_backup_data = None 117 | 118 | if self.movie_release_year is None: 119 | movie_data = search_data['results'][0] 120 | else: 121 | 122 | for result in search_data['results'][:5]: 123 | try: 124 | if result['release_date'] is None: 125 | result['release_date'] = '000000000000000' 126 | continue 127 | except KeyError: 128 | result['release_date'] = '000000000000000' 129 | continue 130 | if movie_data is None: 131 | if str(self.movie_release_year) == result['release_date'][:4]: 132 | movie_data = result 133 | elif result['release_date'][6:8] in ['09', '10', '11', '12'] \ 134 | and str(self.movie_release_year - 1) == result['release_date'][:4]: 135 | movie_data = result 136 | elif result['release_date'][6:8] in ['01', '02', '03', '04'] \ 137 | and str(self.movie_release_year + 1) == result['release_date'][:4]: 138 | movie_data = result 139 | elif movie_backup_data is None: 140 | if str(self.movie_release_year - 1) == result['release_date'][:4]: 141 | movie_backup_data = result 142 | 143 | elif str(self.movie_release_year + 1) == result['release_date'][:4]: 144 | movie_backup_data = result 145 | 146 | if movie_data is None and movie_backup_data is not None: 147 | print('None of the search results had a correct release year, picking the next best result') 148 | movie_data = movie_backup_data 149 | 150 | if movie_data is None: 151 | movie_data = search_data['results'][0] 152 | 153 | self.tmdb_id = movie_data['id'] 154 | self.movie_title = tools.get_clean_string(movie_data['title']) 155 | self.movie_original_title = tools.get_clean_string(movie_data['original_title']) 156 | self.movie_title_keywords = tools.get_keyword_list(movie_data['title']) 157 | self.movie_original_title_keywords = tools.get_keyword_list(movie_data['original_title']) 158 | 159 | if len(movie_data['release_date'][:4]) == 4: 160 | self.movie_release_year = int(movie_data['release_date'][:4]) 161 | else: 162 | self.movie_release_year = None 163 | return True 164 | 165 | if tmdb_api_key is not None: 166 | if tmdb_id is not None: 167 | if get_info_from_details(): 168 | return True 169 | else: 170 | tmdb_id = None 171 | if get_info_from_directory(): 172 | if get_info_from_search(): 173 | return True 174 | else: 175 | return False 176 | 177 | return get_info_from_directory() 178 | 179 | def update_similar_results(self, tmdb_api_key): 180 | 181 | def find_similar_results(): 182 | 183 | def find_by_tmdb_id(): 184 | similar_movies_data = list() 185 | movie_found = False 186 | 187 | for result in search_data['results']: 188 | 189 | if self.tmdb_id == result['id']: 190 | movie_found = True 191 | else: 192 | similar_movies_data.append(result) 193 | 194 | if movie_found: 195 | return similar_movies_data 196 | else: 197 | return None 198 | 199 | def find_by_release_year(): 200 | similar_movies_data = list() 201 | movie_found = False 202 | backup_found = False 203 | 204 | for result in search_data['results']: 205 | 206 | if not movie_found and str(self.movie_release_year) == result['release_date'][:4]: 207 | movie_found = True 208 | continue 209 | 210 | elif not backup_found: 211 | 212 | if result['release_date'][6:8] in ['09', '10', '11', '12'] \ 213 | and str(self.movie_release_year - 1) == result['release_date'][:4]: 214 | backup_found = True 215 | 216 | elif result['release_date'][6:8] in ['01', '02', '03'] \ 217 | and str(self.movie_release_year + 1 == result['release_date'][:4]): 218 | backup_found = True 219 | 220 | if len(similar_movies_data) < 5: 221 | similar_movies_data.append(result) 222 | 223 | if movie_found or backup_found: 224 | return similar_movies_data 225 | else: 226 | return None 227 | 228 | search_data = tools.get_tmdb_search_data(tmdb_api_key, self.movie_title) 229 | 230 | if search_data is None or search_data['total_results'] == 0: 231 | return list() 232 | 233 | ret = find_by_tmdb_id() 234 | if ret is not None: 235 | return ret[:5] 236 | 237 | if self.movie_release_year is None: 238 | return search_data['results'][1:6] 239 | 240 | ret = find_by_release_year() 241 | if ret is not None: 242 | return ret[:5] 243 | 244 | return None 245 | 246 | def process_similar_results(): 247 | self.banned_title_keywords = list() 248 | self.banned_years = list() 249 | 250 | for similar_movie in similar_movies: 251 | 252 | for word in tools.get_keyword_list(similar_movie['title']): 253 | 254 | if (word.lower() not in self.movie_title.lower() 255 | and word.lower() not in self.banned_title_keywords): 256 | 257 | if self.movie_original_title is not None: 258 | 259 | if word.lower() not in self.movie_original_title.lower(): 260 | self.banned_title_keywords.append(word) 261 | 262 | else: 263 | self.banned_title_keywords.append(word) 264 | try: 265 | if len(similar_movie['release_date'][:4]) == 4 \ 266 | and int(similar_movie['release_date'][:4]) not in ([self.movie_release_year] + 267 | self.banned_years) \ 268 | and similar_movie['release_date'][:4] not in self.movie_title: 269 | 270 | self.banned_years.append(int(similar_movie['release_date'][:4])) 271 | except KeyError as e: 272 | pass 273 | similar_movies = find_similar_results() 274 | if similar_movies is not None: 275 | process_similar_results() 276 | return True 277 | else: 278 | return False 279 | 280 | def save_directory(self, save_path): 281 | self.content = None 282 | self.subdirectories = None 283 | if not os.path.isdir(save_path): 284 | os.mkdir(os.path.join(save_path)) 285 | with open(os.path.join(save_path, self.name), 'w') as save_file: 286 | json.dump(self.__dict__, save_file) 287 | -------------------------------------------------------------------------------- /empty_default_config.cfg: -------------------------------------------------------------------------------- 1 | [SETTINGS] 2 | 3 | tmdb_api_key = 4 | 5 | [ADVANCED_SETTINGS] 6 | 7 | force_all = false 8 | -------------------------------------------------------------------------------- /extra_config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import codecs 3 | import json 4 | 5 | import tools as tools 6 | 7 | 8 | class ExtraSettings: 9 | 10 | # todo: make into dictionary. 11 | 12 | # todo (0): make sure nothing fails to import. 13 | 14 | def __init__(self, config_path): 15 | 16 | with codecs.open(config_path, 'r', 'utf-8') as file: 17 | self.config = configparser.RawConfigParser() 18 | self.config.read_file(file) 19 | 20 | self.extra_type = self.config['EXTRA_CONFIG'].get('extra_type') 21 | self.config_id = self.config['EXTRA_CONFIG'].get('config_id') 22 | self.force = self.config['EXTRA_CONFIG'].getboolean('force') 23 | 24 | self.searches = self.get_searches() 25 | 26 | self.required_phrases = \ 27 | tools.make_list_from_string(self.config['FILTERING'].get('required_phrases').replace('\n', '')) 28 | self.banned_phrases = \ 29 | tools.make_list_from_string(self.config['FILTERING'].get('banned_phrases').replace('\n', '')) 30 | self.banned_channels = \ 31 | tools.make_list_from_string(self.config['FILTERING'].get('banned_channels').replace('\n', '')) 32 | 33 | self.custom_filters = self.get_custom_filters() 34 | self.last_resort_policy = self.config['DOWNLOADING_AND_POSTPROCESSING'].get('last_resort_policy') 35 | 36 | self.priority_order = self.config['PRIORITY_RULES'].get('order') 37 | self.preferred_channels = \ 38 | tools.make_list_from_string(self.config['PRIORITY_RULES'].get('preferred_channels', "").replace('\n', '')) 39 | 40 | self.videos_to_download = self.config['DOWNLOADING_AND_POSTPROCESSING'].getint('videos_to_download', 1) 41 | self.naming_scheme = self.config['DOWNLOADING_AND_POSTPROCESSING'].get('naming_scheme') 42 | self.youtube_dl_arguments = json.loads(self.config['DOWNLOADING_AND_POSTPROCESSING'].get('youtube_dl_arguments')) 43 | 44 | self.disable_play_trailers = self.config['EXTRA_CONFIG'].getboolean('disable_play_trailers', False) 45 | self.only_play_trailers = self.config['EXTRA_CONFIG'].getboolean('only_play_trailers', False) 46 | self.skip_movies_with_existing_trailers = \ 47 | self.config['EXTRA_CONFIG'].getboolean('skip_movies_with_existing_trailers', False) 48 | 49 | self.skip_movies_with_existing_theme = \ 50 | self.config['EXTRA_CONFIG'].getboolean('skip_movies_with_existing_theme', False) 51 | return 52 | 53 | def get_searches(self): 54 | 55 | ret = dict() 56 | 57 | for option, value in self.config['SEARCHES'].items(): 58 | 59 | try: 60 | index = int(option.split('_')[-1]) 61 | except ValueError: 62 | continue 63 | 64 | if index not in ret: 65 | ret[index] = dict() 66 | ret[index]['_'.join(option.split('_')[:-1])] = value 67 | 68 | return ret 69 | 70 | def get_custom_filters(self): 71 | 72 | ret = dict() 73 | 74 | for option, value in self.config['CUSTOM_FILTERS'].items(): 75 | 76 | if option == 'break_limit': 77 | self.break_limit = int(value) 78 | continue 79 | if option == 'last_resort_policy': 80 | self.last_resort_policy = value 81 | continue 82 | 83 | try: 84 | index = int(option.split('_')[0]) 85 | except ValueError: 86 | continue 87 | 88 | if index not in ret: 89 | ret[index] = list() 90 | try: 91 | ret[index].append('_'.join(option.split('_')[1:]) + ':::' + value) 92 | except ValueError: 93 | continue 94 | 95 | sorted_ret = list() 96 | for key in sorted(ret.keys()): 97 | sorted_ret.append(ret[key]) 98 | 99 | return sorted_ret 100 | -------------------------------------------------------------------------------- /extra_finder.py: -------------------------------------------------------------------------------- 1 | import os 2 | from youtube_dl import DownloadError 3 | import tools as tools 4 | import youtube_dl 5 | import url_finders 6 | from bisect import bisect 7 | from datetime import date 8 | import time 9 | import shutil 10 | 11 | 12 | class ExtraFinder: 13 | 14 | conn_errors = 0 15 | 16 | def __init__(self, directory, extra_config): 17 | 18 | self.directory = directory 19 | self.config = extra_config 20 | self.complete = True 21 | 22 | self.youtube_videos = list() 23 | self.play_trailers = list() 24 | 25 | def search(self): 26 | 27 | def create_youtube_video(): 28 | 29 | def get_video_data(): 30 | 31 | for tries in range(1, 11): 32 | 33 | try: 34 | 35 | with youtube_dl.YoutubeDL({'socket_timeout': '3'}) as ydl: 36 | return ydl.extract_info(url, download=False) 37 | 38 | except DownloadError as e: 39 | 40 | if 'ERROR: Unable to download webpage:' in e.args[0]: 41 | 42 | if tries > 3: 43 | print('hey, there: error!!!') 44 | raise 45 | 46 | print('failed to get video data, retrying') 47 | time.sleep(1) 48 | else: 49 | return None 50 | 51 | youtube_video = get_video_data() 52 | 53 | if not youtube_video: 54 | return None 55 | 56 | youtube_video['title'] = tools.get_clean_string(youtube_video['title']) 57 | 58 | if youtube_video['view_count'] is None: 59 | youtube_video['view_count'] = 100 60 | 61 | if youtube_video['view_count'] < 100: 62 | youtube_video['view_count'] = 100 63 | 64 | if youtube_video['average_rating'] is None: 65 | youtube_video['average_rating'] = 0 66 | 67 | if youtube_video['view_count'] is None: 68 | youtube_video['view_count'] = 0 69 | 70 | youtube_video['adjusted_rating'] = \ 71 | youtube_video['average_rating'] * (1 - 1 / ((youtube_video['view_count'] / 60) ** 0.5)) 72 | 73 | if youtube_video['width'] is None or youtube_video['height'] is None: 74 | youtube_video['resolution_ratio'] = 1 75 | youtube_video['resolution'] = 144 76 | else: 77 | youtube_video['resolution_ratio'] = youtube_video['width'] / youtube_video['height'] 78 | 79 | resolution = max(int(youtube_video['height']), 80 | int(youtube_video['width'] / 16 * 9)) 81 | resolutions = [144, 240, 360, 480, 720, 1080, 1440, 2160] 82 | 83 | youtube_video['resolution'] = resolutions[bisect(resolutions, resolution * 1.2) - 1] 84 | 85 | if youtube_video['upload_date']: 86 | if youtube_video['upload_date'] is not None: 87 | date_str = youtube_video['upload_date'] 88 | upload_date = date(int(date_str[:4]), int(date_str[4:6]), int(date_str[6:8])) 89 | time_delta = date.today() - upload_date 90 | youtube_video['views_per_day'] = (youtube_video['view_count'] / 91 | (365 + time_delta.total_seconds() / 60 / 60 / 24)) 92 | else: 93 | print('no "upload_date"!!!') 94 | youtube_video['views_per_day'] = 0 95 | else: 96 | print('no "upload_date"!!!') 97 | youtube_video['views_per_day'] = 0 98 | return youtube_video 99 | 100 | url_list = list() 101 | 102 | for search_index, search in self.config.searches.items(): 103 | query = tools.apply_query_template(search['query'], self.directory.__dict__) 104 | limit = int(search['limit']) 105 | 106 | if search['source'] == 'google_search': 107 | urls = url_finders.google_search(query, limit) 108 | 109 | elif search['source'] == 'youtube_search': 110 | urls = url_finders.youtube_search(query, limit) 111 | 112 | elif search['source'] == 'google_channel_search': 113 | urls = url_finders.youtube_channel_search(query, limit) 114 | 115 | else: 116 | print("The search engine \"" + search['source'] + "\" wasn't recognized. Skipping.") 117 | print('Please use "google_search", "youtube_search" or "youtube_channel_search" as the source.') 118 | continue 119 | 120 | if urls: 121 | url_list += urls 122 | 123 | for url in list(set(url_list)): 124 | if not any(url in youtube_video['webpage_url'] 125 | or youtube_video['webpage_url'] in url 126 | for youtube_video in self.youtube_videos): 127 | if 'youtube.com/watch?v=' not in url: 128 | continue 129 | video = create_youtube_video() 130 | 131 | if video: 132 | self.youtube_videos.append(video) 133 | if not video['categories']: 134 | self.play_trailers.append(video) 135 | return 136 | 137 | def filter_search_result(self): 138 | 139 | filtered_candidates = list() 140 | 141 | for youtube_video in self.youtube_videos: 142 | 143 | info = 'Video "' + youtube_video['webpage_url'] + '" was removed. reasons: ' 144 | append_video = True 145 | 146 | for youtube_id in self.directory.banned_youtube_videos_id: 147 | if youtube_id == youtube_video['id']: 148 | info += 'banned youtube video, ' 149 | append_video = False 150 | break 151 | 152 | try: 153 | for year in self.directory.banned_years: 154 | if str(year) in youtube_video['title'].lower(): 155 | append_video = False 156 | info += 'containing banned year in title, ' 157 | break 158 | if any(str(year) in tag.lower() for tag in youtube_video['tags']): 159 | append_video = False 160 | info += 'containing banned year in tags, ' 161 | break 162 | except TypeError: 163 | append_video = False 164 | info += 'unable to confirm year not in (tag:TypeError), ' 165 | 166 | buffer = 0 167 | if len(self.directory.banned_title_keywords) > 3: 168 | buffer = 1 169 | if len(self.directory.banned_title_keywords) > 10: 170 | buffer = 2 171 | for keyword in self.directory.banned_title_keywords: 172 | if ' ' + keyword.lower() + ' ' in ' ' + youtube_video['title'].lower() + ' ': 173 | buffer -= 1 174 | if buffer < 0: 175 | append_video = False 176 | info += 'containing banned similar title keywords, ' 177 | break 178 | 179 | if not any(phrase.lower() in youtube_video['title'].lower() for phrase in self.config.required_phrases): 180 | append_video = False 181 | info += 'not containing any required phrase, ' 182 | 183 | for phrase in self.config.banned_phrases: 184 | if phrase.lower() in youtube_video['title'].lower(): 185 | append_video = False 186 | info += 'containing a banned phrase, ' 187 | break 188 | 189 | for channel in self.config.banned_channels: 190 | if channel.lower() == youtube_video['uploader'].lower(): 191 | append_video = False 192 | info += 'made by a banned channel, ' 193 | break 194 | 195 | title_in_video = False 196 | original_title_in_video = False 197 | 198 | buffer = 0 199 | if len(self.directory.movie_title_keywords) > 3: 200 | buffer = 1 201 | if len(self.directory.movie_title_keywords) > 7: 202 | buffer = 2 203 | 204 | for keyword in self.directory.movie_title_keywords: 205 | if ' ' + keyword.lower() + ' ' not in ' ' + youtube_video['title'].lower() + ' ': 206 | buffer -= 1 207 | if buffer < 0: 208 | break 209 | else: 210 | title_in_video = True 211 | 212 | if self.directory.movie_original_title is not None: 213 | buffer = int(len(self.directory.movie_original_title_keywords) / 4 + 0.1) 214 | 215 | for keyword in self.directory.movie_original_title_keywords: 216 | if ' ' + keyword.lower() + ' ' not in ' ' + youtube_video['title'].lower() + ' ': 217 | buffer -= 1 218 | if buffer < 0: 219 | break 220 | else: 221 | original_title_in_video = True 222 | 223 | if not original_title_in_video and not title_in_video: 224 | append_video = False 225 | info += 'not containing title, ' 226 | 227 | if append_video: 228 | filtered_candidates.append(youtube_video) 229 | else: 230 | print(info[:-2] + '.') 231 | 232 | self.youtube_videos = filtered_candidates 233 | 234 | filtered_candidates = list() 235 | 236 | for youtube_video in self.play_trailers: 237 | 238 | info = 'Video "' + youtube_video['webpage_url'] + '" was removed. reasons: ' 239 | append_video = True 240 | 241 | for year in self.directory.banned_years: 242 | if str(year) in youtube_video['title'].lower(): 243 | append_video = False 244 | info += 'containing banned year in title, ' 245 | break 246 | if any(str(year) in tag.lower() for tag in youtube_video['tags']): 247 | append_video = False 248 | info += 'containing banned year in tags, ' 249 | break 250 | 251 | buffer = 0 252 | if len(self.directory.banned_title_keywords) > 3: 253 | buffer = 1 254 | if len(self.directory.banned_title_keywords) > 6: 255 | buffer = 2 256 | for keyword in self.directory.banned_title_keywords: 257 | if ' ' + keyword.lower() + ' ' in ' ' + youtube_video['title'].lower() + ' ': 258 | buffer -= 1 259 | if buffer < 0: 260 | append_video = False 261 | info += 'containing banned similar title keywords, ' 262 | break 263 | 264 | title_in_video = False 265 | original_title_in_video = False 266 | 267 | buffer = 0 268 | if len(self.directory.movie_title_keywords) > 3: 269 | buffer = 1 270 | if len(self.directory.movie_title_keywords) > 7: 271 | buffer = 2 272 | 273 | for keyword in self.directory.movie_title_keywords: 274 | if keyword.lower() not in youtube_video['title'].lower(): 275 | buffer -= 1 276 | if buffer < 0: 277 | break 278 | else: 279 | title_in_video = True 280 | 281 | if self.directory.movie_original_title is not None: 282 | buffer = int(len(self.directory.movie_original_title_keywords) / 4 + 0.1) 283 | 284 | for keyword in self.directory.movie_original_title_keywords: 285 | if keyword.lower() not in youtube_video['title'].lower(): 286 | buffer -= 1 287 | if buffer < 0: 288 | break 289 | else: 290 | original_title_in_video = True 291 | 292 | if not original_title_in_video and not title_in_video: 293 | append_video = False 294 | info += 'not containing title, ' 295 | 296 | if append_video: 297 | filtered_candidates.append(youtube_video) 298 | else: 299 | print(info[:-2] + '.') 300 | 301 | self.play_trailers = filtered_candidates 302 | 303 | def apply_custom_filters(self): 304 | 305 | def absolute(): 306 | 307 | minimum = filter_args[0] == 'min' 308 | ret = list() 309 | 310 | for youtube_video in filtered_list: 311 | if minimum: 312 | if youtube_video[key] >= limit_value: 313 | ret.append(youtube_video) 314 | else: 315 | if youtube_video[key] <= limit_value: 316 | ret.append(youtube_video) 317 | return ret 318 | 319 | def relative(): 320 | 321 | minimum = filter_args[0] == 'min' 322 | ret = list() 323 | max_value = float('-inf') 324 | 325 | for youtube_video in filtered_list: 326 | video_value = youtube_video[key] 327 | if video_value > max_value: 328 | max_value = video_value 329 | 330 | for youtube_video in filtered_list: 331 | if minimum: 332 | if youtube_video[key] >= max_value * limit_value: 333 | ret.append(youtube_video) 334 | else: 335 | if youtube_video[key] <= max_value * limit_value: 336 | ret.append(youtube_video) 337 | return ret 338 | 339 | def highest(): 340 | keep = filter_args[0] == 'keep' 341 | 342 | ret = sorted(filtered_list, key=lambda x: x[key], reverse=True) 343 | 344 | if keep: 345 | if len(ret) > limit_value: 346 | ret = ret[:limit_value] 347 | else: 348 | ret = ret 349 | else: 350 | if len(ret) > limit_value: 351 | ret = ret[limit_value:] 352 | else: 353 | ret = list() 354 | 355 | return ret 356 | 357 | def lowest(): 358 | keep = filter_args[0] == 'keep' 359 | 360 | ret = sorted(filtered_list, key=lambda x: x[key]) 361 | 362 | if keep: 363 | if len(ret) > limit_value: 364 | ret = ret[:limit_value] 365 | else: 366 | ret = ret 367 | else: 368 | if len(ret) > limit_value: 369 | ret = ret[limit_value:] 370 | else: 371 | ret = list() 372 | 373 | return ret 374 | 375 | filtered_list = None 376 | 377 | for filter_package in self.config.custom_filters: 378 | 379 | filtered_list = list(self.youtube_videos) 380 | 381 | for data in filter_package: 382 | filter_args = data.split(':::')[0].split('_') 383 | limit_value = float(data.split(':::')[1]) 384 | try: 385 | int(filter_args[-1]) 386 | except ValueError: 387 | key = '_'.join(filter_args[2:]) 388 | else: 389 | key = '_'.join(filter_args[2:-1]) 390 | 391 | if filter_args[1] == 'relative': 392 | filtered_list = relative() 393 | if filter_args[1] == 'absolute': 394 | filtered_list = absolute() 395 | if filter_args[1] == 'highest': 396 | filtered_list = highest() 397 | if filter_args[1] == 'lowest': 398 | filtered_list = lowest() 399 | if self.play_trailers and self.config.extra_type == 'trailers': 400 | if len(filtered_list) + 1 >= self.config.break_limit: 401 | break 402 | else: 403 | if len(filtered_list) >= self.config.break_limit: 404 | break 405 | 406 | self.youtube_videos = filtered_list 407 | 408 | return 409 | 410 | def order_results(self): 411 | 412 | attribute_tuple = self.config.priority_order.split('_') 413 | highest = attribute_tuple[0] == 'highest' 414 | key = '_'.join(attribute_tuple[1:]) 415 | 416 | for youtube_video in self.youtube_videos: 417 | if youtube_video[key] is None: 418 | youtube_video[key] = 0 419 | 420 | if highest: 421 | self.youtube_videos = sorted(self.youtube_videos, key=lambda x: x[key], reverse=True) 422 | else: 423 | self.youtube_videos = sorted(self.youtube_videos, key=lambda x: x[key]) 424 | 425 | preferred_videos = list() 426 | not_preferred_channels = list() 427 | 428 | for youtube_video in self.youtube_videos: 429 | if youtube_video['uploader'] in self.config.preferred_channels: 430 | preferred_videos.append(youtube_video) 431 | else: 432 | not_preferred_channels.append(youtube_video) 433 | 434 | self.youtube_videos = preferred_videos + not_preferred_channels 435 | 436 | self.play_trailers = sorted(self.play_trailers, key=lambda x: x['view_count'], reverse=True) 437 | 438 | def download_videos(self, tmp_file): 439 | 440 | downloaded_videos_meta = list() 441 | 442 | arguments = self.config.youtube_dl_arguments 443 | arguments['outtmpl'] = os.path.join(tmp_file, arguments['outtmpl']) 444 | for key, value in arguments.items(): 445 | if isinstance(value, str): 446 | if value.lower() == 'false' or value.lower() == 'no': 447 | arguments[key] = '' 448 | 449 | count = 0 450 | 451 | for youtube_video in self.youtube_videos[:]: 452 | if not self.config.force: 453 | for vid_id in self.directory.record: 454 | if vid_id == youtube_video['id']: 455 | continue 456 | 457 | for tries in range(1, 11): 458 | try: 459 | with youtube_dl.YoutubeDL(arguments) as ydl: 460 | meta = ydl.extract_info(youtube_video['webpage_url']) 461 | downloaded_videos_meta.append(meta) 462 | count += 1 463 | break 464 | 465 | except DownloadError as e: 466 | if tries > 3: 467 | if str(e).startswith('ERROR: Did not get any data blocks'): 468 | return 469 | print('failed to download the video.') 470 | break 471 | print('failed to download the video. retrying') 472 | time.sleep(3) 473 | 474 | if count >= self.config.videos_to_download: 475 | break 476 | 477 | return downloaded_videos_meta 478 | 479 | def move_videos(self, downloaded_videos_meta, tmp_folder): 480 | 481 | def copy_file(): 482 | if not os.path.isdir(os.path.split(target_path)[0]): 483 | os.mkdir(os.path.split(target_path)[0]) 484 | shutil.move(source_path, target_path) 485 | 486 | def record_file(): 487 | vid_id = 'unknown' 488 | for meta in downloaded_videos_meta: 489 | if meta['title'] + '.' + meta['ext'] == file: 490 | vid_id = meta['id'] 491 | break 492 | 493 | self.directory.record.append( 494 | {'hash': file_hash, 495 | 'file_path': os.path.join(self.directory.full_path, self.config.extra_type, file), 496 | 'file_name': file, 497 | 'youtube_video_id': vid_id, 498 | 'config_type': self.config.extra_type}) 499 | 500 | def determine_case(): 501 | for content_file, content_file_hash in self.directory.content.items(): 502 | if content_file == file: 503 | return 'name_in_directory' 504 | 505 | if file_hash == content_file_hash: 506 | return 'hash_in_directory' 507 | 508 | for sub_content in self.directory.subdirectories.values(): 509 | for content_file, content_file_hash in sub_content.items(): 510 | if content_file == file: 511 | return 'name_in_directory' 512 | if file_hash == content_file_hash: 513 | return 'hash_in_directory' 514 | 515 | return '' 516 | 517 | def handle_name_in_directory(): 518 | if self.config.force: 519 | copy_file() 520 | record_file() 521 | self.directory.subdirectories[self.config.extra_type][file] = file_hash 522 | else: 523 | os.remove(source_path) 524 | 525 | def handle_hash_in_directory(): 526 | if self.config.force: 527 | copy_file() 528 | record_file() 529 | if self.config.extra_type in self.directory.subdirectories: 530 | self.directory.subdirectories[self.config.extra_type] = {file: file_hash} 531 | else: 532 | self.directory.subdirectories = {self.config.extra_type: {file: file_hash}} 533 | else: 534 | os.remove(source_path) 535 | 536 | for file in os.listdir(tmp_folder): 537 | source_path = os.path.join(tmp_folder, file) 538 | if self.config.extra_type == "theme-music": 539 | target_path = os.path.join(self.directory.full_path, 'theme.mp3') 540 | else: 541 | target_path = os.path.join(self.directory.full_path, self.config.extra_type, file) 542 | 543 | file_hash = tools.hash_file(source_path) 544 | 545 | if any(file_hash == record['hash'] for record in self.directory.record): 546 | os.remove(source_path) 547 | continue 548 | 549 | case = determine_case() 550 | 551 | if case == 'name_in_directory': 552 | handle_name_in_directory() 553 | elif case == 'hash_in_directory': 554 | handle_hash_in_directory() 555 | else: 556 | copy_file() 557 | 558 | if self.config.extra_type in self.directory.subdirectories: 559 | self.directory.subdirectories[self.config.extra_type][file] = file_hash 560 | else: 561 | self.directory.subdirectories = {self.config.extra_type: {file: file_hash}} 562 | 563 | record_file() 564 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from extra_finder import ExtraFinder 4 | 5 | 6 | def download_extra(directory, config, tmp_folder): 7 | def process_trailers_config(tmp_folder): 8 | 9 | finder = ExtraFinder(directory, config) 10 | print('processing: ' + directory.name) 11 | finder.search() 12 | finder.filter_search_result() 13 | 14 | for youtube_video in finder.youtube_videos: 15 | print('--------------------------------------------------------------------------------------') 16 | print(youtube_video['webpage_url']) 17 | print(str(youtube_video['adjusted_rating'])) 18 | print(youtube_video['format']) 19 | print(str(youtube_video['views_per_day'])) 20 | print('--------------------------------------------------------------------------------------') 21 | print(directory.name) 22 | 23 | finder.apply_custom_filters() 24 | finder.order_results() 25 | 26 | if finder.play_trailers and finder.youtube_videos and not config.disable_play_trailers: 27 | if 'duration' in finder.youtube_videos[0] and 'duration' in finder.play_trailers[0]: 28 | if finder.youtube_videos[0]['duration'] - 23 <= \ 29 | finder.play_trailers[0]['duration'] <= \ 30 | finder.youtube_videos[0]['duration'] + 5: 31 | finder.youtube_videos = [finder.play_trailers[0]] + finder.youtube_videos 32 | print('picked play trailer.') 33 | # if len(finder.youtube_videos) < config.break_limit: 34 | # finder.youtube_videos = [finder.play_trailers[0]] + finder.youtube_videos 35 | 36 | if config.only_play_trailers: 37 | if finder.play_trailers: 38 | finder.youtube_videos = [finder.play_trailers[0]] 39 | else: 40 | return 41 | 42 | if not finder.youtube_videos and finder.play_trailers and not config.disable_play_trailers: 43 | finder.youtube_videos = finder.play_trailers 44 | 45 | for youtube_video in finder.youtube_videos: 46 | print(youtube_video['webpage_url'] + ' : ' + 47 | youtube_video['format'] + 48 | ' (' + str(youtube_video['adjusted_rating']) + ')') 49 | for youtube_video in finder.play_trailers: 50 | print('play trailer: ' + youtube_video['webpage_url'] + ' : ' + youtube_video['format']) 51 | print('--------------------------------------------------------------------------------------') 52 | print('downloading for: ' + directory.name) 53 | count = 0 54 | tmp_folder = os.path.join(tmp_folder, 'tmp_0') 55 | while True: 56 | try: 57 | while os.listdir(tmp_folder): 58 | if count == 0 and not tmp_folder.endswith('_0'): 59 | tmp_folder += '_0' 60 | else: 61 | tmp_folder = tmp_folder[:-2] + '_' + str(count) 62 | count += 1 63 | break 64 | except FileNotFoundError: 65 | os.mkdir(tmp_folder) 66 | for youtube_id in directory.banned_youtube_videos_id: 67 | for youtube_video in finder.youtube_videos: 68 | if youtube_id == youtube_video['id']: 69 | finder.youtube_videos.remove(youtube_video) 70 | 71 | downloaded_videos_meta = finder.download_videos(tmp_folder) 72 | if downloaded_videos_meta: 73 | finder.move_videos(downloaded_videos_meta, tmp_folder) 74 | if "trailer" in config.extra_type.lower(): 75 | directory.trailer_youtube_video_id = downloaded_videos_meta[0]['id'] 76 | 77 | def process_interviews_config(): 78 | pass 79 | 80 | def process_behind_the_scenes_config(): 81 | pass 82 | 83 | def process_featurettes_config(): 84 | pass 85 | 86 | def process_deleted_scenes_config(): 87 | pass 88 | 89 | def process_theme_music_config(tmp_folder): 90 | 91 | finder = ExtraFinder(directory, config) 92 | print('processing: ' + directory.name) 93 | finder.search() 94 | finder.filter_search_result() 95 | 96 | for youtube_video in finder.youtube_videos: 97 | print('--------------------------------------------------------------------------------------') 98 | print(youtube_video['webpage_url']) 99 | print(str(youtube_video['adjusted_rating'])) 100 | print(youtube_video['format']) 101 | print(str(youtube_video['views_per_day'])) 102 | print('--------------------------------------------------------------------------------------') 103 | print(directory.name) 104 | 105 | finder.apply_custom_filters() 106 | finder.order_results() 107 | 108 | for youtube_video in finder.youtube_videos: 109 | print(youtube_video['webpage_url'] + ' : ' + 110 | youtube_video['format'] + 111 | ' (' + str(youtube_video['adjusted_rating']) + ')') 112 | for youtube_video in finder.play_trailers: 113 | print('play trailer: ' + youtube_video['webpage_url'] + ' : ' + youtube_video['format']) 114 | print('--------------------------------------------------------------------------------------') 115 | print('downloading for: ' + directory.name) 116 | count = 0 117 | tmp_folder = os.path.join(tmp_folder, 'tmp_0') 118 | while True: 119 | try: 120 | while os.listdir(tmp_folder): 121 | if count == 0 and not tmp_folder.endswith('_0'): 122 | tmp_folder += '_0' 123 | else: 124 | tmp_folder = tmp_folder[:-2] + '_' + str(count) 125 | count += 1 126 | break 127 | except FileNotFoundError: 128 | os.mkdir(tmp_folder) 129 | 130 | downloaded_videos_meta = finder.download_videos(tmp_folder) 131 | if downloaded_videos_meta: 132 | finder.move_videos(downloaded_videos_meta, tmp_folder) 133 | 134 | if config.extra_type == 'trailers': 135 | process_trailers_config(tmp_folder) 136 | elif config.extra_type == 'interviews': 137 | process_interviews_config() 138 | elif config.extra_type == 'behind the scenes': 139 | process_behind_the_scenes_config() 140 | elif config.extra_type == 'featurettes': 141 | process_featurettes_config() 142 | elif config.extra_type == 'theme-music': 143 | process_theme_music_config(tmp_folder) 144 | elif config.extra_type == 'deleted scenes': 145 | process_deleted_scenes_config() 146 | 147 | # 148 | # library1 = '/storage/plex/library/Filmer' 149 | # library2 = 'testdir' 150 | # 151 | # c = configparser.ConfigParser() 152 | # c.read('default_config.cfg') 153 | # 154 | # tmp_folder = os.path.join(os.path.dirname(sys.argv[0]), 'tmp') 155 | # 156 | # library = library1 157 | # library_content = os.listdir(library) 158 | # 159 | # configs = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs') 160 | # configs_content = os.listdir(configs) 161 | # 162 | # records = os.path.join(os.path.dirname(sys.argv[0]), 'records') 163 | # 164 | # force = False 165 | # 166 | # for folder in library_content: 167 | # if re.match("^\\(.*\\)$", folder) or re.match("^\\..*", folder): 168 | # continue 169 | # for config in configs_content: 170 | # if config.startswith('.'): 171 | # continue 172 | # try: 173 | # try: 174 | # directory = Directory.load_directory(os.path.join(records, folder)) 175 | # except FileNotFoundError: 176 | # directory = Directory(os.path.join(library, folder), c.get('SETTINGS', 'tmdb_api_key')) 177 | # 178 | # extra_config = ExtraSettings(os.path.join(configs, config)) 179 | # if extra_config.config_id in directory.completed_configs and not force: 180 | # continue 181 | # 182 | # directory.update_content() 183 | # 184 | # if force: 185 | # old_record = directory.record 186 | # directory.record = list() 187 | # extra_config.force = True 188 | # 189 | # if not os.path.isdir(tmp_folder): 190 | # os.mkdir(tmp_folder) 191 | # 192 | # 193 | # download_extra(directory, extra_config, tmp_folder) 194 | # 195 | # if force: 196 | # # todo: delete all paths in the old record that are not in the new record 197 | # pass 198 | # 199 | # except FileNotFoundError as e: 200 | # print('file not found: ' + str(e.args[0])) 201 | # continue 202 | # 203 | # except HTTPError: 204 | # print('You might have been flagged by google search. try again tomorrow.') 205 | # sys.exit() 206 | # 207 | # except URLError: 208 | # print('you might have lost your internet connections. exiting') 209 | # sys.exit() 210 | # 211 | # except timeout: 212 | # print('you might have lost your internet connections. exiting') 213 | # sys.exit() 214 | # 215 | # except ConnectionResetError: 216 | # print('you might have lost your internet connections. exiting') 217 | # sys.exit() 218 | # 219 | # except KeyboardInterrupt: 220 | # sys.exit() 221 | # try: 222 | # shutil.rmtree(tmp_folder) 223 | # except FileNotFoundError: 224 | # pass 225 | # os.mkdir(tmp_folder) 226 | # 227 | # sys.exit() 228 | -------------------------------------------------------------------------------- /old_code/Movie-Extra-Downloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import configparser 3 | from configparser import NoOptionError 4 | import fnmatch 5 | import pprint 6 | import shutil 7 | import time 8 | import sys 9 | import codecs 10 | from urllib.request import urlopen 11 | from urllib.error import URLError, HTTPError 12 | import json 13 | from socket import timeout 14 | 15 | # pip install these packages: 16 | try: 17 | from googlesearch import search as google_search # google package 18 | except ImportError: 19 | print('Please upgrade to python 3.6 or run the 2.7 version.') 20 | sys.exit() 21 | from pytube import YouTube # pytube package 22 | from pytube import exceptions 23 | import ffmpeg 24 | # also, install FFmpeg. 25 | 26 | ######################################################################################################################## 27 | 28 | # global variables: 29 | # todo: check config exists. 30 | with codecs.open(os.path.join(os.path.dirname(sys.argv[0]), 'default_config.cfg'), 'r', 'utf-8') as file: 31 | global_config = configparser.ConfigParser() 32 | global_config.read_file(file) 33 | global_settings = global_config['SETTINGS'] 34 | global_advanced_settings = global_config['ADVANCED_SETTINGS'] 35 | 36 | library_dir = global_settings.get('library_directory') 37 | # todo: check library_dir exists. 38 | 39 | temp_dir = global_settings.get('temporary_directory', os.path.dirname(sys.argv[0])) 40 | movie_folder_naming_scheme = global_settings.get('movie_folder_naming_scheme') 41 | 42 | tmdb_api_key = global_settings.get('tmdb_api_key', None) 43 | if tmdb_api_key is None: 44 | has_tmdb_api_key = False 45 | else: 46 | has_tmdb_api_key = True 47 | # todo: check tmdb_api_key is ok. 48 | 49 | extra_config_dir = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs') 50 | 51 | tmdb_movie_search_result = None 52 | tmdb_movie_details = None 53 | tmdb_movie_cast_members = None 54 | 55 | ######################################################################################################################## 56 | 57 | 58 | def main(): 59 | 60 | global tmdb_movie_details 61 | global tmdb_movie_cast_members 62 | global tmdb_movie_search_result 63 | 64 | for directory_name in os.listdir(library_dir): 65 | tmdb_movie_search_result = None 66 | tmdb_movie_details = None 67 | tmdb_movie_cast_members = None 68 | print(directory_name) 69 | directory = get_directory_data(directory_name) 70 | # todo: make sure it's a movie directory 71 | for config in os.listdir(extra_config_dir): 72 | # todo: make sure it's a .cfg file 73 | if process_directory(directory, config): 74 | pass 75 | # todo: record info to record_data 76 | 77 | 78 | def process_directory(directory, config_file): 79 | 80 | with codecs.open(os.path.join(os.path.dirname(sys.argv[0]), extra_config_dir, config_file), 'r', 'utf-8') as file2: 81 | settings = configparser.ConfigParser() 82 | settings.read_file(file2) 83 | 84 | # decide if a search is to be made. 85 | 86 | if not settings['EXTRA_CONFIG'].getboolean('force') or not global_advanced_settings.getboolean('force_all'): 87 | if directory['record_data'].get('completed', False): 88 | return False 89 | 90 | # make the search and return a youtube video source. 91 | 92 | video = get_video(directory, settings) 93 | 94 | # download the extra from youtube 95 | download_info = download(video, settings) 96 | # post process the extra 97 | 98 | post_process(download_info, settings) 99 | 100 | return True 101 | 102 | 103 | def get_directory_data(directory_name): 104 | 105 | directory_ret = dict() 106 | 107 | directory_ret['name'] = directory_name 108 | directory_ret['full_path'] = os.path.join(library_dir, directory_name) 109 | directory_ret['clean_name_tuple'] = (directory_ret['name'] 110 | .replace('(', '') 111 | .replace(')', '') 112 | .replace('[', '') 113 | .replace(']', '') 114 | .replace('{', '') 115 | .replace('}', '') 116 | .replace(':', '') 117 | .replace(';', '') 118 | .replace('.', ' ') 119 | .replace('_', ' ') 120 | .lower()).split(' ') 121 | 122 | if global_settings.getboolean('release_year_end_of_file'): 123 | directory_ret['release_year'] = directory_ret['clean_name_tuple'][-1] 124 | directory_ret['movie_name'] = ' '.join(directory_ret['clean_name_tuple'][:-1]) 125 | else: 126 | directory_ret['release_year'] = None 127 | directory_ret['movie_name'] = ' '.join(directory_ret['clean_name_tuple']) 128 | 129 | # todo: make sure release_year is a year between 1700 and 2100. else > None 130 | 131 | directory_ret['files'] = list() 132 | for file3 in os.listdir(directory_ret['full_path']): 133 | directory_ret['files'].append(file3) 134 | # todo: nested folders. 135 | 136 | # todo: make sure the record_data file exists. and that it is an valid json list 137 | if os.path.isfile('record_data'): 138 | with open('record_data') as data_file: 139 | data = json.load(data_file) 140 | if directory_ret['name'] in data: 141 | directory_ret['record_data'] = data[directory_ret['name']] 142 | else: 143 | directory_ret['record_data'] = dict() 144 | 145 | else: 146 | directory_ret['record_data'] = dict() 147 | 148 | get_tmdb_movie_search_result(directory_ret['movie_name'], directory_ret['release_year']) 149 | get_tmdb_movie_details() 150 | get_tmdb_movie_cast_members() 151 | 152 | return directory_ret 153 | 154 | 155 | def get_video(directory, settings): 156 | 157 | def search_result(): 158 | 159 | def search(): 160 | item_list = list() 161 | 162 | # todo: limit > 0 163 | 164 | while True: 165 | try: 166 | time.sleep(global_advanced_settings.getint('search_cooldown')) 167 | print('searching for: "' + query + '"') 168 | for url in google_search(query, stop=limit): 169 | if len(item_list) >= limit: 170 | break 171 | new_item = {'link': url} 172 | 173 | while True: 174 | 175 | for existing_candidate in video_candidates: 176 | if new_item['link'] == existing_candidate['link']: 177 | break 178 | 179 | try: 180 | new_item['pytube_result'] = YouTube(new_item['link']) 181 | item_list.append(new_item) 182 | break 183 | except KeyError: 184 | print('Pytube failed to initialize (KeyError). trying again in 2 seconds.') 185 | time.sleep(2) 186 | except URLError: 187 | print('Pytube failed to initialize (URLError). trying again in 2 seconds.') 188 | time.sleep(2) 189 | except exceptions.RegexMatchError: 190 | new_item['delete_this_item'] = True 191 | break 192 | 193 | break 194 | 195 | except HTTPError as e: 196 | if e.code == 503: 197 | print('------------------------------------------------------------------------------------') 198 | print('It seems that your IP-address have been flagged by google for unusual activity. ') 199 | print('They usually put down the flag after some time so try again tomorrow.') 200 | print('If this is a reoccurring issue, increase the search cooldown under advanced settings') 201 | print('------------------------------------------------------------------------------------') 202 | sys.exit() 203 | else: 204 | print('Failed to retrieve search results, trying again in 2 seconds: ' + e.msg) 205 | time.sleep(2) 206 | continue 207 | 208 | except URLError as e: 209 | print('Failed to retrieve search results, trying again in 2 seconds: ' + e.msg) 210 | time.sleep(2) 211 | continue 212 | 213 | return item_list 214 | 215 | video_candidates = list() 216 | for option, query in settings['SEARCHES'].items(): 217 | if 'search_string' not in option: 218 | continue 219 | 220 | limit = settings['SEARCHES'].getint('search_result_limit' + option.replace('search_string', '')) 221 | 222 | query = query.replace('{movie_name}', directory['movie_name']) 223 | 224 | if directory['release_year'] is not None: 225 | query = query.replace('{release_year}', directory['release_year']) 226 | else: 227 | query = query.replace('{release_year}', '') 228 | 229 | if tmdb_movie_details['production_companies'][0]['name'] is not None and '{main_studio_name}' in query: 230 | get_tmdb_movie_details() 231 | query = query.replace('{main_studio_name}', tmdb_movie_details['production_companies'][0]['name']) 232 | else: 233 | query = query.replace('{main_studio_name}', '') 234 | 235 | query = query.replace(' ', ' ') 236 | 237 | video_candidates += search() 238 | 239 | return video_candidates 240 | 241 | def scan_candidates(): 242 | 243 | selection_info['max_resolution'] = 0 244 | for candidate in selection_info['candidates']: 245 | 246 | candidate['delete_this_item'] = False 247 | 248 | video = candidate['pytube_result'] 249 | 250 | if candidate['delete_this_item'] or video is None: 251 | continue 252 | 253 | candidate['title'] = video.title 254 | candidate['rating'] = float(video.player_config_args['avg_rating']) 255 | candidate['view_count'] = int(video.player_config_args['view_count']) 256 | candidate['thumbnail_url'] = video.thumbnail_url 257 | candidate['channel'] = video.player_config_args['author'] 258 | candidate['tags'] = video.player_config_args['keywords'] 259 | 260 | if candidate['view_count'] < 100: 261 | candidate['view_count'] = 100 262 | 263 | candidate['adjusted_rating'] = candidate['rating'] * (1 - 1 / ((candidate['view_count'] / 60) ** 0.5)) 264 | 265 | candidate['resolution'] = 0 266 | for stream in video.streams.filter(type='video').all(): 267 | try: 268 | resolution = int(stream.resolution.replace('p', '')) 269 | except AttributeError: 270 | resolution = 0 271 | 272 | if resolution > selection_info['max_resolution']: 273 | selection_info['max_resolution'] = resolution 274 | if resolution > candidate['resolution']: 275 | candidate['resolution'] = resolution 276 | 277 | try: 278 | if 'ad_preroll' in video.player_config_args: 279 | candidate['adds_info'] = 'have adds' 280 | else: 281 | candidate['adds_info'] = 'No adds' 282 | except ValueError: 283 | candidate['adds_info'] = 'No adds' 284 | 285 | return selection_info 286 | 287 | def filter_candidates(): 288 | 289 | filtered_candidates = list() 290 | 291 | required_words = retrive_list_from_string(settings['FILTERING'].get('required_words').lower()) 292 | banned_words = retrive_list_from_string(settings['FILTERING'].get('banned_words').lower()) 293 | banned_channels = retrive_list_from_string(settings['FILTERING'].get('banned_channels').lower()) 294 | 295 | banned_years = list(range(1800, 2100)) 296 | for year in banned_years: 297 | if str(year) in directory['movie_name']: 298 | banned_years.remove(year) 299 | if directory['release_year'] is not None: 300 | if int(directory['release_year']) in banned_years: 301 | banned_years.remove(int(directory['release_year'])) 302 | # todo: +- 1 year? 303 | 304 | for candidate in selection_info['candidates']: 305 | 306 | append_video = True 307 | 308 | # todo: make filter that match title name with trailer title. (min 66% match rounding up ) 309 | # ignoring words: the, on, of, a, an 310 | 311 | if candidate['delete_this_item']: 312 | continue 313 | 314 | for year in banned_years: 315 | if str(year) in candidate['title']: 316 | append_video = False 317 | break 318 | if str(year) in candidate['tags']: 319 | append_video = False 320 | break 321 | 322 | for word in required_words: 323 | if word.lower() not in candidate['title'].lower(): 324 | append_video = False 325 | 326 | for word in banned_words: 327 | if word.lower() in candidate['title'].lower(): 328 | append_video = False 329 | break 330 | 331 | # todo: move to post scoring filter 332 | for channel in banned_channels: 333 | if channel.lower() == candidate['channel'].lower(): 334 | append_video = False 335 | break 336 | 337 | if append_video: 338 | filtered_candidates.append(candidate) 339 | 340 | selection_info['candidates'] = filtered_candidates 341 | 342 | return 343 | 344 | def score_candidates(): 345 | 346 | for candidate in selection_info['candidates']: 347 | candidate['score'] = 0 348 | 349 | if candidate['resolution'] < 700: 350 | candidate['adjusted_rating'] *= 0.96 351 | 352 | return 353 | 354 | def post_scoring_filter(): 355 | return 356 | 357 | def order_candidates(): 358 | 359 | # interviews: limit same person interviews. 360 | # behind the scenes: 361 | # trailers: 362 | # 363 | 364 | selected_extra = None 365 | 366 | top_score = 0 367 | top_view_count = 0 368 | 369 | for candidate in selection_info['candidates']: 370 | 371 | print('-----------------------------------------------------------------') 372 | print(candidate['title']) 373 | print(candidate['adds_info']) 374 | print(candidate['resolution']) 375 | print(candidate['link']) 376 | print(candidate['adjusted_rating']) 377 | print(candidate['view_count']) 378 | 379 | if candidate['adjusted_rating'] > top_score: 380 | top_score = candidate['adjusted_rating'] 381 | 382 | for candidate in selection_info['candidates']: 383 | if candidate['adjusted_rating'] > top_score * 0.95: 384 | if candidate['view_count'] > top_view_count: 385 | top_view_count = candidate['view_count'] 386 | selected_extra = candidate 387 | 388 | print('-----------------------------------------------------------------') 389 | print('picked: ' + selected_extra['title'] + ' (' + selected_extra['link'] + ')') 390 | print('-----------------------------------------------------------------') 391 | return selected_extra 392 | 393 | selection_info = {'candidates': search_result()} 394 | 395 | scan_candidates() 396 | 397 | filter_candidates() 398 | 399 | score_candidates() 400 | 401 | # todo: make post scoring filter 402 | 403 | return order_candidates() 404 | 405 | 406 | def download(video, settings): 407 | info_ret = dict() 408 | return info_ret 409 | 410 | 411 | def post_process(download_info, settings): 412 | # todo: reduce sound 413 | # todo: remove green disclaimer if it exist 414 | # todo: encode in mp4, aac, h264 or link the stream 415 | pass 416 | 417 | 418 | def retrieve_web_page(url, page_name='page'): 419 | 420 | response = None 421 | print('Downloading ' + page_name + '.') 422 | 423 | for attempt in range(10): 424 | try: 425 | response = urlopen(url, timeout=2) 426 | break 427 | 428 | except timeout: 429 | print('Failed to download ' + page_name + ' : timed out. Trying again in 2 seconds.') 430 | 431 | if attempt > 5: 432 | print('You might have lost internet connection.') 433 | raise ValueError('Failed to retrive web page: url requests timed out.') 434 | 435 | time.sleep(2) 436 | 437 | except HTTPError as e: 438 | raise ValueError('Failed to download ' + page_name + ' : ' + e.msg + '. Skipping.') 439 | 440 | except URLError as e: 441 | print('Failed to download ' + page_name + '. Trying again in 2 seconds') 442 | 443 | if attempt > 5: 444 | print('You might have lost internet connection.') 445 | raise ValueError('Failed to retrive web page: ' + e.reason + '.') 446 | 447 | time.sleep(2) 448 | 449 | return response 450 | 451 | 452 | def get_tmdb_movie_search_result(name, release_year): 453 | global tmdb_movie_search_result 454 | if tmdb_movie_search_result is not None: 455 | return 456 | 457 | # todo: modify to not use release_year in search but rather as picking the right one. 458 | # todo: any word in any other movie not in the wanted movie should be on ban list for filtering. 459 | # todo: maybe not: false negatives 460 | 461 | url = 'https://api.themoviedb.org/3/search/movie' \ 462 | '?api_key=' + tmdb_api_key + \ 463 | '&language=en-US&query=' \ 464 | + name.replace(' ', '+') + \ 465 | '&page=1&include_adult=false' 466 | 467 | if release_year is not None: 468 | url += '&year=' + str(release_year) 469 | 470 | response = retrieve_web_page(url, 'movie search api page') 471 | 472 | data = json.loads(response.read().decode('utf-8')) 473 | 474 | if data['total_results'] == 0: 475 | raise ValueError('Unable to find a movie for the directory "' + name + '", skipping.') 476 | 477 | # todo: add +- 1 year to the year if it's close to new year. 478 | 479 | tmdb_movie_search_result = data['results'][0] 480 | response.close() 481 | 482 | 483 | def get_tmdb_movie_details(): 484 | global tmdb_movie_details 485 | global tmdb_movie_search_result 486 | if tmdb_movie_details is not None: 487 | return 488 | 489 | response = retrieve_web_page('https://api.themoviedb.org/3/movie/' 490 | + str(tmdb_movie_search_result['id']) + 491 | '?api_key=' + tmdb_api_key + 492 | '&language=en-US', 'movie details') 493 | 494 | data = json.loads(response.read().decode('utf-8')) 495 | tmdb_movie_details = data 496 | response.close() 497 | 498 | 499 | def get_tmdb_movie_cast_members(): 500 | global tmdb_movie_details 501 | global tmdb_movie_cast_members 502 | if tmdb_movie_cast_members is not None: 503 | return 504 | 505 | response = retrieve_web_page('https://api.themoviedb.org/3/movie/' 506 | + str(tmdb_movie_search_result['id']) + 507 | '/credits' 508 | '?api_key=' + tmdb_api_key, 'cast members') 509 | 510 | data = json.loads(response.read().decode('utf-8')) 511 | tmdb_movie_cast_members = data 512 | response.close() 513 | 514 | 515 | def retrive_list_from_string(string, delimiter=',', remove_spaces_next_to_delimiter=True): 516 | if remove_spaces_next_to_delimiter: 517 | while ' ' + delimiter in string: 518 | string = string.replace(' ' + delimiter, delimiter) 519 | while delimiter + ' ' in string: 520 | string = string.replace(delimiter + ' ', delimiter) 521 | 522 | return string.split(delimiter) 523 | 524 | 525 | main() 526 | # todo: add link_only option and capabilities. 527 | sys.exit() 528 | -------------------------------------------------------------------------------- /old_code/Stream.py: -------------------------------------------------------------------------------- 1 | import time 2 | from _socket import timeout 3 | from urllib.error import HTTPError, URLError 4 | 5 | 6 | class Stream(object): 7 | 8 | conn_errors = 0 9 | 10 | def __init__(self, source, length): 11 | ######################################## 12 | self.complete = True 13 | self.retry = False 14 | 15 | self.source = None 16 | self.id = None 17 | self.type = None 18 | self.container = None 19 | self.bitrate = None 20 | 21 | self.video_codec = None 22 | self.bitrate_per_pixel = None 23 | self.resolution = None 24 | self.fps = None 25 | self.is_hdr = None 26 | self.size = None 27 | self.is_3d = None 28 | 29 | self.audio_codec = None 30 | ######################################## 31 | 32 | tries = 0 33 | while True: 34 | 35 | try: 36 | self.source = source 37 | self.id = source.itag 38 | self.container = source.subtype 39 | self.fps = source.fps 40 | self.file_size = source.filesize 41 | self.bitrate = self.file_size * 8 / length 42 | 43 | if source.is_progressive: 44 | self.get_audio_data(source) 45 | self.get_video_data(source) 46 | self.type = 'combined' 47 | 48 | elif source.includes_audio_track: 49 | self.get_audio_data(source) 50 | self.type = 'audio' 51 | 52 | elif source.includes_video_track: 53 | self.get_video_data(source) 54 | self.type = 'video' 55 | self.size = int(source.size.split('x')[0]), int(source.size.split('x')[1]) 56 | self.bitrate_per_pixel = self.bitrate / (self.size[0] * self.size[1]) 57 | else: 58 | print('both include_audio_track and include_video_track was false') 59 | raise AttributeError('failed to categorise stream') 60 | 61 | except KeyError as e: 62 | print('A stream attribute failed to load. KeyError: ' + str(e)) 63 | self.complete = False 64 | return 65 | except AttributeError as e: 66 | print('A required stream attribute failed to load. AttributeError: ' + str(e)) 67 | self.complete = False 68 | return 69 | except timeout as e: 70 | if tries > 4: 71 | print('A stream failed to load because it got timed out: ' + str(e)) 72 | self.complete = False 73 | self.retry = True 74 | if Stream.conn_errors > 2: 75 | raise 76 | else: 77 | Stream.conn_errors += 1 78 | return 79 | 80 | print('A stream failed to load because it got timed out, retrying. Reason: ' + str(e)) 81 | tries += 1 82 | time.sleep(1) 83 | except HTTPError as e: 84 | print('A stream attribute failed to load, skipping. Reason: ' + str(e)) 85 | self.incomplete = True 86 | return 87 | except URLError as e: 88 | if tries > 2: 89 | print('A stream failed to load. Reason: ' + str(e)) 90 | self.complete = False 91 | self.retry = True 92 | if Stream.conn_errors > 2: 93 | raise 94 | else: 95 | Stream.conn_errors += 1 96 | return 97 | 98 | print('A stream attribute failed to load, retrying. Reason: ' + str(e)) 99 | time.sleep(1) 100 | tries += 1 101 | except ConnectionResetError as e: 102 | if tries > 4: 103 | print('A stream failed to load. Reason: ' + str(e)) 104 | self.complete = False 105 | if Stream.conn_errors > 4: 106 | raise 107 | else: 108 | Stream.conn_errors += 1 109 | return 110 | print('A stream attribute failed to load, retrying. Reason: ' + str(e)) 111 | time.sleep(1) 112 | tries += 1 113 | else: 114 | Stream.conn_errors = 0 115 | break 116 | 117 | def get_video_data(self, source): 118 | self.video_codec = source.video_codec 119 | self.resolution = int(source.resolution.replace('p', '')) 120 | self.is_hdr = source.is_hdr 121 | self.is_3d = source.is_3d 122 | 123 | def get_audio_data(self, source): 124 | self.audio_codec = source.audio_codec 125 | -------------------------------------------------------------------------------- /old_code/YoutubeVideo.py: -------------------------------------------------------------------------------- 1 | from _socket import timeout 2 | from urllib.error import URLError 3 | 4 | from pytube import YouTube 5 | from pytube.exceptions import RegexMatchError 6 | from old_code.Stream import Stream 7 | import time 8 | import tools as tools 9 | 10 | 11 | class YoutubeVideo(object): 12 | 13 | # todo (2): subtitles 14 | conn_errors = 0 15 | 16 | def __init__(self, url, score=0, preferred_container='mp4', min_resolution=360, 17 | max_resolution=1080, force_preferred_container=False): 18 | 19 | ######################################## 20 | self.url = None 21 | self.source = None 22 | self.delete = None 23 | self.complete = None 24 | self.is_play_trailer = None 25 | 26 | self.title = None 27 | self.thumbnail_url = None 28 | self.channel = None 29 | self.tags = list() 30 | 31 | self.view_count = None 32 | self.rating = None 33 | self.adjusted_rating = None 34 | self.resolution = None 35 | self.quality_score = None 36 | self.length = None 37 | self.resolution_ratio = None 38 | 39 | self.streams = list() 40 | self.best_video_stream = None 41 | self.best_audio_stream = None 42 | self.best_combined_stream = None 43 | ######################################## 44 | 45 | self.url = url 46 | self.delete = False 47 | self.is_play_trailer = False 48 | self.complete = True 49 | 50 | tries = 0 51 | while True: 52 | try: 53 | self.source = YouTube(url) 54 | except KeyError as e: 55 | if e.args[0] == 'url': 56 | self.delete = True 57 | self.is_play_trailer = True 58 | # todo (1): add youtube-dl info grabber/downloader 59 | # stuff I need: title, length, keywords? 60 | return 61 | elif e.args[0] == 'url_encoded_fmt_stream_map': 62 | if tries > 4: 63 | print('Failed to load youtube data, retrying. Reason: ' + str(e)) 64 | self.delete = True 65 | return 66 | 67 | print('Failed to load youtube data, retrying. Reason: ' + str(e)) 68 | time.sleep(2) 69 | tries += 1 70 | 71 | else: 72 | raise 73 | except RegexMatchError as e: 74 | print('Pytube failed to load video info. Reason: ' + url + ': ' + str(e)) 75 | self.delete = True 76 | return 77 | except timeout as e: 78 | if tries > 4: 79 | print('Pytube failed to load video info. Reason: ' + str(e)) 80 | self.complete = False 81 | if Stream.conn_errors > 2: 82 | raise 83 | else: 84 | Stream.conn_errors += 1 85 | return 86 | 87 | print('Pytube failed to load video info. Reason: ' + str(e) + ', retrying...') 88 | tries += 1 89 | time.sleep(1) 90 | except URLError as e: 91 | if tries > 2: 92 | print('Pytube failed to load video info. Reason: ' + str(e)) 93 | self.complete = False 94 | if YoutubeVideo.conn_errors > 2: 95 | raise 96 | else: 97 | YoutubeVideo.conn_errors += 1 98 | return 99 | 100 | print('Pytube failed to load video info. Reason: ' + str(e) + ', retrying...') 101 | time.sleep(1) 102 | tries += 1 103 | else: 104 | YoutubeVideo.conn_errors = 0 105 | break 106 | 107 | self.score = score 108 | 109 | self.title = self.source.title 110 | self.title = tools.get_clean_string(self.title) 111 | self.rating = float(self.source.player_config_args['avg_rating']) 112 | self.view_count = int(self.source.player_config_args['view_count']) 113 | self.channel = self.source.player_config_args['author'] 114 | self.length = self.source.player_config_args['length_seconds'] 115 | 116 | self.thumbnail_url = self.source.thumbnail_url 117 | try: 118 | self.thumbnail_url = self.source.thumbnail_url 119 | except KeyError: 120 | self.thumbnail_url = None 121 | 122 | try: 123 | self.tags = self.source.player_config_args['keywords'].split(',') 124 | except KeyError: 125 | self.tags = '' 126 | 127 | if self.view_count < 100: 128 | self.view_count = 100 129 | 130 | self.adjusted_rating = self.rating * (1 - 1 / ((self.view_count / 60) ** 0.5)) 131 | 132 | self.load_streams(min_resolution, max_resolution) 133 | self.update_quality_score(preferred_container) 134 | self.update_best_audio_stream(preferred_container, force_preferred_container) 135 | self.update_best_video_stream(preferred_container, force_preferred_container) 136 | self.update_best_combined_stream(preferred_container, force_preferred_container) 137 | 138 | if self.is_play_trailer: 139 | self.update_youtube_dl_info() 140 | 141 | 142 | 143 | def update_youtube_dl_info(self): 144 | pass 145 | 146 | def update_quality_score(self, preferred_container='mp4'): 147 | self.quality_score = 0 148 | max_res = 0 149 | 150 | for stream in self.streams: 151 | 152 | if stream.type != 'video': 153 | continue 154 | 155 | quality_score = 0 156 | pixel_bitrate = stream.bitrate_per_pixel 157 | 158 | if stream.resolution == 1080: 159 | pixel_bitrate /= 1 160 | quality_score = 120 161 | elif stream.resolution == 720: 162 | pixel_bitrate /= 1.22 163 | quality_score = 108 164 | elif stream.resolution == 480: 165 | pixel_bitrate /= 1.52 166 | quality_score = 65 167 | elif stream.resolution == 360: 168 | pixel_bitrate /= 1.39 169 | quality_score = 40 170 | elif stream.resolution == 240: 171 | pixel_bitrate /= 2.15 172 | quality_score = 20 173 | elif stream.resolution == 144: 174 | pixel_bitrate /= 2.65 175 | quality_score = 10 176 | 177 | if preferred_container.lower() == stream.container: 178 | quality_score *= 1.2 179 | quality_score *= pixel_bitrate 180 | 181 | if stream.resolution > max_res: 182 | self.quality_score = quality_score 183 | max_res = stream.resolution 184 | self.resolution_ratio = stream.size[0] / stream.size[1] 185 | elif stream.resolution == max_res: 186 | if quality_score > self.quality_score: 187 | self.quality_score = quality_score 188 | 189 | def load_streams(self, min_resolution=360, max_resolution=1080): 190 | 191 | self.streams = list() 192 | self.complete = True 193 | 194 | for source_stream in self.source.streams.fmt_streams: 195 | stream = Stream(source_stream, int(self.length)) 196 | if stream.complete: 197 | if stream.resolution is not None: 198 | if stream.resolution > max_resolution or stream.resolution < min_resolution: 199 | continue 200 | self.streams.append(stream) 201 | elif stream.retry: 202 | self.complete = False 203 | if Stream.conn_errors != 0: 204 | self.complete = False 205 | 206 | def update_best_video_stream(self, preferred_container='mp4', force_preferred_container=False): 207 | 208 | highest_resolution = 0 209 | best_stream = None 210 | highest_pref_resolution = 0 211 | best_pref_stream = None 212 | 213 | for stream in self.streams: 214 | if 'video' != stream.type: 215 | continue 216 | 217 | if stream.resolution > highest_resolution: 218 | highest_resolution = stream.resolution 219 | best_stream = stream 220 | 221 | if stream.container.lower() == preferred_container.lower(): 222 | if stream.resolution > highest_pref_resolution: 223 | highest_pref_resolution = stream.resolution 224 | best_pref_stream = stream 225 | 226 | if highest_resolution == highest_pref_resolution or force_preferred_container: 227 | ret = best_pref_stream 228 | else: 229 | ret = best_stream 230 | 231 | self.best_video_stream = ret 232 | 233 | def update_best_audio_stream(self, preferred_container='mp4', force_preferred_container=False): 234 | 235 | highest_bitrate = 0 236 | best_stream = None 237 | highest_pref_bitrate = 0 238 | best_pref_stream = None 239 | 240 | for stream in self.streams: 241 | if 'audio' != stream.type: 242 | continue 243 | 244 | if stream.bitrate > highest_bitrate: 245 | highest_bitrate = stream.bitrate 246 | best_stream = stream 247 | 248 | if stream.container.lower() == preferred_container.lower(): 249 | if stream.bitrate > highest_pref_bitrate: 250 | highest_pref_bitrate = stream.bitrate 251 | best_pref_stream = stream 252 | 253 | if highest_bitrate <= highest_pref_bitrate * 1.35 or force_preferred_container: 254 | ret = best_pref_stream 255 | else: 256 | ret = best_stream 257 | self.best_audio_stream = ret 258 | 259 | def update_best_combined_stream(self, preferred_container='mp4', force_preferred_container=False): 260 | 261 | highest_resolution = 0 262 | 263 | for stream in self.streams: 264 | if 'combined' != stream.type: 265 | continue 266 | 267 | if stream.resolution > highest_resolution: 268 | highest_resolution = stream.resolution 269 | 270 | max_score = 0 271 | selected_stream = None 272 | 273 | for stream in self.streams: 274 | if 'combined' != stream.type: 275 | continue 276 | 277 | score = 0 278 | resolution = stream.resolution 279 | 280 | if force_preferred_container: 281 | if stream.container != preferred_container: 282 | continue 283 | if resolution == highest_resolution: 284 | score += 10 ** 1 285 | if stream.container == preferred_container: 286 | score += 10 ** 0 287 | 288 | if score > max_score: 289 | max_score = score 290 | selected_stream = stream 291 | 292 | self.best_combined_stream = selected_stream 293 | -------------------------------------------------------------------------------- /old_code/config-example.cfg: -------------------------------------------------------------------------------- 1 | [SETTINGS] 2 | 3 | library_directory = 4 | #temp_directory = 5 | 6 | tmdb_api_key = 7 | 8 | release_year_end_of_file = true 9 | ffmpeg_installed = false 10 | 11 | [ADVANCED_SETTINGS] 12 | 13 | force_all = false 14 | search_cooldown = 0 15 | -------------------------------------------------------------------------------- /old_code/old_code.py: -------------------------------------------------------------------------------- 1 | import os 2 | import configparser 3 | from configparser import NoOptionError 4 | import fnmatch 5 | import pprint 6 | import shutil 7 | import time 8 | import sys 9 | from urllib.error import URLError 10 | from urllib.request import urlopen 11 | from urllib.error import URLError, HTTPError 12 | from socket import timeout 13 | import json 14 | 15 | # pip install these packages: 16 | try: 17 | from googlesearch import search as google_search # google package 18 | except ImportError: 19 | print('Please upgrade to python 3.6 or run the 2.7 version.') 20 | sys.exit() 21 | from pytube import YouTube # pytube package 22 | from pytube import exceptions 23 | # also, install FFmpeg. 24 | 25 | 26 | def find_extra(config, extra_name, search, sort_arguments): 27 | 28 | print('Finding video for extra: "' + extra_name + '".') 29 | 30 | time.sleep(1) 31 | print('Loading configuration.') 32 | movie_library_dir = config.get('SETTINGS', 'movie_library_dir') 33 | try: 34 | download_dir = config.get('SETTINGS', 'download_dir') 35 | except NoOptionError: 36 | download_dir = os.getcwd() 37 | ffmpeg_status = config.getboolean('SETTINGS', 'FFmpeg_installed') 38 | 39 | time.sleep(1) 40 | print('Loading library.') 41 | library = get_library_record(movie_library_dir, config) 42 | 43 | time.sleep(1) 44 | print('finding movie to download extra for') 45 | movie_folder = get_movie_folder(movie_library_dir, library, [], [extra_name]) 46 | 47 | config.set('LIBRARY_RECORD', movie_folder.replace(' ', '_'), str(library[movie_folder] + 1)) 48 | 49 | time.sleep(1) 50 | print('finding video to download for : ' + movie_folder) 51 | video_to_download = get_video_to_download(movie_folder, search, sort_arguments) 52 | time.sleep(1) 53 | print('Downloading: "' + video_to_download['title'] + '" (' + video_to_download['link'] + ")") 54 | download(video_to_download, download_dir, extra_name, ffmpeg_status) 55 | 56 | time.sleep(1) 57 | print('Moving "' + extra_name + '" and cleaning up') 58 | move_and_cleanup(download_dir, os.path.join(movie_library_dir, movie_folder), extra_name + '.mp4') 59 | 60 | print('All done!') 61 | return True 62 | 63 | 64 | def get_library_record(library_dir, config): 65 | 66 | library = dict() 67 | 68 | for folder_name in os.listdir(library_dir): 69 | if fnmatch.fnmatch(folder_name, config.get('SETTINGS', 'name_pattern')): 70 | if not config.has_option('LIBRARY_RECORD', folder_name.replace(' ', '_')): 71 | new_entry = 0 72 | else: 73 | new_entry = int(config.getint('LIBRARY_RECORD', folder_name.replace(' ', '_'))) 74 | 75 | library[folder_name] = new_entry 76 | return library 77 | 78 | 79 | def get_movie_folder(library_dir, earlier_tries, have, have_not): 80 | 81 | min_earlier_tries = 10000 82 | max_earlier_tries = 0 83 | 84 | for movie in earlier_tries: 85 | if earlier_tries[movie] < min_earlier_tries: 86 | min_earlier_tries = earlier_tries[movie] 87 | 88 | if earlier_tries[movie] > max_earlier_tries: 89 | max_earlier_tries = earlier_tries[movie] 90 | 91 | while min_earlier_tries <= max_earlier_tries: 92 | 93 | for movie in earlier_tries: 94 | 95 | if earlier_tries[movie] > min_earlier_tries: 96 | continue 97 | 98 | return_movie = True 99 | 100 | for file_name in os.listdir(os.path.join(library_dir, movie)): 101 | for word in have: 102 | if word not in file_name: 103 | return_movie = False 104 | 105 | for file_name in os.listdir(os.path.join(library_dir, movie)): 106 | for word in have_not: 107 | if word in file_name: 108 | return_movie = False 109 | 110 | if return_movie: 111 | return movie 112 | 113 | min_earlier_tries += 1 114 | 115 | print("Couldn't find a movie in the library matching the given restriction.") 116 | print("Or all movies already have the extra you are looking for.") 117 | print('Shutting down.') 118 | sys.exit() 119 | 120 | 121 | def get_video_to_download(movie, search_suffix, filter_arguments): 122 | 123 | def scan_response(response): 124 | 125 | response['max_video_resolution'] = 0 126 | for result in response['items']: 127 | 128 | result['delete_this_item'] = False 129 | 130 | video = None 131 | for try_count in range(5): 132 | 133 | if try_count > 2: 134 | time.sleep(1) 135 | video = YouTube(result['link']) 136 | else: 137 | try: 138 | video = YouTube(result['link']) 139 | break 140 | except KeyError: 141 | print('Pytube failed to initialize (KeyError). trying again in 10 seconds.') 142 | time.sleep(9) 143 | except URLError: 144 | print('Pytube failed to initialize (URLError). trying again in 10 seconds.') 145 | time.sleep(9) 146 | except exceptions.RegexMatchError: 147 | result['delete_this_item'] = True 148 | break 149 | 150 | if result['delete_this_item']: 151 | continue 152 | 153 | result['youtube_object'] = video 154 | result['title'] = video.title 155 | result['avg_rating'] = float(video.player_config_args['avg_rating']) 156 | result['view_count'] = int(video.player_config_args['view_count']) 157 | 158 | if result['view_count'] < 60: 159 | result['view_count'] = 60 160 | 161 | result['video_resolution'] = 0 162 | for stream in video.streams.filter(type='video').all(): 163 | try: 164 | resolution = int(stream.resolution.replace('p', '')) 165 | except AttributeError: 166 | resolution = 0 167 | 168 | if resolution > response['max_video_resolution']: 169 | response['max_video_resolution'] = resolution 170 | if resolution > result['video_resolution']: 171 | result['video_resolution'] = resolution 172 | 173 | try: 174 | if 'ad_preroll' in video.player_config_args: 175 | result['adds_info'] = 'have adds' 176 | else: 177 | result['adds_info'] = 'No adds' 178 | except ValueError: 179 | result['adds_info'] = 'No adds' 180 | 181 | return response 182 | 183 | def filter_response(response, arguments): 184 | 185 | items = list() 186 | 187 | for result in response['items']: 188 | 189 | append_video = True 190 | 191 | if result['delete_this_item']: 192 | continue 193 | 194 | for word in arguments['video_name_must_contain']: 195 | if word.lower() not in result['title'].lower(): 196 | append_video = False 197 | 198 | for word in arguments['video_name_must_not_contain']: 199 | if word.lower() in result['title'].lower(): 200 | append_video = False 201 | 202 | if append_video: 203 | items.append(result) 204 | 205 | response.pop('items') 206 | response['items'] = items 207 | 208 | return response 209 | 210 | def score_response(response, scoring_arguments): 211 | 212 | for result in response['items']: 213 | 214 | result['true_rating'] = result['avg_rating'] * (1 - 1 / ((result['view_count'] / 60) ** 0.5)) 215 | 216 | if result['video_resolution'] < 700: 217 | result['true_rating'] *= 0.90 218 | result['view_count'] *= 0.5 219 | 220 | for bonus in scoring_arguments['video_name_tag_bonuses']: 221 | for word in scoring_arguments['video_name_tag_bonuses'][bonus]: 222 | if word in result['title'].lower(): 223 | result['true_rating'] *= bonus 224 | result['view_count'] *= bonus 225 | break 226 | 227 | return response 228 | 229 | # search for movie 230 | search = movie.replace('(', '').replace(')', '').replace('[', '').replace(']', '') + ' ' + search_suffix 231 | search = search.replace('.', ' ').replace('_', ' ').replace('-', ' ').replace(' ', ' ').replace(' ', ' ') 232 | search = str('site:youtube.com ' + search) 233 | 234 | item_list = list() 235 | for attempt in range(5): 236 | if attempt > 2: 237 | for url in google_search(search, stop=10): 238 | item = {'link': url} 239 | item_list.append(item) 240 | break 241 | else: 242 | try: 243 | for url in google_search(search, stop=10): 244 | item = {'link': url} 245 | item_list.append(item) 246 | break 247 | except URLError: 248 | print('Failed to retrieve search results, trying again in 10 seconds') 249 | time.sleep(10) 250 | continue 251 | 252 | item_list.pop() 253 | item_list.pop() 254 | item_list.pop() 255 | search_response = {'items': item_list} 256 | 257 | search_response = scan_response(search_response) 258 | search_response = filter_response(search_response, filter_arguments) 259 | search_response = score_response(search_response, filter_arguments) 260 | 261 | # select video 262 | selected_movie = None 263 | 264 | top_score = 0 265 | top_view_count = 0 266 | 267 | for item in search_response['items']: 268 | 269 | print('-----------------------------------------------------------------') 270 | print(item['title']) 271 | print(item['adds_info']) 272 | print(item['video_resolution']) 273 | print(item['link']) 274 | print(item['true_rating']) 275 | print(item['view_count']) 276 | 277 | if item['true_rating'] > top_score: 278 | top_score = item['true_rating'] 279 | 280 | for item in search_response['items']: 281 | if item['true_rating'] > top_score * 0.95: 282 | if item['view_count'] > top_view_count: 283 | top_view_count = item['view_count'] 284 | selected_movie = item 285 | 286 | return selected_movie 287 | 288 | 289 | def download(youtube_video, download_dir, file_name, ffmpeg_status): 290 | def get_best_adaptive_audio_stream(stream_list): 291 | 292 | max_bit_rate = 0 293 | top_audio_stream = None 294 | preferable_max_bit_rate = 0 295 | preferable_top_audio_stream = None 296 | 297 | for audio_stream in stream_list.streams.filter(type='audio', progressive=False).all(): 298 | 299 | if audio_stream.is_progressive \ 300 | or audio_stream.resolution != '0p' \ 301 | or audio_stream.video_codec != 'unknown': 302 | continue 303 | 304 | bit_rate = int(audio_stream.abr.replace('kbps', '')) 305 | 306 | if bit_rate > max_bit_rate: 307 | max_bit_rate = bit_rate 308 | top_audio_stream = audio_stream 309 | 310 | if bit_rate > preferable_max_bit_rate and 'mp4a' in audio_stream.audio_codec.lower(): 311 | preferable_max_bit_rate = bit_rate 312 | preferable_top_audio_stream = audio_stream 313 | 314 | if preferable_max_bit_rate * 1.7 > max_bit_rate: 315 | return preferable_top_audio_stream 316 | else: 317 | return top_audio_stream 318 | 319 | def get_best_adaptive_video_stream(stream_list): 320 | 321 | max_resolution = 0 322 | top_video_stream = None 323 | preferable_max_resolution = 0 324 | preferable_top_video_stream = None 325 | 326 | for video_stream in stream_list.streams.filter(type='video').all(): 327 | 328 | if video_stream.is_progressive \ 329 | or video_stream.abr != '25kbps' \ 330 | or video_stream.audio_codec != 'unknown': 331 | continue 332 | 333 | resolution = int(video_stream.resolution.replace('p', '')) 334 | 335 | if resolution > max_resolution: 336 | max_resolution = resolution 337 | top_video_stream = video_stream 338 | 339 | if resolution > 1080: 340 | continue 341 | 342 | if resolution > preferable_max_resolution and 'avc' in video_stream.video_codec.lower(): 343 | preferable_max_resolution = resolution 344 | preferable_top_video_stream = video_stream 345 | 346 | if preferable_max_resolution == max_resolution: 347 | return preferable_top_video_stream 348 | else: 349 | return top_video_stream 350 | 351 | def get_best_progressive_stream(stream_list): 352 | 353 | max_resolution = 0 354 | selected_stream = None 355 | 356 | for progressive_stream in stream_list.streams.filter(progressive=True).all(): 357 | 358 | resolution = int(progressive_stream.resolution.replace('p', '')) 359 | 360 | if resolution > max_resolution: 361 | max_resolution = resolution 362 | 363 | max_score = 0 364 | for progressive_stream in stream_list.streams.filter().all(): 365 | 366 | score = 0 367 | resolution = int(progressive_stream.resolution.replace('p', '')) 368 | bit_rate = int(progressive_stream.abr.replace('kbps', '')) 369 | if not ffmpeg_status: 370 | if progressive_stream.subtype.lower() == 'mp4': 371 | score += 1000000000 372 | if resolution == max_resolution: 373 | score += 10000 374 | if 'avc' in progressive_stream.video_codec.lower(): 375 | score += 1000 376 | if 'mp4a' in progressive_stream.audio_codec.lower(): 377 | score += bit_rate * 1.7 378 | else: 379 | score += bit_rate 380 | 381 | if score > max_score: 382 | max_score = score 383 | selected_stream = progressive_stream 384 | 385 | return selected_stream 386 | 387 | def download_adaptive_streams(video_stream, audio_stream, target_dir, target_file_name): 388 | 389 | for attempt in range(5): 390 | if attempt > 2: 391 | video_stream.download(target_dir, 'video') 392 | break 393 | else: 394 | try: 395 | video_stream.download(target_dir, 'video') 396 | break 397 | except URLError: 398 | print('Failed to download video stream, trying again in 10 seconds') 399 | time.sleep(10) 400 | continue 401 | 402 | for attempt in range(5): 403 | if attempt > 2: 404 | audio_stream.download(target_dir, 'audio') 405 | break 406 | else: 407 | try: 408 | audio_stream.download(target_dir, 'audio') 409 | break 410 | except URLError: 411 | print('Failed to download audio stream, trying again in 10 seconds') 412 | time.sleep(10) 413 | continue 414 | 415 | if 'avc' in video_stream.video_codec.lower(): 416 | video_encode_parameters = 'copy' 417 | else: 418 | video_encode_parameters = 'libx264 -preset slow -crf 18' 419 | 420 | if 'mp4a' in audio_stream.audio_codec.lower(): 421 | audio_encode_parameters = 'copy' 422 | else: 423 | audio_encode_parameters = 'aac -strict -2 -b:a 128k' 424 | 425 | os.system('ffmpeg -i "' + os.path.join(target_dir, 'video') + '".* ' 426 | '-i "' + os.path.join(target_dir, 'audio') + '".* ' 427 | '-c:v ' + video_encode_parameters + ' ' 428 | '-c:a ' + audio_encode_parameters + ' ' 429 | '-threads 4 ' 430 | '"' + os.path.join(target_dir, target_file_name + '.mp4') + '" -y') 431 | 432 | def download_progressive_streams(progressive_stream, target_dir, target_file_name): 433 | 434 | if progressive_stream.subtype.lower() == 'mp4': 435 | 436 | for attempt in range(5): 437 | if attempt > 2: 438 | progressive_stream.download(target_dir, target_file_name) 439 | break 440 | else: 441 | try: 442 | progressive_stream.download(target_dir, target_file_name) 443 | break 444 | except URLError: 445 | print('Failed to download progressive stream, trying again in 10 seconds') 446 | time.sleep(10) 447 | continue 448 | return 449 | else: 450 | 451 | for attempt in range(5): 452 | if attempt > 2: 453 | progressive_stream.download(target_dir, 'progressive') 454 | break 455 | else: 456 | try: 457 | progressive_stream.download(target_dir, 'progressive') 458 | break 459 | except URLError: 460 | print('Failed to download progressive stream, trying again in 10 seconds') 461 | time.sleep(10) 462 | continue 463 | 464 | if 'avc' in progressive_stream.video_codec.lower(): 465 | video_encode_parameters = 'copy' 466 | else: 467 | video_encode_parameters = 'libx264 -preset slow -crf 18' 468 | 469 | if 'mp4a' in progressive_stream.audio_codec.lower(): 470 | audio_encode_parameters = 'copy' 471 | else: 472 | audio_encode_parameters = 'aac -strict -2 -b:a 128k' 473 | 474 | os.system('ffmpeg -i "' + os.path.join(target_dir, 'progressive') + '".* ' 475 | '-c:v ' + video_encode_parameters + ' ' 476 | '-c:a ' + audio_encode_parameters + ' ' 477 | '-threads 4 ' 478 | '"' + os.path.join(target_dir, target_file_name + '.mp4') + '" -y') 479 | 480 | # decide adaptive streams to get 481 | video = youtube_video['youtube_object'] 482 | for stream in video.streams.all(): 483 | 484 | if stream.abr is None: 485 | stream.abr = '25kbps' 486 | if stream.audio_codec is None: 487 | stream.audio_codec = 'unknown' 488 | if stream.resolution is None: 489 | stream.resolution = '0p' 490 | if stream.video_codec is None: 491 | stream.video_codec = 'unknown' 492 | print('---------------------------------------------------------------------------------------------------') 493 | print(pprint.pprint(video.streams.all())) 494 | print('---------------------------------------------------------------------------------------------------') 495 | best_audio_stream = get_best_adaptive_audio_stream(video) 496 | best_video_stream = get_best_adaptive_video_stream(video) 497 | best_progressive_stream = get_best_progressive_stream(video) 498 | print(pprint.pprint(best_progressive_stream)) 499 | print(pprint.pprint(best_video_stream)) 500 | print(pprint.pprint(best_audio_stream)) 501 | print('---------------------------------------------------------------------------------------------------') 502 | 503 | if 'mp4a' in best_audio_stream.audio_codec.lower(): 504 | best_audio_stream.abr = int(best_audio_stream.abr.replace('kbps', '')) * 1.7 505 | if 'mp4a' in best_progressive_stream.audio_codec.lower(): 506 | best_progressive_stream.abr = int(best_progressive_stream.abr.replace('kbps', '')) * 1.7 507 | 508 | # decide to get adaptive or progressive 509 | if not ffmpeg_status: 510 | print('Picked the progressive streams because the ffmpeg_installed setting is false.') 511 | download_progressive_streams(best_progressive_stream, download_dir, file_name) 512 | 513 | elif int(best_video_stream.resolution.replace('p', '')) > int(best_progressive_stream.resolution.replace('p', '')): 514 | print('Picked the adaptive streams because of higher video resolution.') 515 | download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name) 516 | 517 | elif 'avc' not in best_progressive_stream.video_codec.lower() and 'avc' in best_video_stream.video_codec.lower(): 518 | print('Picked the adaptive streams because of better video codec.') 519 | download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name) 520 | 521 | elif best_audio_stream.abr > best_progressive_stream.abr * 0.9: 522 | print('Picked the adaptive streams because of better audio.') 523 | download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name) 524 | 525 | else: 526 | print('Picked the progressive stream.') 527 | download_progressive_streams(best_progressive_stream, download_dir, file_name) 528 | 529 | return 530 | 531 | 532 | def move_and_cleanup(source_dir, target_dir, file_name): 533 | 534 | # moving file 535 | if not os.path.isfile(os.path.join(target_dir, file_name)): 536 | shutil.move(os.path.join(source_dir, file_name), os.path.join(target_dir, file_name)) 537 | else: 538 | os.remove(os.path.join(source_dir, file_name)) 539 | # deleting downloaded files 540 | 541 | for folder_name in os.listdir(source_dir): 542 | if fnmatch.fnmatch(folder_name, 'audio.*'): 543 | os.remove(os.path.join(source_dir, folder_name)) 544 | if fnmatch.fnmatch(folder_name, 'video.*'): 545 | os.remove(os.path.join(source_dir, folder_name)) 546 | if fnmatch.fnmatch(folder_name, 'progressive.*'): 547 | os.remove(os.path.join(source_dir, folder_name)) 548 | 549 | 550 | def get_official_trailer(config): 551 | ################################################################# 552 | # Video constrains: 553 | extra_name = 'Official Trailer-trailer' 554 | search_suffix = ' Trailer' 555 | video_name_must_contain = ['trailer'] 556 | video_name_must_not_contain = ['Side-by-Side', 'Side by Side', 'SidebySide'] 557 | video_name_tag_bonuses = { 558 | 1.01: ['official'], 559 | 0.99: ['preview', 'teaser'] 560 | } 561 | ################################################################# 562 | 563 | filter_arguments = {'video_name_must_contain': video_name_must_contain, 564 | 'video_name_must_not_contain': video_name_must_not_contain, 565 | 'video_name_tag_bonuses': video_name_tag_bonuses} 566 | 567 | find_extra(config, extra_name, search_suffix, filter_arguments) 568 | 569 | 570 | def get_remastered_trailer(config): 571 | 572 | ################################################################# 573 | # Video constrains: 574 | extra_name = 'Remastered Trailer-trailer' 575 | search_suffix = ' Remastered Trailer' 576 | video_name_must_contain = ['trailer', 'remaster'] 577 | video_name_must_not_contain = ['Side-by-Side', 'Side by Side', 'SidebySide'] 578 | video_name_tag_bonuses = { 579 | 0.8: ['preview', 'teaser'], 580 | 1.05: ['fan'] 581 | } 582 | ################################################################# 583 | 584 | filter_arguments = {'video_name_must_contain': video_name_must_contain, 585 | 'video_name_must_not_contain': video_name_must_not_contain, 586 | 'video_name_tag_bonuses': video_name_tag_bonuses} 587 | 588 | find_extra(config, extra_name, search_suffix, filter_arguments) 589 | 590 | def retrieve_web_page(url, page_name='page'): 591 | 592 | response = None 593 | print('Downloading ' + page_name + '.') 594 | for attempt in range(20): 595 | try: 596 | response = urlopen(url, timeout=2) 597 | break 598 | except timeout: 599 | print('Failed to download ' + page_name + ' : timed out. Trying again in 2 seconds.') 600 | time.sleep(2) 601 | if attempt > 8: 602 | print('You might have lost internet connection.') 603 | print('Breaking out of loop and committing') 604 | sys.exit() 605 | except HTTPError as e: 606 | raise ValueError('Failed to download ' + page_name + ' : ' + e.msg + '. Skipping.') 607 | except URLError: 608 | print('Failed to download ' + page_name + '. Trying again in 2 seconds') 609 | time.sleep(2) 610 | if attempt > 8: 611 | print('You might have lost internet connection.') 612 | print('Breaking out of loop and committing') 613 | sys.exit() 614 | 615 | return response 616 | 617 | def get_tmdb_movie_id(movie): 618 | if len(movie['imdb_id']) != 9: 619 | raise ValueError("Movie have no IMDB ID. Skipping.") 620 | 621 | response = retrieve_web_page('https://api.themoviedb.org/3/find/' 622 | + movie['imdb_id'] + 623 | '?api_key=' + tmdb_api_key + 624 | '&language=en-US' 625 | '&external_source=imdb_id', 'tmdb id') 626 | 627 | data = json.loads(response.read().decode('utf-8')) 628 | 629 | if len(data['movie_results']) == 0: 630 | raise ValueError('Unable to find TMDB ID. Skipping.') 631 | 632 | movie['tmdb_id'] = str(data['movie_results'][0]['id']) 633 | response.close() 634 | 635 | 636 | config_file = 'default_config.cfg' 637 | conf = configparser.ConfigParser() 638 | 639 | while True: 640 | try: 641 | conf.read(config_file) 642 | if conf.getboolean('SETTINGS', 'search_for_remastered'): 643 | get_remastered_trailer(conf) 644 | else: 645 | get_official_trailer(conf) 646 | 647 | with open(config_file, 'w') as new_config_file: 648 | conf.write(new_config_file) 649 | new_config_file.close() 650 | 651 | time.sleep(conf.getint('SETTINGS', 'cooldown')) 652 | 653 | except ValueError as error: 654 | print(error) 655 | print('pytube failed to initialize after 3 attempts, try again at a later date.') 656 | time.sleep(10) 657 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | from _socket import timeout 2 | from urllib.error import HTTPError, URLError 3 | from urllib.request import urlopen 4 | from urllib.parse import quote 5 | import time 6 | import json 7 | import hashlib 8 | import os 9 | 10 | 11 | def hash_file(file_path): 12 | if not os.path.isdir(file_path): 13 | md5 = hashlib.md5() 14 | with open(file_path, 'rb') as file: 15 | for i in range(10): 16 | data = file.read(2**20) 17 | if not data: 18 | break 19 | md5.update(data) 20 | return md5.hexdigest() 21 | 22 | 23 | def get_keyword_list(string): 24 | 25 | ret = ' ' + get_clean_string(string).lower() + ' ' 26 | ret = (ret.replace(' the ', ' ') 27 | .replace(' in ', ' ') 28 | .replace(' a ', ' ') 29 | .replace(' by ', ' ') 30 | .replace(' for ', ' ') 31 | .replace(' is ', ' ') 32 | .replace(' am ', ' ') 33 | .replace(' an ', ' ') 34 | .replace(' in ', ' ') 35 | .replace(' with ', ' ') 36 | .replace(' from ', ' ') 37 | .replace(' and ', ' ') 38 | .replace(' movie ', ' ') 39 | .replace(' trailer ', ' ') 40 | .replace(' interview ', ' ') 41 | .replace(' interviews ', ' ') 42 | .replace(' scenes ', ' ') 43 | .replace(' scene ', ' ') 44 | .replace(' official ', ' ') 45 | .replace(' hd ', ' ') 46 | .replace(' hq ', ' ') 47 | .replace(' lq ', ' ') 48 | .replace(' 1080p ', ' ') 49 | .replace(' 720p ', ' ') 50 | .replace(' of ', ' ')) 51 | 52 | return list(set(space_cleanup(ret).split(' '))) 53 | 54 | 55 | def get_clean_string(string): 56 | ret = ' ' + string.lower() + ' ' 57 | 58 | ret = (ret.replace('(', '') 59 | .replace(')', '') 60 | .replace('[', '') 61 | .replace(']', '') 62 | .replace('{', '') 63 | .replace('}', '') 64 | .replace(':', '') 65 | .replace(';', '') 66 | .replace('?', '') 67 | .replace("'", '') 68 | .replace("’", '') 69 | .replace("´", '') 70 | .replace("`", '') 71 | .replace("*", ' ') 72 | .replace('.', ' ') 73 | .replace('·', '-') 74 | .replace(' -', ' ') 75 | .replace('- ', ' ') 76 | .replace('_', ' ') 77 | .replace(' + ', ' : ') 78 | .replace('+', '/') 79 | .replace(' : ', ' + ') 80 | .replace('/ ', ' ') 81 | .replace(' /', ' ') 82 | .replace(' & ', ' ')) 83 | 84 | ret_tup = ret.split(' ') 85 | ret_count = 0 86 | for ret_tup_count in range(len(ret_tup)-1): 87 | if len(ret_tup[ret_tup_count]) == 1 and len(ret_tup[ret_tup_count + 1]) == 1: 88 | ret_count += 1 89 | ret = ret[:ret_count] + ret[ret_count:ret_count + 1].replace(' ', '.') + ret[ret_count + 1:] 90 | ret_count += 1 91 | else: 92 | ret_count += len(ret_tup[ret_tup_count]) + 1 93 | 94 | return space_cleanup(replace_roman_numbers(ret)) 95 | 96 | 97 | def replace_roman_numbers(string): 98 | ret = ' ' + string.lower() + ' ' 99 | 100 | ret = (ret.replace(' ix ', ' 9 ') 101 | .replace(' viiii ', ' 9 ') 102 | .replace(' viii ', ' 8 ') 103 | .replace(' vii ', ' 7 ') 104 | .replace(' vi ', ' 6 ') 105 | .replace(' iv ', ' 4 ') 106 | .replace(' iiii ', ' 4 ') 107 | .replace(' iii ', ' 3 ') 108 | .replace(' ii ', ' 2 ') 109 | .replace(' trailer 4 ', ' trailer ') 110 | .replace(' trailer 3 ', ' trailer ') 111 | .replace(' trailer 2 ', ' trailer ') 112 | .replace(' trailer 1 ', ' trailer ')) 113 | 114 | return space_cleanup(ret) 115 | 116 | 117 | def make_list_from_string(string, delimiter=',', remove_spaces_next_to_delimiter=True): 118 | if remove_spaces_next_to_delimiter: 119 | while ' ' + delimiter in string: 120 | string = string.replace(' ' + delimiter, delimiter) 121 | while delimiter + ' ' in string: 122 | string = string.replace(delimiter + ' ', delimiter) 123 | 124 | return string.split(delimiter) 125 | 126 | 127 | def space_cleanup(string): 128 | ret = string 129 | while ' ' in ret: 130 | ret = ret.replace(' ', ' ') 131 | while ret.endswith(' '): 132 | ret = ret[:-1] 133 | while ret.startswith(' '): 134 | ret = ret[1:] 135 | return ret 136 | 137 | 138 | def retrieve_web_page(url, page_name='page'): 139 | 140 | response = None 141 | print('Downloading ' + page_name + '.') 142 | 143 | for tries in range(1, 10): 144 | try: 145 | response = urlopen(url, timeout=2) 146 | break 147 | 148 | except UnicodeEncodeError as e: 149 | print('Failed to download ' + page_name + ' : ' + str(e) + '. Skipping.') 150 | break 151 | 152 | except timeout: 153 | if tries > 5: 154 | print('You might have lost internet connection.') 155 | break 156 | 157 | time.sleep(1) 158 | print('Failed to download ' + page_name + ' : timed out. Retrying.') 159 | 160 | except HTTPError as e: 161 | print('Failed to download ' + page_name + ' : ' + str(e) + '. Skipping.') 162 | break 163 | 164 | except URLError: 165 | if tries > 3: 166 | print('You might have lost internet connection.') 167 | raise 168 | 169 | time.sleep(1) 170 | print('Failed to download ' + page_name + '. Retrying.') 171 | 172 | return response 173 | 174 | 175 | def apply_query_template(template, keys): 176 | ret = template 177 | for key, value in keys.items(): 178 | if isinstance(value, str): 179 | ret = ret.replace('{' + key + '}', value) 180 | elif isinstance(value, int): 181 | ret = ret.replace('{' + key + '}', str(value)) 182 | elif isinstance(value, float): 183 | ret = ret.replace('{' + key + '}', str(value)) 184 | 185 | return space_cleanup(ret) 186 | 187 | 188 | def get_tmdb_search_data(tmdb_api_key, title): 189 | response = retrieve_web_page('https://api.themoviedb.org/3/search/movie' 190 | '?api_key=' + tmdb_api_key + 191 | '&language=en-US&query=' 192 | + quote(title.encode('utf-8')) + 193 | '&page=1&include_adult=false', 'tmdb movie search page') 194 | if response is None: 195 | return None 196 | data = json.loads(response.read().decode('utf-8')) 197 | response.close() 198 | 199 | return data 200 | 201 | 202 | def get_tmdb_details_data(tmdb_api_key, tmdb_id): 203 | response = retrieve_web_page('https://api.themoviedb.org/3/movie/' 204 | + str(tmdb_id) + 205 | '?api_key=' + tmdb_api_key + 206 | '&language=en-US', 'movie details') 207 | if response is None: 208 | return None 209 | data = json.loads(response.read().decode('utf-8')) 210 | response.close() 211 | 212 | return data 213 | 214 | 215 | def get_tmdb_crew_data(tmdb_api_key, tmdb_id): 216 | pass 217 | -------------------------------------------------------------------------------- /url_finders.py: -------------------------------------------------------------------------------- 1 | from googlesearch import search as google_web_search 2 | from time import sleep 3 | from time import time 4 | import sys 5 | 6 | from urllib.error import HTTPError 7 | 8 | import tools 9 | from bs4 import BeautifulSoup 10 | from urllib.parse import quote 11 | 12 | last = None 13 | 14 | 15 | def google_search(query, limit): 16 | global last 17 | ret_url_list = list() 18 | 19 | for tries in range(1, 10): 20 | try: 21 | if last: 22 | sleep(int(60 - (time() - last))) 23 | except ValueError: 24 | pass 25 | 26 | last = time() 27 | 28 | try: 29 | for url in google_web_search(query, stop=limit): 30 | if 'youtube.com/watch?v=' in url: 31 | ret_url_list.append(url.split('&')[0]) 32 | 33 | except KeyboardInterrupt: 34 | raise 35 | 36 | except HTTPError as e: 37 | print('google search service unavailable.') 38 | 39 | if tries > 3: 40 | print('Failed to download google search result. Reason: ' + str(e)) 41 | raise 42 | 43 | print('Failed to download google search result, retrying. Reason: ' + str(e)) 44 | sleep(1) 45 | 46 | except: 47 | e = sys.exc_info()[0] 48 | if tries > 3: 49 | print('Failed to download google search result. Reason: ' + str(e)) 50 | raise 51 | 52 | print('Failed to download google search result, retrying. Reason: ' + str(e)) 53 | sleep(1) 54 | else: 55 | break 56 | 57 | return ret_url_list[:limit] 58 | 59 | 60 | def youtube_search(query, limit): 61 | 62 | ret_url_list = list() 63 | 64 | for tries in range(1, 10): 65 | try: 66 | response = tools.retrieve_web_page('https://www.youtube.com/results?search_query=' + 67 | quote(query.encode('utf-8')), 68 | 'youtube search result') 69 | 70 | except KeyboardInterrupt: 71 | raise 72 | 73 | except: 74 | e = sys.exc_info()[0] 75 | if tries > 3: 76 | print('Failed to download google search result. Reason: ' + str(e)) 77 | raise 78 | 79 | print('Failed to download google search result, retrying. Reason: ' + str(e)) 80 | sleep(1) 81 | 82 | else: 83 | if response: 84 | soup = BeautifulSoup(response, "html.parser") 85 | for item in soup.findAll(attrs={'class': 'yt-uix-tile-link'}): 86 | url = 'https://www.youtube.com' + item['href'] 87 | ret_url_list.append(url.split('&')[0]) 88 | break 89 | 90 | return ret_url_list[:limit] 91 | 92 | 93 | def youtube_channel_search(query, limit): 94 | # todo (1): implement youtube_channel_search. 95 | pass 96 | --------------------------------------------------------------------------------