├── .gitignore
├── LICENSE
├── Movie-Extra-Downloader.py
├── README.md
├── default_extra_configs
    ├── 1080p-trailer.cfg
    ├── 720p-trailer.cfg
    ├── experimental.cfg
    └── theme-song.cfg
├── directory.py
├── empty_default_config.cfg
├── extra_config.py
├── extra_finder.py
├── main.py
├── old_code
    ├── Movie-Extra-Downloader.py
    ├── Stream.py
    ├── YoutubeVideo.py
    ├── config-example.cfg
    └── old_code.py
├── tools.py
└── url_finders.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .*
  2 | testdir
  3 | testdir*
  4 | venv
  5 | .idea
  6 | *(*
  7 | *)*
  8 | *.log
  9 | default_config.cfg
 10 | testing.py
 11 | record_data
 12 | *tmp*
 13 | records
 14 | extra_configs/**
 15 | failed movies/**
 16 | !extra_configs/.gitkeep
 17 | !failed_movies/.gitkeep
 18 | 
 19 | # Byte-compiled / optimized / DLL files
 20 | __pycache__/
 21 | *.py[cod]
 22 | *$py.class
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | build/
 30 | develop-eggs/
 31 | dist/
 32 | downloads/
 33 | eggs/
 34 | .eggs/
 35 | lib/
 36 | lib64/
 37 | parts/
 38 | sdist/
 39 | var/
 40 | wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # celery beat schedule file
 97 | celerybeat-schedule
 98 | 
 99 | # SageMath parsed files
100 | *.sage.py
101 | 
102 | # Environments
103 | .env
104 | .venv
105 | env/
106 | venv/
107 | ENV/
108 | env.bak/
109 | venv.bak/
110 | 
111 | # Spyder project settings
112 | .spyderproject
113 | .spyproject
114 | 
115 | # Rope project settings
116 | .ropeproject
117 | 
118 | # mkdocs documentation
119 | /site
120 | 
121 | # mypy
122 | .mypy_cache/
123 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 KBlixt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Movie-Extra-Downloader.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | 
  3 | from main import download_extra
  4 | from extra_config import ExtraSettings
  5 | import os
  6 | import sys
  7 | from directory import Directory
  8 | import shutil
  9 | from urllib.error import URLError, HTTPError
 10 | import configparser
 11 | from _socket import timeout
 12 | import argparse
 13 | import tools
 14 | import time
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument("-d", "--directory", help="directory to search extras for")
 18 | parser.add_argument("-l", "--library", help="library of directories to search extras for")
 19 | parser.add_argument("-f", "--force", action="store_true", help="force scan the directories.")
 20 | parser.add_argument("-r", "--replace", action="store_true", help="remove and ban the existing extra.")
 21 | args = parser.parse_args()
 22 | 
 23 | if args.directory and os.path.split(args.directory)[1] == '':
 24 |     args.directory = os.path.split(args.directory)[0]
 25 | 
 26 | if args.library and os.path.split(args.library)[1] == '':
 27 |     args.library = os.path.split(args.library)[0]
 28 | 
 29 | 
 30 | def handle_directory(folder):
 31 |     print('working on directory: "' + os.path.join('...', os.path.split(folder)[1]) + '"')
 32 |     for config in configs_content:
 33 | 
 34 |         if config.startswith('.') or config.startswith('_'):
 35 |             continue
 36 |         try:
 37 |             try:
 38 |                 directory = Directory.load_directory(os.path.join(records, os.path.split(folder)[1]))
 39 |             except FileNotFoundError:
 40 |                 if has_tmdb_key:
 41 |                     directory = Directory(folder, tmdb_api_key=c.get('SETTINGS', 'tmdb_api_key'))
 42 |                 else:
 43 |                     directory = Directory(folder)
 44 | 
 45 |             extra_config = ExtraSettings(os.path.join(configs, config))
 46 | 
 47 |             if args.replace and 'trailer' in extra_config.extra_type.lower():
 48 |                 args.force = True
 49 | 
 50 |             if extra_config.config_id in directory.completed_configs and not args.force:
 51 |                 continue
 52 | 
 53 |             if extra_config.skip_movies_with_existing_trailers and not args.replace:
 54 |                 skip = False
 55 |                 for file in os.listdir(directory.full_path):
 56 |                     if file.lower().endswith('trailer.mp4')\
 57 |                             or file.lower().endswith('trailer.mkv'):
 58 |                         skip = True
 59 |                         break
 60 |                 if skip:
 61 |                     print('movie already have a trailer. skipping.')
 62 |                     directory.save_directory(records)
 63 |                     continue
 64 |                 if os.path.isdir(os.path.join(directory.full_path, 'trailers')):
 65 |                     for file in os.listdir(os.path.join(directory.full_path, 'trailers')):
 66 |                         if file.lower().endswith('.mp4')\
 67 |                                 or file.lower().endswith('.mkv'):
 68 |                             skip = True
 69 |                             break
 70 |                     if skip:
 71 |                         print('movie already have a trailer. skipping.')
 72 |                         directory.save_directory(records)
 73 |                         continue
 74 | 
 75 |             if extra_config.skip_movies_with_existing_theme:
 76 |                 skip = False
 77 |                 for file in os.listdir(directory.full_path):
 78 |                     if file.lower().endswith('theme.mp3')\
 79 |                             or file.lower().endswith('theme.wma')\
 80 |                             or file.lower().endswith('theme.flac'):
 81 |                         skip = True
 82 |                         break
 83 |                 if skip:
 84 |                     print('movie already have a theme song. skipping.')
 85 |                     directory.save_directory(records)
 86 |                     continue
 87 |                 if os.path.isdir(os.path.join(directory.full_path, 'theme-music')):
 88 |                     for file in os.listdir(os.path.join(directory.full_path, 'theme-music')):
 89 |                         if file.lower().endswith('.mp3')\
 90 |                                 or file.lower().endswith('.wma')\
 91 |                                 or file.lower().endswith('.flac'):
 92 |                             skip = True
 93 |                             break
 94 |                     if skip:
 95 |                         print('movie already have a theme song. skipping.')
 96 |                         directory.save_directory(records)
 97 |                         continue
 98 | 
 99 |             directory.update_content()
100 | 
101 |             if args.force:
102 |                 old_record = directory.record
103 |                 directory.record = list()
104 |                 for record in old_record:
105 |                     if record != extra_config.extra_type:
106 |                         directory.record.append(record)
107 |                 extra_config.force = True
108 | 
109 |             if args.replace:
110 |                 directory.banned_youtube_videos_id.append(directory.trailer_youtube_video_id)
111 |                 shutil.rmtree(os.path.join(directory.full_path, extra_config.extra_type))
112 |                 os.mkdir(os.path.join(directory.full_path, extra_config.extra_type))
113 | 
114 |             if not os.path.isdir(tmp_folder):
115 |                 os.mkdir(tmp_folder)
116 | 
117 |             download_extra(directory, extra_config, tmp_folder)
118 |             directory.completed_configs.append(extra_config.config_id)
119 |             directory.save_directory(records)
120 | 
121 |             if args.force:
122 |                 # todo: delete all paths in the old record that are not in the new record
123 |                 pass
124 | 
125 |         except FileNotFoundError as e:
126 |             print('file not found: ' + str(e))
127 |             continue
128 | 
129 |         except HTTPError:
130 |             print('You might have been flagged by google search. try again tomorrow.')
131 |             sys.exit()
132 | 
133 |         except URLError:
134 |             print('you might have lost your internet connections. exiting')
135 |             sys.exit()
136 | 
137 |         except timeout:
138 |             print('you might have lost your internet connections. exiting')
139 |             sys.exit()
140 | 
141 |         except ConnectionResetError:
142 |             print('you might have lost your internet connections. exiting')
143 |             sys.exit()
144 | 
145 |         except KeyboardInterrupt:
146 |             print('exiting! keyboard interrupt.')
147 |             sys.exit()
148 | 
149 | 
150 | def handle_library(library):
151 |     if args.replace:
152 |         print('the replace mode is unable in library mode, please use the directory mode.')
153 |         return False
154 |     for folder in os.listdir(library):
155 |         if folder.startswith('.'):
156 |             continue
157 |         if not os.path.isdir(os.path.join(library, folder)):
158 |             continue
159 |         try:
160 |             handle_directory(os.path.join(library, folder))
161 |         except KeyboardInterrupt:
162 |             raise
163 |         except Exception as e:
164 |             print("----------------------------------------------------------")
165 |             print("----------------------------------------------------------")
166 |             print("----------------------------------------------------------")
167 |             print("----------------------------------------------------------")
168 |             print("----------------------------------------------------------")
169 |             print("--------------------AN ERROR OCCURRED---------------------")
170 |             print("------------------------SKIPPING--------------------------")
171 |             print("------PLEASE REPORT MOVIE TITLE TO THE GITHUB ISSUES------")
172 |             print("-----------------THE SCRIPT WILL CONTINUE-----------------")
173 |             print("----------------------------------------------------------")
174 |             print("-------------------- Exception: --------------------------")
175 |             print(e)
176 |             print(traceback.format_exc())
177 |             print("----------------------------------------------------------")
178 |             print("----------------------------------------------------------")
179 |             time.sleep(1)
180 |             exit()
181 | 
182 |             if not os.path.isdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies")):
183 |                 os.mkdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies"))
184 |             if not os.path.isdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies", folder)):
185 |                 os.mkdir(os.path.join(os.path.dirname(sys.argv[0]), "failed_movies", folder))
186 |             if library == 'testdir':
187 |                 raise
188 |     return True
189 | 
190 | 
191 | c = configparser.ConfigParser()
192 | c.read('default_config.cfg')
193 | 
194 | tmp_folder = os.path.join(os.path.dirname(sys.argv[0]), 'tmp')
195 | 
196 | configs = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs')
197 | configs_content = os.listdir(configs)
198 | 
199 | records = os.path.join(os.path.dirname(sys.argv[0]), 'records')
200 | 
201 | result = tools.get_tmdb_search_data(c.get('SETTINGS', 'tmdb_api_key'), 'star wars')
202 | if result is None:
203 |     print('Warning: No working TMDB api key was specified.')
204 |     time.sleep(10)
205 |     has_tmdb_key = False
206 | else:
207 |     has_tmdb_key = True
208 | 
209 | 
210 | if args.directory:
211 |     handle_directory(args.directory)
212 | elif args.library:
213 |     handle_library(args.library)
214 | else:
215 |     print('please specify a directory (-d) or a library (-l) to search extras for')
216 | 
217 | try:
218 |     shutil.rmtree(tmp_folder)
219 | except FileNotFoundError:
220 |     pass
221 | os.mkdir(tmp_folder)
222 | 
223 | sys.exit()
224 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Movie-Extra-Downloader
 2 | A python 3.6 script that downloads movie extras from youtube.
 3 | 
 4 | This script searches for movie extras on youtube and then uses Youtube-DL to download found videos from youtube. 
 5 | Downloaded videos are put into subfolders in the movie directory.
 6 | 
 7 | The script uses folder names as a basis of the search. it can also recognise that the folder name ends with the release
 8 | year and will use it to make better searches. to improve results the script can use a tmdb api key to filter out
 9 | similarly named titles. the script asumes that you are using a folder naming schemes similar to
10 | "{movie title} {movie release year}". it can handle most common delimiters and will remove all parentheses and brackets 
11 | 
12 | My goal and vision of this program is to find all kind of movie extra content on youtube but for now **I've only implemented 
13 | trailers.** The goal is also to provide options for usefull stuff like renaming schemes. editing the filtering process
14 | should also be fairly easy to mess about with. for now however I woudln't recommend changing to much in the extra configs.
15 | 
16 | ## INFO
17 | 
18 | it'sa a quite slow script. I've hard coded in a limit of one movie per minute so that you don't get flagged by google.
19 | so expect to run it for a day or two the first time you run it
20 | 
21 | The provided configs that I've included are well tested but they are **not perfect**. if you find a issue with the script 
22 | finding the wrong movie entirely please let me know.
23 | 
24 | for now it'll download the video and its thumbnail, name them after the youtube video and move it to a subfolder in the 
25 | movie directory
26 | 
27 | 
28 | 
29 | ## Installation
30 | 
31 | it should be dead simple to install. simply clone the repository and install the python3 modules "youtube-dl" and "google"
32 | 
33 | #### modules needed:
34 | 
35 | - youtube-dl
36 | - google
37 | 
38 | if you wish to download 1080p versions then you'll also need to install ffmpeg so that it can be run from the terminal. 
39 | this should be easy in linux but it's a bit messier on Mac or Windows.
40 | 
41 | ## Configuring
42 | 
43 | you'll need to add a default_config.cfg file into the program folder. there should be a "empty_default_config.cfg".
44 | simply remove the "empty_" from its name and you'll be good to go.
45 | 
46 | if you have a TMDB API key and wish to use it. open the default_config.cfg file and add it to the tmdb_api_key field. 
47 | this is highly recomended since it will provide a better result! 
48 | getting a tmdb api key is really simple and is completed in under 5 minutes. 
49 | 
50 | You'll also need to add extra configs to the "extra_configs" folder. there should be a folder called "default_extra_configs" 
51 | here you'll find fairly well tested configs that should work well, simply copy any wanted config to the "extra_config" folder. 
52 | each config in the "extra_config" folder represents one extra type. one config can download multiple videos but can only download for
53 | one type of extra at a time. configs starting with "." or "_" is ignored.
54 | 
55 | ## Running
56 | 
57 | the program should now be ready to use. run it with python3.5 or 3.6. 
58 | the program expects to be given a movie directory or a movie library to work on. 
59 | giving it a movie directory will only execute the script once on the given directory while giving it a movie library will
60 | run the script on every folder in the given library.
61 | 
62 | a few exmples on a ubuntu machine:
63 | 
64 | #### movie directory example:
65 | 
66 | python3 Movie-Extra-Downloader.py -d /media/plex/Movies/Avatar (2009)
67 | 
68 | #### movie library example:
69 | 
70 | python3 Movie-Extra-Downloader.py -l /media/plex/Movies
71 | 
72 | ## as a costum script for radarr
73 | 
74 | You'll probably need to write a script yourself that calls this program since the script would be different on different systems. 
75 |  
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/default_extra_configs/1080p-trailer.cfg:
--------------------------------------------------------------------------------
 1 | [EXTRA_CONFIG]
 2 | config_id = trailer_1080
 3 | 
 4 | # name of the subfolder that this config puts downloaded videos in.
 5 | extra_type = trailers
 6 | 
 7 | force = false
 8 | 
 9 | 
10 | #### trailers specifics:
11 | only_play_trailers = false
12 | disable_play_trailers = false
13 | skip_movies_with_existing_trailers = true
14 | 
15 | 
16 | #-----------------------------------------------------------------------------------------------------------------------
17 | [SEARCHES]
18 | 
19 | query_1 = site:youtube.com/watch?v= {movie_title} {movie_release_year} trailer
20 | limit_1 = 7
21 | source_1 = google_search
22 | 
23 | query_2 = {movie_original_title} {movie_release_year} trailer
24 | limit_2 = 7
25 | source_2 = youtube_search
26 | 
27 | #-----------------------------------------------------------------------------------------------------------------------
28 | [FILTERING]
29 | 
30 | required_phrases = trailer
31 | 
32 | banned_phrases = Side-by-Side, Side by Side, italiano, español, deutch, german, series, comparision, clip, clips
33 | 
34 | banned_channels = KinoCheck comedy, KinoCheck horror, KinoCheck action, KinoCheck kids, KinoCheck Home,
35 |   KinoCheck, KinoCheck.com, New Trailer Buzz, Screen Junkies, movieclips, KinoCheck International, FilmSelect, FilmSelect Trailer,
36 |   Entertainment Access, trailer city, MOVIE PREDICTOR, Movieclips Classic Trailers, Machinima, ZappMovieTrailer,
37 |   TV Promos, Zero Media, One Media, moviemanTrailers, CheckTrailer, Movieclips Trailers
38 | 
39 | # If you wonder why these channels are banned it's because they either watermark their trailers, begin or end their
40 | # trailer with massive channel promo that lasts for a stupidly long time linking to other videos on their channel.
41 | 
42 | 
43 | 
44 | 
45 | #-----------------------------------------------------------------------------------------------------------------------
46 | [CUSTOM_FILTERS]
47 | 
48 | break_limit = 3
49 | 
50 | 1_min_relative_adjusted_rating = 0.91
51 | 1_min_absolute_resolution = 1080
52 | 1_min_absolute_resolution_ratio = 1.6
53 | 1_max_absolute_duration = 190
54 | 
55 | 2_min_relative_adjusted_rating = 0.92
56 | 2_min_absolute_resolution = 720
57 | 2_min_absolute_resolution_ratio = 1.6
58 | 2_max_absolute_duration = 190
59 | 
60 | 3_min_relative_adjusted_rating = 0.92
61 | 3_min_absolute_resolution = 720
62 | 3_min_absolute_resolution_ratio = 1.3
63 | 3_max_absolute_duration = 190
64 | 
65 | 4_min_relative_adjusted_rating = 0.91
66 | 4_min_relative_resolution = 0.75
67 | 4_max_absolute_duration = 190
68 | 
69 | 5_min_relative_adjusted_rating = 0.91
70 | 5_min_relative_resolution = 0.45
71 | 
72 | #-----------------------------------------------------------------------------------------------------------------------
73 | [PRIORITY_RULES]
74 | 
75 | preferred_channels =
76 | order = highest_view_count
77 | 
78 | #-----------------------------------------------------------------------------------------------------------------------
79 | [DOWNLOADING_AND_POSTPROCESSING]
80 | 
81 | videos_to_download = 1
82 | 
83 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation)
84 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field.
85 | 
86 | youtube_dl_arguments = {"socket_timeout": 3,
87 |                         "writethumbnail": "true",
88 |                         "outtmpl": "%(title)s.%(ext)s",
89 |                         "format": "bestvideo[ext=mp4][height <= 1080]+bestaudio[ext=m4a]/best[ext=mp4][height <= 1080]/best[height <= 1080]"}
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/default_extra_configs/720p-trailer.cfg:
--------------------------------------------------------------------------------
 1 | [EXTRA_CONFIG]
 2 | config_id = trailer_720
 3 | 
 4 | # name of the subfolder that this config puts downloaded videos in.
 5 | extra_type = trailers
 6 | 
 7 | force = false
 8 | 
 9 | 
10 | #### trailers specifics:
11 | only_play_trailers = false
12 | disable_play_trailers = false
13 | skip_movies_with_existing_trailers = true
14 | 
15 | 
16 | #-----------------------------------------------------------------------------------------------------------------------
17 | [SEARCHES]
18 | 
19 | query_1 = site:youtube.com/watch?v= {movie_title} {movie_release_year} trailer
20 | limit_1 = 7
21 | source_1 = google_search
22 | 
23 | query_2 = {movie_original_title} {movie_release_year} trailer
24 | limit_2 = 7
25 | source_2 = youtube_search
26 | 
27 | #-----------------------------------------------------------------------------------------------------------------------
28 | [FILTERING]
29 | 
30 | required_phrases = trailer
31 | 
32 | banned_phrases = Side-by-Side, Side by Side, italiano, español, deutsch, german, series, comparision, clip, clips
33 | 
34 | banned_channels = KinoCheck comedy, KinoCheck horror, KinoCheck action, KinoCheck kids, KinoCheck Home,
35 |   KinoCheck, KinoCheck.com, New Trailer Buzz, Screen Junkies, movieclips, KinoCheck International, FilmSelect, FilmSelect Trailer,
36 |   Entertainment Access, trailer city, MOVIE PREDICTOR, Movieclips Classic Trailers, Machinima, ZappMovieTrailer,
37 |   TV Promos, Zero Media, One Media, moviemanTrailers, CheckTrailer, Movieclips Trailers
38 | 
39 | # If you wonder why these channels are banned it's because they either watermark their trailers or end the trailer with
40 | # massive channel promo that lasts for a stupidly long time linking to other videos on their channel.
41 | 
42 | 
43 | 
44 | 
45 | #-----------------------------------------------------------------------------------------------------------------------
46 | [CUSTOM_FILTERS]
47 | 
48 | break_limit = 3
49 | 
50 | 2_min_relative_adjusted_rating = 0.92
51 | 2_min_absolute_resolution = 720
52 | 2_min_absolute_resolution_ratio = 1.6
53 | 2_max_absolute_duration = 190
54 | 
55 | 3_min_relative_adjusted_rating = 0.92
56 | 3_min_absolute_resolution = 720
57 | 3_min_absolute_resolution_ratio = 1.3
58 | 3_max_absolute_duration = 190
59 | 
60 | 4_min_relative_adjusted_rating = 0.91
61 | 4_min_relative_resolution = 0.75
62 | 4_max_absolute_duration = 190
63 | 
64 | 5_min_relative_adjusted_rating = 0.91
65 | 5_min_relative_resolution = 0.45
66 | 
67 | #-----------------------------------------------------------------------------------------------------------------------
68 | [PRIORITY_RULES]
69 | 
70 | preferred_channels =
71 | order = highest_view_count
72 | 
73 | #-----------------------------------------------------------------------------------------------------------------------
74 | [DOWNLOADING_AND_POSTPROCESSING]
75 | 
76 | videos_to_download = 1
77 | 
78 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation)
79 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field.
80 | 
81 | youtube_dl_arguments = {"socket_timeout": 3,
82 |                         "writethumbnail": "true",
83 |                         "outtmpl": "%(title)s.%(ext)s",
84 |                         "format": "best[ext=mp4][height <=? 720]/best[height <=? 720]"}
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/default_extra_configs/experimental.cfg:
--------------------------------------------------------------------------------
 1 | [EXTRA_CONFIG]
 2 | config_id = behind the scenes
 3 | 
 4 | # name of the subfolder that this config puts downloaded videos in.
 5 | extra_type = behind the scenes
 6 | 
 7 | #### trailers specifics:
 8 | # only_play_trailers = false
 9 | #
10 | 
11 | #### interviews specifics:
12 | #
13 | 
14 | #-----------------------------------------------------------------------------------------------------------------------
15 | [SEARCHES]
16 | 
17 | query_1 = site:youtube.com/watch?v= {movie_original_title} {movie_release_year} behind the scenes
18 | limit_1 = 7
19 | source_1 = google_search
20 | 
21 | query_2 = {movie_original_title} {movie_release_year} behind the scenes
22 | limit_2 = 7
23 | source_2 = youtube_search
24 | 
25 | #-----------------------------------------------------------------------------------------------------------------------
26 | [FILTERING]
27 | 
28 | required_phrases = behind the scenes|backstage|back stage
29 | 
30 | banned_phrases = Side-by-Side, Side by Side
31 | 
32 | banned_channels =
33 | 
34 | #-----------------------------------------------------------------------------------------------------------------------
35 | [CUSTOM_FILTERS]
36 | 
37 | break_limit = 5
38 | 
39 | 1_min_absolute_average_rating = 4.5
40 | 1_min_absolute_resolution = 720
41 | 1_min_absolute_view_count = 2000
42 | 
43 | 
44 | 2_min_absolute_average_rating = 4.5
45 | 2_min_absolute_resolution = 480
46 | 2_min_absolute_view_count = 1000
47 | 
48 | 
49 | 3_min_absolute_average_rating = 4
50 | 3_min_absolute_resolution = 480
51 | 3_min_absolute_view_count = 1000
52 | 
53 | 4_min_absolute_average_rating = 3.5
54 | 4_min_absolute_resolution = 360
55 | 4_min_absolute_view_count = 500
56 | 
57 | last_resort_policy = play-trailer/skip
58 | #-----------------------------------------------------------------------------------------------------------------------
59 | [PRIORITY_RULES]
60 | 
61 | prefered_channels =
62 | order = highest_view_count
63 | 
64 | #-----------------------------------------------------------------------------------------------------------------------
65 | [DOWNLOADING_AND_POSTPROCESSING]
66 | 
67 | videos_to_download = 3
68 | 
69 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation)
70 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field.
71 | 
72 | youtube_dl_arguments = {"socket_timeout": 3,
73 |                         "writethumbnail": "true",
74 |                         "outtmpl": "%(title)s.%(ext)s"}
75 | 
76 | naming_scheme = {video_title} - [{video_url_id}]
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/default_extra_configs/theme-song.cfg:
--------------------------------------------------------------------------------
 1 | [EXTRA_CONFIG]
 2 | #vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
 3 | #This preset need you to have ffmpeg installed!!!
 4 | #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 5 | config_id = theme-music
 6 | 
 7 | # name of the subfolder that this config puts downloaded videos in.
 8 | extra_type = theme-music
 9 | 
10 | force = false
11 | 
12 | skip_movies_with_existing_theme = true
13 | 
14 | 
15 | #-----------------------------------------------------------------------------------------------------------------------
16 | [SEARCHES]
17 | 
18 | query_2 = site:youtube.com/watch?v= {movie_original_title} {movie_release_year} theme song
19 | limit_2 = 5
20 | source_2 = google_search
21 | 
22 | #-----------------------------------------------------------------------------------------------------------------------
23 | [FILTERING]
24 | 
25 | required_phrases = theme, song, ost, soundtrack, sound, music
26 | 
27 | banned_phrases = italiano, español, deutsch, german, series, scene, all, top 10, featured, every song, full songs,
28 |   full songlist, full song list, best of
29 | 
30 | banned_channels =
31 | 
32 | 
33 | 
34 | 
35 | #-----------------------------------------------------------------------------------------------------------------------
36 | [CUSTOM_FILTERS]
37 | 
38 | break_limit = 0
39 | 
40 | 1_min_absolute_duration = 90
41 | 1_max_absolute_duration = 420
42 | 
43 | #-----------------------------------------------------------------------------------------------------------------------
44 | [PRIORITY_RULES]
45 | 
46 | preferred_channels =
47 | order = highest_view_count
48 | 
49 | #-----------------------------------------------------------------------------------------------------------------------
50 | [DOWNLOADING_AND_POSTPROCESSING]
51 | 
52 | videos_to_download = 1
53 | 
54 | # arguments to pass to the youtube download module. (json dict. use double quotation marks instead of single quotation)
55 | # note: the outtmpl option is ignored, instead use the "naming_scheme" field.
56 | 
57 | youtube_dl_arguments = {"socket_timeout": 3,
58 |                         "writethumbnail": "false",
59 |                         "outtmpl": "theme.%(ext)s",
60 |                         "format": "bestaudio",
61 |                         "postprocessors": [{
62 |                             "key": "FFmpegExtractAudio",
63 |                             "preferredcodec": "mp3",
64 |                             "preferredquality": "192"
65 |                             }]
66 |                         }


--------------------------------------------------------------------------------
/directory.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tools as tools
  3 | from datetime import date
  4 | import json
  5 | 
  6 | 
  7 | class Directory(object):
  8 | 
  9 |     def __init__(self, full_path, tmdb_api_key=None, tmdb_id=None, json_dict=None):
 10 | 
 11 |         ########################################
 12 |         self.name = None
 13 |         self.full_path = None
 14 |         self.content = dict
 15 |         self.subdirectories = dict()
 16 | 
 17 |         self.tmdb_id = None
 18 |         self.movie_title = None
 19 |         self.movie_original_title = None
 20 |         self.movie_original_title_keywords = None
 21 |         self.movie_release_year = None
 22 |         self.movie_title_keywords = list()
 23 |         self.movie_crew_data = list()
 24 |         self.trailer_youtube_video_id = None
 25 | 
 26 |         self.banned_title_keywords = list()
 27 |         self.banned_years = list()
 28 |         self.banned_youtube_videos_id = list()
 29 | 
 30 |         self.record = list()
 31 |         self.completed_configs = list()
 32 |         ########################################
 33 | 
 34 |         if full_path is None:
 35 |             for key, value in json_dict.items():
 36 |                 setattr(self, key, value)
 37 |         else:
 38 |             self.update_all(full_path=full_path, tmdb_api_key=tmdb_api_key, tmdb_id=tmdb_id)
 39 | 
 40 |     @classmethod
 41 |     def load_directory(cls, file):
 42 |         with open(file, 'r') as load_file:
 43 |             return Directory(None, json_dict=json.load(load_file))
 44 | 
 45 |     def update_all(self, full_path=None, tmdb_api_key=None, tmdb_id=None):
 46 |         if full_path is not None:
 47 |             self.name = os.path.split(full_path)[1]
 48 |             self.full_path = full_path
 49 |         self.update_content()
 50 |         self.update_movie_info(tmdb_api_key=tmdb_api_key, tmdb_id=tmdb_id)
 51 |         if tmdb_api_key is not None:
 52 |             self.update_similar_results(tmdb_api_key)
 53 | 
 54 |     def update_content(self):
 55 | 
 56 |         self.content = dict()
 57 |         self.subdirectories = dict()
 58 | 
 59 |         for file in os.listdir(self.full_path):
 60 |             if os.path.isdir(os.path.join(self.full_path, file)):
 61 |                 sub_content = dict()
 62 |                 for sub_file in os.listdir(os.path.join(self.full_path, file)):
 63 |                     sub_content[sub_file] = tools.hash_file(os.path.join(self.full_path, file, sub_file))
 64 |                 self.subdirectories[file] = sub_content
 65 |             else:
 66 |                 self.content[file] = tools.hash_file(os.path.join(self.full_path, file))
 67 | 
 68 |     def update_movie_info(self, tmdb_api_key=None, tmdb_id=None):
 69 |         def get_info_from_directory():
 70 |             clean_name_tuple = tools.get_clean_string(self.name).split(' ')
 71 | 
 72 |             if any(clean_name_tuple[-1] == str(year) for year in range(1896, date.today().year + 2)):
 73 |                 self.movie_release_year = int(clean_name_tuple[-1])
 74 |                 self.movie_title = ' '.join(clean_name_tuple[:-1])
 75 |                 self.movie_original_title = ' '.join(clean_name_tuple[:-1])
 76 | 
 77 |             else:
 78 |                 self.movie_release_year = None
 79 |                 self.movie_title = ' '.join(clean_name_tuple)
 80 |                 self.movie_original_title = ' '.join(clean_name_tuple)
 81 | 
 82 |             self.movie_title_keywords = tools.get_keyword_list(self.movie_title)
 83 |             self.movie_original_title_keywords = tools.get_keyword_list(self.movie_original_title)
 84 | 
 85 |             return True
 86 | 
 87 |         def get_info_from_details():
 88 |             details_data = tools.get_tmdb_details_data(tmdb_api_key, tmdb_id)
 89 |             if details_data is not None:
 90 |                 try:
 91 |                     self.tmdb_id = details_data['id']
 92 |                     self.movie_title = details_data['title']
 93 |                     self.movie_original_title = details_data['original_title']
 94 |                     self.movie_title_keywords = tools.get_keyword_list(details_data['title'])
 95 |                     self.movie_original_title_keywords = tools.get_keyword_list(details_data['original_title'])
 96 | 
 97 |                     if len(details_data['release_date'][:4]) == 4:
 98 |                         self.movie_release_year = int(details_data['release_date'][:4])
 99 |                     else:
100 |                         self.movie_release_year = None
101 |                     return True
102 |                 except KeyError as ke:
103 |                     return False
104 |                 except TypeError as te:
105 |                     return False
106 |             else:
107 |                 return False
108 | 
109 |         def get_info_from_search():
110 |             search_data = tools.get_tmdb_search_data(tmdb_api_key, self.movie_title)
111 | 
112 |             if search_data is None or search_data['total_results'] == 0:
113 |                 return False
114 | 
115 |             movie_data = None
116 |             movie_backup_data = None
117 | 
118 |             if self.movie_release_year is None:
119 |                 movie_data = search_data['results'][0]
120 |             else:
121 | 
122 |                 for result in search_data['results'][:5]:
123 |                     try:
124 |                         if result['release_date'] is None:
125 |                             result['release_date'] = '000000000000000'
126 |                             continue
127 |                     except KeyError:
128 |                         result['release_date'] = '000000000000000'
129 |                         continue
130 |                     if movie_data is None:
131 |                         if str(self.movie_release_year) == result['release_date'][:4]:
132 |                             movie_data = result
133 |                         elif result['release_date'][6:8] in ['09', '10', '11', '12'] \
134 |                                 and str(self.movie_release_year - 1) == result['release_date'][:4]:
135 |                             movie_data = result
136 |                         elif result['release_date'][6:8] in ['01', '02', '03', '04'] \
137 |                                 and str(self.movie_release_year + 1) == result['release_date'][:4]:
138 |                             movie_data = result
139 |                     elif movie_backup_data is None:
140 |                         if str(self.movie_release_year - 1) == result['release_date'][:4]:
141 |                             movie_backup_data = result
142 | 
143 |                         elif str(self.movie_release_year + 1) == result['release_date'][:4]:
144 |                             movie_backup_data = result
145 | 
146 |                 if movie_data is None and movie_backup_data is not None:
147 |                     print('None of the search results had a correct release year, picking the next best result')
148 |                     movie_data = movie_backup_data
149 | 
150 |                 if movie_data is None:
151 |                     movie_data = search_data['results'][0]
152 | 
153 |             self.tmdb_id = movie_data['id']
154 |             self.movie_title = tools.get_clean_string(movie_data['title'])
155 |             self.movie_original_title = tools.get_clean_string(movie_data['original_title'])
156 |             self.movie_title_keywords = tools.get_keyword_list(movie_data['title'])
157 |             self.movie_original_title_keywords = tools.get_keyword_list(movie_data['original_title'])
158 | 
159 |             if len(movie_data['release_date'][:4]) == 4:
160 |                 self.movie_release_year = int(movie_data['release_date'][:4])
161 |             else:
162 |                 self.movie_release_year = None
163 |             return True
164 | 
165 |         if tmdb_api_key is not None:
166 |             if tmdb_id is not None:
167 |                 if get_info_from_details():
168 |                     return True
169 |                 else:
170 |                     tmdb_id = None
171 |             if get_info_from_directory():
172 |                 if get_info_from_search():
173 |                     return True
174 |             else:
175 |                 return False
176 | 
177 |         return get_info_from_directory()
178 | 
179 |     def update_similar_results(self, tmdb_api_key):
180 | 
181 |         def find_similar_results():
182 | 
183 |             def find_by_tmdb_id():
184 |                 similar_movies_data = list()
185 |                 movie_found = False
186 | 
187 |                 for result in search_data['results']:
188 | 
189 |                     if self.tmdb_id == result['id']:
190 |                         movie_found = True
191 |                     else:
192 |                         similar_movies_data.append(result)
193 | 
194 |                 if movie_found:
195 |                     return similar_movies_data
196 |                 else:
197 |                     return None
198 | 
199 |             def find_by_release_year():
200 |                 similar_movies_data = list()
201 |                 movie_found = False
202 |                 backup_found = False
203 | 
204 |                 for result in search_data['results']:
205 | 
206 |                     if not movie_found and str(self.movie_release_year) == result['release_date'][:4]:
207 |                         movie_found = True
208 |                         continue
209 | 
210 |                     elif not backup_found:
211 | 
212 |                         if result['release_date'][6:8] in ['09', '10', '11', '12'] \
213 |                                 and str(self.movie_release_year - 1) == result['release_date'][:4]:
214 |                             backup_found = True
215 | 
216 |                         elif result['release_date'][6:8] in ['01', '02', '03'] \
217 |                                 and str(self.movie_release_year + 1 == result['release_date'][:4]):
218 |                             backup_found = True
219 | 
220 |                     if len(similar_movies_data) < 5:
221 |                         similar_movies_data.append(result)
222 | 
223 |                 if movie_found or backup_found:
224 |                     return similar_movies_data
225 |                 else:
226 |                     return None
227 | 
228 |             search_data = tools.get_tmdb_search_data(tmdb_api_key, self.movie_title)
229 | 
230 |             if search_data is None or search_data['total_results'] == 0:
231 |                 return list()
232 | 
233 |             ret = find_by_tmdb_id()
234 |             if ret is not None:
235 |                 return ret[:5]
236 | 
237 |             if self.movie_release_year is None:
238 |                 return search_data['results'][1:6]
239 | 
240 |             ret = find_by_release_year()
241 |             if ret is not None:
242 |                 return ret[:5]
243 | 
244 |             return None
245 | 
246 |         def process_similar_results():
247 |             self.banned_title_keywords = list()
248 |             self.banned_years = list()
249 | 
250 |             for similar_movie in similar_movies:
251 | 
252 |                 for word in tools.get_keyword_list(similar_movie['title']):
253 | 
254 |                     if (word.lower() not in self.movie_title.lower()
255 |                             and word.lower() not in self.banned_title_keywords):
256 | 
257 |                         if self.movie_original_title is not None:
258 | 
259 |                             if word.lower() not in self.movie_original_title.lower():
260 |                                 self.banned_title_keywords.append(word)
261 | 
262 |                         else:
263 |                             self.banned_title_keywords.append(word)
264 |                 try:
265 |                     if len(similar_movie['release_date'][:4]) == 4 \
266 |                             and int(similar_movie['release_date'][:4]) not in ([self.movie_release_year] +
267 |                                                                                self.banned_years) \
268 |                             and similar_movie['release_date'][:4] not in self.movie_title:
269 | 
270 |                         self.banned_years.append(int(similar_movie['release_date'][:4]))
271 |                 except KeyError as e:
272 |                     pass
273 |         similar_movies = find_similar_results()
274 |         if similar_movies is not None:
275 |             process_similar_results()
276 |             return True
277 |         else:
278 |             return False
279 | 
280 |     def save_directory(self, save_path):
281 |         self.content = None
282 |         self.subdirectories = None
283 |         if not os.path.isdir(save_path):
284 |             os.mkdir(os.path.join(save_path))
285 |         with open(os.path.join(save_path, self.name), 'w') as save_file:
286 |             json.dump(self.__dict__, save_file)
287 | 


--------------------------------------------------------------------------------
/empty_default_config.cfg:
--------------------------------------------------------------------------------
1 | [SETTINGS]
2 | 
3 | tmdb_api_key =
4 | 
5 | [ADVANCED_SETTINGS]
6 | 
7 | force_all = false
8 | 


--------------------------------------------------------------------------------
/extra_config.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import codecs
  3 | import json
  4 | 
  5 | import tools as tools
  6 | 
  7 | 
  8 | class ExtraSettings:
  9 | 
 10 |     # todo: make into dictionary.
 11 | 
 12 |     # todo (0): make sure nothing fails to import.
 13 | 
 14 |     def __init__(self, config_path):
 15 | 
 16 |         with codecs.open(config_path, 'r', 'utf-8') as file:
 17 |             self.config = configparser.RawConfigParser()
 18 |             self.config.read_file(file)
 19 | 
 20 |         self.extra_type = self.config['EXTRA_CONFIG'].get('extra_type')
 21 |         self.config_id = self.config['EXTRA_CONFIG'].get('config_id')
 22 |         self.force = self.config['EXTRA_CONFIG'].getboolean('force')
 23 | 
 24 |         self.searches = self.get_searches()
 25 | 
 26 |         self.required_phrases = \
 27 |             tools.make_list_from_string(self.config['FILTERING'].get('required_phrases').replace('\n', ''))
 28 |         self.banned_phrases = \
 29 |             tools.make_list_from_string(self.config['FILTERING'].get('banned_phrases').replace('\n', ''))
 30 |         self.banned_channels = \
 31 |             tools.make_list_from_string(self.config['FILTERING'].get('banned_channels').replace('\n', ''))
 32 | 
 33 |         self.custom_filters = self.get_custom_filters()
 34 |         self.last_resort_policy = self.config['DOWNLOADING_AND_POSTPROCESSING'].get('last_resort_policy')
 35 | 
 36 |         self.priority_order = self.config['PRIORITY_RULES'].get('order')
 37 |         self.preferred_channels = \
 38 |             tools.make_list_from_string(self.config['PRIORITY_RULES'].get('preferred_channels', "").replace('\n', ''))
 39 | 
 40 |         self.videos_to_download = self.config['DOWNLOADING_AND_POSTPROCESSING'].getint('videos_to_download', 1)
 41 |         self.naming_scheme = self.config['DOWNLOADING_AND_POSTPROCESSING'].get('naming_scheme')
 42 |         self.youtube_dl_arguments = json.loads(self.config['DOWNLOADING_AND_POSTPROCESSING'].get('youtube_dl_arguments'))
 43 | 
 44 |         self.disable_play_trailers = self.config['EXTRA_CONFIG'].getboolean('disable_play_trailers', False)
 45 |         self.only_play_trailers = self.config['EXTRA_CONFIG'].getboolean('only_play_trailers', False)
 46 |         self.skip_movies_with_existing_trailers = \
 47 |             self.config['EXTRA_CONFIG'].getboolean('skip_movies_with_existing_trailers', False)
 48 | 
 49 |         self.skip_movies_with_existing_theme = \
 50 |             self.config['EXTRA_CONFIG'].getboolean('skip_movies_with_existing_theme', False)
 51 |         return
 52 | 
 53 |     def get_searches(self):
 54 | 
 55 |         ret = dict()
 56 | 
 57 |         for option, value in self.config['SEARCHES'].items():
 58 | 
 59 |             try:
 60 |                 index = int(option.split('_')[-1])
 61 |             except ValueError:
 62 |                 continue
 63 | 
 64 |             if index not in ret:
 65 |                 ret[index] = dict()
 66 |             ret[index]['_'.join(option.split('_')[:-1])] = value
 67 | 
 68 |         return ret
 69 | 
 70 |     def get_custom_filters(self):
 71 | 
 72 |         ret = dict()
 73 | 
 74 |         for option, value in self.config['CUSTOM_FILTERS'].items():
 75 | 
 76 |             if option == 'break_limit':
 77 |                 self.break_limit = int(value)
 78 |                 continue
 79 |             if option == 'last_resort_policy':
 80 |                 self.last_resort_policy = value
 81 |                 continue
 82 | 
 83 |             try:
 84 |                 index = int(option.split('_')[0])
 85 |             except ValueError:
 86 |                 continue
 87 | 
 88 |             if index not in ret:
 89 |                 ret[index] = list()
 90 |             try:
 91 |                 ret[index].append('_'.join(option.split('_')[1:]) + ':::' + value)
 92 |             except ValueError:
 93 |                 continue
 94 | 
 95 |         sorted_ret = list()
 96 |         for key in sorted(ret.keys()):
 97 |             sorted_ret.append(ret[key])
 98 | 
 99 |         return sorted_ret
100 | 


--------------------------------------------------------------------------------
/extra_finder.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from youtube_dl import DownloadError
  3 | import tools as tools
  4 | import youtube_dl
  5 | import url_finders
  6 | from bisect import bisect
  7 | from datetime import date
  8 | import time
  9 | import shutil
 10 | 
 11 | 
 12 | class ExtraFinder:
 13 | 
 14 |     conn_errors = 0
 15 | 
 16 |     def __init__(self, directory, extra_config):
 17 | 
 18 |         self.directory = directory
 19 |         self.config = extra_config
 20 |         self.complete = True
 21 | 
 22 |         self.youtube_videos = list()
 23 |         self.play_trailers = list()
 24 | 
 25 |     def search(self):
 26 | 
 27 |         def create_youtube_video():
 28 | 
 29 |             def get_video_data():
 30 | 
 31 |                 for tries in range(1, 11):
 32 | 
 33 |                     try:
 34 | 
 35 |                         with youtube_dl.YoutubeDL({'socket_timeout': '3'}) as ydl:
 36 |                             return ydl.extract_info(url, download=False)
 37 | 
 38 |                     except DownloadError as e:
 39 | 
 40 |                         if 'ERROR: Unable to download webpage:' in e.args[0]:
 41 | 
 42 |                             if tries > 3:
 43 |                                 print('hey, there: error!!!')
 44 |                                 raise
 45 | 
 46 |                             print('failed to get video data, retrying')
 47 |                             time.sleep(1)
 48 |                         else:
 49 |                             return None
 50 | 
 51 |             youtube_video = get_video_data()
 52 | 
 53 |             if not youtube_video:
 54 |                 return None
 55 | 
 56 |             youtube_video['title'] = tools.get_clean_string(youtube_video['title'])
 57 | 
 58 |             if youtube_video['view_count'] is None:
 59 |                 youtube_video['view_count'] = 100
 60 | 
 61 |             if youtube_video['view_count'] < 100:
 62 |                 youtube_video['view_count'] = 100
 63 | 
 64 |             if youtube_video['average_rating'] is None:
 65 |                 youtube_video['average_rating'] = 0
 66 | 
 67 |             if youtube_video['view_count'] is None:
 68 |                 youtube_video['view_count'] = 0
 69 | 
 70 |             youtube_video['adjusted_rating'] = \
 71 |                 youtube_video['average_rating'] * (1 - 1 / ((youtube_video['view_count'] / 60) ** 0.5))
 72 | 
 73 |             if youtube_video['width'] is None or youtube_video['height'] is None:
 74 |                 youtube_video['resolution_ratio'] = 1
 75 |                 youtube_video['resolution'] = 144
 76 |             else:
 77 |                 youtube_video['resolution_ratio'] = youtube_video['width'] / youtube_video['height']
 78 | 
 79 |                 resolution = max(int(youtube_video['height']),
 80 |                                  int(youtube_video['width'] / 16 * 9))
 81 |                 resolutions = [144, 240, 360, 480, 720, 1080, 1440, 2160]
 82 | 
 83 |                 youtube_video['resolution'] = resolutions[bisect(resolutions, resolution * 1.2) - 1]
 84 | 
 85 |             if youtube_video['upload_date']:
 86 |                 if youtube_video['upload_date'] is not None:
 87 |                     date_str = youtube_video['upload_date']
 88 |                     upload_date = date(int(date_str[:4]), int(date_str[4:6]), int(date_str[6:8]))
 89 |                     time_delta = date.today() - upload_date
 90 |                     youtube_video['views_per_day'] = (youtube_video['view_count'] /
 91 |                                                       (365 + time_delta.total_seconds() / 60 / 60 / 24))
 92 |                 else:
 93 |                     print('no "upload_date"!!!')
 94 |                     youtube_video['views_per_day'] = 0
 95 |             else:
 96 |                 print('no "upload_date"!!!')
 97 |                 youtube_video['views_per_day'] = 0
 98 |             return youtube_video
 99 | 
100 |         url_list = list()
101 | 
102 |         for search_index, search in self.config.searches.items():
103 |             query = tools.apply_query_template(search['query'], self.directory.__dict__)
104 |             limit = int(search['limit'])
105 | 
106 |             if search['source'] == 'google_search':
107 |                 urls = url_finders.google_search(query, limit)
108 | 
109 |             elif search['source'] == 'youtube_search':
110 |                 urls = url_finders.youtube_search(query, limit)
111 | 
112 |             elif search['source'] == 'google_channel_search':
113 |                 urls = url_finders.youtube_channel_search(query, limit)
114 | 
115 |             else:
116 |                 print("The search engine \"" + search['source'] + "\" wasn't recognized. Skipping.")
117 |                 print('Please use "google_search", "youtube_search" or "youtube_channel_search" as the source.')
118 |                 continue
119 | 
120 |             if urls:
121 |                 url_list += urls
122 | 
123 |         for url in list(set(url_list)):
124 |             if not any(url in youtube_video['webpage_url']
125 |                        or youtube_video['webpage_url'] in url
126 |                        for youtube_video in self.youtube_videos):
127 |                 if 'youtube.com/watch?v=' not in url:
128 |                     continue
129 |                 video = create_youtube_video()
130 | 
131 |                 if video:
132 |                     self.youtube_videos.append(video)
133 |                     if not video['categories']:
134 |                         self.play_trailers.append(video)
135 |         return
136 | 
137 |     def filter_search_result(self):
138 | 
139 |         filtered_candidates = list()
140 | 
141 |         for youtube_video in self.youtube_videos:
142 | 
143 |             info = 'Video "' + youtube_video['webpage_url'] + '" was removed. reasons: '
144 |             append_video = True
145 | 
146 |             for youtube_id in self.directory.banned_youtube_videos_id:
147 |                 if youtube_id == youtube_video['id']:
148 |                     info += 'banned youtube video, '
149 |                     append_video = False
150 |                     break
151 | 
152 |             try:
153 |                 for year in self.directory.banned_years:
154 |                     if str(year) in youtube_video['title'].lower():
155 |                         append_video = False
156 |                         info += 'containing banned year in title, '
157 |                         break
158 |                     if any(str(year) in tag.lower() for tag in youtube_video['tags']):
159 |                         append_video = False
160 |                         info += 'containing banned year in tags, '
161 |                         break
162 |             except TypeError:
163 |                 append_video = False
164 |                 info += 'unable to confirm year not in (tag:TypeError), '
165 | 
166 |             buffer = 0
167 |             if len(self.directory.banned_title_keywords) > 3:
168 |                 buffer = 1
169 |             if len(self.directory.banned_title_keywords) > 10:
170 |                 buffer = 2
171 |             for keyword in self.directory.banned_title_keywords:
172 |                 if ' ' + keyword.lower() + ' ' in ' ' + youtube_video['title'].lower() + ' ':
173 |                     buffer -= 1
174 |                     if buffer < 0:
175 |                         append_video = False
176 |                         info += 'containing banned similar title keywords, '
177 |                         break
178 | 
179 |             if not any(phrase.lower() in youtube_video['title'].lower() for phrase in self.config.required_phrases):
180 |                 append_video = False
181 |                 info += 'not containing any required phrase, '
182 | 
183 |             for phrase in self.config.banned_phrases:
184 |                 if phrase.lower() in youtube_video['title'].lower():
185 |                     append_video = False
186 |                     info += 'containing a banned phrase, '
187 |                     break
188 | 
189 |             for channel in self.config.banned_channels:
190 |                 if channel.lower() == youtube_video['uploader'].lower():
191 |                     append_video = False
192 |                     info += 'made by a banned channel, '
193 |                     break
194 | 
195 |             title_in_video = False
196 |             original_title_in_video = False
197 | 
198 |             buffer = 0
199 |             if len(self.directory.movie_title_keywords) > 3:
200 |                 buffer = 1
201 |             if len(self.directory.movie_title_keywords) > 7:
202 |                 buffer = 2
203 | 
204 |             for keyword in self.directory.movie_title_keywords:
205 |                 if ' ' + keyword.lower() + ' ' not in ' ' + youtube_video['title'].lower() + ' ':
206 |                     buffer -= 1
207 |                     if buffer < 0:
208 |                         break
209 |             else:
210 |                 title_in_video = True
211 | 
212 |             if self.directory.movie_original_title is not None:
213 |                 buffer = int(len(self.directory.movie_original_title_keywords) / 4 + 0.1)
214 | 
215 |                 for keyword in self.directory.movie_original_title_keywords:
216 |                     if ' ' + keyword.lower() + ' ' not in ' ' + youtube_video['title'].lower() + ' ':
217 |                         buffer -= 1
218 |                         if buffer < 0:
219 |                             break
220 |                 else:
221 |                     original_title_in_video = True
222 | 
223 |             if not original_title_in_video and not title_in_video:
224 |                 append_video = False
225 |                 info += 'not containing title, '
226 | 
227 |             if append_video:
228 |                 filtered_candidates.append(youtube_video)
229 |             else:
230 |                 print(info[:-2] + '.')
231 | 
232 |         self.youtube_videos = filtered_candidates
233 | 
234 |         filtered_candidates = list()
235 | 
236 |         for youtube_video in self.play_trailers:
237 | 
238 |             info = 'Video "' + youtube_video['webpage_url'] + '" was removed. reasons: '
239 |             append_video = True
240 | 
241 |             for year in self.directory.banned_years:
242 |                 if str(year) in youtube_video['title'].lower():
243 |                     append_video = False
244 |                     info += 'containing banned year in title, '
245 |                     break
246 |                 if any(str(year) in tag.lower() for tag in youtube_video['tags']):
247 |                     append_video = False
248 |                     info += 'containing banned year in tags, '
249 |                     break
250 | 
251 |             buffer = 0
252 |             if len(self.directory.banned_title_keywords) > 3:
253 |                 buffer = 1
254 |             if len(self.directory.banned_title_keywords) > 6:
255 |                 buffer = 2
256 |             for keyword in self.directory.banned_title_keywords:
257 |                 if ' ' + keyword.lower() + ' ' in ' ' + youtube_video['title'].lower() + ' ':
258 |                     buffer -= 1
259 |                     if buffer < 0:
260 |                         append_video = False
261 |                         info += 'containing banned similar title keywords, '
262 |                         break
263 | 
264 |             title_in_video = False
265 |             original_title_in_video = False
266 | 
267 |             buffer = 0
268 |             if len(self.directory.movie_title_keywords) > 3:
269 |                 buffer = 1
270 |             if len(self.directory.movie_title_keywords) > 7:
271 |                 buffer = 2
272 | 
273 |             for keyword in self.directory.movie_title_keywords:
274 |                 if keyword.lower() not in youtube_video['title'].lower():
275 |                     buffer -= 1
276 |                     if buffer < 0:
277 |                         break
278 |             else:
279 |                 title_in_video = True
280 | 
281 |             if self.directory.movie_original_title is not None:
282 |                 buffer = int(len(self.directory.movie_original_title_keywords) / 4 + 0.1)
283 | 
284 |                 for keyword in self.directory.movie_original_title_keywords:
285 |                     if keyword.lower() not in youtube_video['title'].lower():
286 |                         buffer -= 1
287 |                         if buffer < 0:
288 |                             break
289 |                 else:
290 |                     original_title_in_video = True
291 | 
292 |             if not original_title_in_video and not title_in_video:
293 |                 append_video = False
294 |                 info += 'not containing title, '
295 | 
296 |             if append_video:
297 |                 filtered_candidates.append(youtube_video)
298 |             else:
299 |                 print(info[:-2] + '.')
300 | 
301 |         self.play_trailers = filtered_candidates
302 | 
303 |     def apply_custom_filters(self):
304 | 
305 |         def absolute():
306 | 
307 |             minimum = filter_args[0] == 'min'
308 |             ret = list()
309 | 
310 |             for youtube_video in filtered_list:
311 |                 if minimum:
312 |                     if youtube_video[key] >= limit_value:
313 |                         ret.append(youtube_video)
314 |                 else:
315 |                     if youtube_video[key] <= limit_value:
316 |                         ret.append(youtube_video)
317 |             return ret
318 | 
319 |         def relative():
320 | 
321 |             minimum = filter_args[0] == 'min'
322 |             ret = list()
323 |             max_value = float('-inf')
324 | 
325 |             for youtube_video in filtered_list:
326 |                 video_value = youtube_video[key]
327 |                 if video_value > max_value:
328 |                     max_value = video_value
329 | 
330 |             for youtube_video in filtered_list:
331 |                 if minimum:
332 |                     if youtube_video[key] >= max_value * limit_value:
333 |                         ret.append(youtube_video)
334 |                 else:
335 |                     if youtube_video[key] <= max_value * limit_value:
336 |                         ret.append(youtube_video)
337 |             return ret
338 | 
339 |         def highest():
340 |             keep = filter_args[0] == 'keep'
341 | 
342 |             ret = sorted(filtered_list, key=lambda x: x[key], reverse=True)
343 | 
344 |             if keep:
345 |                 if len(ret) > limit_value:
346 |                     ret = ret[:limit_value]
347 |                 else:
348 |                     ret = ret
349 |             else:
350 |                 if len(ret) > limit_value:
351 |                     ret = ret[limit_value:]
352 |                 else:
353 |                     ret = list()
354 | 
355 |             return ret
356 | 
357 |         def lowest():
358 |             keep = filter_args[0] == 'keep'
359 | 
360 |             ret = sorted(filtered_list, key=lambda x: x[key])
361 | 
362 |             if keep:
363 |                 if len(ret) > limit_value:
364 |                     ret = ret[:limit_value]
365 |                 else:
366 |                     ret = ret
367 |             else:
368 |                 if len(ret) > limit_value:
369 |                     ret = ret[limit_value:]
370 |                 else:
371 |                     ret = list()
372 | 
373 |             return ret
374 | 
375 |         filtered_list = None
376 | 
377 |         for filter_package in self.config.custom_filters:
378 | 
379 |             filtered_list = list(self.youtube_videos)
380 | 
381 |             for data in filter_package:
382 |                 filter_args = data.split(':::')[0].split('_')
383 |                 limit_value = float(data.split(':::')[1])
384 |                 try:
385 |                     int(filter_args[-1])
386 |                 except ValueError:
387 |                     key = '_'.join(filter_args[2:])
388 |                 else:
389 |                     key = '_'.join(filter_args[2:-1])
390 | 
391 |                 if filter_args[1] == 'relative':
392 |                     filtered_list = relative()
393 |                 if filter_args[1] == 'absolute':
394 |                     filtered_list = absolute()
395 |                 if filter_args[1] == 'highest':
396 |                     filtered_list = highest()
397 |                 if filter_args[1] == 'lowest':
398 |                     filtered_list = lowest()
399 |             if self.play_trailers and self.config.extra_type == 'trailers':
400 |                 if len(filtered_list) + 1 >= self.config.break_limit:
401 |                     break
402 |             else:
403 |                 if len(filtered_list) >= self.config.break_limit:
404 |                     break
405 | 
406 |         self.youtube_videos = filtered_list
407 | 
408 |         return
409 | 
410 |     def order_results(self):
411 | 
412 |         attribute_tuple = self.config.priority_order.split('_')
413 |         highest = attribute_tuple[0] == 'highest'
414 |         key = '_'.join(attribute_tuple[1:])
415 | 
416 |         for youtube_video in self.youtube_videos:
417 |             if youtube_video[key] is None:
418 |                 youtube_video[key] = 0
419 | 
420 |         if highest:
421 |             self.youtube_videos = sorted(self.youtube_videos, key=lambda x: x[key], reverse=True)
422 |         else:
423 |             self.youtube_videos = sorted(self.youtube_videos, key=lambda x: x[key])
424 | 
425 |         preferred_videos = list()
426 |         not_preferred_channels = list()
427 | 
428 |         for youtube_video in self.youtube_videos:
429 |             if youtube_video['uploader'] in self.config.preferred_channels:
430 |                 preferred_videos.append(youtube_video)
431 |             else:
432 |                 not_preferred_channels.append(youtube_video)
433 | 
434 |         self.youtube_videos = preferred_videos + not_preferred_channels
435 | 
436 |         self.play_trailers = sorted(self.play_trailers, key=lambda x: x['view_count'], reverse=True)
437 | 
438 |     def download_videos(self, tmp_file):
439 | 
440 |         downloaded_videos_meta = list()
441 | 
442 |         arguments = self.config.youtube_dl_arguments
443 |         arguments['outtmpl'] = os.path.join(tmp_file, arguments['outtmpl'])
444 |         for key, value in arguments.items():
445 |             if isinstance(value, str):
446 |                 if value.lower() == 'false' or value.lower() == 'no':
447 |                     arguments[key] = ''
448 | 
449 |         count = 0
450 | 
451 |         for youtube_video in self.youtube_videos[:]:
452 |             if not self.config.force:
453 |                 for vid_id in self.directory.record:
454 |                     if vid_id == youtube_video['id']:
455 |                         continue
456 | 
457 |             for tries in range(1, 11):
458 |                 try:
459 |                     with youtube_dl.YoutubeDL(arguments) as ydl:
460 |                         meta = ydl.extract_info(youtube_video['webpage_url'])
461 |                         downloaded_videos_meta.append(meta)
462 |                         count += 1
463 |                         break
464 | 
465 |                 except DownloadError as e:
466 |                     if tries > 3:
467 |                         if str(e).startswith('ERROR: Did not get any data blocks'):
468 |                             return
469 |                         print('failed to download the video.')
470 |                         break
471 |                     print('failed to download the video. retrying')
472 |                     time.sleep(3)
473 | 
474 |             if count >= self.config.videos_to_download:
475 |                 break
476 | 
477 |         return downloaded_videos_meta
478 | 
479 |     def move_videos(self, downloaded_videos_meta, tmp_folder):
480 | 
481 |         def copy_file():
482 |             if not os.path.isdir(os.path.split(target_path)[0]):
483 |                 os.mkdir(os.path.split(target_path)[0])
484 |             shutil.move(source_path, target_path)
485 | 
486 |         def record_file():
487 |             vid_id = 'unknown'
488 |             for meta in downloaded_videos_meta:
489 |                 if meta['title'] + '.' + meta['ext'] == file:
490 |                     vid_id = meta['id']
491 |                     break
492 | 
493 |             self.directory.record.append(
494 |                 {'hash': file_hash,
495 |                  'file_path': os.path.join(self.directory.full_path, self.config.extra_type, file),
496 |                  'file_name': file,
497 |                  'youtube_video_id': vid_id,
498 |                  'config_type': self.config.extra_type})
499 | 
500 |         def determine_case():
501 |             for content_file, content_file_hash in self.directory.content.items():
502 |                 if content_file == file:
503 |                     return 'name_in_directory'
504 | 
505 |                 if file_hash == content_file_hash:
506 |                     return 'hash_in_directory'
507 | 
508 |             for sub_content in self.directory.subdirectories.values():
509 |                 for content_file, content_file_hash in sub_content.items():
510 |                     if content_file == file:
511 |                         return 'name_in_directory'
512 |                     if file_hash == content_file_hash:
513 |                         return 'hash_in_directory'
514 | 
515 |             return ''
516 | 
517 |         def handle_name_in_directory():
518 |             if self.config.force:
519 |                 copy_file()
520 |                 record_file()
521 |                 self.directory.subdirectories[self.config.extra_type][file] = file_hash
522 |             else:
523 |                 os.remove(source_path)
524 | 
525 |         def handle_hash_in_directory():
526 |             if self.config.force:
527 |                 copy_file()
528 |                 record_file()
529 |                 if self.config.extra_type in self.directory.subdirectories:
530 |                     self.directory.subdirectories[self.config.extra_type] = {file: file_hash}
531 |                 else:
532 |                     self.directory.subdirectories = {self.config.extra_type: {file: file_hash}}
533 |             else:
534 |                 os.remove(source_path)
535 | 
536 |         for file in os.listdir(tmp_folder):
537 |             source_path = os.path.join(tmp_folder, file)
538 |             if self.config.extra_type == "theme-music":
539 |                 target_path = os.path.join(self.directory.full_path, 'theme.mp3')
540 |             else:
541 |                 target_path = os.path.join(self.directory.full_path, self.config.extra_type, file)
542 | 
543 |             file_hash = tools.hash_file(source_path)
544 | 
545 |             if any(file_hash == record['hash'] for record in self.directory.record):
546 |                 os.remove(source_path)
547 |                 continue
548 | 
549 |             case = determine_case()
550 | 
551 |             if case == 'name_in_directory':
552 |                 handle_name_in_directory()
553 |             elif case == 'hash_in_directory':
554 |                 handle_hash_in_directory()
555 |             else:
556 |                 copy_file()
557 | 
558 |                 if self.config.extra_type in self.directory.subdirectories:
559 |                     self.directory.subdirectories[self.config.extra_type][file] = file_hash
560 |                 else:
561 |                     self.directory.subdirectories = {self.config.extra_type: {file: file_hash}}
562 | 
563 |                 record_file()
564 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from extra_finder import ExtraFinder
  4 | 
  5 | 
  6 | def download_extra(directory, config, tmp_folder):
  7 |     def process_trailers_config(tmp_folder):
  8 | 
  9 |         finder = ExtraFinder(directory, config)
 10 |         print('processing: ' + directory.name)
 11 |         finder.search()
 12 |         finder.filter_search_result()
 13 | 
 14 |         for youtube_video in finder.youtube_videos:
 15 |             print('--------------------------------------------------------------------------------------')
 16 |             print(youtube_video['webpage_url'])
 17 |             print(str(youtube_video['adjusted_rating']))
 18 |             print(youtube_video['format'])
 19 |             print(str(youtube_video['views_per_day']))
 20 |         print('--------------------------------------------------------------------------------------')
 21 |         print(directory.name)
 22 | 
 23 |         finder.apply_custom_filters()
 24 |         finder.order_results()
 25 | 
 26 |         if finder.play_trailers and finder.youtube_videos and not config.disable_play_trailers:
 27 |             if 'duration' in finder.youtube_videos[0] and 'duration' in finder.play_trailers[0]:
 28 |                 if finder.youtube_videos[0]['duration'] - 23 <= \
 29 |                         finder.play_trailers[0]['duration'] <= \
 30 |                         finder.youtube_videos[0]['duration'] + 5:
 31 |                     finder.youtube_videos = [finder.play_trailers[0]] + finder.youtube_videos
 32 |                     print('picked play trailer.')
 33 |             # if len(finder.youtube_videos) < config.break_limit:
 34 |             #     finder.youtube_videos = [finder.play_trailers[0]] + finder.youtube_videos
 35 | 
 36 |         if config.only_play_trailers:
 37 |             if finder.play_trailers:
 38 |                 finder.youtube_videos = [finder.play_trailers[0]]
 39 |             else:
 40 |                 return
 41 | 
 42 |         if not finder.youtube_videos and finder.play_trailers and not config.disable_play_trailers:
 43 |             finder.youtube_videos = finder.play_trailers
 44 | 
 45 |         for youtube_video in finder.youtube_videos:
 46 |             print(youtube_video['webpage_url'] + ' : ' +
 47 |                   youtube_video['format'] +
 48 |                   ' (' + str(youtube_video['adjusted_rating']) + ')')
 49 |         for youtube_video in finder.play_trailers:
 50 |             print('play trailer: ' + youtube_video['webpage_url'] + ' : ' + youtube_video['format'])
 51 |         print('--------------------------------------------------------------------------------------')
 52 |         print('downloading for: ' + directory.name)
 53 |         count = 0
 54 |         tmp_folder = os.path.join(tmp_folder, 'tmp_0')
 55 |         while True:
 56 |             try:
 57 |                 while os.listdir(tmp_folder):
 58 |                     if count == 0 and not tmp_folder.endswith('_0'):
 59 |                         tmp_folder += '_0'
 60 |                     else:
 61 |                         tmp_folder = tmp_folder[:-2] + '_' + str(count)
 62 |                         count += 1
 63 |                 break
 64 |             except FileNotFoundError:
 65 |                 os.mkdir(tmp_folder)
 66 |         for youtube_id in directory.banned_youtube_videos_id:
 67 |             for youtube_video in finder.youtube_videos:
 68 |                 if youtube_id == youtube_video['id']:
 69 |                     finder.youtube_videos.remove(youtube_video)
 70 | 
 71 |         downloaded_videos_meta = finder.download_videos(tmp_folder)
 72 |         if downloaded_videos_meta:
 73 |             finder.move_videos(downloaded_videos_meta, tmp_folder)
 74 |             if "trailer" in config.extra_type.lower():
 75 |                 directory.trailer_youtube_video_id = downloaded_videos_meta[0]['id']
 76 | 
 77 |     def process_interviews_config():
 78 |         pass
 79 | 
 80 |     def process_behind_the_scenes_config():
 81 |         pass
 82 | 
 83 |     def process_featurettes_config():
 84 |         pass
 85 | 
 86 |     def process_deleted_scenes_config():
 87 |         pass
 88 | 
 89 |     def process_theme_music_config(tmp_folder):
 90 | 
 91 |         finder = ExtraFinder(directory, config)
 92 |         print('processing: ' + directory.name)
 93 |         finder.search()
 94 |         finder.filter_search_result()
 95 | 
 96 |         for youtube_video in finder.youtube_videos:
 97 |             print('--------------------------------------------------------------------------------------')
 98 |             print(youtube_video['webpage_url'])
 99 |             print(str(youtube_video['adjusted_rating']))
100 |             print(youtube_video['format'])
101 |             print(str(youtube_video['views_per_day']))
102 |         print('--------------------------------------------------------------------------------------')
103 |         print(directory.name)
104 | 
105 |         finder.apply_custom_filters()
106 |         finder.order_results()
107 | 
108 |         for youtube_video in finder.youtube_videos:
109 |             print(youtube_video['webpage_url'] + ' : ' +
110 |                   youtube_video['format'] +
111 |                   ' (' + str(youtube_video['adjusted_rating']) + ')')
112 |         for youtube_video in finder.play_trailers:
113 |             print('play trailer: ' + youtube_video['webpage_url'] + ' : ' + youtube_video['format'])
114 |         print('--------------------------------------------------------------------------------------')
115 |         print('downloading for: ' + directory.name)
116 |         count = 0
117 |         tmp_folder = os.path.join(tmp_folder, 'tmp_0')
118 |         while True:
119 |             try:
120 |                 while os.listdir(tmp_folder):
121 |                     if count == 0 and not tmp_folder.endswith('_0'):
122 |                         tmp_folder += '_0'
123 |                     else:
124 |                         tmp_folder = tmp_folder[:-2] + '_' + str(count)
125 |                         count += 1
126 |                 break
127 |             except FileNotFoundError:
128 |                 os.mkdir(tmp_folder)
129 | 
130 |         downloaded_videos_meta = finder.download_videos(tmp_folder)
131 |         if downloaded_videos_meta:
132 |             finder.move_videos(downloaded_videos_meta, tmp_folder)
133 | 
134 |     if config.extra_type == 'trailers':
135 |         process_trailers_config(tmp_folder)
136 |     elif config.extra_type == 'interviews':
137 |         process_interviews_config()
138 |     elif config.extra_type == 'behind the scenes':
139 |         process_behind_the_scenes_config()
140 |     elif config.extra_type == 'featurettes':
141 |         process_featurettes_config()
142 |     elif config.extra_type == 'theme-music':
143 |         process_theme_music_config(tmp_folder)
144 |     elif config.extra_type == 'deleted scenes':
145 |         process_deleted_scenes_config()
146 | 
147 | #
148 | # library1 = '/storage/plex/library/Filmer'
149 | # library2 = 'testdir'
150 | #
151 | # c = configparser.ConfigParser()
152 | # c.read('default_config.cfg')
153 | #
154 | # tmp_folder = os.path.join(os.path.dirname(sys.argv[0]), 'tmp')
155 | #
156 | # library = library1
157 | # library_content = os.listdir(library)
158 | #
159 | # configs = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs')
160 | # configs_content = os.listdir(configs)
161 | #
162 | # records = os.path.join(os.path.dirname(sys.argv[0]), 'records')
163 | #
164 | # force = False
165 | #
166 | # for folder in library_content:
167 | #     if re.match("^\\(.*\\)$", folder) or re.match("^\\..*", folder):
168 | #         continue
169 | #     for config in configs_content:
170 | #         if config.startswith('.'):
171 | #             continue
172 | #         try:
173 | #             try:
174 | #                 directory = Directory.load_directory(os.path.join(records, folder))
175 | #             except FileNotFoundError:
176 | #                 directory = Directory(os.path.join(library, folder), c.get('SETTINGS', 'tmdb_api_key'))
177 | #
178 | #             extra_config = ExtraSettings(os.path.join(configs, config))
179 | #             if extra_config.config_id in directory.completed_configs and not force:
180 | #                 continue
181 | #
182 | #             directory.update_content()
183 | #
184 | #             if force:
185 | #                 old_record = directory.record
186 | #                 directory.record = list()
187 | #                 extra_config.force = True
188 | #
189 | #             if not os.path.isdir(tmp_folder):
190 | #                 os.mkdir(tmp_folder)
191 | #
192 | #
193 | #             download_extra(directory, extra_config, tmp_folder)
194 | #
195 | #             if force:
196 | #                 # todo: delete all paths in the old record that are not in the new record
197 | #                 pass
198 | #
199 | #         except FileNotFoundError as e:
200 | #             print('file not found: ' + str(e.args[0]))
201 | #             continue
202 | #
203 | #         except HTTPError:
204 | #             print('You might have been flagged by google search. try again tomorrow.')
205 | #             sys.exit()
206 | #
207 | #         except URLError:
208 | #             print('you might have lost your internet connections. exiting')
209 | #             sys.exit()
210 | #
211 | #         except timeout:
212 | #             print('you might have lost your internet connections. exiting')
213 | #             sys.exit()
214 | #
215 | #         except ConnectionResetError:
216 | #             print('you might have lost your internet connections. exiting')
217 | #             sys.exit()
218 | #
219 | #         except KeyboardInterrupt:
220 | #             sys.exit()
221 | # try:
222 | #     shutil.rmtree(tmp_folder)
223 | # except FileNotFoundError:
224 | #     pass
225 | # os.mkdir(tmp_folder)
226 | #
227 | # sys.exit()
228 | 


--------------------------------------------------------------------------------
/old_code/Movie-Extra-Downloader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import configparser
  3 | from configparser import NoOptionError
  4 | import fnmatch
  5 | import pprint
  6 | import shutil
  7 | import time
  8 | import sys
  9 | import codecs
 10 | from urllib.request import urlopen
 11 | from urllib.error import URLError, HTTPError
 12 | import json
 13 | from socket import timeout
 14 | 
 15 | # pip install these packages:
 16 | try:
 17 |     from googlesearch import search as google_search  # google package
 18 | except ImportError:
 19 |     print('Please upgrade to python 3.6 or run the 2.7 version.')
 20 |     sys.exit()
 21 | from pytube import YouTube  # pytube package
 22 | from pytube import exceptions
 23 | import ffmpeg
 24 | # also, install FFmpeg.
 25 | 
 26 | ########################################################################################################################
 27 | 
 28 | # global variables:
 29 | # todo: check config exists.
 30 | with codecs.open(os.path.join(os.path.dirname(sys.argv[0]), 'default_config.cfg'), 'r', 'utf-8') as file:
 31 |     global_config = configparser.ConfigParser()
 32 |     global_config.read_file(file)
 33 | global_settings = global_config['SETTINGS']
 34 | global_advanced_settings = global_config['ADVANCED_SETTINGS']
 35 | 
 36 | library_dir = global_settings.get('library_directory')
 37 | # todo: check library_dir exists.
 38 | 
 39 | temp_dir = global_settings.get('temporary_directory', os.path.dirname(sys.argv[0]))
 40 | movie_folder_naming_scheme = global_settings.get('movie_folder_naming_scheme')
 41 | 
 42 | tmdb_api_key = global_settings.get('tmdb_api_key', None)
 43 | if tmdb_api_key is None:
 44 |     has_tmdb_api_key = False
 45 | else:
 46 |     has_tmdb_api_key = True
 47 | # todo: check tmdb_api_key is ok.
 48 | 
 49 | extra_config_dir = os.path.join(os.path.dirname(sys.argv[0]), 'extra_configs')
 50 | 
 51 | tmdb_movie_search_result = None
 52 | tmdb_movie_details = None
 53 | tmdb_movie_cast_members = None
 54 | 
 55 | ########################################################################################################################
 56 | 
 57 | 
 58 | def main():
 59 | 
 60 |     global tmdb_movie_details
 61 |     global tmdb_movie_cast_members
 62 |     global tmdb_movie_search_result
 63 | 
 64 |     for directory_name in os.listdir(library_dir):
 65 |         tmdb_movie_search_result = None
 66 |         tmdb_movie_details = None
 67 |         tmdb_movie_cast_members = None
 68 |         print(directory_name)
 69 |         directory = get_directory_data(directory_name)
 70 |         # todo: make sure it's a movie directory
 71 |         for config in os.listdir(extra_config_dir):
 72 |             # todo: make sure it's a .cfg file
 73 |             if process_directory(directory, config):
 74 |                 pass
 75 |                 # todo: record info to record_data
 76 | 
 77 | 
 78 | def process_directory(directory, config_file):
 79 | 
 80 |     with codecs.open(os.path.join(os.path.dirname(sys.argv[0]), extra_config_dir, config_file), 'r', 'utf-8') as file2:
 81 |         settings = configparser.ConfigParser()
 82 |         settings.read_file(file2)
 83 | 
 84 |     # decide if a search is to be made.
 85 | 
 86 |     if not settings['EXTRA_CONFIG'].getboolean('force') or not global_advanced_settings.getboolean('force_all'):
 87 |         if directory['record_data'].get('completed', False):
 88 |             return False
 89 | 
 90 |     # make the search and return a youtube video source.
 91 | 
 92 |     video = get_video(directory, settings)
 93 | 
 94 |     # download the extra from youtube
 95 |     download_info = download(video, settings)
 96 |     # post process the extra
 97 | 
 98 |     post_process(download_info, settings)
 99 | 
100 |     return True
101 | 
102 | 
103 | def get_directory_data(directory_name):
104 | 
105 |     directory_ret = dict()
106 | 
107 |     directory_ret['name'] = directory_name
108 |     directory_ret['full_path'] = os.path.join(library_dir, directory_name)
109 |     directory_ret['clean_name_tuple'] = (directory_ret['name']
110 |                                          .replace('(', '')
111 |                                          .replace(')', '')
112 |                                          .replace('[', '')
113 |                                          .replace(']', '')
114 |                                          .replace('{', '')
115 |                                          .replace('}', '')
116 |                                          .replace(':', '')
117 |                                          .replace(';', '')
118 |                                          .replace('.', ' ')
119 |                                          .replace('_', ' ')
120 |                                          .lower()).split(' ')
121 | 
122 |     if global_settings.getboolean('release_year_end_of_file'):
123 |         directory_ret['release_year'] = directory_ret['clean_name_tuple'][-1]
124 |         directory_ret['movie_name'] = ' '.join(directory_ret['clean_name_tuple'][:-1])
125 |     else:
126 |         directory_ret['release_year'] = None
127 |         directory_ret['movie_name'] = ' '.join(directory_ret['clean_name_tuple'])
128 | 
129 |     # todo: make sure release_year is a year between 1700 and 2100. else > None
130 | 
131 |     directory_ret['files'] = list()
132 |     for file3 in os.listdir(directory_ret['full_path']):
133 |         directory_ret['files'].append(file3)
134 |         # todo: nested folders.
135 | 
136 |     # todo: make sure the record_data file exists. and that it is an valid json list
137 |     if os.path.isfile('record_data'):
138 |         with open('record_data') as data_file:
139 |             data = json.load(data_file)
140 |             if directory_ret['name'] in data:
141 |                 directory_ret['record_data'] = data[directory_ret['name']]
142 |             else:
143 |                 directory_ret['record_data'] = dict()
144 | 
145 |     else:
146 |         directory_ret['record_data'] = dict()
147 | 
148 |     get_tmdb_movie_search_result(directory_ret['movie_name'], directory_ret['release_year'])
149 |     get_tmdb_movie_details()
150 |     get_tmdb_movie_cast_members()
151 | 
152 |     return directory_ret
153 | 
154 | 
155 | def get_video(directory, settings):
156 | 
157 |     def search_result():
158 | 
159 |         def search():
160 |             item_list = list()
161 | 
162 |             # todo: limit > 0
163 | 
164 |             while True:
165 |                 try:
166 |                     time.sleep(global_advanced_settings.getint('search_cooldown'))
167 |                     print('searching for: "' + query + '"')
168 |                     for url in google_search(query, stop=limit):
169 |                         if len(item_list) >= limit:
170 |                             break
171 |                         new_item = {'link': url}
172 | 
173 |                         while True:
174 | 
175 |                             for existing_candidate in video_candidates:
176 |                                 if new_item['link'] == existing_candidate['link']:
177 |                                     break
178 | 
179 |                             try:
180 |                                 new_item['pytube_result'] = YouTube(new_item['link'])
181 |                                 item_list.append(new_item)
182 |                                 break
183 |                             except KeyError:
184 |                                 print('Pytube failed to initialize (KeyError). trying again in 2 seconds.')
185 |                                 time.sleep(2)
186 |                             except URLError:
187 |                                 print('Pytube failed to initialize (URLError). trying again in 2 seconds.')
188 |                                 time.sleep(2)
189 |                             except exceptions.RegexMatchError:
190 |                                 new_item['delete_this_item'] = True
191 |                                 break
192 | 
193 |                     break
194 | 
195 |                 except HTTPError as e:
196 |                     if e.code == 503:
197 |                         print('------------------------------------------------------------------------------------')
198 |                         print('It seems that your IP-address have been flagged by google for unusual activity. ')
199 |                         print('They usually put down the flag after some time so try again tomorrow.')
200 |                         print('If this is a reoccurring issue, increase the search cooldown under advanced settings')
201 |                         print('------------------------------------------------------------------------------------')
202 |                         sys.exit()
203 |                     else:
204 |                         print('Failed to retrieve search results, trying again in 2 seconds: ' + e.msg)
205 |                         time.sleep(2)
206 |                         continue
207 | 
208 |                 except URLError as e:
209 |                     print('Failed to retrieve search results, trying again in 2 seconds: ' + e.msg)
210 |                     time.sleep(2)
211 |                     continue
212 | 
213 |             return item_list
214 | 
215 |         video_candidates = list()
216 |         for option, query in settings['SEARCHES'].items():
217 |             if 'search_string' not in option:
218 |                 continue
219 | 
220 |             limit = settings['SEARCHES'].getint('search_result_limit' + option.replace('search_string', ''))
221 | 
222 |             query = query.replace('{movie_name}', directory['movie_name'])
223 | 
224 |             if directory['release_year'] is not None:
225 |                 query = query.replace('{release_year}', directory['release_year'])
226 |             else:
227 |                 query = query.replace('{release_year}', '')
228 | 
229 |             if tmdb_movie_details['production_companies'][0]['name'] is not None and '{main_studio_name}' in query:
230 |                 get_tmdb_movie_details()
231 |                 query = query.replace('{main_studio_name}', tmdb_movie_details['production_companies'][0]['name'])
232 |             else:
233 |                 query = query.replace('{main_studio_name}', '')
234 | 
235 |             query = query.replace('  ', ' ')
236 | 
237 |             video_candidates += search()
238 | 
239 |         return video_candidates
240 | 
241 |     def scan_candidates():
242 | 
243 |         selection_info['max_resolution'] = 0
244 |         for candidate in selection_info['candidates']:
245 | 
246 |             candidate['delete_this_item'] = False
247 | 
248 |             video = candidate['pytube_result']
249 | 
250 |             if candidate['delete_this_item'] or video is None:
251 |                 continue
252 | 
253 |             candidate['title'] = video.title
254 |             candidate['rating'] = float(video.player_config_args['avg_rating'])
255 |             candidate['view_count'] = int(video.player_config_args['view_count'])
256 |             candidate['thumbnail_url'] = video.thumbnail_url
257 |             candidate['channel'] = video.player_config_args['author']
258 |             candidate['tags'] = video.player_config_args['keywords']
259 | 
260 |             if candidate['view_count'] < 100:
261 |                 candidate['view_count'] = 100
262 | 
263 |             candidate['adjusted_rating'] = candidate['rating'] * (1 - 1 / ((candidate['view_count'] / 60) ** 0.5))
264 | 
265 |             candidate['resolution'] = 0
266 |             for stream in video.streams.filter(type='video').all():
267 |                 try:
268 |                     resolution = int(stream.resolution.replace('p', ''))
269 |                 except AttributeError:
270 |                     resolution = 0
271 | 
272 |                 if resolution > selection_info['max_resolution']:
273 |                     selection_info['max_resolution'] = resolution
274 |                 if resolution > candidate['resolution']:
275 |                     candidate['resolution'] = resolution
276 | 
277 |             try:
278 |                 if 'ad_preroll' in video.player_config_args:
279 |                     candidate['adds_info'] = 'have adds'
280 |                 else:
281 |                     candidate['adds_info'] = 'No adds'
282 |             except ValueError:
283 |                 candidate['adds_info'] = 'No adds'
284 | 
285 |         return selection_info
286 | 
287 |     def filter_candidates():
288 | 
289 |         filtered_candidates = list()
290 | 
291 |         required_words = retrive_list_from_string(settings['FILTERING'].get('required_words').lower())
292 |         banned_words = retrive_list_from_string(settings['FILTERING'].get('banned_words').lower())
293 |         banned_channels = retrive_list_from_string(settings['FILTERING'].get('banned_channels').lower())
294 | 
295 |         banned_years = list(range(1800, 2100))
296 |         for year in banned_years:
297 |             if str(year) in directory['movie_name']:
298 |                 banned_years.remove(year)
299 |         if directory['release_year'] is not None:
300 |             if int(directory['release_year']) in banned_years:
301 |                 banned_years.remove(int(directory['release_year']))
302 |                 # todo: +- 1 year?
303 | 
304 |         for candidate in selection_info['candidates']:
305 | 
306 |             append_video = True
307 | 
308 |             # todo: make filter that match title name with trailer title. (min 66% match rounding up )
309 |             # ignoring words: the, on, of, a, an
310 | 
311 |             if candidate['delete_this_item']:
312 |                 continue
313 | 
314 |             for year in banned_years:
315 |                 if str(year) in candidate['title']:
316 |                     append_video = False
317 |                     break
318 |                 if str(year) in candidate['tags']:
319 |                     append_video = False
320 |                     break
321 | 
322 |             for word in required_words:
323 |                 if word.lower() not in candidate['title'].lower():
324 |                     append_video = False
325 | 
326 |             for word in banned_words:
327 |                 if word.lower() in candidate['title'].lower():
328 |                     append_video = False
329 |                     break
330 | 
331 |             # todo: move to post scoring filter
332 |             for channel in banned_channels:
333 |                 if channel.lower() == candidate['channel'].lower():
334 |                     append_video = False
335 |                     break
336 | 
337 |             if append_video:
338 |                 filtered_candidates.append(candidate)
339 | 
340 |         selection_info['candidates'] = filtered_candidates
341 | 
342 |         return
343 | 
344 |     def score_candidates():
345 | 
346 |         for candidate in selection_info['candidates']:
347 |             candidate['score'] = 0
348 | 
349 |             if candidate['resolution'] < 700:
350 |                 candidate['adjusted_rating'] *= 0.96
351 | 
352 |         return
353 | 
354 |     def post_scoring_filter():
355 |         return
356 | 
357 |     def order_candidates():
358 | 
359 |         # interviews: limit same person interviews.
360 |         # behind the scenes:
361 |         # trailers:
362 |         #
363 | 
364 |         selected_extra = None
365 | 
366 |         top_score = 0
367 |         top_view_count = 0
368 | 
369 |         for candidate in selection_info['candidates']:
370 | 
371 |             print('-----------------------------------------------------------------')
372 |             print(candidate['title'])
373 |             print(candidate['adds_info'])
374 |             print(candidate['resolution'])
375 |             print(candidate['link'])
376 |             print(candidate['adjusted_rating'])
377 |             print(candidate['view_count'])
378 | 
379 |             if candidate['adjusted_rating'] > top_score:
380 |                 top_score = candidate['adjusted_rating']
381 | 
382 |         for candidate in selection_info['candidates']:
383 |             if candidate['adjusted_rating'] > top_score * 0.95:
384 |                 if candidate['view_count'] > top_view_count:
385 |                     top_view_count = candidate['view_count']
386 |                     selected_extra = candidate
387 | 
388 |         print('-----------------------------------------------------------------')
389 |         print('picked: ' + selected_extra['title'] + ' (' + selected_extra['link'] + ')')
390 |         print('-----------------------------------------------------------------')
391 |         return selected_extra
392 | 
393 |     selection_info = {'candidates': search_result()}
394 | 
395 |     scan_candidates()
396 | 
397 |     filter_candidates()
398 | 
399 |     score_candidates()
400 | 
401 |     # todo: make post scoring filter
402 | 
403 |     return order_candidates()
404 | 
405 | 
406 | def download(video, settings):
407 |     info_ret = dict()
408 |     return info_ret
409 | 
410 | 
411 | def post_process(download_info, settings):
412 |     # todo: reduce sound
413 |     # todo: remove green disclaimer if it exist
414 |     # todo: encode in mp4, aac, h264 or link the stream
415 |     pass
416 | 
417 | 
418 | def retrieve_web_page(url, page_name='page'):
419 | 
420 |     response = None
421 |     print('Downloading ' + page_name + '.')
422 | 
423 |     for attempt in range(10):
424 |         try:
425 |             response = urlopen(url, timeout=2)
426 |             break
427 | 
428 |         except timeout:
429 |             print('Failed to download ' + page_name + ' : timed out. Trying again in 2 seconds.')
430 | 
431 |             if attempt > 5:
432 |                 print('You might have lost internet connection.')
433 |                 raise ValueError('Failed to retrive web page: url requests timed out.')
434 | 
435 |             time.sleep(2)
436 | 
437 |         except HTTPError as e:
438 |             raise ValueError('Failed to download ' + page_name + ' : ' + e.msg + '. Skipping.')
439 | 
440 |         except URLError as e:
441 |             print('Failed to download ' + page_name + '. Trying again in 2 seconds')
442 | 
443 |             if attempt > 5:
444 |                 print('You might have lost internet connection.')
445 |                 raise ValueError('Failed to retrive web page: ' + e.reason + '.')
446 | 
447 |             time.sleep(2)
448 | 
449 |     return response
450 | 
451 | 
452 | def get_tmdb_movie_search_result(name, release_year):
453 |     global tmdb_movie_search_result
454 |     if tmdb_movie_search_result is not None:
455 |         return
456 | 
457 |     # todo: modify to not use release_year in search but rather as picking the right one.
458 |     # todo: any word in any other movie not in the wanted movie should be on ban list for filtering.
459 |     # todo: maybe not: false negatives
460 | 
461 |     url = 'https://api.themoviedb.org/3/search/movie' \
462 |           '?api_key=' + tmdb_api_key + \
463 |           '&language=en-US&query=' \
464 |           + name.replace(' ', '+') + \
465 |           '&page=1&include_adult=false'
466 | 
467 |     if release_year is not None:
468 |         url += '&year=' + str(release_year)
469 | 
470 |     response = retrieve_web_page(url, 'movie search api page')
471 | 
472 |     data = json.loads(response.read().decode('utf-8'))
473 | 
474 |     if data['total_results'] == 0:
475 |         raise ValueError('Unable to find a movie for the directory "' + name + '", skipping.')
476 | 
477 |     # todo: add +- 1 year to the year if it's close to new year.
478 | 
479 |     tmdb_movie_search_result = data['results'][0]
480 |     response.close()
481 | 
482 | 
483 | def get_tmdb_movie_details():
484 |     global tmdb_movie_details
485 |     global tmdb_movie_search_result
486 |     if tmdb_movie_details is not None:
487 |         return
488 | 
489 |     response = retrieve_web_page('https://api.themoviedb.org/3/movie/'
490 |                                  + str(tmdb_movie_search_result['id']) +
491 |                                  '?api_key=' + tmdb_api_key +
492 |                                  '&language=en-US', 'movie details')
493 | 
494 |     data = json.loads(response.read().decode('utf-8'))
495 |     tmdb_movie_details = data
496 |     response.close()
497 | 
498 | 
499 | def get_tmdb_movie_cast_members():
500 |     global tmdb_movie_details
501 |     global tmdb_movie_cast_members
502 |     if tmdb_movie_cast_members is not None:
503 |         return
504 | 
505 |     response = retrieve_web_page('https://api.themoviedb.org/3/movie/'
506 |                                  + str(tmdb_movie_search_result['id']) +
507 |                                  '/credits'
508 |                                  '?api_key=' + tmdb_api_key, 'cast members')
509 | 
510 |     data = json.loads(response.read().decode('utf-8'))
511 |     tmdb_movie_cast_members = data
512 |     response.close()
513 | 
514 | 
515 | def retrive_list_from_string(string, delimiter=',', remove_spaces_next_to_delimiter=True):
516 |     if remove_spaces_next_to_delimiter:
517 |         while ' ' + delimiter in string:
518 |             string = string.replace(' ' + delimiter, delimiter)
519 |         while delimiter + ' ' in string:
520 |             string = string.replace(delimiter + ' ', delimiter)
521 | 
522 |     return string.split(delimiter)
523 | 
524 | 
525 | main()
526 | # todo: add link_only option and capabilities.
527 | sys.exit()
528 | 


--------------------------------------------------------------------------------
/old_code/Stream.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from _socket import timeout
  3 | from urllib.error import HTTPError, URLError
  4 | 
  5 | 
  6 | class Stream(object):
  7 | 
  8 |     conn_errors = 0
  9 | 
 10 |     def __init__(self, source, length):
 11 |         ########################################
 12 |         self.complete = True
 13 |         self.retry = False
 14 | 
 15 |         self.source = None
 16 |         self.id = None
 17 |         self.type = None
 18 |         self.container = None
 19 |         self.bitrate = None
 20 | 
 21 |         self.video_codec = None
 22 |         self.bitrate_per_pixel = None
 23 |         self.resolution = None
 24 |         self.fps = None
 25 |         self.is_hdr = None
 26 |         self.size = None
 27 |         self.is_3d = None
 28 | 
 29 |         self.audio_codec = None
 30 |         ########################################
 31 | 
 32 |         tries = 0
 33 |         while True:
 34 | 
 35 |             try:
 36 |                 self.source = source
 37 |                 self.id = source.itag
 38 |                 self.container = source.subtype
 39 |                 self.fps = source.fps
 40 |                 self.file_size = source.filesize
 41 |                 self.bitrate = self.file_size * 8 / length
 42 | 
 43 |                 if source.is_progressive:
 44 |                     self.get_audio_data(source)
 45 |                     self.get_video_data(source)
 46 |                     self.type = 'combined'
 47 | 
 48 |                 elif source.includes_audio_track:
 49 |                     self.get_audio_data(source)
 50 |                     self.type = 'audio'
 51 | 
 52 |                 elif source.includes_video_track:
 53 |                     self.get_video_data(source)
 54 |                     self.type = 'video'
 55 |                     self.size = int(source.size.split('x')[0]), int(source.size.split('x')[1])
 56 |                     self.bitrate_per_pixel = self.bitrate / (self.size[0] * self.size[1])
 57 |                 else:
 58 |                     print('both include_audio_track and include_video_track was false')
 59 |                     raise AttributeError('failed to categorise stream')
 60 | 
 61 |             except KeyError as e:
 62 |                 print('A stream attribute failed to load. KeyError: ' + str(e))
 63 |                 self.complete = False
 64 |                 return
 65 |             except AttributeError as e:
 66 |                 print('A required stream attribute failed to load. AttributeError: ' + str(e))
 67 |                 self.complete = False
 68 |                 return
 69 |             except timeout as e:
 70 |                 if tries > 4:
 71 |                     print('A stream failed to load because it got timed out: ' + str(e))
 72 |                     self.complete = False
 73 |                     self.retry = True
 74 |                     if Stream.conn_errors > 2:
 75 |                         raise
 76 |                     else:
 77 |                         Stream.conn_errors += 1
 78 |                         return
 79 | 
 80 |                 print('A stream failed to load because it got timed out, retrying. Reason: ' + str(e))
 81 |                 tries += 1
 82 |                 time.sleep(1)
 83 |             except HTTPError as e:
 84 |                 print('A stream attribute failed to load, skipping. Reason: ' + str(e))
 85 |                 self.incomplete = True
 86 |                 return
 87 |             except URLError as e:
 88 |                 if tries > 2:
 89 |                     print('A stream failed to load. Reason: ' + str(e))
 90 |                     self.complete = False
 91 |                     self.retry = True
 92 |                     if Stream.conn_errors > 2:
 93 |                         raise
 94 |                     else:
 95 |                         Stream.conn_errors += 1
 96 |                         return
 97 | 
 98 |                 print('A stream attribute failed to load, retrying. Reason: ' + str(e))
 99 |                 time.sleep(1)
100 |                 tries += 1
101 |             except ConnectionResetError as e:
102 |                 if tries > 4:
103 |                     print('A stream failed to load. Reason: ' + str(e))
104 |                     self.complete = False
105 |                     if Stream.conn_errors > 4:
106 |                         raise
107 |                     else:
108 |                         Stream.conn_errors += 1
109 |                         return
110 |                 print('A stream attribute failed to load, retrying. Reason: ' + str(e))
111 |                 time.sleep(1)
112 |                 tries += 1
113 |             else:
114 |                 Stream.conn_errors = 0
115 |                 break
116 | 
117 |     def get_video_data(self, source):
118 |         self.video_codec = source.video_codec
119 |         self.resolution = int(source.resolution.replace('p', ''))
120 |         self.is_hdr = source.is_hdr
121 |         self.is_3d = source.is_3d
122 | 
123 |     def get_audio_data(self, source):
124 |         self.audio_codec = source.audio_codec
125 | 


--------------------------------------------------------------------------------
/old_code/YoutubeVideo.py:
--------------------------------------------------------------------------------
  1 | from _socket import timeout
  2 | from urllib.error import URLError
  3 | 
  4 | from pytube import YouTube
  5 | from pytube.exceptions import RegexMatchError
  6 | from old_code.Stream import Stream
  7 | import time
  8 | import tools as tools
  9 | 
 10 | 
 11 | class YoutubeVideo(object):
 12 | 
 13 |     # todo (2): subtitles
 14 |     conn_errors = 0
 15 | 
 16 |     def __init__(self, url, score=0, preferred_container='mp4', min_resolution=360,
 17 |                  max_resolution=1080, force_preferred_container=False):
 18 | 
 19 |         ########################################
 20 |         self.url = None
 21 |         self.source = None
 22 |         self.delete = None
 23 |         self.complete = None
 24 |         self.is_play_trailer = None
 25 | 
 26 |         self.title = None
 27 |         self.thumbnail_url = None
 28 |         self.channel = None
 29 |         self.tags = list()
 30 | 
 31 |         self.view_count = None
 32 |         self.rating = None
 33 |         self.adjusted_rating = None
 34 |         self.resolution = None
 35 |         self.quality_score = None
 36 |         self.length = None
 37 |         self.resolution_ratio = None
 38 | 
 39 |         self.streams = list()
 40 |         self.best_video_stream = None
 41 |         self.best_audio_stream = None
 42 |         self.best_combined_stream = None
 43 |         ########################################
 44 | 
 45 |         self.url = url
 46 |         self.delete = False
 47 |         self.is_play_trailer = False
 48 |         self.complete = True
 49 | 
 50 |         tries = 0
 51 |         while True:
 52 |             try:
 53 |                 self.source = YouTube(url)
 54 |             except KeyError as e:
 55 |                 if e.args[0] == 'url':
 56 |                     self.delete = True
 57 |                     self.is_play_trailer = True
 58 |                     # todo (1): add youtube-dl info grabber/downloader
 59 |                     # stuff I need: title, length, keywords?
 60 |                     return
 61 |                 elif e.args[0] == 'url_encoded_fmt_stream_map':
 62 |                     if tries > 4:
 63 |                         print('Failed to load youtube data, retrying. Reason: ' + str(e))
 64 |                         self.delete = True
 65 |                         return
 66 | 
 67 |                     print('Failed to load youtube data, retrying. Reason: ' + str(e))
 68 |                     time.sleep(2)
 69 |                     tries += 1
 70 | 
 71 |                 else:
 72 |                     raise
 73 |             except RegexMatchError as e:
 74 |                 print('Pytube failed to load video info. Reason: ' + url + ': ' + str(e))
 75 |                 self.delete = True
 76 |                 return
 77 |             except timeout as e:
 78 |                 if tries > 4:
 79 |                     print('Pytube failed to load video info. Reason: ' + str(e))
 80 |                     self.complete = False
 81 |                     if Stream.conn_errors > 2:
 82 |                         raise
 83 |                     else:
 84 |                         Stream.conn_errors += 1
 85 |                     return
 86 | 
 87 |                 print('Pytube failed to load video info. Reason: ' + str(e) + ', retrying...')
 88 |                 tries += 1
 89 |                 time.sleep(1)
 90 |             except URLError as e:
 91 |                 if tries > 2:
 92 |                     print('Pytube failed to load video info. Reason: ' + str(e))
 93 |                     self.complete = False
 94 |                     if YoutubeVideo.conn_errors > 2:
 95 |                         raise
 96 |                     else:
 97 |                         YoutubeVideo.conn_errors += 1
 98 |                     return
 99 | 
100 |                 print('Pytube failed to load video info. Reason: ' + str(e) + ', retrying...')
101 |                 time.sleep(1)
102 |                 tries += 1
103 |             else:
104 |                 YoutubeVideo.conn_errors = 0
105 |                 break
106 | 
107 |         self.score = score
108 | 
109 |         self.title = self.source.title
110 |         self.title = tools.get_clean_string(self.title)
111 |         self.rating = float(self.source.player_config_args['avg_rating'])
112 |         self.view_count = int(self.source.player_config_args['view_count'])
113 |         self.channel = self.source.player_config_args['author']
114 |         self.length = self.source.player_config_args['length_seconds']
115 | 
116 |         self.thumbnail_url = self.source.thumbnail_url
117 |         try:
118 |             self.thumbnail_url = self.source.thumbnail_url
119 |         except KeyError:
120 |             self.thumbnail_url = None
121 | 
122 |         try:
123 |             self.tags = self.source.player_config_args['keywords'].split(',')
124 |         except KeyError:
125 |             self.tags = ''
126 | 
127 |         if self.view_count < 100:
128 |             self.view_count = 100
129 | 
130 |         self.adjusted_rating = self.rating * (1 - 1 / ((self.view_count / 60) ** 0.5))
131 | 
132 |         self.load_streams(min_resolution, max_resolution)
133 |         self.update_quality_score(preferred_container)
134 |         self.update_best_audio_stream(preferred_container, force_preferred_container)
135 |         self.update_best_video_stream(preferred_container, force_preferred_container)
136 |         self.update_best_combined_stream(preferred_container, force_preferred_container)
137 | 
138 |         if self.is_play_trailer:
139 |             self.update_youtube_dl_info()
140 | 
141 | 
142 | 
143 |     def update_youtube_dl_info(self):
144 |         pass
145 | 
146 |     def update_quality_score(self, preferred_container='mp4'):
147 |         self.quality_score = 0
148 |         max_res = 0
149 | 
150 |         for stream in self.streams:
151 | 
152 |             if stream.type != 'video':
153 |                 continue
154 | 
155 |             quality_score = 0
156 |             pixel_bitrate = stream.bitrate_per_pixel
157 | 
158 |             if stream.resolution == 1080:
159 |                 pixel_bitrate /= 1
160 |                 quality_score = 120
161 |             elif stream.resolution == 720:
162 |                 pixel_bitrate /= 1.22
163 |                 quality_score = 108
164 |             elif stream.resolution == 480:
165 |                 pixel_bitrate /= 1.52
166 |                 quality_score = 65
167 |             elif stream.resolution == 360:
168 |                 pixel_bitrate /= 1.39
169 |                 quality_score = 40
170 |             elif stream.resolution == 240:
171 |                 pixel_bitrate /= 2.15
172 |                 quality_score = 20
173 |             elif stream.resolution == 144:
174 |                 pixel_bitrate /= 2.65
175 |                 quality_score = 10
176 | 
177 |             if preferred_container.lower() == stream.container:
178 |                 quality_score *= 1.2
179 |             quality_score *= pixel_bitrate
180 | 
181 |             if stream.resolution > max_res:
182 |                 self.quality_score = quality_score
183 |                 max_res = stream.resolution
184 |                 self.resolution_ratio = stream.size[0] / stream.size[1]
185 |             elif stream.resolution == max_res:
186 |                 if quality_score > self.quality_score:
187 |                     self.quality_score = quality_score
188 | 
189 |     def load_streams(self, min_resolution=360, max_resolution=1080):
190 | 
191 |         self.streams = list()
192 |         self.complete = True
193 | 
194 |         for source_stream in self.source.streams.fmt_streams:
195 |             stream = Stream(source_stream, int(self.length))
196 |             if stream.complete:
197 |                 if stream.resolution is not None:
198 |                     if stream.resolution > max_resolution or stream.resolution < min_resolution:
199 |                         continue
200 |                 self.streams.append(stream)
201 |             elif stream.retry:
202 |                 self.complete = False
203 |         if Stream.conn_errors != 0:
204 |             self.complete = False
205 | 
206 |     def update_best_video_stream(self, preferred_container='mp4', force_preferred_container=False):
207 | 
208 |         highest_resolution = 0
209 |         best_stream = None
210 |         highest_pref_resolution = 0
211 |         best_pref_stream = None
212 | 
213 |         for stream in self.streams:
214 |             if 'video' != stream.type:
215 |                 continue
216 | 
217 |             if stream.resolution > highest_resolution:
218 |                 highest_resolution = stream.resolution
219 |                 best_stream = stream
220 | 
221 |             if stream.container.lower() == preferred_container.lower():
222 |                 if stream.resolution > highest_pref_resolution:
223 |                     highest_pref_resolution = stream.resolution
224 |                     best_pref_stream = stream
225 | 
226 |         if highest_resolution == highest_pref_resolution or force_preferred_container:
227 |             ret = best_pref_stream
228 |         else:
229 |             ret = best_stream
230 | 
231 |         self.best_video_stream = ret
232 | 
233 |     def update_best_audio_stream(self, preferred_container='mp4', force_preferred_container=False):
234 | 
235 |         highest_bitrate = 0
236 |         best_stream = None
237 |         highest_pref_bitrate = 0
238 |         best_pref_stream = None
239 | 
240 |         for stream in self.streams:
241 |             if 'audio' != stream.type:
242 |                 continue
243 | 
244 |             if stream.bitrate > highest_bitrate:
245 |                 highest_bitrate = stream.bitrate
246 |                 best_stream = stream
247 | 
248 |             if stream.container.lower() == preferred_container.lower():
249 |                 if stream.bitrate > highest_pref_bitrate:
250 |                     highest_pref_bitrate = stream.bitrate
251 |                     best_pref_stream = stream
252 | 
253 |         if highest_bitrate <= highest_pref_bitrate * 1.35 or force_preferred_container:
254 |             ret = best_pref_stream
255 |         else:
256 |             ret = best_stream
257 |         self.best_audio_stream = ret
258 | 
259 |     def update_best_combined_stream(self, preferred_container='mp4', force_preferred_container=False):
260 | 
261 |         highest_resolution = 0
262 | 
263 |         for stream in self.streams:
264 |             if 'combined' != stream.type:
265 |                 continue
266 | 
267 |             if stream.resolution > highest_resolution:
268 |                 highest_resolution = stream.resolution
269 | 
270 |         max_score = 0
271 |         selected_stream = None
272 | 
273 |         for stream in self.streams:
274 |             if 'combined' != stream.type:
275 |                 continue
276 | 
277 |             score = 0
278 |             resolution = stream.resolution
279 | 
280 |             if force_preferred_container:
281 |                 if stream.container != preferred_container:
282 |                     continue
283 |             if resolution == highest_resolution:
284 |                 score += 10 ** 1
285 |             if stream.container == preferred_container:
286 |                 score += 10 ** 0
287 | 
288 |             if score > max_score:
289 |                 max_score = score
290 |                 selected_stream = stream
291 | 
292 |         self.best_combined_stream = selected_stream
293 | 


--------------------------------------------------------------------------------
/old_code/config-example.cfg:
--------------------------------------------------------------------------------
 1 | [SETTINGS]
 2 | 
 3 | library_directory =
 4 | #temp_directory =
 5 | 
 6 | tmdb_api_key =
 7 | 
 8 | release_year_end_of_file = true
 9 | ffmpeg_installed = false
10 | 
11 | [ADVANCED_SETTINGS]
12 | 
13 | force_all = false
14 | search_cooldown = 0
15 | 


--------------------------------------------------------------------------------
/old_code/old_code.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import configparser
  3 | from configparser import NoOptionError
  4 | import fnmatch
  5 | import pprint
  6 | import shutil
  7 | import time
  8 | import sys
  9 | from urllib.error import URLError
 10 | from urllib.request import urlopen
 11 | from urllib.error import URLError, HTTPError
 12 | from socket import timeout
 13 | import json
 14 | 
 15 | # pip install these packages:
 16 | try:
 17 |     from googlesearch import search as google_search  # google package
 18 | except ImportError:
 19 |     print('Please upgrade to python 3.6 or run the 2.7 version.')
 20 |     sys.exit()
 21 | from pytube import YouTube  # pytube package
 22 | from pytube import exceptions
 23 | # also, install FFmpeg.
 24 | 
 25 | 
 26 | def find_extra(config, extra_name, search, sort_arguments):
 27 | 
 28 |     print('Finding video for extra: "' + extra_name + '".')
 29 | 
 30 |     time.sleep(1)
 31 |     print('Loading configuration.')
 32 |     movie_library_dir = config.get('SETTINGS', 'movie_library_dir')
 33 |     try:
 34 |         download_dir = config.get('SETTINGS', 'download_dir')
 35 |     except NoOptionError:
 36 |         download_dir = os.getcwd()
 37 |     ffmpeg_status = config.getboolean('SETTINGS', 'FFmpeg_installed')
 38 | 
 39 |     time.sleep(1)
 40 |     print('Loading library.')
 41 |     library = get_library_record(movie_library_dir, config)
 42 | 
 43 |     time.sleep(1)
 44 |     print('finding movie to download extra for')
 45 |     movie_folder = get_movie_folder(movie_library_dir, library, [], [extra_name])
 46 | 
 47 |     config.set('LIBRARY_RECORD', movie_folder.replace(' ', '_'), str(library[movie_folder] + 1))
 48 | 
 49 |     time.sleep(1)
 50 |     print('finding video to download for : ' + movie_folder)
 51 |     video_to_download = get_video_to_download(movie_folder, search, sort_arguments)
 52 |     time.sleep(1)
 53 |     print('Downloading: "' + video_to_download['title'] + '" (' + video_to_download['link'] + ")")
 54 |     download(video_to_download, download_dir, extra_name, ffmpeg_status)
 55 | 
 56 |     time.sleep(1)
 57 |     print('Moving "' + extra_name + '" and cleaning up')
 58 |     move_and_cleanup(download_dir, os.path.join(movie_library_dir, movie_folder), extra_name + '.mp4')
 59 | 
 60 |     print('All done!')
 61 |     return True
 62 | 
 63 | 
 64 | def get_library_record(library_dir, config):
 65 | 
 66 |     library = dict()
 67 | 
 68 |     for folder_name in os.listdir(library_dir):
 69 |         if fnmatch.fnmatch(folder_name, config.get('SETTINGS', 'name_pattern')):
 70 |             if not config.has_option('LIBRARY_RECORD', folder_name.replace(' ', '_')):
 71 |                 new_entry = 0
 72 |             else:
 73 |                 new_entry = int(config.getint('LIBRARY_RECORD', folder_name.replace(' ', '_')))
 74 | 
 75 |             library[folder_name] = new_entry
 76 |     return library
 77 | 
 78 | 
 79 | def get_movie_folder(library_dir, earlier_tries, have, have_not):
 80 | 
 81 |     min_earlier_tries = 10000
 82 |     max_earlier_tries = 0
 83 | 
 84 |     for movie in earlier_tries:
 85 |         if earlier_tries[movie] < min_earlier_tries:
 86 |             min_earlier_tries = earlier_tries[movie]
 87 | 
 88 |         if earlier_tries[movie] > max_earlier_tries:
 89 |             max_earlier_tries = earlier_tries[movie]
 90 | 
 91 |     while min_earlier_tries <= max_earlier_tries:
 92 | 
 93 |         for movie in earlier_tries:
 94 | 
 95 |             if earlier_tries[movie] > min_earlier_tries:
 96 |                 continue
 97 | 
 98 |             return_movie = True
 99 | 
100 |             for file_name in os.listdir(os.path.join(library_dir, movie)):
101 |                 for word in have:
102 |                     if word not in file_name:
103 |                         return_movie = False
104 | 
105 |             for file_name in os.listdir(os.path.join(library_dir, movie)):
106 |                 for word in have_not:
107 |                     if word in file_name:
108 |                         return_movie = False
109 | 
110 |             if return_movie:
111 |                 return movie
112 | 
113 |         min_earlier_tries += 1
114 | 
115 |     print("Couldn't find a movie in the library matching the given restriction.")
116 |     print("Or all movies already have the extra you are looking for.")
117 |     print('Shutting down.')
118 |     sys.exit()
119 | 
120 | 
121 | def get_video_to_download(movie, search_suffix, filter_arguments):
122 | 
123 |     def scan_response(response):
124 | 
125 |         response['max_video_resolution'] = 0
126 |         for result in response['items']:
127 | 
128 |             result['delete_this_item'] = False
129 | 
130 |             video = None
131 |             for try_count in range(5):
132 | 
133 |                 if try_count > 2:
134 |                     time.sleep(1)
135 |                     video = YouTube(result['link'])
136 |                 else:
137 |                     try:
138 |                         video = YouTube(result['link'])
139 |                         break
140 |                     except KeyError:
141 |                         print('Pytube failed to initialize (KeyError). trying again in 10 seconds.')
142 |                         time.sleep(9)
143 |                     except URLError:
144 |                         print('Pytube failed to initialize (URLError). trying again in 10 seconds.')
145 |                         time.sleep(9)
146 |                     except exceptions.RegexMatchError:
147 |                         result['delete_this_item'] = True
148 |                         break
149 | 
150 |             if result['delete_this_item']:
151 |                 continue
152 | 
153 |             result['youtube_object'] = video
154 |             result['title'] = video.title
155 |             result['avg_rating'] = float(video.player_config_args['avg_rating'])
156 |             result['view_count'] = int(video.player_config_args['view_count'])
157 | 
158 |             if result['view_count'] < 60:
159 |                 result['view_count'] = 60
160 | 
161 |             result['video_resolution'] = 0
162 |             for stream in video.streams.filter(type='video').all():
163 |                 try:
164 |                     resolution = int(stream.resolution.replace('p', ''))
165 |                 except AttributeError:
166 |                     resolution = 0
167 | 
168 |                 if resolution > response['max_video_resolution']:
169 |                     response['max_video_resolution'] = resolution
170 |                 if resolution > result['video_resolution']:
171 |                     result['video_resolution'] = resolution
172 | 
173 |             try:
174 |                 if 'ad_preroll' in video.player_config_args:
175 |                     result['adds_info'] = 'have adds'
176 |                 else:
177 |                     result['adds_info'] = 'No adds'
178 |             except ValueError:
179 |                 result['adds_info'] = 'No adds'
180 | 
181 |         return response
182 | 
183 |     def filter_response(response, arguments):
184 | 
185 |         items = list()
186 | 
187 |         for result in response['items']:
188 | 
189 |             append_video = True
190 | 
191 |             if result['delete_this_item']:
192 |                 continue
193 | 
194 |             for word in arguments['video_name_must_contain']:
195 |                 if word.lower() not in result['title'].lower():
196 |                     append_video = False
197 | 
198 |             for word in arguments['video_name_must_not_contain']:
199 |                 if word.lower() in result['title'].lower():
200 |                     append_video = False
201 | 
202 |             if append_video:
203 |                 items.append(result)
204 | 
205 |         response.pop('items')
206 |         response['items'] = items
207 | 
208 |         return response
209 | 
210 |     def score_response(response, scoring_arguments):
211 | 
212 |         for result in response['items']:
213 | 
214 |             result['true_rating'] = result['avg_rating'] * (1 - 1 / ((result['view_count'] / 60) ** 0.5))
215 | 
216 |             if result['video_resolution'] < 700:
217 |                 result['true_rating'] *= 0.90
218 |                 result['view_count'] *= 0.5
219 | 
220 |             for bonus in scoring_arguments['video_name_tag_bonuses']:
221 |                 for word in scoring_arguments['video_name_tag_bonuses'][bonus]:
222 |                     if word in result['title'].lower():
223 |                         result['true_rating'] *= bonus
224 |                         result['view_count'] *= bonus
225 |                         break
226 | 
227 |         return response
228 | 
229 |     # search for movie
230 |     search = movie.replace('(', '').replace(')', '').replace('[', '').replace(']', '') + ' ' + search_suffix
231 |     search = search.replace('.', ' ').replace('_', ' ').replace('-', ' ').replace('  ', ' ').replace('  ', ' ')
232 |     search = str('site:youtube.com ' + search)
233 | 
234 |     item_list = list()
235 |     for attempt in range(5):
236 |         if attempt > 2:
237 |             for url in google_search(search, stop=10):
238 |                 item = {'link': url}
239 |                 item_list.append(item)
240 |             break
241 |         else:
242 |             try:
243 |                 for url in google_search(search, stop=10):
244 |                     item = {'link': url}
245 |                     item_list.append(item)
246 |                 break
247 |             except URLError:
248 |                 print('Failed to retrieve search results, trying again in 10 seconds')
249 |                 time.sleep(10)
250 |                 continue
251 | 
252 |     item_list.pop()
253 |     item_list.pop()
254 |     item_list.pop()
255 |     search_response = {'items': item_list}
256 | 
257 |     search_response = scan_response(search_response)
258 |     search_response = filter_response(search_response, filter_arguments)
259 |     search_response = score_response(search_response, filter_arguments)
260 | 
261 |     # select video
262 |     selected_movie = None
263 | 
264 |     top_score = 0
265 |     top_view_count = 0
266 | 
267 |     for item in search_response['items']:
268 | 
269 |         print('-----------------------------------------------------------------')
270 |         print(item['title'])
271 |         print(item['adds_info'])
272 |         print(item['video_resolution'])
273 |         print(item['link'])
274 |         print(item['true_rating'])
275 |         print(item['view_count'])
276 | 
277 |         if item['true_rating'] > top_score:
278 |             top_score = item['true_rating']
279 | 
280 |     for item in search_response['items']:
281 |         if item['true_rating'] > top_score * 0.95:
282 |             if item['view_count'] > top_view_count:
283 |                 top_view_count = item['view_count']
284 |                 selected_movie = item
285 | 
286 |     return selected_movie
287 | 
288 | 
289 | def download(youtube_video, download_dir, file_name, ffmpeg_status):
290 |     def get_best_adaptive_audio_stream(stream_list):
291 | 
292 |         max_bit_rate = 0
293 |         top_audio_stream = None
294 |         preferable_max_bit_rate = 0
295 |         preferable_top_audio_stream = None
296 | 
297 |         for audio_stream in stream_list.streams.filter(type='audio', progressive=False).all():
298 | 
299 |             if audio_stream.is_progressive \
300 |                     or audio_stream.resolution != '0p' \
301 |                     or audio_stream.video_codec != 'unknown':
302 |                 continue
303 | 
304 |             bit_rate = int(audio_stream.abr.replace('kbps', ''))
305 | 
306 |             if bit_rate > max_bit_rate:
307 |                 max_bit_rate = bit_rate
308 |                 top_audio_stream = audio_stream
309 | 
310 |             if bit_rate > preferable_max_bit_rate and 'mp4a' in audio_stream.audio_codec.lower():
311 |                 preferable_max_bit_rate = bit_rate
312 |                 preferable_top_audio_stream = audio_stream
313 | 
314 |         if preferable_max_bit_rate * 1.7 > max_bit_rate:
315 |             return preferable_top_audio_stream
316 |         else:
317 |             return top_audio_stream
318 | 
319 |     def get_best_adaptive_video_stream(stream_list):
320 | 
321 |         max_resolution = 0
322 |         top_video_stream = None
323 |         preferable_max_resolution = 0
324 |         preferable_top_video_stream = None
325 | 
326 |         for video_stream in stream_list.streams.filter(type='video').all():
327 | 
328 |             if video_stream.is_progressive \
329 |                     or video_stream.abr != '25kbps' \
330 |                     or video_stream.audio_codec != 'unknown':
331 |                 continue
332 | 
333 |             resolution = int(video_stream.resolution.replace('p', ''))
334 | 
335 |             if resolution > max_resolution:
336 |                 max_resolution = resolution
337 |                 top_video_stream = video_stream
338 | 
339 |             if resolution > 1080:
340 |                 continue
341 | 
342 |             if resolution > preferable_max_resolution and 'avc' in video_stream.video_codec.lower():
343 |                 preferable_max_resolution = resolution
344 |                 preferable_top_video_stream = video_stream
345 | 
346 |         if preferable_max_resolution == max_resolution:
347 |             return preferable_top_video_stream
348 |         else:
349 |             return top_video_stream
350 | 
351 |     def get_best_progressive_stream(stream_list):
352 | 
353 |         max_resolution = 0
354 |         selected_stream = None
355 | 
356 |         for progressive_stream in stream_list.streams.filter(progressive=True).all():
357 | 
358 |             resolution = int(progressive_stream.resolution.replace('p', ''))
359 | 
360 |             if resolution > max_resolution:
361 |                 max_resolution = resolution
362 | 
363 |         max_score = 0
364 |         for progressive_stream in stream_list.streams.filter().all():
365 | 
366 |             score = 0
367 |             resolution = int(progressive_stream.resolution.replace('p', ''))
368 |             bit_rate = int(progressive_stream.abr.replace('kbps', ''))
369 |             if not ffmpeg_status:
370 |                 if progressive_stream.subtype.lower() == 'mp4':
371 |                     score += 1000000000
372 |             if resolution == max_resolution:
373 |                 score += 10000
374 |             if 'avc' in progressive_stream.video_codec.lower():
375 |                 score += 1000
376 |             if 'mp4a' in progressive_stream.audio_codec.lower():
377 |                 score += bit_rate * 1.7
378 |             else:
379 |                 score += bit_rate
380 | 
381 |             if score > max_score:
382 |                 max_score = score
383 |                 selected_stream = progressive_stream
384 | 
385 |         return selected_stream
386 | 
387 |     def download_adaptive_streams(video_stream, audio_stream, target_dir, target_file_name):
388 | 
389 |         for attempt in range(5):
390 |             if attempt > 2:
391 |                 video_stream.download(target_dir, 'video')
392 |                 break
393 |             else:
394 |                 try:
395 |                     video_stream.download(target_dir, 'video')
396 |                     break
397 |                 except URLError:
398 |                     print('Failed to download video stream, trying again in 10 seconds')
399 |                     time.sleep(10)
400 |                     continue
401 | 
402 |         for attempt in range(5):
403 |             if attempt > 2:
404 |                 audio_stream.download(target_dir, 'audio')
405 |                 break
406 |             else:
407 |                 try:
408 |                     audio_stream.download(target_dir, 'audio')
409 |                     break
410 |                 except URLError:
411 |                     print('Failed to download audio stream, trying again in 10 seconds')
412 |                     time.sleep(10)
413 |                     continue
414 | 
415 |         if 'avc' in video_stream.video_codec.lower():
416 |             video_encode_parameters = 'copy'
417 |         else:
418 |             video_encode_parameters = 'libx264 -preset slow -crf 18'
419 | 
420 |         if 'mp4a' in audio_stream.audio_codec.lower():
421 |             audio_encode_parameters = 'copy'
422 |         else:
423 |             audio_encode_parameters = 'aac -strict -2 -b:a 128k'
424 | 
425 |         os.system('ffmpeg -i "' + os.path.join(target_dir, 'video') + '".* '
426 |                   '-i "' + os.path.join(target_dir, 'audio') + '".* '
427 |                   '-c:v ' + video_encode_parameters + ' '
428 |                   '-c:a ' + audio_encode_parameters + ' '
429 |                   '-threads 4 '
430 |                   '"' + os.path.join(target_dir, target_file_name + '.mp4') + '" -y')
431 | 
432 |     def download_progressive_streams(progressive_stream, target_dir, target_file_name):
433 | 
434 |         if progressive_stream.subtype.lower() == 'mp4':
435 | 
436 |             for attempt in range(5):
437 |                 if attempt > 2:
438 |                     progressive_stream.download(target_dir, target_file_name)
439 |                     break
440 |                 else:
441 |                     try:
442 |                         progressive_stream.download(target_dir, target_file_name)
443 |                         break
444 |                     except URLError:
445 |                         print('Failed to download progressive stream, trying again in 10 seconds')
446 |                         time.sleep(10)
447 |                         continue
448 |             return
449 |         else:
450 | 
451 |             for attempt in range(5):
452 |                 if attempt > 2:
453 |                     progressive_stream.download(target_dir, 'progressive')
454 |                     break
455 |                 else:
456 |                     try:
457 |                         progressive_stream.download(target_dir, 'progressive')
458 |                         break
459 |                     except URLError:
460 |                         print('Failed to download progressive stream, trying again in 10 seconds')
461 |                         time.sleep(10)
462 |                         continue
463 | 
464 |         if 'avc' in progressive_stream.video_codec.lower():
465 |             video_encode_parameters = 'copy'
466 |         else:
467 |             video_encode_parameters = 'libx264 -preset slow -crf 18'
468 | 
469 |         if 'mp4a' in progressive_stream.audio_codec.lower():
470 |             audio_encode_parameters = 'copy'
471 |         else:
472 |             audio_encode_parameters = 'aac -strict -2 -b:a 128k'
473 | 
474 |         os.system('ffmpeg -i "' + os.path.join(target_dir, 'progressive') + '".* '
475 |                   '-c:v ' + video_encode_parameters + ' '
476 |                   '-c:a ' + audio_encode_parameters + ' '
477 |                   '-threads 4 '
478 |                   '"' + os.path.join(target_dir, target_file_name + '.mp4') + '" -y')
479 | 
480 |     # decide adaptive streams to get
481 |     video = youtube_video['youtube_object']
482 |     for stream in video.streams.all():
483 | 
484 |         if stream.abr is None:
485 |             stream.abr = '25kbps'
486 |         if stream.audio_codec is None:
487 |             stream.audio_codec = 'unknown'
488 |         if stream.resolution is None:
489 |             stream.resolution = '0p'
490 |         if stream.video_codec is None:
491 |             stream.video_codec = 'unknown'
492 |     print('---------------------------------------------------------------------------------------------------')
493 |     print(pprint.pprint(video.streams.all()))
494 |     print('---------------------------------------------------------------------------------------------------')
495 |     best_audio_stream = get_best_adaptive_audio_stream(video)
496 |     best_video_stream = get_best_adaptive_video_stream(video)
497 |     best_progressive_stream = get_best_progressive_stream(video)
498 |     print(pprint.pprint(best_progressive_stream))
499 |     print(pprint.pprint(best_video_stream))
500 |     print(pprint.pprint(best_audio_stream))
501 |     print('---------------------------------------------------------------------------------------------------')
502 | 
503 |     if 'mp4a' in best_audio_stream.audio_codec.lower():
504 |         best_audio_stream.abr = int(best_audio_stream.abr.replace('kbps', '')) * 1.7
505 |     if 'mp4a' in best_progressive_stream.audio_codec.lower():
506 |         best_progressive_stream.abr = int(best_progressive_stream.abr.replace('kbps', '')) * 1.7
507 | 
508 |     # decide to get adaptive or progressive
509 |     if not ffmpeg_status:
510 |         print('Picked the progressive streams because the ffmpeg_installed setting is false.')
511 |         download_progressive_streams(best_progressive_stream, download_dir, file_name)
512 | 
513 |     elif int(best_video_stream.resolution.replace('p', '')) > int(best_progressive_stream.resolution.replace('p', '')):
514 |         print('Picked the adaptive streams because of higher video resolution.')
515 |         download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name)
516 | 
517 |     elif 'avc' not in best_progressive_stream.video_codec.lower() and 'avc' in best_video_stream.video_codec.lower():
518 |         print('Picked the adaptive streams because of better video codec.')
519 |         download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name)
520 | 
521 |     elif best_audio_stream.abr > best_progressive_stream.abr * 0.9:
522 |         print('Picked the adaptive streams because of better audio.')
523 |         download_adaptive_streams(best_video_stream, best_audio_stream, download_dir, file_name)
524 | 
525 |     else:
526 |         print('Picked the progressive stream.')
527 |         download_progressive_streams(best_progressive_stream, download_dir, file_name)
528 | 
529 |     return
530 | 
531 | 
532 | def move_and_cleanup(source_dir, target_dir, file_name):
533 | 
534 |     # moving file
535 |     if not os.path.isfile(os.path.join(target_dir, file_name)):
536 |         shutil.move(os.path.join(source_dir, file_name), os.path.join(target_dir, file_name))
537 |     else:
538 |         os.remove(os.path.join(source_dir, file_name))
539 |     # deleting downloaded files
540 | 
541 |     for folder_name in os.listdir(source_dir):
542 |         if fnmatch.fnmatch(folder_name, 'audio.*'):
543 |             os.remove(os.path.join(source_dir, folder_name))
544 |         if fnmatch.fnmatch(folder_name, 'video.*'):
545 |             os.remove(os.path.join(source_dir, folder_name))
546 |         if fnmatch.fnmatch(folder_name, 'progressive.*'):
547 |             os.remove(os.path.join(source_dir, folder_name))
548 | 
549 | 
550 | def get_official_trailer(config):
551 |     #################################################################
552 |     # Video constrains:
553 |     extra_name = 'Official Trailer-trailer'
554 |     search_suffix = ' Trailer'
555 |     video_name_must_contain = ['trailer']
556 |     video_name_must_not_contain = ['Side-by-Side', 'Side by Side', 'SidebySide']
557 |     video_name_tag_bonuses = {
558 |         1.01: ['official'],
559 |         0.99: ['preview', 'teaser']
560 |     }
561 |     #################################################################
562 | 
563 |     filter_arguments = {'video_name_must_contain': video_name_must_contain,
564 |                         'video_name_must_not_contain': video_name_must_not_contain,
565 |                         'video_name_tag_bonuses': video_name_tag_bonuses}
566 | 
567 |     find_extra(config, extra_name, search_suffix, filter_arguments)
568 | 
569 | 
570 | def get_remastered_trailer(config):
571 | 
572 |     #################################################################
573 |     # Video constrains:
574 |     extra_name = 'Remastered Trailer-trailer'
575 |     search_suffix = ' Remastered Trailer'
576 |     video_name_must_contain = ['trailer', 'remaster']
577 |     video_name_must_not_contain = ['Side-by-Side', 'Side by Side', 'SidebySide']
578 |     video_name_tag_bonuses = {
579 |         0.8: ['preview', 'teaser'],
580 |         1.05: ['fan']
581 |     }
582 |     #################################################################
583 | 
584 |     filter_arguments = {'video_name_must_contain': video_name_must_contain,
585 |                         'video_name_must_not_contain': video_name_must_not_contain,
586 |                         'video_name_tag_bonuses': video_name_tag_bonuses}
587 | 
588 |     find_extra(config, extra_name, search_suffix, filter_arguments)
589 | 
590 | def retrieve_web_page(url, page_name='page'):
591 | 
592 |     response = None
593 |     print('Downloading ' + page_name + '.')
594 |     for attempt in range(20):
595 |         try:
596 |             response = urlopen(url, timeout=2)
597 |             break
598 |         except timeout:
599 |             print('Failed to download ' + page_name + ' : timed out. Trying again in 2 seconds.')
600 |             time.sleep(2)
601 |             if attempt > 8:
602 |                 print('You might have lost internet connection.')
603 |                 print('Breaking out of loop and committing')
604 |                 sys.exit()
605 |         except HTTPError as e:
606 |             raise ValueError('Failed to download ' + page_name + ' : ' + e.msg + '. Skipping.')
607 |         except URLError:
608 |             print('Failed to download ' + page_name + '. Trying again in 2 seconds')
609 |             time.sleep(2)
610 |             if attempt > 8:
611 |                 print('You might have lost internet connection.')
612 |                 print('Breaking out of loop and committing')
613 |                 sys.exit()
614 | 
615 |     return response
616 | 
617 | def get_tmdb_movie_id(movie):
618 |     if len(movie['imdb_id']) != 9:
619 |         raise ValueError("Movie have no IMDB ID. Skipping.")
620 | 
621 |     response = retrieve_web_page('https://api.themoviedb.org/3/find/'
622 |                                  + movie['imdb_id'] +
623 |                                  '?api_key=' + tmdb_api_key +
624 |                                  '&language=en-US'
625 |                                  '&external_source=imdb_id', 'tmdb id')
626 | 
627 |     data = json.loads(response.read().decode('utf-8'))
628 | 
629 |     if len(data['movie_results']) == 0:
630 |         raise ValueError('Unable to find TMDB ID. Skipping.')
631 | 
632 |     movie['tmdb_id'] = str(data['movie_results'][0]['id'])
633 |     response.close()
634 | 
635 | 
636 | config_file = 'default_config.cfg'
637 | conf = configparser.ConfigParser()
638 | 
639 | while True:
640 |     try:
641 |         conf.read(config_file)
642 |         if conf.getboolean('SETTINGS', 'search_for_remastered'):
643 |             get_remastered_trailer(conf)
644 |         else:
645 |             get_official_trailer(conf)
646 | 
647 |         with open(config_file, 'w') as new_config_file:
648 |             conf.write(new_config_file)
649 |             new_config_file.close()
650 | 
651 |         time.sleep(conf.getint('SETTINGS', 'cooldown'))
652 | 
653 |     except ValueError as error:
654 |         print(error)
655 |         print('pytube failed to initialize after 3 attempts, try again at a later date.')
656 |     time.sleep(10)
657 | 


--------------------------------------------------------------------------------
/tools.py:
--------------------------------------------------------------------------------
  1 | from _socket import timeout
  2 | from urllib.error import HTTPError, URLError
  3 | from urllib.request import urlopen
  4 | from urllib.parse import quote
  5 | import time
  6 | import json
  7 | import hashlib
  8 | import os
  9 | 
 10 | 
 11 | def hash_file(file_path):
 12 |     if not os.path.isdir(file_path):
 13 |         md5 = hashlib.md5()
 14 |         with open(file_path, 'rb') as file:
 15 |             for i in range(10):
 16 |                 data = file.read(2**20)
 17 |                 if not data:
 18 |                     break
 19 |                 md5.update(data)
 20 |         return md5.hexdigest()
 21 | 
 22 | 
 23 | def get_keyword_list(string):
 24 | 
 25 |     ret = ' ' + get_clean_string(string).lower() + ' '
 26 |     ret = (ret.replace(' the ', ' ')
 27 |               .replace(' in ', ' ')
 28 |               .replace(' a ', ' ')
 29 |               .replace(' by ', ' ')
 30 |               .replace(' for ', ' ')
 31 |               .replace(' is ', ' ')
 32 |               .replace(' am ', ' ')
 33 |               .replace(' an ', ' ')
 34 |               .replace(' in ', ' ')
 35 |               .replace(' with ', ' ')
 36 |               .replace(' from ', ' ')
 37 |               .replace(' and ', ' ')
 38 |               .replace(' movie ', ' ')
 39 |               .replace(' trailer ', ' ')
 40 |               .replace(' interview ', ' ')
 41 |               .replace(' interviews ', ' ')
 42 |               .replace(' scenes ', ' ')
 43 |               .replace(' scene ', ' ')
 44 |               .replace(' official ', ' ')
 45 |               .replace(' hd ', ' ')
 46 |               .replace(' hq ', ' ')
 47 |               .replace(' lq ', ' ')
 48 |               .replace(' 1080p ', ' ')
 49 |               .replace(' 720p ', ' ')
 50 |               .replace(' of ', ' '))
 51 | 
 52 |     return list(set(space_cleanup(ret).split(' ')))
 53 | 
 54 | 
 55 | def get_clean_string(string):
 56 |     ret = ' ' + string.lower() + ' '
 57 | 
 58 |     ret = (ret.replace('(', '')
 59 |               .replace(')', '')
 60 |               .replace('[', '')
 61 |               .replace(']', '')
 62 |               .replace('{', '')
 63 |               .replace('}', '')
 64 |               .replace(':', '')
 65 |               .replace(';', '')
 66 |               .replace('?', '')
 67 |               .replace("'", '')
 68 |               .replace("’", '')
 69 |               .replace("´", '')
 70 |               .replace("`", '')
 71 |               .replace("*", ' ')
 72 |               .replace('.', ' ')
 73 |               .replace('·', '-')
 74 |               .replace(' -', ' ')
 75 |               .replace('- ', ' ')
 76 |               .replace('_', ' ')
 77 |               .replace(' + ', ' : ')
 78 |               .replace('+', '/')
 79 |               .replace(' : ', ' + ')
 80 |               .replace('/ ', ' ')
 81 |               .replace(' /', ' ')
 82 |               .replace(' & ', ' '))
 83 | 
 84 |     ret_tup = ret.split(' ')
 85 |     ret_count = 0
 86 |     for ret_tup_count in range(len(ret_tup)-1):
 87 |         if len(ret_tup[ret_tup_count]) == 1 and len(ret_tup[ret_tup_count + 1]) == 1:
 88 |             ret_count += 1
 89 |             ret = ret[:ret_count] + ret[ret_count:ret_count + 1].replace(' ', '.') + ret[ret_count + 1:]
 90 |             ret_count += 1
 91 |         else:
 92 |             ret_count += len(ret_tup[ret_tup_count]) + 1
 93 | 
 94 |     return space_cleanup(replace_roman_numbers(ret))
 95 | 
 96 | 
 97 | def replace_roman_numbers(string):
 98 |     ret = ' ' + string.lower() + ' '
 99 | 
100 |     ret = (ret.replace(' ix ', ' 9 ')
101 |            .replace(' viiii ', ' 9 ')
102 |            .replace(' viii ', ' 8 ')
103 |            .replace(' vii ', ' 7 ')
104 |            .replace(' vi ', ' 6 ')
105 |            .replace(' iv ', ' 4 ')
106 |            .replace(' iiii ', ' 4 ')
107 |            .replace(' iii ', ' 3 ')
108 |            .replace(' ii ', ' 2 ')
109 |            .replace(' trailer 4 ', ' trailer ')
110 |            .replace(' trailer 3 ', ' trailer ')
111 |            .replace(' trailer 2 ', ' trailer ')
112 |            .replace(' trailer 1 ', ' trailer '))
113 | 
114 |     return space_cleanup(ret)
115 | 
116 | 
117 | def make_list_from_string(string, delimiter=',', remove_spaces_next_to_delimiter=True):
118 |     if remove_spaces_next_to_delimiter:
119 |         while ' ' + delimiter in string:
120 |             string = string.replace(' ' + delimiter, delimiter)
121 |         while delimiter + ' ' in string:
122 |             string = string.replace(delimiter + ' ', delimiter)
123 | 
124 |     return string.split(delimiter)
125 | 
126 | 
127 | def space_cleanup(string):
128 |     ret = string
129 |     while '  ' in ret:
130 |         ret = ret.replace('  ', ' ')
131 |     while ret.endswith(' '):
132 |         ret = ret[:-1]
133 |     while ret.startswith(' '):
134 |         ret = ret[1:]
135 |     return ret
136 | 
137 | 
138 | def retrieve_web_page(url, page_name='page'):
139 | 
140 |     response = None
141 |     print('Downloading ' + page_name + '.')
142 | 
143 |     for tries in range(1, 10):
144 |         try:
145 |             response = urlopen(url, timeout=2)
146 |             break
147 | 
148 |         except UnicodeEncodeError as e:
149 |             print('Failed to download ' + page_name + ' : ' + str(e) + '. Skipping.')
150 |             break
151 | 
152 |         except timeout:
153 |             if tries > 5:
154 |                 print('You might have lost internet connection.')
155 |                 break
156 | 
157 |             time.sleep(1)
158 |             print('Failed to download ' + page_name + ' : timed out. Retrying.')
159 | 
160 |         except HTTPError as e:
161 |             print('Failed to download ' + page_name + ' : ' + str(e) + '. Skipping.')
162 |             break
163 | 
164 |         except URLError:
165 |             if tries > 3:
166 |                 print('You might have lost internet connection.')
167 |                 raise
168 | 
169 |             time.sleep(1)
170 |             print('Failed to download ' + page_name + '. Retrying.')
171 | 
172 |     return response
173 | 
174 | 
175 | def apply_query_template(template, keys):
176 |     ret = template
177 |     for key, value in keys.items():
178 |         if isinstance(value, str):
179 |             ret = ret.replace('{' + key + '}', value)
180 |         elif isinstance(value, int):
181 |             ret = ret.replace('{' + key + '}', str(value))
182 |         elif isinstance(value, float):
183 |             ret = ret.replace('{' + key + '}', str(value))
184 | 
185 |     return space_cleanup(ret)
186 | 
187 | 
188 | def get_tmdb_search_data(tmdb_api_key, title):
189 |     response = retrieve_web_page('https://api.themoviedb.org/3/search/movie'
190 |                                        '?api_key=' + tmdb_api_key +
191 |                                        '&language=en-US&query='
192 |                                        + quote(title.encode('utf-8')) +
193 |                                        '&page=1&include_adult=false', 'tmdb movie search page')
194 |     if response is None:
195 |         return None
196 |     data = json.loads(response.read().decode('utf-8'))
197 |     response.close()
198 | 
199 |     return data
200 | 
201 | 
202 | def get_tmdb_details_data(tmdb_api_key, tmdb_id):
203 |     response = retrieve_web_page('https://api.themoviedb.org/3/movie/'
204 |                                        + str(tmdb_id) +
205 |                                        '?api_key=' + tmdb_api_key +
206 |                                        '&language=en-US', 'movie details')
207 |     if response is None:
208 |         return None
209 |     data = json.loads(response.read().decode('utf-8'))
210 |     response.close()
211 | 
212 |     return data
213 | 
214 | 
215 | def get_tmdb_crew_data(tmdb_api_key, tmdb_id):
216 |     pass
217 | 


--------------------------------------------------------------------------------
/url_finders.py:
--------------------------------------------------------------------------------
 1 | from googlesearch import search as google_web_search
 2 | from time import sleep
 3 | from time import time
 4 | import sys
 5 | 
 6 | from urllib.error import HTTPError
 7 | 
 8 | import tools
 9 | from bs4 import BeautifulSoup
10 | from urllib.parse import quote
11 | 
12 | last = None
13 | 
14 | 
15 | def google_search(query, limit):
16 |     global last
17 |     ret_url_list = list()
18 | 
19 |     for tries in range(1, 10):
20 |         try:
21 |             if last:
22 |                 sleep(int(60 - (time() - last)))
23 |         except ValueError:
24 |             pass
25 | 
26 |         last = time()
27 | 
28 |         try:
29 |             for url in google_web_search(query, stop=limit):
30 |                 if 'youtube.com/watch?v=' in url:
31 |                     ret_url_list.append(url.split('&')[0])
32 | 
33 |         except KeyboardInterrupt:
34 |             raise
35 | 
36 |         except HTTPError as e:
37 |             print('google search service unavailable.')
38 | 
39 |             if tries > 3:
40 |                 print('Failed to download google search result. Reason: ' + str(e))
41 |                 raise
42 | 
43 |             print('Failed to download google search result, retrying. Reason: ' + str(e))
44 |             sleep(1)
45 | 
46 |         except:
47 |             e = sys.exc_info()[0]
48 |             if tries > 3:
49 |                 print('Failed to download google search result. Reason: ' + str(e))
50 |                 raise
51 | 
52 |             print('Failed to download google search result, retrying. Reason: ' + str(e))
53 |             sleep(1)
54 |         else:
55 |             break
56 | 
57 |     return ret_url_list[:limit]
58 | 
59 | 
60 | def youtube_search(query, limit):
61 | 
62 |     ret_url_list = list()
63 | 
64 |     for tries in range(1, 10):
65 |         try:
66 |             response = tools.retrieve_web_page('https://www.youtube.com/results?search_query=' +
67 |                                                quote(query.encode('utf-8')),
68 |                                                'youtube search result')
69 | 
70 |         except KeyboardInterrupt:
71 |             raise
72 | 
73 |         except:
74 |             e = sys.exc_info()[0]
75 |             if tries > 3:
76 |                 print('Failed to download google search result. Reason: ' + str(e))
77 |                 raise
78 | 
79 |             print('Failed to download google search result, retrying. Reason: ' + str(e))
80 |             sleep(1)
81 | 
82 |         else:
83 |             if response:
84 |                 soup = BeautifulSoup(response, "html.parser")
85 |                 for item in soup.findAll(attrs={'class': 'yt-uix-tile-link'}):
86 |                     url = 'https://www.youtube.com' + item['href']
87 |                     ret_url_list.append(url.split('&')[0])
88 |             break
89 | 
90 |     return ret_url_list[:limit]
91 | 
92 | 
93 | def youtube_channel_search(query, limit):
94 |     # todo (1): implement youtube_channel_search.
95 |     pass
96 | 


--------------------------------------------------------------------------------