├── .gitattributes ├── DisneyPlus ├── __pycache__ │ └── disneyplus.cpython-37.pyc └── disneyplus.py ├── HBOMax ├── __pycache__ │ └── hbomax.cpython-37.pyc └── hbomax.py ├── Hulu ├── __pycache__ │ └── hulu.cpython-37.pyc └── hulu.py ├── LICENSE.md ├── Netflix ├── __pycache__ │ └── netflix.cpython-37.pyc └── netflix.py ├── PrimeVideo ├── __pycache__ │ └── primevideo.cpython-37.pyc └── primevideo.py ├── README.md ├── chromedriver.exe ├── config.py ├── search.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /DisneyPlus/__pycache__/disneyplus.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/DisneyPlus/__pycache__/disneyplus.cpython-37.pyc -------------------------------------------------------------------------------- /DisneyPlus/disneyplus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import config 3 | from time import sleep 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | 6 | def login(driver): 7 | 8 | driver.get("https://www.disneyplus.com/home") 9 | input("sign into Disney+ press Enter to continue: ") 10 | 11 | return 12 | 13 | def movie_search(driver, movie_title): 14 | 15 | driver.get("https://www.disneyplus.com/search") 16 | sleep(2) 17 | 18 | # Send keys w/o clicking on element 19 | actions = ActionChains(driver) 20 | actions.send_keys(movie_title) 21 | actions.perform() 22 | 23 | # After typing wait for page to load 24 | sleep(2) 25 | return 26 | 27 | def scan_results(driver, movie_title): 28 | movie_found = False 29 | 30 | defualt_elements = driver.find_elements_by_class_name("gv2-asset") 31 | for web_element in defualt_elements: 32 | # iterate through we elemets of images 33 | #nested_elements = web_element.find_elements_by_class_name("sc-hMqMXs") #5/20 34 | nested_elements = web_element.find_elements_by_class_name("sc-cvbbAY") # 6/24 35 | 36 | for elem in nested_elements: 37 | # first subfield 38 | element_id = elem.get_attribute('data-testid') 39 | 40 | #if "search-result"element_id == "movie-title": # 5/24 41 | if "search-result" in element_id: 42 | # second subfield where aria label w/ name is located 43 | sub_elements = elem.find_elements_by_class_name("sc-kpOJdX") 44 | 45 | for e in sub_elements: 46 | current_title = e.get_attribute('aria-label') 47 | print(f"current_title: {current_title}") 48 | if movie_title in current_title: 49 | print(f"Found '{movie_title}' as '{current_title}' ") 50 | movie_found = True 51 | 52 | return movie_found 53 | 54 | def search(driver, movie_title): 55 | 56 | driver.get("https://www.disneyplus.com/home") 57 | sleep(5) 58 | 59 | movie_search(driver, movie_title=movie_title) 60 | 61 | is_on_disneyplus = scan_results(driver, movie_title=movie_title) 62 | 63 | return is_on_disneyplus 64 | -------------------------------------------------------------------------------- /HBOMax/__pycache__/hbomax.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/HBOMax/__pycache__/hbomax.cpython-37.pyc -------------------------------------------------------------------------------- /HBOMax/hbomax.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import config 3 | from time import sleep 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | 6 | def login(driver): 7 | 8 | driver.get("https://play.hbomax.com/") 9 | input("Manually sign into Netflix press Enter to continue: ") 10 | 11 | return 12 | 13 | def select_profile(driver): 14 | """ 15 | Set HBO Max profile name in the config file as HBOMAX_ACC 16 | """ 17 | 18 | defualt_elements = driver.find_elements_by_class_name("default") 19 | for web_element in defualt_elements: 20 | nested_elements = web_element.find_elements_by_class_name("class1") 21 | 22 | for element in nested_elements: 23 | e = element.find_elements_by_class_name("class7") 24 | 25 | for i in e: 26 | if config.HBOMAX_ACC in i.text: 27 | i.click() 28 | sleep(2) 29 | return 30 | return 31 | 32 | def movie_search(driver, movie_title): 33 | 34 | driver.find_element_by_css_selector("[aria-label=Search]").click() 35 | sleep(1) 36 | 37 | # Send keys w/o clicking on element 38 | actions = ActionChains(driver) 39 | actions.send_keys(movie_title) 40 | actions.perform() 41 | 42 | # After typing wait for page to load 43 | sleep(2) 44 | return 45 | 46 | def scan_results(driver, movie_title): 47 | 48 | movie_found = False 49 | 50 | defualt_elements = driver.find_elements_by_class_name("default") 51 | for web_element in defualt_elements: 52 | current_title = web_element.get_attribute('aria-label') 53 | link = web_element.get_attribute('href') 54 | if current_title is not None: 55 | if movie_title in current_title: 56 | print(f"Found '{movie_title}' as '{current_title}' ") 57 | movie_found = True 58 | 59 | return movie_found 60 | 61 | def search(driver, movie_title): 62 | driver.get("https://play.hbomax.com/") 63 | sleep(5) 64 | 65 | select_profile(driver) 66 | movie_search(driver, movie_title=movie_title) 67 | 68 | is_on_hbomax = scan_results(driver, movie_title=movie_title) 69 | 70 | return is_on_hbomax 71 | -------------------------------------------------------------------------------- /Hulu/__pycache__/hulu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/Hulu/__pycache__/hulu.cpython-37.pyc -------------------------------------------------------------------------------- /Hulu/hulu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import config 3 | from time import sleep 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | 6 | def login(driver): 7 | 8 | driver.get("https://www.hulu.com/hub/home") 9 | input("Manually sign into Netflix press Enter to continue: ") 10 | return 11 | 12 | 13 | def movie_search(driver, movie_title): 14 | driver.get("https://www.hulu.com/search") 15 | 16 | driver.find_element_by_class_name("SearchBar").click() #.send_keys("Some Movie") 17 | sleep(2) 18 | 19 | # # Send keys w/o clicking on element 20 | actions = ActionChains(driver) 21 | actions.send_keys(movie_title) 22 | actions.perform() 23 | 24 | sleep(2) 25 | return 26 | 27 | 28 | def scan_results(driver, movie_title): 29 | 30 | movie_found = False 31 | movie_url = "" 32 | movie_elements = driver.find_elements_by_class_name("ListItem") 33 | for element in movie_elements: 34 | 35 | title = element.find_elements_by_class_name("ListItem__content") 36 | for elem in title: 37 | #print(f"Movie: {elem.text}") 38 | if movie_title in elem.text: 39 | #print(f"Found: {elem.text}") 40 | print(f"Found '{movie_title}' as '{elem.text}' ") 41 | movie_url = element.get_attribute('href') 42 | movie_found = True 43 | return movie_found, movie_url 44 | 45 | return movie_found, movie_url 46 | 47 | def check_if_free(driver, available, movie_hulu_url): 48 | """ 49 | Check if "Watch Movie" button is there 50 | if not, it's likely available in a special package (Starz etc) or availabe for Rent on Hulu. 51 | """ 52 | is_free = False 53 | 54 | if available: 55 | driver.get(movie_hulu_url) 56 | sleep(3) 57 | 58 | watch_movie_button = driver.find_elements_by_class_name("WatchAction") 59 | for e in watch_movie_button: 60 | #print(e.text) 61 | #print(e.get_attribute('href')) 62 | if e.text == "WATCH MOVIE": 63 | is_free = True 64 | 65 | return is_free 66 | 67 | def search(driver, movie_title): 68 | 69 | driver.get("https://www.hulu.com/hub/home") 70 | 71 | movie_search(driver, movie_title=movie_title) 72 | availability, movie_url = scan_results(driver, movie_title=movie_title) 73 | 74 | is_on_hulu= check_if_free(driver, available=availability, movie_hulu_url=movie_url) 75 | 76 | if availability and (not is_on_hulu): 77 | print(f"Hulu: available on Premium Package / Rental") 78 | 79 | return is_on_hulu 80 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2021] [Michael Romero Jr] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Netflix/__pycache__/netflix.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/Netflix/__pycache__/netflix.cpython-37.pyc -------------------------------------------------------------------------------- /Netflix/netflix.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import config 3 | from time import sleep 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | 6 | def login(driver): 7 | driver.get("https://www.netflix.com/") 8 | input("Manually sign into Netflix press Enter to continue: ") 9 | 10 | return 11 | 12 | 13 | def select_profile(driver): 14 | """ 15 | Set Netflix profile name in the config file as NETFLIX_ACC 16 | """ 17 | 18 | try: 19 | # Select profile 20 | profiles = driver.find_elements_by_class_name("profile-name") 21 | for profile in profiles: 22 | #print(profile.text) # print names of Netflix account names 23 | if config.NETFLIX_ACC == profile.text: 24 | profile.click() 25 | return 26 | except: 27 | print(f"select_profile fail") 28 | 29 | return 30 | 31 | 32 | def movie_search(driver, movie_title): 33 | driver.find_element_by_class_name("icon-search").click() 34 | sleep(2) 35 | 36 | # Send keys w/o clicking on element 37 | actions = ActionChains(driver) 38 | actions.send_keys(movie_title) 39 | actions.perform() 40 | 41 | # After typing wait for page to load 42 | sleep(2) 43 | return 44 | 45 | 46 | def scan_results(driver, movie_title): 47 | 48 | movie_found = False 49 | 50 | movie_titles = driver.find_elements_by_class_name("fallback-text") 51 | for movie in movie_titles: 52 | #print(f"Title: {movie.text}") 53 | if movie.text == movie_title: 54 | movie_found = True 55 | 56 | return movie_found 57 | 58 | 59 | def search(driver, movie_title): 60 | driver.get("https://www.netflix.com/") 61 | 62 | select_profile(driver) 63 | movie_search(driver, movie_title=movie_title) 64 | 65 | is_on_netflix = scan_results(driver, movie_title=movie_title) 66 | 67 | return is_on_netflix 68 | -------------------------------------------------------------------------------- /PrimeVideo/__pycache__/primevideo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/PrimeVideo/__pycache__/primevideo.cpython-37.pyc -------------------------------------------------------------------------------- /PrimeVideo/primevideo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import config 3 | from time import sleep 4 | from selenium.webdriver.common.action_chains import ActionChains 5 | 6 | def login(driver): 7 | 8 | driver.get("https://www.amazon.com/gp/video/storefront") 9 | input("Manually sign into Netflix press Enter to continue: ") 10 | 11 | return 12 | 13 | 14 | def movie_search(driver, movie_title): 15 | 16 | driver.find_element_by_class_name("nav-search-field ").click() 17 | sleep(2) 18 | 19 | # Send keys w/o clicking on element 20 | actions = ActionChains(driver) 21 | actions.send_keys(movie_title) 22 | actions.perform() 23 | 24 | driver.find_element_by_id("nav-search-submit-button").click() 25 | 26 | return 27 | 28 | 29 | def scan_results(driver, movie_title): 30 | 31 | movie_found = False 32 | 33 | first_movie_elems = driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[1]/div/span[3]/div[2]/div[1]/div/div/div/div[2]/div[2]/div/div[1]/h2/a/span") 34 | for elem in first_movie_elems: 35 | title = elem.text 36 | if movie_title in title: 37 | # is "Without Remose" in "Tom Clancy's Without Remorse" 38 | movie_found = True 39 | print(f"Found '{movie_title}' as '{title}' ") 40 | 41 | return movie_found 42 | 43 | 44 | def check_if_free(driver, available): 45 | """ 46 | Check if "Watch Movie" button is there 47 | if not, it's likely available in a special package (Starz etc) or availabe for Rent on Hulu. 48 | """ 49 | is_free = False 50 | 51 | if available: 52 | watch_movie_button = driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[1]/div/span[3]/div[2]/div[1]/div/div/div/div[2]/div[2]/div/div[3]/div[1]/div/div[2]/span/span/a") 53 | 54 | for e in watch_movie_button: 55 | #print(e.text) 56 | #print(e.get_attribute('href')) 57 | if e.text == "Watch now": 58 | is_free = True 59 | 60 | return is_free 61 | 62 | def search(driver, movie_title): 63 | driver.get("https://www.amazon.com/gp/video/storefront") 64 | 65 | movie_search(driver, movie_title=movie_title) 66 | 67 | availability = scan_results(driver, movie_title=movie_title) 68 | is_on_prime = check_if_free(driver, available=availability) 69 | 70 | if availability and (not is_on_prime): 71 | print(f"primevideo: available as Rental / Purchase") 72 | 73 | return is_on_prime 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Screen Shot 2021-05-20 at 9 42 16 AM 3 | 4 | 5 | ## Install 6 | - git clone and cd to directory 7 | - install Selenium 8 | - download chromedriver.exe to same directory 9 | 10 | ## First Run 11 | Use `--setup True` for the first run. 12 | Platforms like PrimeVideo, Hulu, and HBO Max aren't very kind to bots so `ChromeOptions` loads cookies from previous useage. First run opens a chrome browser, goes to streaming platform and asks the user to log in. (Hulu and Amazon require captcha's/SMS).
13 | Once the browser is recognized the first time, it stays logged in the next time. 14 | 15 | ## Regular Use 16 | `python search.py --movie Movie Title One, Movie Title Two --exclude HBOMax Netflix`
17 | This will search for the two movies (Movie Title One and Movie Title Two) on all platforms except HBOMax and Netflix. 18 | Exclude is empty by defualt so it checks all platforms by defualt. 19 | 20 | ## Future Updates 21 | - Improve search results: Mainly uses `if substring in string` where `substring` is user's movie_title and `string` is movie title as listed on streaming platform. (I.e. movie_title="Now You See Me" is found as "Pretty Little Liars, S4 E12: Now You See Me, Now You Don’t." which is not ideal) 22 | - threading/parallel: `from multiprocessing import Pool` to query platforms simultaneously. 23 | - headless browser mode 24 | -------------------------------------------------------------------------------- /chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/chromedriver.exe -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | # NETFLIX_EMAIL = input("Enter Netflix email: ") 4 | # NETFLIX_PASS = input("Enter Netflix password: ") 5 | NETFLIX_ACC = "Macbook" 6 | 7 | # HULU_EMAIL = input("Enter Hulu email: ") 8 | # HULU_PASS = input("Enter Hulu email: ") 9 | 10 | # PVIDEO_EMAIL = input("Enter Amazon Prime email: ") 11 | # PVIDEO_PASS = input("Enter Amazon Prime password: ") 12 | 13 | # HBOMAX_EMAIL = input("Enter HBO Max email: ") 14 | # HBOMAX_PASS = input("Enter HBO Max password: ") 15 | HBOMAX_ACC = "michael" 16 | 17 | 18 | # DISNEY_EMAIL = input("Enter Disney+ email: ") 19 | # DISNEY_PASS = input("Enter Disney+ pass: ") 20 | 21 | # executable path for chrome driver 22 | DRIVER_EXECUTABLE_PATH = pathlib.Path(__file__).parent.absolute().joinpath("chromedriver.exe") 23 | #DRIVER_EXECUTABLE_PATH = "/mnt/c/Python/chromedrivery.exe" -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | from selenium import webdriver 4 | from time import sleep 5 | 6 | import config 7 | from Netflix import netflix 8 | from Hulu import hulu 9 | from PrimeVideo import primevideo 10 | from HBOMax import hbomax 11 | from DisneyPlus import disneyplus 12 | 13 | import argparse 14 | 15 | import utils 16 | 17 | def configure_chrome_driver(): 18 | options = webdriver.ChromeOptions() 19 | options.add_argument(f"user-data-dir={pathlib.Path(__file__).parent.absolute().joinpath('chrome-profile')}") 20 | the_driver = webdriver.Chrome(executable_path=config.DRIVER_EXECUTABLE_PATH, options=options) 21 | 22 | #the_driver = webdriver.Chrome('/mnt/c/Python/chromedriver.exe') # hardcode path 23 | return the_driver 24 | 25 | def arg_parse(): 26 | """ 27 | Parse arguements to the detect module 28 | """ 29 | 30 | parser = argparse.ArgumentParser(description='Python app to query streaming platforms for movie title. ') 31 | parser.add_argument('--movie', '--movies', help="list of movies (seperated by commas) to query different streaming platforms.", nargs='+', default=[]) 32 | parser.add_argument("--setup", dest = "setup", help = "First run is for streaming platform sign ins.", default = False) 33 | parser.add_argument('--exclude', help="platforms to exclude from search", nargs='+', default=[]) 34 | 35 | return parser.parse_args() 36 | 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | args = arg_parse() 42 | 43 | # convert [movie, title, one, movie, title, two] 44 | # to {0: "movie title one" 1: "movie title two"} 45 | movies_dict = utils.create_movie_args_dict(args_list=args.movie) 46 | movies_dict = utils.movie_dict_format(list=args.movie, dictionary=movies_dict) 47 | 48 | print(f"movies to query: ") 49 | for key in movies_dict: 50 | print(f"{movies_dict[key]}") 51 | 52 | driver = configure_chrome_driver() 53 | for key in movies_dict: 54 | movie = movies_dict[key] 55 | 56 | # First setup is just logging in 57 | if args.setup: 58 | print("First run: ") 59 | utils.setup(driver) 60 | 61 | print(f"Looking for '{movie}' ") 62 | 63 | if "DisneyPlus" not in args.exclude: 64 | print(f"Querying Disney+: ", end = " ") 65 | if disneyplus.search(driver, movie): 66 | print(f"'{movie}' is on Disney+") 67 | else: 68 | print(f"not available. ") 69 | 70 | if "HBOMax" not in args.exclude: 71 | print(f"Querying HBOMax: ", end = " ") 72 | if hbomax.search(driver, movie): 73 | print(f"'{movie}' is on HBO Max") 74 | else: 75 | print(f"not available. ") 76 | 77 | if "Hulu" not in args.exclude: 78 | print(f"Querying Hulu: ", end = " ") 79 | if hulu.search(driver, movie): 80 | print(f"'{movie}' is on Hulu") 81 | else: 82 | print(f"not available. ") 83 | 84 | if "Netflix" not in args.exclude: 85 | print(f"Querying Netflix: ", end = " ") 86 | if netflix.search(driver, movie): 87 | print(f"'{movie}' is on Netflix") 88 | else: 89 | print(f"not available. ") 90 | 91 | if "PrimeVideo" not in args.exclude: 92 | print(f"Querying PrimeVideo: ", end = " ") 93 | if primevideo.search(driver, movie): 94 | print(f"'{movie}' is on PrimeVideo") 95 | else: 96 | print(f"not available. ") 97 | 98 | print() 99 | #driver.close() 100 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pathlib 3 | from selenium import webdriver 4 | 5 | import config 6 | from Netflix import netflix 7 | from Hulu import hulu 8 | from PrimeVideo import primevideo 9 | from HBOMax import hbomax 10 | from DisneyPlus import disneyplus 11 | 12 | def setup(driver, exclude_list): 13 | if "DisneyPlus" not in exclude_list: 14 | disneyplus.login(driver) 15 | if "HBOMax" not in exclude_list: 16 | hbomax.login(driver) 17 | if "Hulu" not in exclude_list: 18 | hulu.login(driver) 19 | if "Netflix" not in exclude_list: 20 | netflix.login(driver) 21 | if "PrimeVideo" not in exclude_list: 22 | primevideo.login(driver) 23 | 24 | return 25 | 26 | def create_movie_args_dict(args_list): 27 | count = len(args_list) 28 | 29 | movies_dict = {} 30 | for n in range(count): 31 | movies_dict[n] = "" 32 | 33 | return movies_dict 34 | 35 | def movie_dict_format(list, dictionary): 36 | 37 | index=0 38 | movies_dict={} 39 | got_title = False 40 | 41 | # iterate though list from args.movies [movie, title, one, movie, title, two] 42 | for elem in list: 43 | dictionary[index] = dictionary[index] + " " + elem 44 | if "," in elem: 45 | index+=1 46 | 47 | movies = {} 48 | index=0 49 | for key in dictionary: 50 | if len(dictionary[key]) > 0: 51 | title = dictionary[key] 52 | 53 | if title[0] == " ": # remove leading space 54 | title = title[1:] 55 | 56 | if title[-1] == ",": # remove trailing comma 57 | title = title[:-1] 58 | movies[index] = title 59 | index+=1 60 | 61 | return movies 62 | --------------------------------------------------------------------------------