├── .gitattributes
├── DisneyPlus
├── __pycache__
│ └── disneyplus.cpython-37.pyc
└── disneyplus.py
├── HBOMax
├── __pycache__
│ └── hbomax.cpython-37.pyc
└── hbomax.py
├── Hulu
├── __pycache__
│ └── hulu.cpython-37.pyc
└── hulu.py
├── LICENSE.md
├── Netflix
├── __pycache__
│ └── netflix.cpython-37.pyc
└── netflix.py
├── PrimeVideo
├── __pycache__
│ └── primevideo.cpython-37.pyc
└── primevideo.py
├── README.md
├── chromedriver.exe
├── config.py
├── search.py
└── utils.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/DisneyPlus/__pycache__/disneyplus.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/DisneyPlus/__pycache__/disneyplus.cpython-37.pyc
--------------------------------------------------------------------------------
/DisneyPlus/disneyplus.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import config
3 | from time import sleep
4 | from selenium.webdriver.common.action_chains import ActionChains
5 |
6 | def login(driver):
7 |
8 | driver.get("https://www.disneyplus.com/home")
9 | input("sign into Disney+ press Enter to continue: ")
10 |
11 | return
12 |
13 | def movie_search(driver, movie_title):
14 |
15 | driver.get("https://www.disneyplus.com/search")
16 | sleep(2)
17 |
18 | # Send keys w/o clicking on element
19 | actions = ActionChains(driver)
20 | actions.send_keys(movie_title)
21 | actions.perform()
22 |
23 | # After typing wait for page to load
24 | sleep(2)
25 | return
26 |
27 | def scan_results(driver, movie_title):
28 | movie_found = False
29 |
30 | defualt_elements = driver.find_elements_by_class_name("gv2-asset")
31 | for web_element in defualt_elements:
32 | # iterate through we elemets of images
33 | #nested_elements = web_element.find_elements_by_class_name("sc-hMqMXs") #5/20
34 | nested_elements = web_element.find_elements_by_class_name("sc-cvbbAY") # 6/24
35 |
36 | for elem in nested_elements:
37 | # first subfield
38 | element_id = elem.get_attribute('data-testid')
39 |
40 | #if "search-result"element_id == "movie-title": # 5/24
41 | if "search-result" in element_id:
42 | # second subfield where aria label w/ name is located
43 | sub_elements = elem.find_elements_by_class_name("sc-kpOJdX")
44 |
45 | for e in sub_elements:
46 | current_title = e.get_attribute('aria-label')
47 | print(f"current_title: {current_title}")
48 | if movie_title in current_title:
49 | print(f"Found '{movie_title}' as '{current_title}' ")
50 | movie_found = True
51 |
52 | return movie_found
53 |
54 | def search(driver, movie_title):
55 |
56 | driver.get("https://www.disneyplus.com/home")
57 | sleep(5)
58 |
59 | movie_search(driver, movie_title=movie_title)
60 |
61 | is_on_disneyplus = scan_results(driver, movie_title=movie_title)
62 |
63 | return is_on_disneyplus
64 |
--------------------------------------------------------------------------------
/HBOMax/__pycache__/hbomax.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/HBOMax/__pycache__/hbomax.cpython-37.pyc
--------------------------------------------------------------------------------
/HBOMax/hbomax.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import config
3 | from time import sleep
4 | from selenium.webdriver.common.action_chains import ActionChains
5 |
6 | def login(driver):
7 |
8 | driver.get("https://play.hbomax.com/")
9 | input("Manually sign into Netflix press Enter to continue: ")
10 |
11 | return
12 |
13 | def select_profile(driver):
14 | """
15 | Set HBO Max profile name in the config file as HBOMAX_ACC
16 | """
17 |
18 | defualt_elements = driver.find_elements_by_class_name("default")
19 | for web_element in defualt_elements:
20 | nested_elements = web_element.find_elements_by_class_name("class1")
21 |
22 | for element in nested_elements:
23 | e = element.find_elements_by_class_name("class7")
24 |
25 | for i in e:
26 | if config.HBOMAX_ACC in i.text:
27 | i.click()
28 | sleep(2)
29 | return
30 | return
31 |
32 | def movie_search(driver, movie_title):
33 |
34 | driver.find_element_by_css_selector("[aria-label=Search]").click()
35 | sleep(1)
36 |
37 | # Send keys w/o clicking on element
38 | actions = ActionChains(driver)
39 | actions.send_keys(movie_title)
40 | actions.perform()
41 |
42 | # After typing wait for page to load
43 | sleep(2)
44 | return
45 |
46 | def scan_results(driver, movie_title):
47 |
48 | movie_found = False
49 |
50 | defualt_elements = driver.find_elements_by_class_name("default")
51 | for web_element in defualt_elements:
52 | current_title = web_element.get_attribute('aria-label')
53 | link = web_element.get_attribute('href')
54 | if current_title is not None:
55 | if movie_title in current_title:
56 | print(f"Found '{movie_title}' as '{current_title}' ")
57 | movie_found = True
58 |
59 | return movie_found
60 |
61 | def search(driver, movie_title):
62 | driver.get("https://play.hbomax.com/")
63 | sleep(5)
64 |
65 | select_profile(driver)
66 | movie_search(driver, movie_title=movie_title)
67 |
68 | is_on_hbomax = scan_results(driver, movie_title=movie_title)
69 |
70 | return is_on_hbomax
71 |
--------------------------------------------------------------------------------
/Hulu/__pycache__/hulu.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/Hulu/__pycache__/hulu.cpython-37.pyc
--------------------------------------------------------------------------------
/Hulu/hulu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import config
3 | from time import sleep
4 | from selenium.webdriver.common.action_chains import ActionChains
5 |
6 | def login(driver):
7 |
8 | driver.get("https://www.hulu.com/hub/home")
9 | input("Manually sign into Netflix press Enter to continue: ")
10 | return
11 |
12 |
13 | def movie_search(driver, movie_title):
14 | driver.get("https://www.hulu.com/search")
15 |
16 | driver.find_element_by_class_name("SearchBar").click() #.send_keys("Some Movie")
17 | sleep(2)
18 |
19 | # # Send keys w/o clicking on element
20 | actions = ActionChains(driver)
21 | actions.send_keys(movie_title)
22 | actions.perform()
23 |
24 | sleep(2)
25 | return
26 |
27 |
28 | def scan_results(driver, movie_title):
29 |
30 | movie_found = False
31 | movie_url = ""
32 | movie_elements = driver.find_elements_by_class_name("ListItem")
33 | for element in movie_elements:
34 |
35 | title = element.find_elements_by_class_name("ListItem__content")
36 | for elem in title:
37 | #print(f"Movie: {elem.text}")
38 | if movie_title in elem.text:
39 | #print(f"Found: {elem.text}")
40 | print(f"Found '{movie_title}' as '{elem.text}' ")
41 | movie_url = element.get_attribute('href')
42 | movie_found = True
43 | return movie_found, movie_url
44 |
45 | return movie_found, movie_url
46 |
47 | def check_if_free(driver, available, movie_hulu_url):
48 | """
49 | Check if "Watch Movie" button is there
50 | if not, it's likely available in a special package (Starz etc) or availabe for Rent on Hulu.
51 | """
52 | is_free = False
53 |
54 | if available:
55 | driver.get(movie_hulu_url)
56 | sleep(3)
57 |
58 | watch_movie_button = driver.find_elements_by_class_name("WatchAction")
59 | for e in watch_movie_button:
60 | #print(e.text)
61 | #print(e.get_attribute('href'))
62 | if e.text == "WATCH MOVIE":
63 | is_free = True
64 |
65 | return is_free
66 |
67 | def search(driver, movie_title):
68 |
69 | driver.get("https://www.hulu.com/hub/home")
70 |
71 | movie_search(driver, movie_title=movie_title)
72 | availability, movie_url = scan_results(driver, movie_title=movie_title)
73 |
74 | is_on_hulu= check_if_free(driver, available=availability, movie_hulu_url=movie_url)
75 |
76 | if availability and (not is_on_hulu):
77 | print(f"Hulu: available on Premium Package / Rental")
78 |
79 | return is_on_hulu
80 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [2021] [Michael Romero Jr]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Netflix/__pycache__/netflix.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/Netflix/__pycache__/netflix.cpython-37.pyc
--------------------------------------------------------------------------------
/Netflix/netflix.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import config
3 | from time import sleep
4 | from selenium.webdriver.common.action_chains import ActionChains
5 |
6 | def login(driver):
7 | driver.get("https://www.netflix.com/")
8 | input("Manually sign into Netflix press Enter to continue: ")
9 |
10 | return
11 |
12 |
13 | def select_profile(driver):
14 | """
15 | Set Netflix profile name in the config file as NETFLIX_ACC
16 | """
17 |
18 | try:
19 | # Select profile
20 | profiles = driver.find_elements_by_class_name("profile-name")
21 | for profile in profiles:
22 | #print(profile.text) # print names of Netflix account names
23 | if config.NETFLIX_ACC == profile.text:
24 | profile.click()
25 | return
26 | except:
27 | print(f"select_profile fail")
28 |
29 | return
30 |
31 |
32 | def movie_search(driver, movie_title):
33 | driver.find_element_by_class_name("icon-search").click()
34 | sleep(2)
35 |
36 | # Send keys w/o clicking on element
37 | actions = ActionChains(driver)
38 | actions.send_keys(movie_title)
39 | actions.perform()
40 |
41 | # After typing wait for page to load
42 | sleep(2)
43 | return
44 |
45 |
46 | def scan_results(driver, movie_title):
47 |
48 | movie_found = False
49 |
50 | movie_titles = driver.find_elements_by_class_name("fallback-text")
51 | for movie in movie_titles:
52 | #print(f"Title: {movie.text}")
53 | if movie.text == movie_title:
54 | movie_found = True
55 |
56 | return movie_found
57 |
58 |
59 | def search(driver, movie_title):
60 | driver.get("https://www.netflix.com/")
61 |
62 | select_profile(driver)
63 | movie_search(driver, movie_title=movie_title)
64 |
65 | is_on_netflix = scan_results(driver, movie_title=movie_title)
66 |
67 | return is_on_netflix
68 |
--------------------------------------------------------------------------------
/PrimeVideo/__pycache__/primevideo.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/PrimeVideo/__pycache__/primevideo.cpython-37.pyc
--------------------------------------------------------------------------------
/PrimeVideo/primevideo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import config
3 | from time import sleep
4 | from selenium.webdriver.common.action_chains import ActionChains
5 |
6 | def login(driver):
7 |
8 | driver.get("https://www.amazon.com/gp/video/storefront")
9 | input("Manually sign into Netflix press Enter to continue: ")
10 |
11 | return
12 |
13 |
14 | def movie_search(driver, movie_title):
15 |
16 | driver.find_element_by_class_name("nav-search-field ").click()
17 | sleep(2)
18 |
19 | # Send keys w/o clicking on element
20 | actions = ActionChains(driver)
21 | actions.send_keys(movie_title)
22 | actions.perform()
23 |
24 | driver.find_element_by_id("nav-search-submit-button").click()
25 |
26 | return
27 |
28 |
29 | def scan_results(driver, movie_title):
30 |
31 | movie_found = False
32 |
33 | first_movie_elems = driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[1]/div/span[3]/div[2]/div[1]/div/div/div/div[2]/div[2]/div/div[1]/h2/a/span")
34 | for elem in first_movie_elems:
35 | title = elem.text
36 | if movie_title in title:
37 | # is "Without Remose" in "Tom Clancy's Without Remorse"
38 | movie_found = True
39 | print(f"Found '{movie_title}' as '{title}' ")
40 |
41 | return movie_found
42 |
43 |
44 | def check_if_free(driver, available):
45 | """
46 | Check if "Watch Movie" button is there
47 | if not, it's likely available in a special package (Starz etc) or availabe for Rent on Hulu.
48 | """
49 | is_free = False
50 |
51 | if available:
52 | watch_movie_button = driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div[1]/div/span[3]/div[2]/div[1]/div/div/div/div[2]/div[2]/div/div[3]/div[1]/div/div[2]/span/span/a")
53 |
54 | for e in watch_movie_button:
55 | #print(e.text)
56 | #print(e.get_attribute('href'))
57 | if e.text == "Watch now":
58 | is_free = True
59 |
60 | return is_free
61 |
62 | def search(driver, movie_title):
63 | driver.get("https://www.amazon.com/gp/video/storefront")
64 |
65 | movie_search(driver, movie_title=movie_title)
66 |
67 | availability = scan_results(driver, movie_title=movie_title)
68 | is_on_prime = check_if_free(driver, available=availability)
69 |
70 | if availability and (not is_on_prime):
71 | print(f"primevideo: available as Rental / Purchase")
72 |
73 | return is_on_prime
74 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ## Install
6 | - git clone and cd to directory
7 | - install Selenium
8 | - download chromedriver.exe to same directory
9 |
10 | ## First Run
11 | Use `--setup True` for the first run.
12 | Platforms like PrimeVideo, Hulu, and HBO Max aren't very kind to bots so `ChromeOptions` loads cookies from previous useage. First run opens a chrome browser, goes to streaming platform and asks the user to log in. (Hulu and Amazon require captcha's/SMS).
13 | Once the browser is recognized the first time, it stays logged in the next time.
14 |
15 | ## Regular Use
16 | `python search.py --movie Movie Title One, Movie Title Two --exclude HBOMax Netflix`
17 | This will search for the two movies (Movie Title One and Movie Title Two) on all platforms except HBOMax and Netflix.
18 | Exclude is empty by defualt so it checks all platforms by defualt.
19 |
20 | ## Future Updates
21 | - Improve search results: Mainly uses `if substring in string` where `substring` is user's movie_title and `string` is movie title as listed on streaming platform. (I.e. movie_title="Now You See Me" is found as "Pretty Little Liars, S4 E12: Now You See Me, Now You Don’t." which is not ideal)
22 | - threading/parallel: `from multiprocessing import Pool` to query platforms simultaneously.
23 | - headless browser mode
24 |
--------------------------------------------------------------------------------
/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MichaelRomeroJr/PyMovieSearch/470d2fa25adea9436ca8f93234e2bd815799b9c4/chromedriver.exe
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | # NETFLIX_EMAIL = input("Enter Netflix email: ")
4 | # NETFLIX_PASS = input("Enter Netflix password: ")
5 | NETFLIX_ACC = "Macbook"
6 |
7 | # HULU_EMAIL = input("Enter Hulu email: ")
8 | # HULU_PASS = input("Enter Hulu email: ")
9 |
10 | # PVIDEO_EMAIL = input("Enter Amazon Prime email: ")
11 | # PVIDEO_PASS = input("Enter Amazon Prime password: ")
12 |
13 | # HBOMAX_EMAIL = input("Enter HBO Max email: ")
14 | # HBOMAX_PASS = input("Enter HBO Max password: ")
15 | HBOMAX_ACC = "michael"
16 |
17 |
18 | # DISNEY_EMAIL = input("Enter Disney+ email: ")
19 | # DISNEY_PASS = input("Enter Disney+ pass: ")
20 |
21 | # executable path for chrome driver
22 | DRIVER_EXECUTABLE_PATH = pathlib.Path(__file__).parent.absolute().joinpath("chromedriver.exe")
23 | #DRIVER_EXECUTABLE_PATH = "/mnt/c/Python/chromedrivery.exe"
--------------------------------------------------------------------------------
/search.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import pathlib
3 | from selenium import webdriver
4 | from time import sleep
5 |
6 | import config
7 | from Netflix import netflix
8 | from Hulu import hulu
9 | from PrimeVideo import primevideo
10 | from HBOMax import hbomax
11 | from DisneyPlus import disneyplus
12 |
13 | import argparse
14 |
15 | import utils
16 |
17 | def configure_chrome_driver():
18 | options = webdriver.ChromeOptions()
19 | options.add_argument(f"user-data-dir={pathlib.Path(__file__).parent.absolute().joinpath('chrome-profile')}")
20 | the_driver = webdriver.Chrome(executable_path=config.DRIVER_EXECUTABLE_PATH, options=options)
21 |
22 | #the_driver = webdriver.Chrome('/mnt/c/Python/chromedriver.exe') # hardcode path
23 | return the_driver
24 |
25 | def arg_parse():
26 | """
27 | Parse arguements to the detect module
28 | """
29 |
30 | parser = argparse.ArgumentParser(description='Python app to query streaming platforms for movie title. ')
31 | parser.add_argument('--movie', '--movies', help="list of movies (seperated by commas) to query different streaming platforms.", nargs='+', default=[])
32 | parser.add_argument("--setup", dest = "setup", help = "First run is for streaming platform sign ins.", default = False)
33 | parser.add_argument('--exclude', help="platforms to exclude from search", nargs='+', default=[])
34 |
35 | return parser.parse_args()
36 |
37 |
38 |
39 | if __name__ == '__main__':
40 |
41 | args = arg_parse()
42 |
43 | # convert [movie, title, one, movie, title, two]
44 | # to {0: "movie title one" 1: "movie title two"}
45 | movies_dict = utils.create_movie_args_dict(args_list=args.movie)
46 | movies_dict = utils.movie_dict_format(list=args.movie, dictionary=movies_dict)
47 |
48 | print(f"movies to query: ")
49 | for key in movies_dict:
50 | print(f"{movies_dict[key]}")
51 |
52 | driver = configure_chrome_driver()
53 | for key in movies_dict:
54 | movie = movies_dict[key]
55 |
56 | # First setup is just logging in
57 | if args.setup:
58 | print("First run: ")
59 | utils.setup(driver)
60 |
61 | print(f"Looking for '{movie}' ")
62 |
63 | if "DisneyPlus" not in args.exclude:
64 | print(f"Querying Disney+: ", end = " ")
65 | if disneyplus.search(driver, movie):
66 | print(f"'{movie}' is on Disney+")
67 | else:
68 | print(f"not available. ")
69 |
70 | if "HBOMax" not in args.exclude:
71 | print(f"Querying HBOMax: ", end = " ")
72 | if hbomax.search(driver, movie):
73 | print(f"'{movie}' is on HBO Max")
74 | else:
75 | print(f"not available. ")
76 |
77 | if "Hulu" not in args.exclude:
78 | print(f"Querying Hulu: ", end = " ")
79 | if hulu.search(driver, movie):
80 | print(f"'{movie}' is on Hulu")
81 | else:
82 | print(f"not available. ")
83 |
84 | if "Netflix" not in args.exclude:
85 | print(f"Querying Netflix: ", end = " ")
86 | if netflix.search(driver, movie):
87 | print(f"'{movie}' is on Netflix")
88 | else:
89 | print(f"not available. ")
90 |
91 | if "PrimeVideo" not in args.exclude:
92 | print(f"Querying PrimeVideo: ", end = " ")
93 | if primevideo.search(driver, movie):
94 | print(f"'{movie}' is on PrimeVideo")
95 | else:
96 | print(f"not available. ")
97 |
98 | print()
99 | #driver.close()
100 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import pathlib
3 | from selenium import webdriver
4 |
5 | import config
6 | from Netflix import netflix
7 | from Hulu import hulu
8 | from PrimeVideo import primevideo
9 | from HBOMax import hbomax
10 | from DisneyPlus import disneyplus
11 |
12 | def setup(driver, exclude_list):
13 | if "DisneyPlus" not in exclude_list:
14 | disneyplus.login(driver)
15 | if "HBOMax" not in exclude_list:
16 | hbomax.login(driver)
17 | if "Hulu" not in exclude_list:
18 | hulu.login(driver)
19 | if "Netflix" not in exclude_list:
20 | netflix.login(driver)
21 | if "PrimeVideo" not in exclude_list:
22 | primevideo.login(driver)
23 |
24 | return
25 |
26 | def create_movie_args_dict(args_list):
27 | count = len(args_list)
28 |
29 | movies_dict = {}
30 | for n in range(count):
31 | movies_dict[n] = ""
32 |
33 | return movies_dict
34 |
35 | def movie_dict_format(list, dictionary):
36 |
37 | index=0
38 | movies_dict={}
39 | got_title = False
40 |
41 | # iterate though list from args.movies [movie, title, one, movie, title, two]
42 | for elem in list:
43 | dictionary[index] = dictionary[index] + " " + elem
44 | if "," in elem:
45 | index+=1
46 |
47 | movies = {}
48 | index=0
49 | for key in dictionary:
50 | if len(dictionary[key]) > 0:
51 | title = dictionary[key]
52 |
53 | if title[0] == " ": # remove leading space
54 | title = title[1:]
55 |
56 | if title[-1] == ",": # remove trailing comma
57 | title = title[:-1]
58 | movies[index] = title
59 | index+=1
60 |
61 | return movies
62 |
--------------------------------------------------------------------------------