├── IMDBList2PlexCollection.py
├── README.md
├── config.ini
└── requirements.txt


/IMDBList2PlexCollection.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | #
  3 | #      Automated IMDB List to Plex Collection Script by /u/deva5610 - V2.0
  4 | #
  5 | #                      Created by modifiying the excellent
  6 | #
  7 | #        Automated IMDB Top 250 Plex collection script by /u/SwiftPanda16
  8 | #
  9 | #                         *** USE AT YOUR OWN RISK! ***
 10 | #   *** I AM NOT RESPONSIBLE FOR DAMAGE TO YOUR PLEX SERVER OR LIBRARIES! ***
 11 | #
 12 | #------------------------------------------------------------------------------
 13 | 
 14 | #############################################
 15 | ##### CODE BELOW - DON'T EDIT BELOW HERE#####
 16 | #############################################
 17 | import os
 18 | import sys
 19 | import requests
 20 | import time
 21 | import platform
 22 | from lxml import html
 23 | from plexapi.server import PlexServer
 24 | from tmdbv3api import TMDb
 25 | from tmdbv3api import Movie
 26 | import configparser
 27 | from bs4 import BeautifulSoup
 28 | import re
 29 | import traceback  # Added for error handling
 30 | 
 31 | # Define the path to your config.ini file
 32 | CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.ini')
 33 | 
 34 | # Start with a nice clean screen
 35 | #os.system('cls' if os.name == 'nt' else 'clear')
 36 | 
 37 | # Hacky solution for Python 2.x & 3.x compatibility
 38 | if hasattr(__builtins__, 'raw_input'):
 39 |     input = raw_input
 40 | 
 41 | ### Header ###
 42 | print("===================================================================")
 43 | print(" Automated IMDB List to Collection script by /u/deva5610 - V1.2 ")
 44 | print(" Created by modifying the excellent  ")
 45 | print(" Automated IMDB Top 250 Plex collection script by /u/SwiftPanda16  ")
 46 | print("===================================================================")
 47 | print("\n")
 48 | 
 49 | def load_config(config_path):
 50 |     config = configparser.ConfigParser()
 51 |     config.read(config_path)
 52 |     return config
 53 | 
 54 | # Load configuration from config.ini
 55 | config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.ini')
 56 | config = load_config(config_path)
 57 | 
 58 | PLEX_URL = config.get('plex', 'url')
 59 | PLEX_TOKEN = config.get('plex', 'token')
 60 | MOVIE_LIBRARIES = config.get('plex', 'library').split(',')
 61 | TMDB_API_KEY = config.get('tmdb', 'apikey')
 62 | 
 63 | #except Exception as e:
 64 | #    print(f"Error loading configuration from {config_path}: {str(e)}")
 65 | #    sys.exit(1)
 66 | 
 67 | def validate_input(imdb_url, page_numbers):
 68 |     # Validate user inputs for IMDb URL and page numbers
 69 |     imdb_url_pattern = r'^https:\/\/www\.imdb\.com\/list\/ls\d+\/$'
 70 |     if not re.match(imdb_url_pattern, imdb_url):
 71 |         raise ValueError("Invalid IMDb URL. It should be in the format 'https://www.imdb.com/list/ls<list_id>/'.")
 72 | 
 73 |     try:
 74 |         page_numbers = int(page_numbers)
 75 |         if page_numbers <= 0:
 76 |             raise ValueError()
 77 |     except ValueError:
 78 |         raise ValueError("Page numbers should be a positive integer.")
 79 | 
 80 | def add_collection(library_key, rating_key):
 81 |     # Add a movie to a Plex collection
 82 |     headers = {"X-Plex-Token": PLEX_TOKEN}
 83 |     params = {
 84 |         "type": 1,
 85 |         "id": rating_key,
 86 |         "collection[0].tag.tag": IMDB_COLLECTION_NAME,
 87 |         "collection.locked": 1
 88 |     }
 89 |     url = f"{PLEX_URL}/library/sections/{library_key}/all"
 90 | 
 91 |     try:
 92 |         response = requests.put(url, headers=headers, params=params)
 93 |         response.raise_for_status()  # Raise an exception for HTTP errors
 94 |         print(f"Added movie to collection: {rating_key}")
 95 |     except Exception as e:
 96 |         print(f"Failed to add movie to collection: {rating_key}")
 97 |         traceback.print_exc()  # Print the exception and its traceback
 98 | 
 99 | def retrieve_movies_from_plex(plex, movie_libraries):
100 |     # Retrieve movies from Plex libraries
101 |     all_movies = []
102 |     for movie_lib in movie_libraries:
103 |         try:
104 |             movie_library = plex.library.section(movie_lib)
105 |             all_movies.extend(movie_library.all())
106 |         except Exception as e:
107 |             print(f"Error retrieving movies from '{movie_lib}' library: {str(e)}")
108 |             traceback.print_exc()  # Print the exception and its traceback
109 |     return all_movies
110 | 
111 | def extract_year(year_element_text):
112 |     # Extract the year from the text, handling different formats
113 |     year_matches = re.findall(r'\d{4}', year_element_text)
114 |     if year_matches:
115 |         return int(year_matches[-1])
116 |     return None
117 |     
118 | def retrieve_movies_from_imdb(imdb_url, page_numbers):
119 |     # Retrieve movies from IMDb list with English titles
120 |     imdb_movies = []
121 |     
122 |     headers = {
123 |         "Accept-Language": "en-US,en;q=0.9",  # Set the desired language here
124 |     }
125 | 
126 |     for page in range(1, int(page_numbers) + 1):
127 |         page_url = f"{imdb_url}?page={page}"
128 | 
129 |         try:
130 |             response = requests.get(page_url, headers=headers)
131 |             response.raise_for_status()  # Raise an exception for HTTP errors
132 |         except Exception as e:
133 |             print(f"Failed to retrieve page {page} from IMDb: {str(e)}")
134 |             traceback.print_exc()  # Print the exception and its traceback
135 |             continue  # Continue to the next page
136 | 
137 |         if response.status_code == 200:
138 |             soup = BeautifulSoup(response.text, 'html.parser')
139 |             movie_elements = soup.find_all("div", class_="lister-item-content")
140 | 
141 |             for movie_element in movie_elements:
142 |                 title_element = movie_element.find("h3", class_="lister-item-header")
143 |                 year_element = movie_element.find("span", class_="lister-item-year")
144 |                 imdb_link = movie_element.find("a", href=True)
145 | 
146 |                 # Check if all required elements are found
147 |                 if title_element and year_element and imdb_link:
148 |                     try:
149 |                         title = title_element.find("a").text.strip()
150 |                         year = extract_year(year_element.text)
151 | 
152 |                         imdb_id = imdb_link["href"].split("/title/")[1].split("/")[0]
153 | 
154 |                         imdb_movies.append({
155 |                             "title": title,
156 |                             "year": year,
157 |                             "imdb_id": imdb_id
158 |                         })
159 |                         print(f"Scraped Movie: '{title}' (IMDb ID: {imdb_id}, Year: {year})")
160 |                     except Exception as e:
161 |                         print(f"Failed to process movie data on page {page} for movie '{title}' (IMDb ID: {imdb_id}): {str(e)}")
162 |                         traceback.print_exc()  # Print the exception and its traceback
163 |                 else:
164 |                     print(f"Failed to extract movie data from page {page}. Missing elements:")
165 |                     if not title_element:
166 |                         print("- Title element not found.")
167 |                     if not year_element:
168 |                         print("- Year element not found.")
169 |                     if not imdb_link:
170 |                         print("- IMDb link element not found.")
171 |         else:
172 |             print(f"Failed to retrieve page {page} from IMDb.")
173 |     return imdb_movies
174 |                     
175 | def match_imdb_to_plex_movies(plex_movies, imdb_movies):
176 |     # Match IMDb movies to Plex movies
177 |     imdb_to_plex_map = {}
178 |     
179 |     for imdb_movie in imdb_movies:
180 |         matched_plex_movie = find_matching_plex_movie(imdb_movie, plex_movies)
181 |         if matched_plex_movie:
182 |             imdb_to_plex_map[imdb_movie["imdb_id"]] = matched_plex_movie
183 |     
184 |     return imdb_to_plex_map
185 | 
186 | def find_matching_plex_movie(imdb_movie, plex_movies):
187 |     # Custom matching logic to find a Plex movie that matches the IMDb movie
188 |     for plex_movie in plex_movies:
189 |         if is_matching(imdb_movie, plex_movie):
190 |             return plex_movie
191 |     return None
192 | 
193 | def is_matching(imdb_movie, plex_movie):
194 |     # Custom comparison logic to determine if an IMDb movie matches a Plex movie
195 |     imdb_title = imdb_movie["title"]
196 |     imdb_year = imdb_movie["year"]
197 |     plex_title = plex_movie.title
198 |     plex_year = plex_movie.year
199 | 
200 |     # Example: Consider it a match if titles are the same and years are within +/- 1 year
201 |     if imdb_title == plex_title and abs(int(imdb_year) - int(plex_year)) <= 1:
202 |         return True
203 | 
204 |     return False
205 | 
206 | def run_imdb_sync():
207 |     try:
208 |         os.system('cls' if os.name == 'nt' else 'clear')
209 |         imdb_url = input("IMDB List URL (e.g., https://www.imdb.com/list/ls002400902/): ")
210 |         page_numbers = input("How many pages do you want to scrape on this IMDB list? (default: 1): ") or "1"
211 |         validate_input(imdb_url, page_numbers)
212 | 
213 |         # Input the collection name
214 |         global IMDB_COLLECTION_NAME
215 |         IMDB_COLLECTION_NAME = input("Collection Name (e.g., Disney Classics): ")
216 | 
217 |         plex = PlexServer(PLEX_URL, PLEX_TOKEN)  # Use the global values defined earlier
218 |         plex_movies = retrieve_movies_from_plex(plex, MOVIE_LIBRARIES)
219 | 
220 |         imdb_movies = retrieve_movies_from_imdb(imdb_url, page_numbers)
221 |         imdb_to_plex_map = match_imdb_to_plex_movies(plex_movies, imdb_movies)
222 | 
223 |         print("Adding the collection '{0}' to matched movies.".format(IMDB_COLLECTION_NAME))
224 |         for imdb_id, plex_movie in imdb_to_plex_map.items():
225 |             add_collection(plex_movie.librarySectionID, plex_movie.ratingKey)
226 | 
227 |         print("Done!")
228 |     except Exception as e:
229 |         print("An error occurred:", str(e))
230 |         sys.exit(1)
231 | 
232 | def main():
233 |     run_imdb_sync()
234 | 
235 | if __name__ == "__main__":
236 |     main()
237 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # IMDBList2PlexCollection
 2 | Simple script/[standalone build](https://github.com/deva5610/IMDBList2PlexCollection/releases/) to take an IMDB list, match the movies
 3 | in your Plex Library and turn them into a collection.
 4 | 
 5 | This script is a modified version of [this excellent script](https://gist.github.com/JonnyWong16/f5b9af386ea58e19bf18c09f2681df23).
 6 | 
 7 | Thanks to /u/SwiftPanda16 for the original.
 8 | 
 9 | # Disclaimer
10 | I'm not a developer.....at all. My modifications are probably quite slap happy, but they work fine for me and have on a few
11 | different installs now with 0 problems. **This doesn't mean it will for you.** I'm not responsible for any heartaches caused when you
12 | decide to mess with your Plex server. Maybe spin up a small test library before deploying it on a big library if you're concerned
13 | about my lack of ability!
14 | 
15 | # Configuration
16 | Create or edit config.ini with your favourite text editor. Keep config.ini in the same working directory as the script. 
17 | 
18 | **ONLY _"url="_, _"token="_ and _"library="_ underneath the [plex] header need to be set for the script to work**.
19 | 
20 | **url=** cannot end with a trailing slash - _**url=http://localhost:32400**_ & _**url=https://plex.woofwoof.wahoo**_ are both 
21 | examples of proper formatting, _**url=https://plex.woofwoof.wahoo/**_ is not.
22 | 
23 | **token=** can be found using [this guide.](https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/)
24 | A token can also be found in Tautulli or Ombi if you're using them. _**token=njkjdkHJJKAJKnjSAKJ**_ is an example of correct formatting.
25 | 
26 | **library=** is pretty self explanatory. Multiple libraries supported, seperated by a comma ",". _**library=Movies and library=4K Movies,Movies,Kids Movies**_ are examples of correct formatting.
27 | 
28 | They are the three variables most people will have to fill in.
29 | 
30 | **_If, and only IF you're using_** 'The Movie Database' agent instead of Plex Movie you'll also need to edit the _**apikey=**_ variable
31 | located under the [tmdb] header.
32 | 
33 | **Once complete it should look like**
34 | 
35 |     [plex]
36 |     url=http://PLEXSERVERURL:32400
37 |     token=REPLACEmeWITHyourTOKEN
38 |     library=Movies,Test Library,Kids
39 | 
40 |     [tmdb]
41 |     apikey=Optional
42 | 
43 | # Usage
44 | If you are not using a [standalone binary](https://github.com/deva5610/IMDBList2PlexCollection/releases/) you'll need to install dependencies. Use pip to install the few listed requirements.
45 | 
46 | pip install -r requirements.txt **_OR_** "pip install lxml" "pip install plexapi" "pip install requests" "pip install tmdbv3api" in turn.
47 | 
48 | Run the script with "python imdb2collection.py" and follow the instructions. You'll want two things. A URL to the IMDB list you want to match (eg - https://www.imdb.com/list/ls064646512/) and to decide what you want the matching movies to be tagged as
49 | (eg - Pixar, Pixar Movies, Pixar Animations, etc - all 3 are valid entries when asked).
50 | 
51 | ***Note - You can only use the base URL (eg - https://www.imdb.com/list/ls064646512/) if there are any parameters after the last trailing slash, the program may not run properly. IMDB Searches are no longer supported.***
52 | 
53 | That's it. The script should (hopefully!) run, it'll match movies from the IMDB list to your Movies Library and tag them into the
54 | collection you specified.
55 | 
56 | # Issues
57 | Probably. Don't blame me. Fork, fix and merge.
58 | 
59 | # Enjoy
60 | This one is simple.
61 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
1 | [plex]
2 | url=http://PLEXSERVERURL:32400
3 | token=REPLACEmeWITHyourTOKEN
4 | library=My Movies,Movies2,Kids Movies,4KLibrary,1080P Content
5 | 
6 | [tmdb]
7 | apikey=
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | plexapi
2 | tmdbv3api
3 | lxml
4 | requests
5 | beautifulsoup4
6 | 


--------------------------------------------------------------------------------