├── .editorconfig ├── .gitignore ├── .isort.cfg ├── CHANGELOG.rst ├── LICENSE ├── README.rst ├── imdbparser ├── __init__.py ├── __main__.py ├── advancedsearchresult.py ├── base.py ├── chart.py ├── exceptions.py ├── generateadvancedsearchresult.py ├── imdb.py ├── movie.py ├── person.py └── searchresult.py ├── setup.cfg └── setup.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | end_of_line = lf 11 | charset = utf-8 12 | 13 | # Docstrings and comments use max_line_length = 79 14 | [*.py] 15 | max_line_length = 119 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mo 2 | *.egg-info 3 | *.egg 4 | *.EGG 5 | *.EGG-INFO 6 | bin 7 | build 8 | develop-eggs 9 | downloads 10 | eggs 11 | fake-eggs 12 | parts 13 | dist 14 | .installed.cfg 15 | .mr.developer.cfg 16 | .hg 17 | .bzr 18 | .svn 19 | *.pyc 20 | *.pyo 21 | *.tmp* 22 | dropin.cache 23 | _trial_temp 24 | *.komodoproject 25 | docs/_build* 26 | .env* 27 | autotorrent.conf 28 | .coverage 29 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output=3 3 | include_trailing_comma=True 4 | force_grid_wrap=0 5 | use_parentheses=True 6 | line_length=88 7 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Version 1.0.22 () 2 | =========================================================== 3 | 4 | * Feature: Added support for IMDb charts 5 | * Feature: Added support for advanced search. 6 | 7 | 8 | Version 1.0.21 (21-12-2019) 9 | =========================================================== 10 | 11 | * Feature: Better CLI 12 | * Bugfix: TV search was not actually searching TV only 13 | * Bugfix: Movie search now search only movies 14 | 15 | Version 1.0.20 (27-11-2019) 16 | =========================================================== 17 | 18 | * Feature: Added support for "more like this" 19 | 20 | Version 1.0.19 (13-04-2019) 21 | =========================================================== 22 | 23 | * Bugfix: Link in plot not properly parsed 24 | 25 | Version 1.0.18 (19-08-2018) 26 | =========================================================== 27 | 28 | * Change: Tagline, description and storyline moved around a little bit 29 | * Bugfix: Director now found if plural too 30 | 31 | Version 1.0.17 (12-05-2018) 32 | =========================================================== 33 | 34 | * Bugfix: Storyline changed tagtype from div to span 35 | * Bugfix: Missing plot summary threw exception 36 | 37 | Version 1.0.14 (26-04-2018) 38 | =========================================================== 39 | 40 | * Bugfix: IMDb redirects to HTTPS if HTTP is called, changed standard URLs to HTTPS 41 | * Bugfix: Writer / Writers support (not always plural) 42 | * Bugfix: Storyline doesn't include writer anymore 43 | 44 | Version 1.0.9 (28-08-2017) 45 | =========================================================== 46 | 47 | * Bugfix: Search movies now also search TV to include TV Movies 48 | 49 | Version 1.0.8 (13-05-2017) 50 | =========================================================== 51 | 52 | * Bugfix: Shows with missing rating and shows with no description 53 | not throwing exception anymore. 54 | * Bugfix: Encoding error when searching for unicode 55 | 56 | Version 1.0.7 (30-04-2017) 57 | =========================================================== 58 | 59 | * Bugfix: small parsingbug with TV and ratings 60 | 61 | Version 1.0.4 (28-03-2017) 62 | =========================================================== 63 | 64 | * Feature: Added support for smart resolve of tv shows and movies 65 | 66 | Version 1.0.3 (20-10-2016) 67 | =========================================================== 68 | 69 | * Feature: Added support for tv show search 70 | * Change: Renamed search result variable from movies to results 71 | 72 | Version 1.0.2 (13-10-2016) 73 | =========================================================== 74 | 75 | * Fixing all the small bugs that slipped through! 76 | 77 | Version 1.0.1 (13-10-2016) 78 | =========================================================== 79 | 80 | * Initial release of rewrite 81 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2016 Anders Jensen 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | imdbparser 2 | ========== 3 | 4 | Search IMDb and get movie information. 5 | Incredibly basic, limited feature-set, somewhat fast. 6 | 7 | Usage 8 | ----- 9 | 10 | Get from ID 11 | ~~~~~~~~~~~ 12 | .. code-block:: python 13 | 14 | >>> from imdbparser import IMDb 15 | >>> imdb = IMDb() 16 | >>> movie = imdb.get_movie(1954470) 17 | >>> movie.fetched 18 | False 19 | >>> movie.fetch() 20 | >>> movie.fetched 21 | True 22 | >>> movie.__dict__ 23 | ... 24 | 25 | 26 | Search 27 | ~~~~~~ 28 | .. code-block:: python 29 | 30 | >>> from imdbparser import IMDb 31 | >>> imdb = IMDb() 32 | >>> search_result = imdb.search_movie('Matrix') 33 | >>> search_result.fetched 34 | False 35 | >>> search_result.fetch() 36 | >>> search_result.fetched 37 | True 38 | >>> search_result.results 39 | [, 40 | , ...] 41 | 42 | .. code-block:: python 43 | 44 | >>> from imdbparser import IMDb 45 | >>> imdb = IMDb() 46 | >>> search_result = imdb.search_tv_show('it crowd') 47 | >>> search_result.fetched 48 | False 49 | >>> search_result.fetch() 50 | >>> search_result.fetched 51 | True 52 | >>> search_result.results 53 | [, 54 | , ...] 55 | 56 | 57 | 58 | 59 | 60 | License 61 | ------- 62 | 63 | MIT, see LICENSE -------------------------------------------------------------------------------- /imdbparser/__init__.py: -------------------------------------------------------------------------------- 1 | from .imdb import AS, IMDb # NOQA 2 | 3 | __version__ = "1.0.22" 4 | -------------------------------------------------------------------------------- /imdbparser/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | from pprint import pprint 4 | 5 | 6 | def main(): 7 | from .imdb import IMDb, CHART_TYPES, AS 8 | 9 | parser = argparse.ArgumentParser(description="Fetch info from IMDb") 10 | parser.add_argument("--debug", help="Enable debugging", action="store_true") 11 | 12 | subparsers = parser.add_subparsers(help="sub-command help", dest="command") 13 | 14 | fetch_parser = subparsers.add_parser(name="fetch") 15 | fetch_parser.add_argument("imdb_id", help="an IMDb id, e.g. tt0120737") 16 | 17 | search_parser = subparsers.add_parser( 18 | name="search", description="Search for a movie or tv show" 19 | ) 20 | search_parser.add_argument( 21 | "type", help="Type to search for", choices=["tv", "movie"] 22 | ) 23 | search_parser.add_argument("title", help="Title to search for") 24 | 25 | resolve_parser = subparsers.add_parser( 26 | name="resolve", description="Try to resolve a search into a specific entry" 27 | ) 28 | resolve_parser.add_argument( 29 | "type", help="Type to search-resolve for", choices=["tv", "movie"] 30 | ) 31 | resolve_parser.add_argument("title", help="Title to search-resolve for") 32 | resolve_parser.add_argument( 33 | "year", help="Year close to the entry", type=int, nargs="?" 34 | ) 35 | 36 | chart_parser = subparsers.add_parser(name="chart", description="Fetch a chart") 37 | chart_parser.add_argument("type", help="Chart type", choices=CHART_TYPES) 38 | 39 | args = parser.parse_args() 40 | 41 | if args.debug: 42 | logging.basicConfig(level=logging.DEBUG) 43 | 44 | i = IMDb() 45 | movie = None 46 | movies = None 47 | 48 | if args.command == "fetch": 49 | movie = i.get_movie(args.imdb_id.lstrip("tt")) 50 | elif args.command == "search": 51 | if args.type == "tv": 52 | movies = i.search_tv_show(args.title) 53 | elif args.type == "movie": 54 | movies = i.search_movie(args.title) 55 | elif args.command == "resolve": 56 | if args.type == "tv": 57 | movie = i.resolve_tv_show(args.title, args.year) 58 | elif args.type == "movie": 59 | movie = i.resolve_movie(args.title, args.year) 60 | elif args.command == "chart": 61 | movies = i.get_chart(args.type) 62 | else: 63 | parser.print_help() 64 | 65 | if movie is not None: 66 | movie.fetch() 67 | pprint(movie.__dict__) 68 | print("") 69 | print("More like this") 70 | for recommended_movie in movie.more_like_this: 71 | pprint(recommended_movie.__dict__) 72 | 73 | if movies is not None: 74 | movies.fetch() 75 | if movies.results: 76 | for movie in movies.results: 77 | print(movie) 78 | print(movie.__dict__) 79 | else: 80 | print("Nothing found...") 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /imdbparser/advancedsearchresult.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from decimal import Decimal 4 | 5 | from requests.compat import quote_plus, urlencode 6 | 7 | from .base import Base 8 | from .movie import Movie 9 | from .person import Person 10 | 11 | 12 | class Option: 13 | def __init__(self, label, value): 14 | self.label = label.strip() 15 | self.value = value.strip() 16 | 17 | def __str__(self): 18 | return f"{self.label} ({self.value})" 19 | 20 | def __repr__(self): 21 | return f"Option({self.label!r}, {self.value!r})" 22 | 23 | 24 | class ParseBase(Base): 25 | base_url = "https://www.imdb.com/search/title/?" 26 | 27 | def _get_urls(self): 28 | return [self.base_url + urlencode(self.query)] 29 | 30 | def parse(self, htmls): 31 | super().parse(htmls) 32 | 33 | self.results = [] 34 | 35 | for row in self.trees[0].xpath( 36 | "//div[@class='lister-list']/div[contains(@class, 'lister-item')]" 37 | ): 38 | cover = row.xpath(".//img")[0] 39 | imdb_id = cover.attrib["data-tconst"] 40 | movie = Movie(imdb_id, self.imdb) 41 | movie.directors = [] 42 | movie.actors = [] 43 | cover = cover.attrib["src"] 44 | if "nopicture" not in cover: 45 | movie.cover = self.cleanup_photo_url(cover) 46 | 47 | header = row.xpath(".//h3[@class='lister-item-header']")[0] 48 | movie.title = header.xpath(".//a/text()")[0] 49 | year = re.findall( 50 | "\d+", 51 | header.xpath(".//span[contains(@class, 'lister-item-year')]/text()")[0], 52 | ) 53 | if year and len(year[0]) == 4: 54 | movie.year = int(year[0]) 55 | 56 | runtime = row.xpath(".//span[@class='runtime']") 57 | if runtime: 58 | runtime = re.findall("\d+", runtime[0].text) 59 | if runtime: 60 | movie.duration = int(runtime[0]) 61 | 62 | genres = row.xpath(".//span[@class='genre']") 63 | if genres: 64 | movie.genres = genres[0].text.split(", ") 65 | 66 | rating = row.xpath(".//div[contains(@class, 'ratings-imdb-rating')]") 67 | if rating: 68 | movie.rating = Decimal(rating[0].attrib["data-value"]) 69 | 70 | votes = row.xpath(".//span[@class='sort-num_votes-visible']/span[2]") 71 | if votes: 72 | movie.votes = int(votes[0].attrib["data-value"]) 73 | 74 | content = row.xpath("div[@class='lister-item-content']")[0] 75 | storyline = content.xpath("./p[2]") 76 | if storyline: 77 | movie.storyline = storyline[0].text.strip() 78 | 79 | people = content.xpath("./p[3]") 80 | if people: 81 | people = people[0] 82 | people_titles = iter( 83 | [ 84 | t.strip(" ,\n:s") 85 | for t in people.xpath("./text()") 86 | if t.strip(" ,\n") 87 | ] 88 | ) 89 | current_title = next(people_titles) 90 | for e in people: 91 | if e.tag == "span": 92 | current_title = next(people_titles) 93 | else: 94 | p = Person( 95 | re.findall("/nm(\d+)", e.attrib["href"])[0], self.imdb 96 | ) 97 | p.name = e.text 98 | if current_title == "Director": 99 | movie.directors.append(p) 100 | elif current_title == "Star": 101 | movie.actors.append(p) 102 | 103 | self.results.append(movie) 104 | 105 | 106 | class AS: 107 | class TITLE_TYPE: 108 | FEATURE_FILM = Option("Feature Film", "feature") 109 | TV_MOVIE = Option("TV Movie", "tv_movie") 110 | TV_SERIES = Option("TV Series", "tv_series") 111 | TV_EPISODE = Option("TV Episode", "tv_episode") 112 | TV_SPECIAL = Option("TV Special", "tv_special") 113 | MINI_SERIES = Option("Mini-Series", "tv_miniseries") 114 | DOCUMENTARY = Option("Documentary", "documentary") 115 | VIDEO_GAME = Option("Video Game", "video_game") 116 | SHORT_FILM = Option("Short Film", "short") 117 | VIDEO = Option("Video", "video") 118 | TV_SHORT = Option("TV Short", "tv_short") 119 | 120 | class GENRES: 121 | ACTION = Option("Action", "action") 122 | ADVENTURE = Option("Adventure", "adventure") 123 | ANIMATION = Option("Animation", "animation") 124 | BIOGRAPHY = Option("Biography", "biography") 125 | COMEDY = Option("Comedy", "comedy") 126 | CRIME = Option("Crime", "crime") 127 | DOCUMENTARY = Option("Documentary", "documentary") 128 | DRAMA = Option("Drama", "drama") 129 | FAMILY = Option("Family", "family") 130 | FANTASY = Option("Fantasy", "fantasy") 131 | FILM_NOIR = Option("Film-Noir", "film_noir") 132 | GAME_SHOW = Option("Game-Show", "game_show") 133 | HISTORY = Option("History", "history") 134 | HORROR = Option("Horror", "horror") 135 | MUSIC = Option("Music", "music") 136 | MUSICAL = Option("Musical", "musical") 137 | MYSTERY = Option("Mystery", "mystery") 138 | NEWS = Option("News", "news") 139 | REALITY_TV = Option("Reality-TV", "reality_tv") 140 | ROMANCE = Option("Romance", "romance") 141 | SCI_FI = Option("Sci-Fi", "sci_fi") 142 | SPORT = Option("Sport", "sport") 143 | TALK_SHOW = Option("Talk-Show", "talk_show") 144 | THRILLER = Option("Thriller", "thriller") 145 | WAR = Option("War", "war") 146 | WESTERN = Option("Western", "western") 147 | 148 | class GROUPS: 149 | IMDB_TOP_100 = Option('IMDb "Top 100"', "top_100") 150 | IMDB_TOP_250 = Option('IMDb "Top 250"', "top_250") 151 | IMDB_TOP_1000 = Option('IMDb "Top 1000"', "top_1000") 152 | OSCAR_WINNING = Option("Oscar-Winning", "oscar_winners") 153 | EMMY_AWARD_WINNING = Option("Emmy Award-Winning", "emmy_winners") 154 | GOLDEN_GLOBE_WINNING = Option("Golden Globe-Winning", "golden_globe_winners") 155 | OSCAR_NOMINATED = Option("Oscar-Nominated", "oscar_nominees") 156 | EMMY_AWARD_NOMINATED = Option("Emmy Award-Nominated", "emmy_nominees") 157 | GOLDEN_GLOBE_NOMINATED = Option( 158 | "Golden Globe-Nominated", "golden_globe_nominees" 159 | ) 160 | BEST_PICTURE_WINNING = Option( 161 | "Best Picture-Winning", "oscar_best_picture_winners" 162 | ) 163 | BEST_DIRECTOR_WINNING = Option( 164 | "Best Director-Winning", "oscar_best_director_winners" 165 | ) 166 | NOW_PLAYING = Option("Now-Playing", "now-playing-us") 167 | BEST_PICTURE_NOMINATED = Option( 168 | "Best Picture-Nominated", "oscar_best_picture_nominees" 169 | ) 170 | BEST_DIRECTOR_NOMINATED = Option( 171 | "Best Director-Nominated", "oscar_best_director_nominees" 172 | ) 173 | NATIONAL_FILM_BOARD_PRESERVED = Option( 174 | "National Film Board Preserved", "national_film_registry" 175 | ) 176 | RAZZIE_WINNING = Option("Razzie-Winning", "razzie_winners") 177 | IMDB_BOTTOM_100 = Option('IMDb "Bottom 100"', "bottom_100") 178 | IMDB_BOTTOM_250 = Option('IMDb "Bottom 250"', "bottom_250") 179 | RAZZIE_NOMINATED = Option("Razzie-Nominated", "razzie_nominees") 180 | IMDB_BOTTOM_1000 = Option('IMDb "Bottom 1000"', "bottom_1000") 181 | 182 | class HAS: 183 | ALTERNATE_VERSIONS = Option("Alternate Versions", "alternate-versions") 184 | AWARDS = Option("Awards", "awards") 185 | BUSINESS_INFO = Option("Business Info", "business-info") 186 | CRAZY_CREDITS = Option("Crazy Credits", "crazy-credits") 187 | GOOFS = Option("Goofs", "goofs") 188 | LOCATIONS = Option("Locations", "locations") 189 | PLOT = Option("Plot", "plot") 190 | QUOTES = Option("Quotes", "quotes") 191 | SOUNDTRACKS = Option("Soundtracks", "soundtracks") 192 | TECHNICAL_INFO = Option("Technical Info", "technical") 193 | TRIVIA = Option("Trivia", "trivia") 194 | X_RAY = Option("X-Ray", "x-ray") 195 | 196 | class COMPANIES: 197 | TWENTIETH_CENTURY_FOX = Option("20th Century Fox", "fox") 198 | SONY = Option("Sony", "columbia") 199 | DREAMWORKS = Option("DreamWorks", "dreamworks") 200 | MGM = Option("MGM", "mgm") 201 | PARAMOUNT = Option("Paramount", "paramount") 202 | UNIVERSAL = Option("Universal", "universal") 203 | WALT_DISNEY = Option("Walt Disney", "disney") 204 | WARNER_BROS = Option("Warner Bros.", "warner") 205 | 206 | class CERTIFICATES: 207 | G = Option("G", "us:G") 208 | PG = Option("PG", "us:PG") 209 | PG_13 = Option("PG-13", "us:PG-13") 210 | R = Option("R", "us:R") 211 | NC_17 = Option("NC-17", "us:NC-17") 212 | 213 | class COLORS: 214 | COLOR = Option("Color", "color") 215 | BLACK_WHITE = Option("Black & White", "black_and_white") 216 | COLORIZED = Option("Colorized", "colorized") 217 | ACES = Option("ACES", "aces") 218 | 219 | class COUNTRIES: 220 | AFGHANISTAN = Option("Afghanistan", "af") 221 | LAND_ISLANDS = Option("Åland Islands", "ax") 222 | ALBANIA = Option("Albania", "al") 223 | ALGERIA = Option("Algeria", "dz") 224 | AMERICAN_SAMOA = Option("American Samoa", "as") 225 | ANDORRA = Option("Andorra", "ad") 226 | ANGOLA = Option("Angola", "ao") 227 | ANGUILLA = Option("Anguilla", "ai") 228 | ANTARCTICA = Option("Antarctica", "aq") 229 | ANTIGUA_AND_BARBUDA = Option("Antigua and Barbuda", "ag") 230 | ARGENTINA = Option("Argentina", "ar") 231 | ARMENIA = Option("Armenia", "am") 232 | ARUBA = Option("Aruba", "aw") 233 | AUSTRALIA = Option("Australia", "au") 234 | AUSTRIA = Option("Austria", "at") 235 | AZERBAIJAN = Option("Azerbaijan", "az") 236 | BAHAMAS = Option("Bahamas", "bs") 237 | BAHRAIN = Option("Bahrain", "bh") 238 | BANGLADESH = Option("Bangladesh", "bd") 239 | BARBADOS = Option("Barbados", "bb") 240 | BELARUS = Option("Belarus", "by") 241 | BELGIUM = Option("Belgium", "be") 242 | BELIZE = Option("Belize", "bz") 243 | BENIN = Option("Benin", "bj") 244 | BERMUDA = Option("Bermuda", "bm") 245 | BHUTAN = Option("Bhutan", "bt") 246 | BOLIVIA = Option("Bolivia", "bo") 247 | BONAIRE_SINT_EUSTATIUS_AND_SABA = Option( 248 | "Bonaire, Sint Eustatius and Saba", "bq" 249 | ) 250 | BOSNIA_AND_HERZEGOVINA = Option("Bosnia and Herzegovina", "ba") 251 | BOTSWANA = Option("Botswana", "bw") 252 | BOUVET_ISLAND = Option("Bouvet Island", "bv") 253 | BRAZIL = Option("Brazil", "br") 254 | BRITISH_INDIAN_OCEAN_TERRITORY = Option("British Indian Ocean Territory", "io") 255 | BRITISH_VIRGIN_ISLANDS = Option("British Virgin Islands", "vg") 256 | BRUNEI_DARUSSALAM = Option("Brunei Darussalam", "bn") 257 | BULGARIA = Option("Bulgaria", "bg") 258 | BURKINA_FASO = Option("Burkina Faso", "bf") 259 | BURMA = Option("Burma", "bumm") 260 | BURUNDI = Option("Burundi", "bi") 261 | CAMBODIA = Option("Cambodia", "kh") 262 | CAMEROON = Option("Cameroon", "cm") 263 | CANADA = Option("Canada", "ca") 264 | CAPE_VERDE = Option("Cape Verde", "cv") 265 | CAYMAN_ISLANDS = Option("Cayman Islands", "ky") 266 | CENTRAL_AFRICAN_REPUBLIC = Option("Central African Republic", "cf") 267 | CHAD = Option("Chad", "td") 268 | CHILE = Option("Chile", "cl") 269 | CHINA = Option("China", "cn") 270 | CHRISTMAS_ISLAND = Option("Christmas Island", "cx") 271 | COCOS_KEELING_ISLANDS = Option("Cocos (Keeling) Islands", "cc") 272 | COLOMBIA = Option("Colombia", "co") 273 | COMOROS = Option("Comoros", "km") 274 | CONGO = Option("Congo", "cg") 275 | COOK_ISLANDS = Option("Cook Islands", "ck") 276 | COSTA_RICA = Option("Costa Rica", "cr") 277 | CTE_D_IVOIRE = Option("Côte d'Ivoire", "ci") 278 | CROATIA = Option("Croatia", "hr") 279 | CUBA = Option("Cuba", "cu") 280 | CYPRUS = Option("Cyprus", "cy") 281 | CZECH_REPUBLIC = Option("Czech Republic", "cz") 282 | CZECHOSLOVAKIA = Option("Czechoslovakia", "cshh") 283 | DEMOCRATIC_REPUBLIC_OF_THE_CONGO = Option( 284 | "Democratic Republic of the Congo", "cd" 285 | ) 286 | DENMARK = Option("Denmark", "dk") 287 | DJIBOUTI = Option("Djibouti", "dj") 288 | DOMINICA = Option("Dominica", "dm") 289 | DOMINICAN_REPUBLIC = Option("Dominican Republic", "do") 290 | EAST_GERMANY = Option("East Germany", "ddde") 291 | ECUADOR = Option("Ecuador", "ec") 292 | EGYPT = Option("Egypt", "eg") 293 | EL_SALVADOR = Option("El Salvador", "sv") 294 | EQUATORIAL_GUINEA = Option("Equatorial Guinea", "gq") 295 | ERITREA = Option("Eritrea", "er") 296 | ESTONIA = Option("Estonia", "ee") 297 | ETHIOPIA = Option("Ethiopia", "et") 298 | FALKLAND_ISLANDS = Option("Falkland Islands", "fk") 299 | FAROE_ISLANDS = Option("Faroe Islands", "fo") 300 | FEDERAL_REPUBLIC_OF_YUGOSLAVIA = Option( 301 | "Federal Republic of Yugoslavia", "yucs" 302 | ) 303 | FEDERATED_STATES_OF_MICRONESIA = Option("Federated States of Micronesia", "fm") 304 | FIJI = Option("Fiji", "fj") 305 | FINLAND = Option("Finland", "fi") 306 | FRANCE = Option("France", "fr") 307 | FRENCH_GUIANA = Option("French Guiana", "gf") 308 | FRENCH_POLYNESIA = Option("French Polynesia", "pf") 309 | FRENCH_SOUTHERN_TERRITORIES = Option("French Southern Territories", "tf") 310 | GABON = Option("Gabon", "ga") 311 | GAMBIA = Option("Gambia", "gm") 312 | GEORGIA = Option("Georgia", "ge") 313 | GERMANY = Option("Germany", "de") 314 | GHANA = Option("Ghana", "gh") 315 | GIBRALTAR = Option("Gibraltar", "gi") 316 | GREECE = Option("Greece", "gr") 317 | GREENLAND = Option("Greenland", "gl") 318 | GRENADA = Option("Grenada", "gd") 319 | GUADELOUPE = Option("Guadeloupe", "gp") 320 | GUAM = Option("Guam", "gu") 321 | GUATEMALA = Option("Guatemala", "gt") 322 | GUERNSEY = Option("Guernsey", "gg") 323 | GUINEA = Option("Guinea", "gn") 324 | GUINEA_BISSAU = Option("Guinea-Bissau", "gw") 325 | GUYANA = Option("Guyana", "gy") 326 | HAITI = Option("Haiti", "ht") 327 | HEARD_ISLAND_AND_MCDONALD_ISLANDS = Option( 328 | "Heard Island and McDonald Islands", "hm" 329 | ) 330 | HOLY_SEE_VATICAN_CITY_STATE = Option("Holy See (Vatican City State)", "va") 331 | HONDURAS = Option("Honduras", "hn") 332 | HONG_KONG = Option("Hong Kong", "hk") 333 | HUNGARY = Option("Hungary", "hu") 334 | ICELAND = Option("Iceland", "is") 335 | INDIA = Option("India", "in") 336 | INDONESIA = Option("Indonesia", "id") 337 | IRAN = Option("Iran", "ir") 338 | IRAQ = Option("Iraq", "iq") 339 | IRELAND = Option("Ireland", "ie") 340 | ISLE_OF_MAN = Option("Isle of Man", "im") 341 | ISRAEL = Option("Israel", "il") 342 | ITALY = Option("Italy", "it") 343 | JAMAICA = Option("Jamaica", "jm") 344 | JAPAN = Option("Japan", "jp") 345 | JERSEY = Option("Jersey", "je") 346 | JORDAN = Option("Jordan", "jo") 347 | KAZAKHSTAN = Option("Kazakhstan", "kz") 348 | KENYA = Option("Kenya", "ke") 349 | KIRIBATI = Option("Kiribati", "ki") 350 | KOREA = Option("Korea", "xko") 351 | KOSOVO = Option("Kosovo", "xkv") 352 | KUWAIT = Option("Kuwait", "kw") 353 | KYRGYZSTAN = Option("Kyrgyzstan", "kg") 354 | LAOS = Option("Laos", "la") 355 | LATVIA = Option("Latvia", "lv") 356 | LEBANON = Option("Lebanon", "lb") 357 | LESOTHO = Option("Lesotho", "ls") 358 | LIBERIA = Option("Liberia", "lr") 359 | LIBYA = Option("Libya", "ly") 360 | LIECHTENSTEIN = Option("Liechtenstein", "li") 361 | LITHUANIA = Option("Lithuania", "lt") 362 | LUXEMBOURG = Option("Luxembourg", "lu") 363 | MACAO = Option("Macao", "mo") 364 | MADAGASCAR = Option("Madagascar", "mg") 365 | MALAWI = Option("Malawi", "mw") 366 | MALAYSIA = Option("Malaysia", "my") 367 | MALDIVES = Option("Maldives", "mv") 368 | MALI = Option("Mali", "ml") 369 | MALTA = Option("Malta", "mt") 370 | MARSHALL_ISLANDS = Option("Marshall Islands", "mh") 371 | MARTINIQUE = Option("Martinique", "mq") 372 | MAURITANIA = Option("Mauritania", "mr") 373 | MAURITIUS = Option("Mauritius", "mu") 374 | MAYOTTE = Option("Mayotte", "yt") 375 | MEXICO = Option("Mexico", "mx") 376 | MOLDOVA = Option("Moldova", "md") 377 | MONACO = Option("Monaco", "mc") 378 | MONGOLIA = Option("Mongolia", "mn") 379 | MONTENEGRO = Option("Montenegro", "me") 380 | MONTSERRAT = Option("Montserrat", "ms") 381 | MOROCCO = Option("Morocco", "ma") 382 | MOZAMBIQUE = Option("Mozambique", "mz") 383 | MYANMAR = Option("Myanmar", "mm") 384 | NAMIBIA = Option("Namibia", "na") 385 | NAURU = Option("Nauru", "nr") 386 | NEPAL = Option("Nepal", "np") 387 | NETHERLANDS = Option("Netherlands", "nl") 388 | NETHERLANDS_ANTILLES = Option("Netherlands Antilles", "an") 389 | NEW_CALEDONIA = Option("New Caledonia", "nc") 390 | NEW_ZEALAND = Option("New Zealand", "nz") 391 | NICARAGUA = Option("Nicaragua", "ni") 392 | NIGER = Option("Niger", "ne") 393 | NIGERIA = Option("Nigeria", "ng") 394 | NIUE = Option("Niue", "nu") 395 | NORFOLK_ISLAND = Option("Norfolk Island", "nf") 396 | NORTH_KOREA = Option("North Korea", "kp") 397 | NORTH_VIETNAM = Option("North Vietnam", "vdvn") 398 | NORTHERN_MARIANA_ISLANDS = Option("Northern Mariana Islands", "mp") 399 | NORWAY = Option("Norway", "no") 400 | OMAN = Option("Oman", "om") 401 | PAKISTAN = Option("Pakistan", "pk") 402 | PALAU = Option("Palau", "pw") 403 | PALESTINE = Option("Palestine", "xpi") 404 | PALESTINIAN_TERRITORY = Option("Palestinian Territory", "ps") 405 | PANAMA = Option("Panama", "pa") 406 | PAPUA_NEW_GUINEA = Option("Papua New Guinea", "pg") 407 | PARAGUAY = Option("Paraguay", "py") 408 | PERU = Option("Peru", "pe") 409 | PHILIPPINES = Option("Philippines", "ph") 410 | POLAND = Option("Poland", "pl") 411 | PORTUGAL = Option("Portugal", "pt") 412 | PITCAIRN = Option("Pitcairn", "pn") 413 | PUERTO_RICO = Option("Puerto Rico", "pr") 414 | QATAR = Option("Qatar", "qa") 415 | REPUBLIC_OF_MACEDONIA = Option("Republic of Macedonia", "mk") 416 | RUNION = Option("Réunion", "re") 417 | ROMANIA = Option("Romania", "ro") 418 | RUSSIA = Option("Russia", "ru") 419 | RWANDA = Option("Rwanda", "rw") 420 | SAINT_BARTHLEMY = Option("Saint Barthélemy", "bl") 421 | SAINT_HELENA = Option("Saint Helena", "sh") 422 | SAINT_KITTS_AND_NEVIS = Option("Saint Kitts and Nevis", "kn") 423 | SAINT_LUCIA = Option("Saint Lucia", "lc") 424 | SAINT_MARTIN_FRENCH_PART = Option("Saint Martin (French part)", "mf") 425 | SAINT_PIERRE_AND_MIQUELON = Option("Saint Pierre and Miquelon", "pm") 426 | SAINT_VINCENT_AND_THE_GRENADINES = Option( 427 | "Saint Vincent and the Grenadines", "vc" 428 | ) 429 | SAMOA = Option("Samoa", "ws") 430 | SAN_MARINO = Option("San Marino", "sm") 431 | SAO_TOME_AND_PRINCIPE = Option("Sao Tome and Principe", "st") 432 | SAUDI_ARABIA = Option("Saudi Arabia", "sa") 433 | SENEGAL = Option("Senegal", "sn") 434 | SERBIA = Option("Serbia", "rs") 435 | SERBIA_AND_MONTENEGRO = Option("Serbia and Montenegro", "csxx") 436 | SEYCHELLES = Option("Seychelles", "sc") 437 | SIAM = Option("Siam", "xsi") 438 | SIERRA_LEONE = Option("Sierra Leone", "sl") 439 | SINGAPORE = Option("Singapore", "sg") 440 | SLOVAKIA = Option("Slovakia", "sk") 441 | SLOVENIA = Option("Slovenia", "si") 442 | SOLOMON_ISLANDS = Option("Solomon Islands", "sb") 443 | SOMALIA = Option("Somalia", "so") 444 | SOUTH_AFRICA = Option("South Africa", "za") 445 | SOUTH_GEORGIA_AND_THE_SOUTH_SANDWICH_ISLANDS = Option( 446 | "South Georgia and the South Sandwich Islands", "gs" 447 | ) 448 | SOUTH_KOREA = Option("South Korea", "kr") 449 | SOVIET_UNION = Option("Soviet Union", "suhh") 450 | SPAIN = Option("Spain", "es") 451 | SRI_LANKA = Option("Sri Lanka", "lk") 452 | SUDAN = Option("Sudan", "sd") 453 | SURINAME = Option("Suriname", "sr") 454 | SVALBARD_AND_JAN_MAYEN = Option("Svalbard and Jan Mayen", "sj") 455 | SWAZILAND = Option("Swaziland", "sz") 456 | SWEDEN = Option("Sweden", "se") 457 | SWITZERLAND = Option("Switzerland", "ch") 458 | SYRIA = Option("Syria", "sy") 459 | TAIWAN = Option("Taiwan", "tw") 460 | TAJIKISTAN = Option("Tajikistan", "tj") 461 | TANZANIA = Option("Tanzania", "tz") 462 | THAILAND = Option("Thailand", "th") 463 | TIMOR_LESTE = Option("Timor-Leste", "tl") 464 | TOGO = Option("Togo", "tg") 465 | TOKELAU = Option("Tokelau", "tk") 466 | TONGA = Option("Tonga", "to") 467 | TRINIDAD_AND_TOBAGO = Option("Trinidad and Tobago", "tt") 468 | TUNISIA = Option("Tunisia", "tn") 469 | TURKEY = Option("Turkey", "tr") 470 | TURKMENISTAN = Option("Turkmenistan", "tm") 471 | TURKS_AND_CAICOS_ISLANDS = Option("Turks and Caicos Islands", "tc") 472 | TUVALU = Option("Tuvalu", "tv") 473 | U_S_VIRGIN_ISLANDS = Option("U.S. Virgin Islands", "vi") 474 | UGANDA = Option("Uganda", "ug") 475 | UKRAINE = Option("Ukraine", "ua") 476 | UNITED_ARAB_EMIRATES = Option("United Arab Emirates", "ae") 477 | UNITED_KINGDOM = Option("United Kingdom", "gb") 478 | UNITED_STATES = Option("United States", "us") 479 | UNITED_STATES_MINOR_OUTLYING_ISLANDS = Option( 480 | "United States Minor Outlying Islands", "um" 481 | ) 482 | URUGUAY = Option("Uruguay", "uy") 483 | UZBEKISTAN = Option("Uzbekistan", "uz") 484 | VANUATU = Option("Vanuatu", "vu") 485 | VENEZUELA = Option("Venezuela", "ve") 486 | VIETNAM = Option("Vietnam", "vn") 487 | WALLIS_AND_FUTUNA = Option("Wallis and Futuna", "wf") 488 | WEST_GERMANY = Option("West Germany", "xwg") 489 | WESTERN_SAHARA = Option("Western Sahara", "eh") 490 | YEMEN = Option("Yemen", "ye") 491 | YUGOSLAVIA = Option("Yugoslavia", "xyu") 492 | ZAIRE = Option("Zaire", "zrcd") 493 | ZAMBIA = Option("Zambia", "zm") 494 | ZIMBABWE = Option("Zimbabwe", "zw") 495 | 496 | class LANGUAGES: 497 | ABKHAZIAN = Option("Abkhazian", "ab") 498 | ABORIGINAL = Option("Aboriginal", "qac") 499 | ACH = Option("Aché", "guq") 500 | ACHOLI = Option("Acholi", "qam") 501 | AFRIKAANS = Option("Afrikaans", "af") 502 | AIDOUKROU = Option("Aidoukrou", "qas") 503 | AKAN = Option("Akan", "ak") 504 | ALBANIAN = Option("Albanian", "sq") 505 | ALGONQUIN = Option("Algonquin", "alg") 506 | AMERICAN_SIGN_LANGUAGE = Option("American Sign Language", "ase") 507 | AMHARIC = Option("Amharic", "am") 508 | APACHE_LANGUAGES = Option("Apache languages", "apa") 509 | ARABIC = Option("Arabic", "ar") 510 | ARAGONESE = Option("Aragonese", "an") 511 | ARAMAIC = Option("Aramaic", "arc") 512 | ARAPAHO = Option("Arapaho", "arp") 513 | ARMENIAN = Option("Armenian", "hy") 514 | ASSAMESE = Option("Assamese", "as") 515 | ASSYRIAN_NEO_ARAMAIC = Option("Assyrian Neo-Aramaic", "aii") 516 | ATHAPASCAN_LANGUAGES = Option("Athapascan languages", "ath") 517 | AUSTRALIAN_SIGN_LANGUAGE = Option("Australian Sign Language", "asf") 518 | AWADHI = Option("Awadhi", "awa") 519 | AYMARA = Option("Aymara", "ay") 520 | AZERBAIJANI = Option("Azerbaijani", "az") 521 | BABLE = Option("Bable", "ast") 522 | BAKA = Option("Baka", "qbd") 523 | BALINESE = Option("Balinese", "ban") 524 | BAMBARA = Option("Bambara", "bm") 525 | BASQUE = Option("Basque", "eu") 526 | BASSARI = Option("Bassari", "bsc") 527 | BELARUSIAN = Option("Belarusian", "be") 528 | BEMBA = Option("Bemba", "bem") 529 | BENGALI = Option("Bengali", "bn") 530 | BERBER_LANGUAGES = Option("Berber languages", "ber") 531 | BHOJPURI = Option("Bhojpuri", "bho") 532 | BICOLANO = Option("Bicolano", "qbi") 533 | BODO = Option("Bodo", "qbh") 534 | BOSNIAN = Option("Bosnian", "bs") 535 | BRAZILIAN_SIGN_LANGUAGE = Option("Brazilian Sign Language", "bzs") 536 | BRETON = Option("Breton", "br") 537 | BRITISH_SIGN_LANGUAGE = Option("British Sign Language", "bfi") 538 | BULGARIAN = Option("Bulgarian", "bg") 539 | BURMESE = Option("Burmese", "my") 540 | CANTONESE = Option("Cantonese", "yue") 541 | CATALAN = Option("Catalan", "ca") 542 | CENTRAL_KHMER = Option("Central Khmer", "km") 543 | CHAKMA = Option("Chakma", "ccp") 544 | CHAOZHOU = Option("Chaozhou", "qax") 545 | CHECHEN = Option("Chechen", "ce") 546 | CHEROKEE = Option("Cherokee", "chr") 547 | CHEYENNE = Option("Cheyenne", "chy") 548 | CHHATTISGARHI = Option("Chhattisgarhi", "hne") 549 | CHINESE = Option("Chinese", "zh") 550 | CORNISH = Option("Cornish", "kw") 551 | CORSICAN = Option("Corsican", "co") 552 | CREE = Option("Cree", "cr") 553 | CREEK = Option("Creek", "mus") 554 | CROATIAN = Option("Croatian", "hr") 555 | CROW = Option("Crow", "cro") 556 | CZECH = Option("Czech", "cs") 557 | DANISH = Option("Danish", "da") 558 | DARI = Option("Dari", "prs") 559 | DESIYA = Option("Desiya", "dso") 560 | DINKA = Option("Dinka", "din") 561 | DJERMA = Option("Djerma", "qaw") 562 | DOGRI = Option("Dogri", "doi") 563 | DUTCH = Option("Dutch", "nl") 564 | DYULA = Option("Dyula", "dyu") 565 | DZONGKHA = Option("Dzongkha", "dz") 566 | EAST_GREENLANDIC = Option("East-Greenlandic", "qbc") 567 | EASTERN_FRISIAN = Option("Eastern Frisian", "frs") 568 | EGYPTIAN_ANCIENT = Option("Egyptian (Ancient)", "egy") 569 | ENGLISH = Option("English", "en") 570 | ESPERANTO = Option("Esperanto", "eo") 571 | ESTONIAN = Option("Estonian", "et") 572 | EWE = Option("Ewe", "ee") 573 | FALIASCH = Option("Faliasch", "qbg") 574 | FAROESE = Option("Faroese", "fo") 575 | FILIPINO = Option("Filipino", "fil") 576 | FINNISH = Option("Finnish", "fi") 577 | FLEMISH = Option("Flemish", "qbn") 578 | FON = Option("Fon", "fon") 579 | FRENCH = Option("French", "fr") 580 | FRENCH_SIGN_LANGUAGE = Option("French Sign Language", "fsl") 581 | FULAH = Option("Fulah", "ff") 582 | FUR = Option("Fur", "fvr") 583 | GAELIC = Option("Gaelic", "gd") 584 | GALICIAN = Option("Galician", "gl") 585 | GEORGIAN = Option("Georgian", "ka") 586 | GERMAN = Option("German", "de") 587 | GERMAN_SIGN_LANGUAGE = Option("German Sign Language", "gsg") 588 | GREBO = Option("Grebo", "grb") 589 | GREEK = Option("Greek", "el") 590 | GREEK_ANCIENT_TO_1453 = Option("Greek, Ancient (to 1453)", "grc") 591 | GREENLANDIC = Option("Greenlandic", "kl") 592 | GUARANI = Option("Guarani", "gn") 593 | GUJARATI = Option("Gujarati", "gu") 594 | GUMATJ = Option("Gumatj", "gnn") 595 | GUNWINGGU = Option("Gunwinggu", "gup") 596 | HAITIAN = Option("Haitian", "ht") 597 | HAIDA = Option("Haida", "hai") 598 | HAKKA = Option("Hakka", "hak") 599 | HARYANVI = Option("Haryanvi", "bgc") 600 | HASSANYA = Option("Hassanya", "qav") 601 | HAUSA = Option("Hausa", "ha") 602 | HAWAIIAN = Option("Hawaiian", "haw") 603 | HEBREW = Option("Hebrew", "he") 604 | HINDI = Option("Hindi", "hi") 605 | HMONG = Option("Hmong", "hmn") 606 | HOKKIEN = Option("Hokkien", "qab") 607 | HOPI = Option("Hopi", "hop") 608 | HUNGARIAN = Option("Hungarian", "hu") 609 | IBAN = Option("Iban", "iba") 610 | IBO = Option("Ibo", "qag") 611 | ICELANDIC = Option("Icelandic", "is") 612 | ICELANDIC_SIGN_LANGUAGE = Option("Icelandic Sign Language", "icl") 613 | INDIAN_SIGN_LANGUAGE = Option("Indian Sign Language", "ins") 614 | INDONESIAN = Option("Indonesian", "id") 615 | INUKTITUT = Option("Inuktitut", "iu") 616 | INUPIAQ = Option("Inupiaq", "ik") 617 | IRISH_GAELIC = Option("Irish Gaelic", "ga") 618 | IRULA = Option("Irula", "iru") 619 | ITALIAN = Option("Italian", "it") 620 | JAPANESE = Option("Japanese", "ja") 621 | JAPANESE_SIGN_LANGUAGE = Option("Japanese Sign Language", "jsl") 622 | JOLA_FONYI = Option("Jola-Fonyi", "dyo") 623 | JU_HOAN = Option("Ju'hoan", "ktz") 624 | KAADO = Option("Kaado", "qbf") 625 | KABUVERDIANU = Option("Kabuverdianu", "kea") 626 | KABYLE = Option("Kabyle", "kab") 627 | KALMYK_OIRAT = Option("Kalmyk-Oirat", "xal") 628 | KANNADA = Option("Kannada", "kn") 629 | KARAJ = Option("Karajá", "kpj") 630 | KARBI = Option("Karbi", "mjw") 631 | KAREN = Option("Karen", "kar") 632 | KAZAKH = Option("Kazakh", "kk") 633 | KHANTY = Option("Khanty", "kca") 634 | KHASI = Option("Khasi", "kha") 635 | KIKUYU = Option("Kikuyu", "ki") 636 | KINYARWANDA = Option("Kinyarwanda", "rw") 637 | KIRUNDI = Option("Kirundi", "qar") 638 | KLINGON = Option("Klingon", "tlh") 639 | KODAVA = Option("Kodava", "kfa") 640 | KONKANI = Option("Konkani", "kok") 641 | KOREAN = Option("Korean", "ko") 642 | KOREAN_SIGN_LANGUAGE = Option("Korean Sign Language", "kvk") 643 | KOROWAI = Option("Korowai", "khe") 644 | KRIOLU = Option("Kriolu", "qaq") 645 | KRU = Option("Kru", "kro") 646 | KUDMALI = Option("Kudmali", "kyw") 647 | KUNA = Option("Kuna", "qbb") 648 | KURDISH = Option("Kurdish", "ku") 649 | KWAKIUTL = Option("Kwakiutl", "kwk") 650 | KYRGYZ = Option("Kyrgyz", "ky") 651 | LADAKHI = Option("Ladakhi", "lbj") 652 | LADINO = Option("Ladino", "lad") 653 | LAO = Option("Lao", "lo") 654 | LATIN = Option("Latin", "la") 655 | LATVIAN = Option("Latvian", "lv") 656 | LIMBU = Option("Limbu", "lif") 657 | LINGALA = Option("Lingala", "ln") 658 | LITHUANIAN = Option("Lithuanian", "lt") 659 | LOW_GERMAN = Option("Low German", "nds") 660 | LUXEMBOURGISH = Option("Luxembourgish", "lb") 661 | MACEDONIAN = Option("Macedonian", "mk") 662 | MACRO_J = Option("Macro-Jê", "qbm") 663 | MAGAHI = Option("Magahi", "mag") 664 | MAITHILI = Option("Maithili", "mai") 665 | MALAGASY = Option("Malagasy", "mg") 666 | MALAY = Option("Malay", "ms") 667 | MALAYALAM = Option("Malayalam", "ml") 668 | MALECITE_PASSAMAQUODDY = Option("Malecite-Passamaquoddy", "pqm") 669 | MALINKA = Option("Malinka", "qap") 670 | MALTESE = Option("Maltese", "mt") 671 | MANCHU = Option("Manchu", "mnc") 672 | MANDARIN = Option("Mandarin", "cmn") 673 | MANDINGO = Option("Mandingo", "man") 674 | MANIPURI = Option("Manipuri", "mni") 675 | MAORI = Option("Maori", "mi") 676 | MAPUDUNGUN = Option("Mapudungun", "arn") 677 | MARATHI = Option("Marathi", "mr") 678 | MARSHALLESE = Option("Marshallese", "mh") 679 | MASAI = Option("Masai", "mas") 680 | MASALIT = Option("Masalit", "mls") 681 | MAYA = Option("Maya", "myn") 682 | MENDE = Option("Mende", "men") 683 | MICMAC = Option("Micmac", "mic") 684 | MIDDLE_ENGLISH = Option("Middle English", "enm") 685 | MIN_NAN = Option("Min Nan", "nan") 686 | MINANGKABAU = Option("Minangkabau", "min") 687 | MIRANDESE = Option("Mirandese", "mwl") 688 | MIXTEC = Option("Mixtec", "qmt") 689 | MIZO = Option("Mizo", "lus") 690 | MOHAWK = Option("Mohawk", "moh") 691 | MONGOLIAN = Option("Mongolian", "mn") 692 | MONTAGNAIS = Option("Montagnais", "moe") 693 | MORE = Option("More", "qaf") 694 | MORISYEN = Option("Morisyen", "mfe") 695 | NAGPURI = Option("Nagpuri", "qbl") 696 | NAHUATL = Option("Nahuatl", "nah") 697 | NAMA = Option("Nama", "qba") 698 | NAVAJO = Option("Navajo", "nv") 699 | NAXI = Option("Naxi", "nbf") 700 | NDEBELE = Option("Ndebele", "nd") 701 | NEAPOLITAN = Option("Neapolitan", "nap") 702 | NENETS = Option("Nenets", "yrk") 703 | NEPALI = Option("Nepali", "ne") 704 | NISGA_A = Option("Nisga'a", "ncg") 705 | NONE = Option("None", "zxx") 706 | NORSE_OLD = Option("Norse, Old", "non") 707 | NORTH_AMERICAN_INDIAN = Option("North American Indian", "nai") 708 | NORWEGIAN = Option("Norwegian", "no") 709 | NUSHI = Option("Nushi", "qbk") 710 | NYANEKA = Option("Nyaneka", "nyk") 711 | NYANJA = Option("Nyanja", "ny") 712 | OCCITAN = Option("Occitan", "oc") 713 | OJIBWA = Option("Ojibwa", "oj") 714 | OJIHIMBA = Option("Ojihimba", "qaz") 715 | OLD_ENGLISH = Option("Old English", "ang") 716 | ORIYA = Option("Oriya", "or") 717 | PAPIAMENTO = Option("Papiamento", "pap") 718 | PARSEE = Option("Parsee", "qaj") 719 | PASHTU = Option("Pashtu", "ps") 720 | PAWNEE = Option("Pawnee", "paw") 721 | PERSIAN = Option("Persian", "fa") 722 | PEUL = Option("Peul", "qai") 723 | POLISH = Option("Polish", "pl") 724 | POLYNESIAN = Option("Polynesian", "qah") 725 | PORTUGUESE = Option("Portuguese", "pt") 726 | PULAR = Option("Pular", "fuf") 727 | PUNJABI = Option("Punjabi", "pa") 728 | PUREPECHA = Option("Purepecha", "tsz") 729 | QUECHUA = Option("Quechua", "qu") 730 | QUENYA = Option("Quenya", "qya") 731 | RAJASTHANI = Option("Rajasthani", "raj") 732 | RAWAN = Option("Rawan", "qbj") 733 | RHAETIAN = Option("Rhaetian", "xrr") 734 | ROMANIAN = Option("Romanian", "ro") 735 | ROMANSH = Option("Romansh", "rm") 736 | ROMANY = Option("Romany", "rom") 737 | ROTUMAN = Option("Rotuman", "rtm") 738 | RUSSIAN = Option("Russian", "ru") 739 | RUSSIAN_SIGN_LANGUAGE = Option("Russian Sign Language", "rsl") 740 | RYUKYUAN = Option("Ryukyuan", "qao") 741 | SAAMI = Option("Saami", "qae") 742 | SAMOAN = Option("Samoan", "sm") 743 | SANSKRIT = Option("Sanskrit", "sa") 744 | SARDINIAN = Option("Sardinian", "sc") 745 | SCANIAN = Option("Scanian", "qay") 746 | SERBIAN = Option("Serbian", "sr") 747 | SERBO_CROATIAN = Option("Serbo-Croatian", "qbo") 748 | SERER = Option("Serer", "srr") 749 | SHANGHAINESE = Option("Shanghainese", "qad") 750 | SHANXI = Option("Shanxi", "qau") 751 | SHONA = Option("Shona", "sn") 752 | SHOSHONI = Option("Shoshoni", "shh") 753 | SICILIAN = Option("Sicilian", "scn") 754 | SINDARIN = Option("Sindarin", "sjn") 755 | SINDHI = Option("Sindhi", "sd") 756 | SINHALA = Option("Sinhala", "si") 757 | SIOUX = Option("Sioux", "sio") 758 | SLOVAK = Option("Slovak", "sk") 759 | SLOVENIAN = Option("Slovenian", "sl") 760 | SOMALI = Option("Somali", "so") 761 | SONGHAY = Option("Songhay", "son") 762 | SONINKE = Option("Soninke", "snk") 763 | SORBIAN_LANGUAGES = Option("Sorbian languages", "wen") 764 | SOTHO = Option("Sotho", "st") 765 | SOUSSON = Option("Sousson", "qbe") 766 | SPANISH = Option("Spanish", "es") 767 | SPANISH_SIGN_LANGUAGE = Option("Spanish Sign Language", "ssp") 768 | SRANAN = Option("Sranan", "srn") 769 | SWAHILI = Option("Swahili", "sw") 770 | SWEDISH = Option("Swedish", "sv") 771 | SWISS_GERMAN = Option("Swiss German", "gsw") 772 | SYLHETI = Option("Sylheti", "syl") 773 | TAGALOG = Option("Tagalog", "tl") 774 | TAJIK = Option("Tajik", "tg") 775 | TAMASHEK = Option("Tamashek", "tmh") 776 | TAMIL = Option("Tamil", "ta") 777 | TARAHUMARA = Option("Tarahumara", "tac") 778 | TATAR = Option("Tatar", "tt") 779 | TELUGU = Option("Telugu", "te") 780 | TEOCHEW = Option("Teochew", "qak") 781 | THAI = Option("Thai", "th") 782 | TIBETAN = Option("Tibetan", "bo") 783 | TIGRIGNA = Option("Tigrigna", "qan") 784 | TLINGIT = Option("Tlingit", "tli") 785 | TOK_PISIN = Option("Tok Pisin", "tpi") 786 | TONGA_TONGA_ISLANDS = Option("Tonga (Tonga Islands)", "to") 787 | TSONGA = Option("Tsonga", "ts") 788 | TSWA = Option("Tswa", "tsc") 789 | TSWANA = Option("Tswana", "tn") 790 | TULU = Option("Tulu", "tcy") 791 | TUPI = Option("Tupi", "tup") 792 | TURKISH = Option("Turkish", "tr") 793 | TURKMEN = Option("Turkmen", "tk") 794 | TUVINIAN = Option("Tuvinian", "tyv") 795 | TZOTZIL = Option("Tzotzil", "tzo") 796 | UKRAINIAN = Option("Ukrainian", "uk") 797 | UKRAINIAN_SIGN_LANGUAGE = Option("Ukrainian Sign Language", "ukl") 798 | UNGWATSI = Option("Ungwatsi", "qat") 799 | URDU = Option("Urdu", "ur") 800 | UZBEK = Option("Uzbek", "uz") 801 | VIETNAMESE = Option("Vietnamese", "vi") 802 | VISAYAN = Option("Visayan", "qaa") 803 | WASHOE = Option("Washoe", "was") 804 | WELSH = Option("Welsh", "cy") 805 | WOLOF = Option("Wolof", "wo") 806 | XHOSA = Option("Xhosa", "xh") 807 | YAKUT = Option("Yakut", "sah") 808 | YAPESE = Option("Yapese", "yap") 809 | YIDDISH = Option("Yiddish", "yi") 810 | YORUBA = Option("Yoruba", "yo") 811 | ZULU = Option("Zulu", "zu") 812 | 813 | class SOUND_MIXES: 814 | MONO = Option("Mono", "mono") 815 | SILENT = Option("Silent", "silent") 816 | STEREO = Option("Stereo", "stereo") 817 | DOLBY_DIGITAL = Option("Dolby Digital", "dolby_digital") 818 | DOLBY = Option("Dolby", "dolby") 819 | DOLBY_SR = Option("Dolby SR", "dolby_sr") 820 | DTS = Option("DTS", "dts") 821 | SDDS = Option("SDDS", "sdds") 822 | ULTRA_STEREO = Option("Ultra Stereo", "ultra_stereo") 823 | _TRACK_STEREO = Option("6-Track Stereo", "6_track_stereo") 824 | _MM_6_TRACK = Option("70 mm 6-Track", "70_mm_6_track") 825 | VITAPHONE = Option("Vitaphone", "vitaphone") 826 | DOLBY_DIGITAL_EX = Option("Dolby Digital EX", "dolby_digital_ex") 827 | DE_FOREST_PHONOFILM = Option("De Forest Phonofilm", "de_forest_phonofilm") 828 | DTS_STEREO = Option("DTS-Stereo", "dts_stereo") 829 | CHRONOPHONE = Option("Chronophone", "chronophone") 830 | DTS_ES = Option("DTS-ES", "dts_es") 831 | PERSPECTA_STEREO = Option("Perspecta Stereo", "perspecta_stereo") 832 | CINEPHONE = Option("Cinephone", "cinephone") 833 | _CHANNEL_STEREO = Option("3 Channel Stereo", "3_channel_stereo") 834 | CINEMATOPHONE = Option("Cinematophone", "cinematophone") 835 | SONICS_DDP = Option("Sonics-DDP", "sonics_ddp") 836 | _TRACK_DIGITAL_SOUND = Option( 837 | "12-Track Digital Sound", "12_track_digital_sound" 838 | ) 839 | DTS_70_MM = Option("DTS 70 mm", "dts_70_mm") 840 | IMAX_6_TRACK = Option("IMAX 6-Track", "imax_6_track") 841 | MATRIX_SURROUND = Option("Matrix Surround", "matrix_surround") 842 | SONIX = Option("Sonix", "sonix") 843 | SENSURROUND = Option("Sensurround", "sensurround") 844 | CINERAMA_7_TRACK = Option("Cinerama 7-Track", "cinerama_7_track") 845 | KINOPLASTICON = Option("Kinoplasticon", "kinoplasticon") 846 | DIGITRAC_DIGITAL_AUDIO_SYSTEM = Option( 847 | "Digitrac Digital Audio System", "digitrac_digital_audio_system" 848 | ) 849 | CINESOUND = Option("Cinesound", "cinesound") 850 | PHONO_KINEMA = Option("Phono-Kinema", "phono_kinema") 851 | CDS = Option("CDS", "cds") 852 | LC_CONCEPT_DIGITAL_SOUND = Option( 853 | "LC-Concept Digital Sound", "lc_concept_digital_sound" 854 | ) 855 | 856 | class MY_RATINGS: 857 | INCLUDE_ALL_TITLES = Option("Include All Titles", "") 858 | EXCLUDE_TITLES_I_VE_SEEN = Option("Exclude Titles I've Seen", "exclude") 859 | RESTRICT_TO_TITLES_I_VE_SEEN = Option( 860 | "Restrict to Titles I've Seen", "restrict" 861 | ) 862 | 863 | class NOW_PLAYING: 864 | SHOW_ALL_TITLES = Option("Show All Titles", "") 865 | ONLY_SHOW_TITLES_CURRENTLY_PLAYING_NEAR_ME = Option( 866 | "Only Show Titles Currently Playing Near Me", "restrict" 867 | ) 868 | 869 | class ADULT: 870 | EXCLUDE = Option("Exclude", "") 871 | INCLUDE = Option("Include", "include") 872 | 873 | 874 | class AdvancedSearchResult(ParseBase): 875 | def __init__( 876 | self, 877 | imdb, 878 | title="", 879 | title_type=[], 880 | release_date=("", ""), 881 | user_rating=("", ""), 882 | num_votes=("", ""), 883 | genres=[], 884 | groups=[], 885 | has=[], 886 | companies=[], 887 | certificates=[], 888 | colors=[], 889 | countries=[], 890 | keywords="", 891 | languages=[], 892 | locations="", 893 | moviemeter=("", ""), 894 | plot="", 895 | runtime=("", ""), 896 | sound_mixes=[], 897 | my_ratings=[], 898 | now_playing=[], 899 | adult=[], 900 | ): 901 | self.imdb = imdb 902 | 903 | self.query = {} 904 | self.query["title"] = title 905 | self.query["title_type"] = ",".join( 906 | [isinstance(v, str) and v or v.value for v in title_type] 907 | ) 908 | self.query["release_date-min"] = release_date[0] 909 | self.query["release_date-max"] = release_date[1] 910 | self.query["user_rating-min"] = user_rating[0] 911 | self.query["user_rating-max"] = user_rating[1] 912 | self.query["num_votes-min"] = num_votes[0] 913 | self.query["num_votes-max"] = num_votes[1] 914 | self.query["genres"] = ",".join( 915 | [isinstance(v, str) and v or v.value for v in genres] 916 | ) 917 | self.query["groups"] = ",".join( 918 | [isinstance(v, str) and v or v.value for v in groups] 919 | ) 920 | self.query["has"] = ",".join([isinstance(v, str) and v or v.value for v in has]) 921 | self.query["companies"] = ",".join( 922 | [isinstance(v, str) and v or v.value for v in companies] 923 | ) 924 | self.query["certificates"] = ",".join( 925 | [isinstance(v, str) and v or v.value for v in certificates] 926 | ) 927 | self.query["colors"] = ",".join( 928 | [isinstance(v, str) and v or v.value for v in colors] 929 | ) 930 | self.query["countries"] = ",".join( 931 | [isinstance(v, str) and v or v.value for v in countries] 932 | ) 933 | self.query["keywords"] = keywords 934 | self.query["languages"] = ",".join( 935 | [isinstance(v, str) and v or v.value for v in languages] 936 | ) 937 | self.query["locations"] = locations 938 | self.query["moviemeter-min"] = moviemeter[0] 939 | self.query["moviemeter-max"] = moviemeter[1] 940 | self.query["plot"] = plot 941 | self.query["runtime-min"] = runtime[0] 942 | self.query["runtime-max"] = runtime[1] 943 | self.query["sound_mixes"] = ",".join( 944 | [isinstance(v, str) and v or v.value for v in sound_mixes] 945 | ) 946 | self.query["my_ratings"] = ",".join( 947 | [isinstance(v, str) and v or v.value for v in my_ratings] 948 | ) 949 | self.query["now_playing"] = ",".join( 950 | [isinstance(v, str) and v or v.value for v in now_playing] 951 | ) 952 | self.query["adult"] = ",".join( 953 | [isinstance(v, str) and v or v.value for v in adult] 954 | ) 955 | -------------------------------------------------------------------------------- /imdbparser/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import lxml.html 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | class Base(object): 9 | fetched = False 10 | 11 | def __init__(self, imdb_id, imdb): 12 | self.imdb_id = str(imdb_id).zfill(7) 13 | self.imdb = imdb 14 | 15 | def _get_urls(self): 16 | return [base_url % (self.imdb_id,) for base_url in self.base_urls] 17 | 18 | def fetch(self): 19 | if not self.fetched: 20 | urls = self._get_urls() 21 | logger.debug("Fetching and parsing urls %s" % (urls,)) 22 | self.parse([self.imdb._get_data(url) for url in urls]) 23 | self.fetched = True 24 | 25 | def cleanup_photo_url(self, url): 26 | if url: 27 | if "title_addposter" in url or "imdb-share-logo" in url: 28 | return None 29 | url = url.split(".") 30 | url.pop(-2) 31 | return ".".join(url) 32 | 33 | def parse(self, htmls): 34 | self.trees = [lxml.html.fromstring(html) for html in htmls] 35 | -------------------------------------------------------------------------------- /imdbparser/chart.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from decimal import Decimal 4 | 5 | from requests.compat import quote_plus 6 | 7 | from .base import Base 8 | from .movie import Movie 9 | 10 | 11 | class Chart(Base): 12 | base_url = "https://www.imdb.com/chart/%s" 13 | 14 | def __init__(self, chart, imdb): 15 | self.chart = chart 16 | self.imdb = imdb 17 | 18 | def _get_urls(self): 19 | return [self.base_url % (self.chart,)] 20 | 21 | def parse(self, htmls): 22 | super(Chart, self).parse(htmls) 23 | 24 | self.results = [] 25 | for item_row in self.trees[0].xpath("//tbody[@class='lister-list']/tr"): 26 | poster_column = item_row.xpath(".//td[@class='posterColumn']")[0] 27 | 28 | cover = poster_column.xpath(".//img/@src")[0] 29 | if "/nopicture/" in cover: 30 | cover = None 31 | else: 32 | cover = self.cleanup_photo_url(cover) 33 | 34 | imdb_id = re.findall(r"/tt(\d+)/", poster_column.xpath(".//a/@href")[0])[0] 35 | 36 | rating_text = item_row.xpath( 37 | ".//td[contains(@class, 'imdbRating')]/strong/@title" 38 | ) 39 | if rating_text: 40 | print(rating_text[0]) 41 | rating, votes = re.findall("[0-9.,]+", rating_text[0]) 42 | rating = Decimal(rating) 43 | votes = int(votes.replace(",", "")) 44 | 45 | year = None 46 | for base_element in item_row.xpath( 47 | ".//td[@class='titleColumn']//span[@class='secondaryInfo']/text()" 48 | ): 49 | years = re.findall(r"\((\d{4})\)", base_element) 50 | if years: 51 | year = int(years[0]) 52 | break 53 | 54 | item = Movie(imdb_id, self.imdb) 55 | 56 | item.title = item_row.xpath(".//td[@class='titleColumn']//a/text()")[0] 57 | item.year = year 58 | item.cover = cover 59 | item.rating = rating 60 | item.votes = votes 61 | 62 | self.results.append(item) 63 | -------------------------------------------------------------------------------- /imdbparser/exceptions.py: -------------------------------------------------------------------------------- 1 | class IMDbException(Exception): 2 | pass 3 | 4 | 5 | class UnknownChartTypeException(IMDbException): 6 | pass 7 | -------------------------------------------------------------------------------- /imdbparser/generateadvancedsearchresult.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .advancedsearchresult import Option 4 | 5 | 6 | def enumify(text): 7 | text = re.sub("[ -.]+", "_", text) 8 | text = re.sub(r"[^a-zA-Z0-9_]+", "", text) 9 | text = text.strip("_") 10 | if text.startswith("20th"): 11 | text = "twentieth" + text[4:] 12 | return text.lstrip("012345679").upper() 13 | 14 | 15 | def generate_function_and_enums(tree): 16 | enums = {} 17 | all_fields = [] 18 | for section in tree.xpath("//div[@class='clause']"): 19 | section_title = section.xpath(".//h3/text()")[0] 20 | if ( 21 | section_title == "Instant Watch Options" 22 | ): # bugged and not all that interesting 23 | continue 24 | if section_title == "Cast/Crew": # Skipped for now 25 | continue 26 | if section_title == "Display Options": # sorting option, not part of the form 27 | continue 28 | input_fields = section.xpath(".//input") 29 | input_field_names = section.xpath(".//input/@name") 30 | select_fields = section.xpath(".//select") 31 | select_field_names = section.xpath(".//select/@name") 32 | is_min_max = ( 33 | any(f for f in input_field_names if f.endswith("-min")) 34 | and any(f for f in input_field_names if f.endswith("-max")) 35 | or any(f for f in select_field_names if f.endswith("-min")) 36 | and any(f for f in select_field_names if f.endswith("-max")) 37 | ) 38 | if len(input_fields) == 1 and len(select_fields) == 0: 39 | all_fields.append((input_field_names[0], "normal", "")) 40 | elif len(set(input_field_names)) == 1 and len(select_fields) == 0: 41 | field_name = input_field_names[0] 42 | all_fields.append((field_name, "enum", [])) 43 | for e in input_fields: 44 | label = section.xpath(f".//label[@for='{e.attrib['id']}']")[0] 45 | if label.text: 46 | label = label.text 47 | else: 48 | label = label.xpath(".//*/@title")[0] 49 | value = e.attrib["value"] 50 | enums.setdefault(enumify(field_name), {})[enumify(label)] = Option( 51 | label, value 52 | ) 53 | elif len(select_fields) == 1 and len(input_fields) == 0: 54 | field_name = select_field_names[0] 55 | all_fields.append((field_name, "enum", [])) 56 | for field in select_fields[0].xpath(".//option"): 57 | label = field.text 58 | value = field.attrib["value"] 59 | enums.setdefault(enumify(field_name), {})[enumify(label)] = Option( 60 | label, value 61 | ) 62 | elif is_min_max: 63 | if select_field_names: 64 | field_name = select_field_names[0][:-4] 65 | else: 66 | field_name = input_field_names[0][:-4] 67 | all_fields.append((field_name, "minmax", ("", ""))) 68 | else: 69 | print("Unknown", section_title, input_fields, select_fields) 70 | 71 | code = [] 72 | code.append("class AS:") 73 | for k, v in enums.items(): 74 | code.append(f" class {k}:") 75 | for label, option in v.items(): 76 | code.append(f" {label} = {option!r}") 77 | code.append("") 78 | code.append("") 79 | 80 | func_args = ", ".join([f"{fn}={v!r}" for (fn, t, v) in all_fields]) 81 | code.append("class AdvancedSearchResult(ParseBase):") 82 | code.append(f" def __init__(self, imdb, {func_args}):") 83 | code.append(" self.imdb = imdb") 84 | code.append("") 85 | code.append(" self.query = {}") 86 | for fn, t, v in all_fields: 87 | if t == "normal": 88 | code.append(f" self.query['{fn}'] = {fn}") 89 | elif t == "enum": 90 | code.append( 91 | f" self.query['{fn}'] = ','.join([isinstance(v, str) and v or v.value for v in {fn}])" 92 | ) 93 | elif t == "minmax": 94 | code.append(f" self.query['{fn}-min'] = {fn}[0]") 95 | code.append(f" self.query['{fn}-max'] = {fn}[1]") 96 | 97 | return "\n".join(code) 98 | -------------------------------------------------------------------------------- /imdbparser/imdb.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import requests 4 | 5 | from .advancedsearchresult import AS, AdvancedSearchResult 6 | from .chart import Chart 7 | from .exceptions import UnknownChartTypeException 8 | from .movie import Movie 9 | from .person import Person 10 | from .searchresult import SearchResult 11 | 12 | CHART_TYPES = [ 13 | "tvmeter", 14 | "moviemeter", 15 | "top", 16 | "top-english-movies", 17 | "toptv", 18 | # 'top-rated-indian-movies', 19 | "bottom", 20 | ] 21 | 22 | 23 | class IMDb(object): 24 | def _get_data(self, url): 25 | headers = { 26 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 27 | "Accept-Language": "en", 28 | } 29 | r = requests.get(url, headers=headers) 30 | return r.text 31 | 32 | def _normalize_title(self, title): 33 | title = re.sub(r"[^\x00-\x7F]+", "", title) 34 | title = re.sub(r" +", " ", title) 35 | 36 | return title 37 | 38 | def resolve_movie(self, title, year=None): 39 | """Tries to find a movie with a given title and year""" 40 | r = self.search_movie(title) 41 | 42 | return self._match_results(r, title, year) 43 | 44 | def resolve_tv_show(self, title, year=None): 45 | """Tries to find a movie with a given title and year""" 46 | r = self.search_tv_show(title) 47 | 48 | return self._match_results(r, title, year) 49 | 50 | def _match_results(self, results, title, year): 51 | results.fetch() 52 | results = results.results 53 | 54 | if not results: 55 | return None 56 | 57 | normalized_title = self._normalize_title(title) 58 | 59 | for result in results[:7]: 60 | for title in result.get_titles(): 61 | result_normalized_title = result.title 62 | if result_normalized_title == normalized_title and ( 63 | year is None or result.year is None or year == result.year 64 | ): 65 | return result 66 | 67 | if year: 68 | for result in results[:5]: 69 | for title in result.get_titles(): 70 | result_normalized_title = result.title 71 | if ( 72 | result_normalized_title == normalized_title 73 | and result.year is not None 74 | and abs(year - result.year) <= 1 75 | ): 76 | return result 77 | 78 | return results[0] 79 | 80 | def search_movie(self, query): 81 | return SearchResult("movie", query, self) 82 | 83 | def search_tv_show(self, query): 84 | return SearchResult("tv", query, self) 85 | 86 | def get_movie(self, imdb_id): 87 | return Movie(imdb_id, self) 88 | 89 | def get_person(self, imdb_id): 90 | return Person(imdb_id, self) 91 | 92 | def get_chart(self, chart_type): 93 | if chart_type not in CHART_TYPES: 94 | raise UnknownChartTypeException() 95 | 96 | return Chart(chart_type, self) 97 | 98 | def advanced_search(self, **kwargs): 99 | return AdvancedSearchResult(self, **kwargs) 100 | -------------------------------------------------------------------------------- /imdbparser/movie.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from decimal import Decimal 4 | 5 | from .base import Base 6 | from .person import Person 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class Movie(Base): 12 | title = None 13 | year = None 14 | 15 | base_urls = [ 16 | "https://www.imdb.com/title/tt%s/reference", 17 | "https://www.imdb.com/title/tt%s/", 18 | ] 19 | 20 | def parse(self, htmls): 21 | super(Movie, self).parse(htmls) 22 | 23 | self.alternative_titles = [] 24 | self.actors = [] 25 | self.directors = [] 26 | self.writers = [] 27 | self.more_like_this = [] 28 | 29 | self.languages = [] 30 | self.genres = [] 31 | self.countries = [] 32 | self.plot_keywords = [] 33 | 34 | self.cover = None 35 | self.duration = None 36 | self.title = None 37 | self.year = None 38 | self.release_date = None 39 | self.description = None 40 | self.plot = None 41 | self.storyline = None 42 | self.tagline = None 43 | self.rating = None 44 | self.votes = None 45 | 46 | titles = [ 47 | x.strip() 48 | for x in self.trees[0].xpath('//h3[@itemprop="name"]//text()') 49 | if x.strip() and x not in ["(", ")"] 50 | ] 51 | 52 | self.title = titles[0] 53 | 54 | title_extra = [ 55 | x.strip() 56 | for x in self.trees[0].xpath( 57 | "//div[@class='titlereference-header']/div/text()" 58 | ) 59 | if x.strip() 60 | ] 61 | if title_extra: 62 | title_extra = title_extra[0] 63 | if title_extra != "Reference View": 64 | if self.trees[0].xpath( 65 | "//span[@class='titlereference-original-title-label']" 66 | ): 67 | self.alternative_titles.append(self.title) 68 | self.title = title_extra 69 | else: 70 | self.alternative_titles.append(title_extra) 71 | 72 | for t in titles[1:]: 73 | try: 74 | self.year = int(t.strip(u"()").split(u"\u2013")[0]) 75 | except ValueError: 76 | continue 77 | else: 78 | break 79 | else: 80 | header_title = self.trees[0].xpath("//meta[@name='title']/@content") 81 | if header_title: 82 | try: 83 | self.year = int( 84 | re.findall( 85 | r"\((?:TV Series )?(\d{4})(?:\u2013(?: |\d+))?\) - IMDb$", 86 | header_title[0], 87 | )[0] 88 | ) 89 | except (ValueError, IndexError): 90 | pass 91 | 92 | cover = self.trees[0].xpath("//link[@rel='image_src']/@href") 93 | if cover: 94 | self.cover = self.cleanup_photo_url(cover[0]) 95 | if "images/logos/imdb_fb_logo" in self.cover: 96 | self.cover = None 97 | 98 | rating = self.trees[0].xpath("//span[@class='ipl-rating-star__rating']/text()") 99 | if rating and rating[0]: 100 | self.rating = Decimal(rating[0]) 101 | 102 | votes = self.trees[0].xpath( 103 | "//span[@class='ipl-rating-star__total-votes']/text()" 104 | ) 105 | if votes and votes[0]: 106 | self.votes = int(votes[0].strip("()").replace(",", "")) 107 | 108 | rows = self.trees[0].xpath( 109 | "//table[@class='titlereference-list ipl-zebra-list']//tr" 110 | ) 111 | for row in rows: 112 | key, value = row.xpath("./td") 113 | key = str(key.text) 114 | 115 | if key == "Genres": 116 | self.genres = [ 117 | x.text for x in value.xpath(".//a") if "/genre/" in x.attrib["href"] 118 | ] 119 | elif key == "Taglines": 120 | self.tagline = value.text.strip() 121 | elif key == "Plot Summary": 122 | plot = value.xpath("./p") 123 | if plot: 124 | self.plot = plot[0].text.strip() 125 | elif key == "Plot Keywords": 126 | self.plot_keywords = [ 127 | x.text 128 | for x in value.xpath(".//a") 129 | if "/keyword/" in x.attrib["href"] 130 | ] 131 | elif key == "Also Known As": 132 | self.alternative_titles += [ 133 | x.strip().split("\n")[0].strip() 134 | for x in value.xpath(".//li/text()") 135 | if x.strip() 136 | ] 137 | elif key == "Runtime": 138 | runtimes = re.findall("(\d+) min", " ".join(value.xpath(".//text()"))) 139 | if runtimes: 140 | self.duration = int(runtimes[0]) 141 | elif key == "Country": 142 | self.countries = [ 143 | x.text 144 | for x in value.xpath(".//a") 145 | if "/country/" in x.attrib["href"] 146 | ] 147 | elif key == "Language": 148 | self.languages = [ 149 | x.text 150 | for x in value.xpath(".//a") 151 | if "/language/" in x.attrib["href"] 152 | ] 153 | 154 | summary_texts = self.trees[1].xpath("//div[@class='summary_text']/text()") 155 | if summary_texts: 156 | self.description = summary_texts[0].strip() 157 | 158 | storylines = self.trees[1].xpath( 159 | "//h2[text()='Storyline']/../div/p/span//text()" 160 | ) 161 | if storylines: 162 | self.storyline = "".join(storylines).strip() 163 | if self.storyline.startswith("Add a Plot"): 164 | self.storyline = None 165 | 166 | release_dates = [ 167 | x.strip() 168 | for x in self.trees[1].xpath("//h4[text()='Release Date:']/../text()") 169 | if x.strip() 170 | ] 171 | if release_dates: 172 | self.release_date = release_dates[0] 173 | 174 | rows = self.trees[0].xpath("//div[@class='titlereference-overview-section']") 175 | for row in rows: 176 | key = row.xpath("./text()")[0].strip() 177 | if ( 178 | key == "Director:" 179 | or key == "Directors:" 180 | or key == "Writers:" 181 | or key == "Writer:" 182 | ): 183 | for elem in row.xpath(".//a"): 184 | if "/name/" not in elem.attrib["href"]: 185 | continue 186 | p = Person( 187 | re.findall("/nm(\d+)", elem.attrib["href"])[0], self.imdb 188 | ) 189 | p.name = elem.text 190 | if key == "Director:" or key == "Directors:": 191 | self.directors.append(p) 192 | elif key == "Writers:" or key == "Writer:": 193 | self.writers.append(p) 194 | 195 | for row in self.trees[0].xpath( 196 | "//h4[@id='cast']/../../following-sibling::table[1]//tr//td[@itemprop='actor']" 197 | ): 198 | elem = row.xpath(".//a") 199 | if not elem: 200 | continue 201 | 202 | elem = elem[0] 203 | if "/name/" not in elem.attrib["href"]: 204 | continue 205 | 206 | p = Person(re.findall("/nm(\d+)", elem.attrib["href"])[0], self.imdb) 207 | p.name = elem.xpath("./span/text()")[0] 208 | self.actors.append(p) 209 | 210 | self.alternative_titles = list(set(self.alternative_titles)) 211 | 212 | for rec_details in self.trees[1].xpath("//div[@class='rec_details']"): 213 | self.more_like_this.append(self.parse_recommendation(rec_details)) 214 | 215 | def parse_recommendation(self, tree): 216 | tree_title = tree.xpath(".//div[@class='rec-title']")[0] 217 | imdb_id = tree_title.xpath("./a/@href")[0].split("/")[2][2:] 218 | movie = Movie(imdb_id, self.imdb) 219 | 220 | title = tree_title.xpath(".//a//text()") 221 | if title: 222 | movie.title = title[0].strip() 223 | 224 | year = tree_title.xpath("./span/text()") 225 | if year: 226 | try: 227 | movie.year = int(tree_title.xpath("./span/text()")[0].strip("()")) 228 | except ValueError: 229 | pass 230 | 231 | movie.genres = [ 232 | x.strip() 233 | for x in tree.xpath(".//div[contains(@class, 'rec-cert-genre')]/text()") 234 | if x.strip() 235 | ] 236 | 237 | ratings = tree.xpath(".//div[contains(@class, 'rating-list')]/@title") 238 | if ratings: 239 | ratings = re.findall(r"([\d.]+)/10 \(([\d,]+) votes\)", ratings[0]) 240 | if ratings: 241 | ratings = ratings[0] 242 | movie.rating = Decimal(ratings[0]) 243 | movie.votes = int(ratings[1].replace(",", "")) 244 | 245 | movie.tagline = "".join( 246 | tree.xpath(".//div[@class='rec-outline']/p/text()") 247 | ).strip() 248 | 249 | return movie 250 | 251 | def __repr__(self): 252 | return "" % ( 253 | self.fetched, 254 | self.imdb_id, 255 | self.title, 256 | self.year, 257 | ) 258 | 259 | def get_titles(self): 260 | yield self.title 261 | for title in self.alternative_titles: 262 | yield title 263 | -------------------------------------------------------------------------------- /imdbparser/person.py: -------------------------------------------------------------------------------- 1 | from .base import Base 2 | 3 | 4 | class Person(Base): 5 | name = None 6 | 7 | base_url = "http://www.imdb.com/name/nm%s/" 8 | 9 | def parse(self, html): 10 | super(Person, self).parse(html) 11 | 12 | def __repr__(self): 13 | return "" % ( 14 | self.fetched, 15 | self.imdb_id, 16 | self.name, 17 | ) 18 | -------------------------------------------------------------------------------- /imdbparser/searchresult.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | from requests.compat import quote_plus 5 | 6 | from .base import Base 7 | from .movie import Movie 8 | 9 | 10 | class SearchResult(Base): 11 | base_url = "http://www.imdb.com/find?q=%s&s=tt" 12 | 13 | def __init__(self, search_type, query, imdb): 14 | self.search_type = search_type 15 | if sys.version_info[0] < 3 and isinstance(query, unicode): 16 | query = query.encode("utf-8") 17 | self.imdb_id = quote_plus(query) 18 | self.imdb = imdb 19 | 20 | def _get_urls(self): 21 | url = self.base_url 22 | if self.search_type == "tv": 23 | url += "&ttype=tv" 24 | elif self.search_type == "movie": 25 | url += "&ttype=ft" 26 | 27 | return [url % (self.imdb_id,)] 28 | 29 | def parse(self, htmls): 30 | super(SearchResult, self).parse(htmls) 31 | 32 | self.results = [] 33 | for movie_row in self.trees[0].xpath( 34 | "//table[@class='findList']//tr[contains(@class, 'findResult')]" 35 | ): 36 | cover = movie_row.xpath(".//td[@class='primary_photo']//img/@src")[0] 37 | if "/nopicture/" in cover: 38 | cover = None 39 | else: 40 | cover = self.cleanup_photo_url(cover) 41 | 42 | text = movie_row.xpath(".//td[@class='result_text']")[0] 43 | imdb_id = re.findall(r"/tt(\d+)/", text.xpath(".//a/@href")[0])[0] 44 | orig_title = text.xpath(".//a")[0].text 45 | alternative_titles = [] 46 | 47 | base_elements = [x.strip() for x in text.xpath("./text()") if x.strip()] 48 | 49 | if "aka" in base_elements: 50 | base_elements.remove("aka") 51 | 52 | alternative_titles.append(orig_title) 53 | orig_title = text.xpath("./i")[0].text.strip('"') 54 | 55 | year = None 56 | if base_elements: 57 | try: 58 | years = re.findall(r"\((\d{4})\)", base_elements[0]) 59 | if years: 60 | year = int(years[0]) 61 | except ValueError: 62 | pass 63 | 64 | movie = Movie(imdb_id, self.imdb) 65 | movie.title = orig_title 66 | movie.year = year 67 | movie.cover = cover 68 | movie.alternative_titles = alternative_titles 69 | 70 | self.results.append(movie) 71 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git 3 | ignore = W601 4 | max-line-length = 119 5 | 6 | [bdist_wheel] 7 | universal = 1 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup 4 | 5 | 6 | readme_path = os.path.join(os.path.dirname(__file__), "README.rst") 7 | with open(readme_path) as fp: 8 | long_description = fp.read() 9 | 10 | setup( 11 | name='imdbparser', 12 | version='1.0.22', 13 | url='https://github.com/JohnDoee/imdbparser', 14 | author='John Doee', 15 | author_email='johndoee@tidalstream.org', 16 | description='IMDB Parser', 17 | long_description=long_description, 18 | license='MIT', 19 | packages=['imdbparser'], 20 | install_requires=['lxml', 'requests'], 21 | classifiers=[ 22 | 'Development Status :: 4 - Beta', 23 | 'Intended Audience :: Developers', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Operating System :: OS Independent', 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 2', 28 | 'Programming Language :: Python :: 2.7', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.4', 31 | 'Programming Language :: Python :: 3.5', 32 | 'Programming Language :: Python :: 3.6', 33 | 'Topic :: Database :: Front-Ends', 34 | 'Topic :: Software Development :: Libraries :: Python Modules', 35 | ] 36 | ) 37 | --------------------------------------------------------------------------------