├── .editorconfig
├── .gitignore
├── .isort.cfg
├── CHANGELOG.rst
├── LICENSE
├── README.rst
├── imdbparser
    ├── __init__.py
    ├── __main__.py
    ├── advancedsearchresult.py
    ├── base.py
    ├── chart.py
    ├── exceptions.py
    ├── generateadvancedsearchresult.py
    ├── imdb.py
    ├── movie.py
    ├── person.py
    └── searchresult.py
├── setup.cfg
└── setup.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | end_of_line = lf
11 | charset = utf-8
12 | 
13 | # Docstrings and comments use max_line_length = 79
14 | [*.py]
15 | max_line_length = 119
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.mo
 2 | *.egg-info
 3 | *.egg
 4 | *.EGG
 5 | *.EGG-INFO
 6 | bin
 7 | build
 8 | develop-eggs
 9 | downloads
10 | eggs
11 | fake-eggs
12 | parts
13 | dist
14 | .installed.cfg
15 | .mr.developer.cfg
16 | .hg
17 | .bzr
18 | .svn
19 | *.pyc
20 | *.pyo
21 | *.tmp*
22 | dropin.cache
23 | _trial_temp
24 | *.komodoproject
25 | docs/_build*
26 | .env*
27 | autotorrent.conf
28 | .coverage
29 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output=3
3 | include_trailing_comma=True
4 | force_grid_wrap=0
5 | use_parentheses=True
6 | line_length=88
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | Version 1.0.22 ()
 2 | ===========================================================
 3 | 
 4 | *   Feature: Added support for IMDb charts
 5 | *   Feature: Added support for advanced search.
 6 | 
 7 | 
 8 | Version 1.0.21 (21-12-2019)
 9 | ===========================================================
10 | 
11 | *   Feature: Better CLI
12 | *   Bugfix: TV search was not actually searching TV only
13 | *   Bugfix: Movie search now search only movies
14 | 
15 | Version 1.0.20 (27-11-2019)
16 | ===========================================================
17 | 
18 | *   Feature: Added support for "more like this"
19 | 
20 | Version 1.0.19 (13-04-2019)
21 | ===========================================================
22 | 
23 | *   Bugfix: Link in plot not properly parsed
24 | 
25 | Version 1.0.18 (19-08-2018)
26 | ===========================================================
27 | 
28 | *   Change: Tagline, description and storyline moved around a little bit
29 | *   Bugfix: Director now found if plural too
30 | 
31 | Version 1.0.17 (12-05-2018)
32 | ===========================================================
33 | 
34 | *   Bugfix: Storyline changed tagtype from div to span
35 | *   Bugfix: Missing plot summary threw exception
36 | 
37 | Version 1.0.14 (26-04-2018)
38 | ===========================================================
39 | 
40 | *   Bugfix: IMDb redirects to HTTPS if HTTP is called, changed standard URLs to HTTPS
41 | *   Bugfix: Writer / Writers support (not always plural)
42 | *   Bugfix: Storyline doesn't include writer anymore
43 | 
44 | Version 1.0.9 (28-08-2017)
45 | ===========================================================
46 | 
47 | *   Bugfix: Search movies now also search TV to include TV Movies
48 | 
49 | Version 1.0.8 (13-05-2017)
50 | ===========================================================
51 | 
52 | *   Bugfix: Shows with missing rating and shows with no description
53 |             not throwing exception anymore.
54 | *   Bugfix: Encoding error when searching for unicode
55 | 
56 | Version 1.0.7 (30-04-2017)
57 | ===========================================================
58 | 
59 | *   Bugfix: small parsingbug with TV and ratings
60 | 
61 | Version 1.0.4 (28-03-2017)
62 | ===========================================================
63 | 
64 | *   Feature: Added support for smart resolve of tv shows and movies
65 | 
66 | Version 1.0.3 (20-10-2016)
67 | ===========================================================
68 | 
69 | *   Feature: Added support for tv show search
70 | *   Change: Renamed search result variable from movies to results
71 | 
72 | Version 1.0.2 (13-10-2016)
73 | ===========================================================
74 | 
75 | *   Fixing all the small bugs that slipped through!
76 | 
77 | Version 1.0.1 (13-10-2016)
78 | ===========================================================
79 | 
80 | *   Initial release of rewrite
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2016 Anders Jensen
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | imdbparser
 2 | ==========
 3 | 
 4 | Search IMDb and get movie information.
 5 | Incredibly basic, limited feature-set, somewhat fast.
 6 | 
 7 | Usage
 8 | -----
 9 | 
10 | Get from ID
11 | ~~~~~~~~~~~
12 | .. code-block:: python
13 | 
14 |     >>> from imdbparser import IMDb
15 |     >>> imdb = IMDb()
16 |     >>> movie = imdb.get_movie(1954470)
17 |     >>> movie.fetched
18 |     False
19 |     >>> movie.fetch()
20 |     >>> movie.fetched
21 |     True
22 |     >>> movie.__dict__
23 |     ...
24 | 
25 | 
26 | Search
27 | ~~~~~~
28 | .. code-block:: python
29 | 
30 |     >>> from imdbparser import IMDb
31 |     >>> imdb = IMDb()
32 |     >>> search_result = imdb.search_movie('Matrix')
33 |     >>> search_result.fetched
34 |     False
35 |     >>> search_result.fetch()
36 |     >>> search_result.fetched
37 |     True
38 |     >>> search_result.results
39 |     [<Movie fetched=False imdb_id='0133093' title='The Matrix' year=1999>,
40 |      <Movie fetched=False imdb_id='0234215' title='The Matrix Reloaded' year=2003>, ...]
41 | 
42 | .. code-block:: python
43 | 
44 |     >>> from imdbparser import IMDb
45 |     >>> imdb = IMDb()
46 |     >>> search_result = imdb.search_tv_show('it crowd')
47 |     >>> search_result.fetched
48 |     False
49 |     >>> search_result.fetch()
50 |     >>> search_result.fetched
51 |     True
52 |     >>> search_result.results
53 |     [<Movie fetched=False imdb_id='0487831' title='The IT Crowd' year=None>,
54 |      <Movie fetched=False imdb_id='0944954' title='The IT Crowd' year=None>, ...]
55 | 
56 | 
57 | 
58 | 
59 | 
60 | License
61 | -------
62 | 
63 | MIT, see LICENSE


--------------------------------------------------------------------------------
/imdbparser/__init__.py:
--------------------------------------------------------------------------------
1 | from .imdb import AS, IMDb  # NOQA
2 | 
3 | __version__ = "1.0.22"
4 | 


--------------------------------------------------------------------------------
/imdbparser/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | from pprint import pprint
 4 | 
 5 | 
 6 | def main():
 7 |     from .imdb import IMDb, CHART_TYPES, AS
 8 | 
 9 |     parser = argparse.ArgumentParser(description="Fetch info from IMDb")
10 |     parser.add_argument("--debug", help="Enable debugging", action="store_true")
11 | 
12 |     subparsers = parser.add_subparsers(help="sub-command help", dest="command")
13 | 
14 |     fetch_parser = subparsers.add_parser(name="fetch")
15 |     fetch_parser.add_argument("imdb_id", help="an IMDb id, e.g. tt0120737")
16 | 
17 |     search_parser = subparsers.add_parser(
18 |         name="search", description="Search for a movie or tv show"
19 |     )
20 |     search_parser.add_argument(
21 |         "type", help="Type to search for", choices=["tv", "movie"]
22 |     )
23 |     search_parser.add_argument("title", help="Title to search for")
24 | 
25 |     resolve_parser = subparsers.add_parser(
26 |         name="resolve", description="Try to resolve a search into a specific entry"
27 |     )
28 |     resolve_parser.add_argument(
29 |         "type", help="Type to search-resolve for", choices=["tv", "movie"]
30 |     )
31 |     resolve_parser.add_argument("title", help="Title to search-resolve for")
32 |     resolve_parser.add_argument(
33 |         "year", help="Year close to the entry", type=int, nargs="?"
34 |     )
35 | 
36 |     chart_parser = subparsers.add_parser(name="chart", description="Fetch a chart")
37 |     chart_parser.add_argument("type", help="Chart type", choices=CHART_TYPES)
38 | 
39 |     args = parser.parse_args()
40 | 
41 |     if args.debug:
42 |         logging.basicConfig(level=logging.DEBUG)
43 | 
44 |     i = IMDb()
45 |     movie = None
46 |     movies = None
47 | 
48 |     if args.command == "fetch":
49 |         movie = i.get_movie(args.imdb_id.lstrip("tt"))
50 |     elif args.command == "search":
51 |         if args.type == "tv":
52 |             movies = i.search_tv_show(args.title)
53 |         elif args.type == "movie":
54 |             movies = i.search_movie(args.title)
55 |     elif args.command == "resolve":
56 |         if args.type == "tv":
57 |             movie = i.resolve_tv_show(args.title, args.year)
58 |         elif args.type == "movie":
59 |             movie = i.resolve_movie(args.title, args.year)
60 |     elif args.command == "chart":
61 |         movies = i.get_chart(args.type)
62 |     else:
63 |         parser.print_help()
64 | 
65 |     if movie is not None:
66 |         movie.fetch()
67 |         pprint(movie.__dict__)
68 |         print("")
69 |         print("More like this")
70 |         for recommended_movie in movie.more_like_this:
71 |             pprint(recommended_movie.__dict__)
72 | 
73 |     if movies is not None:
74 |         movies.fetch()
75 |         if movies.results:
76 |             for movie in movies.results:
77 |                 print(movie)
78 |                 print(movie.__dict__)
79 |         else:
80 |             print("Nothing found...")
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/imdbparser/advancedsearchresult.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import sys
  3 | from decimal import Decimal
  4 | 
  5 | from requests.compat import quote_plus, urlencode
  6 | 
  7 | from .base import Base
  8 | from .movie import Movie
  9 | from .person import Person
 10 | 
 11 | 
 12 | class Option:
 13 |     def __init__(self, label, value):
 14 |         self.label = label.strip()
 15 |         self.value = value.strip()
 16 | 
 17 |     def __str__(self):
 18 |         return f"{self.label} ({self.value})"
 19 | 
 20 |     def __repr__(self):
 21 |         return f"Option({self.label!r}, {self.value!r})"
 22 | 
 23 | 
 24 | class ParseBase(Base):
 25 |     base_url = "https://www.imdb.com/search/title/?"
 26 | 
 27 |     def _get_urls(self):
 28 |         return [self.base_url + urlencode(self.query)]
 29 | 
 30 |     def parse(self, htmls):
 31 |         super().parse(htmls)
 32 | 
 33 |         self.results = []
 34 | 
 35 |         for row in self.trees[0].xpath(
 36 |             "//div[@class='lister-list']/div[contains(@class, 'lister-item')]"
 37 |         ):
 38 |             cover = row.xpath(".//img")[0]
 39 |             imdb_id = cover.attrib["data-tconst"]
 40 |             movie = Movie(imdb_id, self.imdb)
 41 |             movie.directors = []
 42 |             movie.actors = []
 43 |             cover = cover.attrib["src"]
 44 |             if "nopicture" not in cover:
 45 |                 movie.cover = self.cleanup_photo_url(cover)
 46 | 
 47 |             header = row.xpath(".//h3[@class='lister-item-header']")[0]
 48 |             movie.title = header.xpath(".//a/text()")[0]
 49 |             year = re.findall(
 50 |                 "\d+",
 51 |                 header.xpath(".//span[contains(@class, 'lister-item-year')]/text()")[0],
 52 |             )
 53 |             if year and len(year[0]) == 4:
 54 |                 movie.year = int(year[0])
 55 | 
 56 |             runtime = row.xpath(".//span[@class='runtime']")
 57 |             if runtime:
 58 |                 runtime = re.findall("\d+", runtime[0].text)
 59 |                 if runtime:
 60 |                     movie.duration = int(runtime[0])
 61 | 
 62 |             genres = row.xpath(".//span[@class='genre']")
 63 |             if genres:
 64 |                 movie.genres = genres[0].text.split(", ")
 65 | 
 66 |             rating = row.xpath(".//div[contains(@class, 'ratings-imdb-rating')]")
 67 |             if rating:
 68 |                 movie.rating = Decimal(rating[0].attrib["data-value"])
 69 | 
 70 |             votes = row.xpath(".//span[@class='sort-num_votes-visible']/span[2]")
 71 |             if votes:
 72 |                 movie.votes = int(votes[0].attrib["data-value"])
 73 | 
 74 |             content = row.xpath("div[@class='lister-item-content']")[0]
 75 |             storyline = content.xpath("./p[2]")
 76 |             if storyline:
 77 |                 movie.storyline = storyline[0].text.strip()
 78 | 
 79 |             people = content.xpath("./p[3]")
 80 |             if people:
 81 |                 people = people[0]
 82 |                 people_titles = iter(
 83 |                     [
 84 |                         t.strip(" ,\n:s")
 85 |                         for t in people.xpath("./text()")
 86 |                         if t.strip(" ,\n")
 87 |                     ]
 88 |                 )
 89 |                 current_title = next(people_titles)
 90 |                 for e in people:
 91 |                     if e.tag == "span":
 92 |                         current_title = next(people_titles)
 93 |                     else:
 94 |                         p = Person(
 95 |                             re.findall("/nm(\d+)", e.attrib["href"])[0], self.imdb
 96 |                         )
 97 |                         p.name = e.text
 98 |                         if current_title == "Director":
 99 |                             movie.directors.append(p)
100 |                         elif current_title == "Star":
101 |                             movie.actors.append(p)
102 | 
103 |             self.results.append(movie)
104 | 
105 | 
106 | class AS:
107 |     class TITLE_TYPE:
108 |         FEATURE_FILM = Option("Feature Film", "feature")
109 |         TV_MOVIE = Option("TV Movie", "tv_movie")
110 |         TV_SERIES = Option("TV Series", "tv_series")
111 |         TV_EPISODE = Option("TV Episode", "tv_episode")
112 |         TV_SPECIAL = Option("TV Special", "tv_special")
113 |         MINI_SERIES = Option("Mini-Series", "tv_miniseries")
114 |         DOCUMENTARY = Option("Documentary", "documentary")
115 |         VIDEO_GAME = Option("Video Game", "video_game")
116 |         SHORT_FILM = Option("Short Film", "short")
117 |         VIDEO = Option("Video", "video")
118 |         TV_SHORT = Option("TV Short", "tv_short")
119 | 
120 |     class GENRES:
121 |         ACTION = Option("Action", "action")
122 |         ADVENTURE = Option("Adventure", "adventure")
123 |         ANIMATION = Option("Animation", "animation")
124 |         BIOGRAPHY = Option("Biography", "biography")
125 |         COMEDY = Option("Comedy", "comedy")
126 |         CRIME = Option("Crime", "crime")
127 |         DOCUMENTARY = Option("Documentary", "documentary")
128 |         DRAMA = Option("Drama", "drama")
129 |         FAMILY = Option("Family", "family")
130 |         FANTASY = Option("Fantasy", "fantasy")
131 |         FILM_NOIR = Option("Film-Noir", "film_noir")
132 |         GAME_SHOW = Option("Game-Show", "game_show")
133 |         HISTORY = Option("History", "history")
134 |         HORROR = Option("Horror", "horror")
135 |         MUSIC = Option("Music", "music")
136 |         MUSICAL = Option("Musical", "musical")
137 |         MYSTERY = Option("Mystery", "mystery")
138 |         NEWS = Option("News", "news")
139 |         REALITY_TV = Option("Reality-TV", "reality_tv")
140 |         ROMANCE = Option("Romance", "romance")
141 |         SCI_FI = Option("Sci-Fi", "sci_fi")
142 |         SPORT = Option("Sport", "sport")
143 |         TALK_SHOW = Option("Talk-Show", "talk_show")
144 |         THRILLER = Option("Thriller", "thriller")
145 |         WAR = Option("War", "war")
146 |         WESTERN = Option("Western", "western")
147 | 
148 |     class GROUPS:
149 |         IMDB_TOP_100 = Option('IMDb "Top 100"', "top_100")
150 |         IMDB_TOP_250 = Option('IMDb "Top 250"', "top_250")
151 |         IMDB_TOP_1000 = Option('IMDb "Top 1000"', "top_1000")
152 |         OSCAR_WINNING = Option("Oscar-Winning", "oscar_winners")
153 |         EMMY_AWARD_WINNING = Option("Emmy Award-Winning", "emmy_winners")
154 |         GOLDEN_GLOBE_WINNING = Option("Golden Globe-Winning", "golden_globe_winners")
155 |         OSCAR_NOMINATED = Option("Oscar-Nominated", "oscar_nominees")
156 |         EMMY_AWARD_NOMINATED = Option("Emmy Award-Nominated", "emmy_nominees")
157 |         GOLDEN_GLOBE_NOMINATED = Option(
158 |             "Golden Globe-Nominated", "golden_globe_nominees"
159 |         )
160 |         BEST_PICTURE_WINNING = Option(
161 |             "Best Picture-Winning", "oscar_best_picture_winners"
162 |         )
163 |         BEST_DIRECTOR_WINNING = Option(
164 |             "Best Director-Winning", "oscar_best_director_winners"
165 |         )
166 |         NOW_PLAYING = Option("Now-Playing", "now-playing-us")
167 |         BEST_PICTURE_NOMINATED = Option(
168 |             "Best Picture-Nominated", "oscar_best_picture_nominees"
169 |         )
170 |         BEST_DIRECTOR_NOMINATED = Option(
171 |             "Best Director-Nominated", "oscar_best_director_nominees"
172 |         )
173 |         NATIONAL_FILM_BOARD_PRESERVED = Option(
174 |             "National Film Board Preserved", "national_film_registry"
175 |         )
176 |         RAZZIE_WINNING = Option("Razzie-Winning", "razzie_winners")
177 |         IMDB_BOTTOM_100 = Option('IMDb "Bottom 100"', "bottom_100")
178 |         IMDB_BOTTOM_250 = Option('IMDb "Bottom 250"', "bottom_250")
179 |         RAZZIE_NOMINATED = Option("Razzie-Nominated", "razzie_nominees")
180 |         IMDB_BOTTOM_1000 = Option('IMDb "Bottom 1000"', "bottom_1000")
181 | 
182 |     class HAS:
183 |         ALTERNATE_VERSIONS = Option("Alternate Versions", "alternate-versions")
184 |         AWARDS = Option("Awards", "awards")
185 |         BUSINESS_INFO = Option("Business Info", "business-info")
186 |         CRAZY_CREDITS = Option("Crazy Credits", "crazy-credits")
187 |         GOOFS = Option("Goofs", "goofs")
188 |         LOCATIONS = Option("Locations", "locations")
189 |         PLOT = Option("Plot", "plot")
190 |         QUOTES = Option("Quotes", "quotes")
191 |         SOUNDTRACKS = Option("Soundtracks", "soundtracks")
192 |         TECHNICAL_INFO = Option("Technical Info", "technical")
193 |         TRIVIA = Option("Trivia", "trivia")
194 |         X_RAY = Option("X-Ray", "x-ray")
195 | 
196 |     class COMPANIES:
197 |         TWENTIETH_CENTURY_FOX = Option("20th Century Fox", "fox")
198 |         SONY = Option("Sony", "columbia")
199 |         DREAMWORKS = Option("DreamWorks", "dreamworks")
200 |         MGM = Option("MGM", "mgm")
201 |         PARAMOUNT = Option("Paramount", "paramount")
202 |         UNIVERSAL = Option("Universal", "universal")
203 |         WALT_DISNEY = Option("Walt Disney", "disney")
204 |         WARNER_BROS = Option("Warner Bros.", "warner")
205 | 
206 |     class CERTIFICATES:
207 |         G = Option("G", "us:G")
208 |         PG = Option("PG", "us:PG")
209 |         PG_13 = Option("PG-13", "us:PG-13")
210 |         R = Option("R", "us:R")
211 |         NC_17 = Option("NC-17", "us:NC-17")
212 | 
213 |     class COLORS:
214 |         COLOR = Option("Color", "color")
215 |         BLACK_WHITE = Option("Black & White", "black_and_white")
216 |         COLORIZED = Option("Colorized", "colorized")
217 |         ACES = Option("ACES", "aces")
218 | 
219 |     class COUNTRIES:
220 |         AFGHANISTAN = Option("Afghanistan", "af")
221 |         LAND_ISLANDS = Option("Åland Islands", "ax")
222 |         ALBANIA = Option("Albania", "al")
223 |         ALGERIA = Option("Algeria", "dz")
224 |         AMERICAN_SAMOA = Option("American Samoa", "as")
225 |         ANDORRA = Option("Andorra", "ad")
226 |         ANGOLA = Option("Angola", "ao")
227 |         ANGUILLA = Option("Anguilla", "ai")
228 |         ANTARCTICA = Option("Antarctica", "aq")
229 |         ANTIGUA_AND_BARBUDA = Option("Antigua and Barbuda", "ag")
230 |         ARGENTINA = Option("Argentina", "ar")
231 |         ARMENIA = Option("Armenia", "am")
232 |         ARUBA = Option("Aruba", "aw")
233 |         AUSTRALIA = Option("Australia", "au")
234 |         AUSTRIA = Option("Austria", "at")
235 |         AZERBAIJAN = Option("Azerbaijan", "az")
236 |         BAHAMAS = Option("Bahamas", "bs")
237 |         BAHRAIN = Option("Bahrain", "bh")
238 |         BANGLADESH = Option("Bangladesh", "bd")
239 |         BARBADOS = Option("Barbados", "bb")
240 |         BELARUS = Option("Belarus", "by")
241 |         BELGIUM = Option("Belgium", "be")
242 |         BELIZE = Option("Belize", "bz")
243 |         BENIN = Option("Benin", "bj")
244 |         BERMUDA = Option("Bermuda", "bm")
245 |         BHUTAN = Option("Bhutan", "bt")
246 |         BOLIVIA = Option("Bolivia", "bo")
247 |         BONAIRE_SINT_EUSTATIUS_AND_SABA = Option(
248 |             "Bonaire, Sint Eustatius and Saba", "bq"
249 |         )
250 |         BOSNIA_AND_HERZEGOVINA = Option("Bosnia and Herzegovina", "ba")
251 |         BOTSWANA = Option("Botswana", "bw")
252 |         BOUVET_ISLAND = Option("Bouvet Island", "bv")
253 |         BRAZIL = Option("Brazil", "br")
254 |         BRITISH_INDIAN_OCEAN_TERRITORY = Option("British Indian Ocean Territory", "io")
255 |         BRITISH_VIRGIN_ISLANDS = Option("British Virgin Islands", "vg")
256 |         BRUNEI_DARUSSALAM = Option("Brunei Darussalam", "bn")
257 |         BULGARIA = Option("Bulgaria", "bg")
258 |         BURKINA_FASO = Option("Burkina Faso", "bf")
259 |         BURMA = Option("Burma", "bumm")
260 |         BURUNDI = Option("Burundi", "bi")
261 |         CAMBODIA = Option("Cambodia", "kh")
262 |         CAMEROON = Option("Cameroon", "cm")
263 |         CANADA = Option("Canada", "ca")
264 |         CAPE_VERDE = Option("Cape Verde", "cv")
265 |         CAYMAN_ISLANDS = Option("Cayman Islands", "ky")
266 |         CENTRAL_AFRICAN_REPUBLIC = Option("Central African Republic", "cf")
267 |         CHAD = Option("Chad", "td")
268 |         CHILE = Option("Chile", "cl")
269 |         CHINA = Option("China", "cn")
270 |         CHRISTMAS_ISLAND = Option("Christmas Island", "cx")
271 |         COCOS_KEELING_ISLANDS = Option("Cocos (Keeling) Islands", "cc")
272 |         COLOMBIA = Option("Colombia", "co")
273 |         COMOROS = Option("Comoros", "km")
274 |         CONGO = Option("Congo", "cg")
275 |         COOK_ISLANDS = Option("Cook Islands", "ck")
276 |         COSTA_RICA = Option("Costa Rica", "cr")
277 |         CTE_D_IVOIRE = Option("Côte d'Ivoire", "ci")
278 |         CROATIA = Option("Croatia", "hr")
279 |         CUBA = Option("Cuba", "cu")
280 |         CYPRUS = Option("Cyprus", "cy")
281 |         CZECH_REPUBLIC = Option("Czech Republic", "cz")
282 |         CZECHOSLOVAKIA = Option("Czechoslovakia", "cshh")
283 |         DEMOCRATIC_REPUBLIC_OF_THE_CONGO = Option(
284 |             "Democratic Republic of the Congo", "cd"
285 |         )
286 |         DENMARK = Option("Denmark", "dk")
287 |         DJIBOUTI = Option("Djibouti", "dj")
288 |         DOMINICA = Option("Dominica", "dm")
289 |         DOMINICAN_REPUBLIC = Option("Dominican Republic", "do")
290 |         EAST_GERMANY = Option("East Germany", "ddde")
291 |         ECUADOR = Option("Ecuador", "ec")
292 |         EGYPT = Option("Egypt", "eg")
293 |         EL_SALVADOR = Option("El Salvador", "sv")
294 |         EQUATORIAL_GUINEA = Option("Equatorial Guinea", "gq")
295 |         ERITREA = Option("Eritrea", "er")
296 |         ESTONIA = Option("Estonia", "ee")
297 |         ETHIOPIA = Option("Ethiopia", "et")
298 |         FALKLAND_ISLANDS = Option("Falkland Islands", "fk")
299 |         FAROE_ISLANDS = Option("Faroe Islands", "fo")
300 |         FEDERAL_REPUBLIC_OF_YUGOSLAVIA = Option(
301 |             "Federal Republic of Yugoslavia", "yucs"
302 |         )
303 |         FEDERATED_STATES_OF_MICRONESIA = Option("Federated States of Micronesia", "fm")
304 |         FIJI = Option("Fiji", "fj")
305 |         FINLAND = Option("Finland", "fi")
306 |         FRANCE = Option("France", "fr")
307 |         FRENCH_GUIANA = Option("French Guiana", "gf")
308 |         FRENCH_POLYNESIA = Option("French Polynesia", "pf")
309 |         FRENCH_SOUTHERN_TERRITORIES = Option("French Southern Territories", "tf")
310 |         GABON = Option("Gabon", "ga")
311 |         GAMBIA = Option("Gambia", "gm")
312 |         GEORGIA = Option("Georgia", "ge")
313 |         GERMANY = Option("Germany", "de")
314 |         GHANA = Option("Ghana", "gh")
315 |         GIBRALTAR = Option("Gibraltar", "gi")
316 |         GREECE = Option("Greece", "gr")
317 |         GREENLAND = Option("Greenland", "gl")
318 |         GRENADA = Option("Grenada", "gd")
319 |         GUADELOUPE = Option("Guadeloupe", "gp")
320 |         GUAM = Option("Guam", "gu")
321 |         GUATEMALA = Option("Guatemala", "gt")
322 |         GUERNSEY = Option("Guernsey", "gg")
323 |         GUINEA = Option("Guinea", "gn")
324 |         GUINEA_BISSAU = Option("Guinea-Bissau", "gw")
325 |         GUYANA = Option("Guyana", "gy")
326 |         HAITI = Option("Haiti", "ht")
327 |         HEARD_ISLAND_AND_MCDONALD_ISLANDS = Option(
328 |             "Heard Island and McDonald Islands", "hm"
329 |         )
330 |         HOLY_SEE_VATICAN_CITY_STATE = Option("Holy See (Vatican City State)", "va")
331 |         HONDURAS = Option("Honduras", "hn")
332 |         HONG_KONG = Option("Hong Kong", "hk")
333 |         HUNGARY = Option("Hungary", "hu")
334 |         ICELAND = Option("Iceland", "is")
335 |         INDIA = Option("India", "in")
336 |         INDONESIA = Option("Indonesia", "id")
337 |         IRAN = Option("Iran", "ir")
338 |         IRAQ = Option("Iraq", "iq")
339 |         IRELAND = Option("Ireland", "ie")
340 |         ISLE_OF_MAN = Option("Isle of Man", "im")
341 |         ISRAEL = Option("Israel", "il")
342 |         ITALY = Option("Italy", "it")
343 |         JAMAICA = Option("Jamaica", "jm")
344 |         JAPAN = Option("Japan", "jp")
345 |         JERSEY = Option("Jersey", "je")
346 |         JORDAN = Option("Jordan", "jo")
347 |         KAZAKHSTAN = Option("Kazakhstan", "kz")
348 |         KENYA = Option("Kenya", "ke")
349 |         KIRIBATI = Option("Kiribati", "ki")
350 |         KOREA = Option("Korea", "xko")
351 |         KOSOVO = Option("Kosovo", "xkv")
352 |         KUWAIT = Option("Kuwait", "kw")
353 |         KYRGYZSTAN = Option("Kyrgyzstan", "kg")
354 |         LAOS = Option("Laos", "la")
355 |         LATVIA = Option("Latvia", "lv")
356 |         LEBANON = Option("Lebanon", "lb")
357 |         LESOTHO = Option("Lesotho", "ls")
358 |         LIBERIA = Option("Liberia", "lr")
359 |         LIBYA = Option("Libya", "ly")
360 |         LIECHTENSTEIN = Option("Liechtenstein", "li")
361 |         LITHUANIA = Option("Lithuania", "lt")
362 |         LUXEMBOURG = Option("Luxembourg", "lu")
363 |         MACAO = Option("Macao", "mo")
364 |         MADAGASCAR = Option("Madagascar", "mg")
365 |         MALAWI = Option("Malawi", "mw")
366 |         MALAYSIA = Option("Malaysia", "my")
367 |         MALDIVES = Option("Maldives", "mv")
368 |         MALI = Option("Mali", "ml")
369 |         MALTA = Option("Malta", "mt")
370 |         MARSHALL_ISLANDS = Option("Marshall Islands", "mh")
371 |         MARTINIQUE = Option("Martinique", "mq")
372 |         MAURITANIA = Option("Mauritania", "mr")
373 |         MAURITIUS = Option("Mauritius", "mu")
374 |         MAYOTTE = Option("Mayotte", "yt")
375 |         MEXICO = Option("Mexico", "mx")
376 |         MOLDOVA = Option("Moldova", "md")
377 |         MONACO = Option("Monaco", "mc")
378 |         MONGOLIA = Option("Mongolia", "mn")
379 |         MONTENEGRO = Option("Montenegro", "me")
380 |         MONTSERRAT = Option("Montserrat", "ms")
381 |         MOROCCO = Option("Morocco", "ma")
382 |         MOZAMBIQUE = Option("Mozambique", "mz")
383 |         MYANMAR = Option("Myanmar", "mm")
384 |         NAMIBIA = Option("Namibia", "na")
385 |         NAURU = Option("Nauru", "nr")
386 |         NEPAL = Option("Nepal", "np")
387 |         NETHERLANDS = Option("Netherlands", "nl")
388 |         NETHERLANDS_ANTILLES = Option("Netherlands Antilles", "an")
389 |         NEW_CALEDONIA = Option("New Caledonia", "nc")
390 |         NEW_ZEALAND = Option("New Zealand", "nz")
391 |         NICARAGUA = Option("Nicaragua", "ni")
392 |         NIGER = Option("Niger", "ne")
393 |         NIGERIA = Option("Nigeria", "ng")
394 |         NIUE = Option("Niue", "nu")
395 |         NORFOLK_ISLAND = Option("Norfolk Island", "nf")
396 |         NORTH_KOREA = Option("North Korea", "kp")
397 |         NORTH_VIETNAM = Option("North Vietnam", "vdvn")
398 |         NORTHERN_MARIANA_ISLANDS = Option("Northern Mariana Islands", "mp")
399 |         NORWAY = Option("Norway", "no")
400 |         OMAN = Option("Oman", "om")
401 |         PAKISTAN = Option("Pakistan", "pk")
402 |         PALAU = Option("Palau", "pw")
403 |         PALESTINE = Option("Palestine", "xpi")
404 |         PALESTINIAN_TERRITORY = Option("Palestinian Territory", "ps")
405 |         PANAMA = Option("Panama", "pa")
406 |         PAPUA_NEW_GUINEA = Option("Papua New Guinea", "pg")
407 |         PARAGUAY = Option("Paraguay", "py")
408 |         PERU = Option("Peru", "pe")
409 |         PHILIPPINES = Option("Philippines", "ph")
410 |         POLAND = Option("Poland", "pl")
411 |         PORTUGAL = Option("Portugal", "pt")
412 |         PITCAIRN = Option("Pitcairn", "pn")
413 |         PUERTO_RICO = Option("Puerto Rico", "pr")
414 |         QATAR = Option("Qatar", "qa")
415 |         REPUBLIC_OF_MACEDONIA = Option("Republic of Macedonia", "mk")
416 |         RUNION = Option("Réunion", "re")
417 |         ROMANIA = Option("Romania", "ro")
418 |         RUSSIA = Option("Russia", "ru")
419 |         RWANDA = Option("Rwanda", "rw")
420 |         SAINT_BARTHLEMY = Option("Saint Barthélemy", "bl")
421 |         SAINT_HELENA = Option("Saint Helena", "sh")
422 |         SAINT_KITTS_AND_NEVIS = Option("Saint Kitts and Nevis", "kn")
423 |         SAINT_LUCIA = Option("Saint Lucia", "lc")
424 |         SAINT_MARTIN_FRENCH_PART = Option("Saint Martin (French part)", "mf")
425 |         SAINT_PIERRE_AND_MIQUELON = Option("Saint Pierre and Miquelon", "pm")
426 |         SAINT_VINCENT_AND_THE_GRENADINES = Option(
427 |             "Saint Vincent and the Grenadines", "vc"
428 |         )
429 |         SAMOA = Option("Samoa", "ws")
430 |         SAN_MARINO = Option("San Marino", "sm")
431 |         SAO_TOME_AND_PRINCIPE = Option("Sao Tome and Principe", "st")
432 |         SAUDI_ARABIA = Option("Saudi Arabia", "sa")
433 |         SENEGAL = Option("Senegal", "sn")
434 |         SERBIA = Option("Serbia", "rs")
435 |         SERBIA_AND_MONTENEGRO = Option("Serbia and Montenegro", "csxx")
436 |         SEYCHELLES = Option("Seychelles", "sc")
437 |         SIAM = Option("Siam", "xsi")
438 |         SIERRA_LEONE = Option("Sierra Leone", "sl")
439 |         SINGAPORE = Option("Singapore", "sg")
440 |         SLOVAKIA = Option("Slovakia", "sk")
441 |         SLOVENIA = Option("Slovenia", "si")
442 |         SOLOMON_ISLANDS = Option("Solomon Islands", "sb")
443 |         SOMALIA = Option("Somalia", "so")
444 |         SOUTH_AFRICA = Option("South Africa", "za")
445 |         SOUTH_GEORGIA_AND_THE_SOUTH_SANDWICH_ISLANDS = Option(
446 |             "South Georgia and the South Sandwich Islands", "gs"
447 |         )
448 |         SOUTH_KOREA = Option("South Korea", "kr")
449 |         SOVIET_UNION = Option("Soviet Union", "suhh")
450 |         SPAIN = Option("Spain", "es")
451 |         SRI_LANKA = Option("Sri Lanka", "lk")
452 |         SUDAN = Option("Sudan", "sd")
453 |         SURINAME = Option("Suriname", "sr")
454 |         SVALBARD_AND_JAN_MAYEN = Option("Svalbard and Jan Mayen", "sj")
455 |         SWAZILAND = Option("Swaziland", "sz")
456 |         SWEDEN = Option("Sweden", "se")
457 |         SWITZERLAND = Option("Switzerland", "ch")
458 |         SYRIA = Option("Syria", "sy")
459 |         TAIWAN = Option("Taiwan", "tw")
460 |         TAJIKISTAN = Option("Tajikistan", "tj")
461 |         TANZANIA = Option("Tanzania", "tz")
462 |         THAILAND = Option("Thailand", "th")
463 |         TIMOR_LESTE = Option("Timor-Leste", "tl")
464 |         TOGO = Option("Togo", "tg")
465 |         TOKELAU = Option("Tokelau", "tk")
466 |         TONGA = Option("Tonga", "to")
467 |         TRINIDAD_AND_TOBAGO = Option("Trinidad and Tobago", "tt")
468 |         TUNISIA = Option("Tunisia", "tn")
469 |         TURKEY = Option("Turkey", "tr")
470 |         TURKMENISTAN = Option("Turkmenistan", "tm")
471 |         TURKS_AND_CAICOS_ISLANDS = Option("Turks and Caicos Islands", "tc")
472 |         TUVALU = Option("Tuvalu", "tv")
473 |         U_S_VIRGIN_ISLANDS = Option("U.S. Virgin Islands", "vi")
474 |         UGANDA = Option("Uganda", "ug")
475 |         UKRAINE = Option("Ukraine", "ua")
476 |         UNITED_ARAB_EMIRATES = Option("United Arab Emirates", "ae")
477 |         UNITED_KINGDOM = Option("United Kingdom", "gb")
478 |         UNITED_STATES = Option("United States", "us")
479 |         UNITED_STATES_MINOR_OUTLYING_ISLANDS = Option(
480 |             "United States Minor Outlying Islands", "um"
481 |         )
482 |         URUGUAY = Option("Uruguay", "uy")
483 |         UZBEKISTAN = Option("Uzbekistan", "uz")
484 |         VANUATU = Option("Vanuatu", "vu")
485 |         VENEZUELA = Option("Venezuela", "ve")
486 |         VIETNAM = Option("Vietnam", "vn")
487 |         WALLIS_AND_FUTUNA = Option("Wallis and Futuna", "wf")
488 |         WEST_GERMANY = Option("West Germany", "xwg")
489 |         WESTERN_SAHARA = Option("Western Sahara", "eh")
490 |         YEMEN = Option("Yemen", "ye")
491 |         YUGOSLAVIA = Option("Yugoslavia", "xyu")
492 |         ZAIRE = Option("Zaire", "zrcd")
493 |         ZAMBIA = Option("Zambia", "zm")
494 |         ZIMBABWE = Option("Zimbabwe", "zw")
495 | 
496 |     class LANGUAGES:
497 |         ABKHAZIAN = Option("Abkhazian", "ab")
498 |         ABORIGINAL = Option("Aboriginal", "qac")
499 |         ACH = Option("Aché", "guq")
500 |         ACHOLI = Option("Acholi", "qam")
501 |         AFRIKAANS = Option("Afrikaans", "af")
502 |         AIDOUKROU = Option("Aidoukrou", "qas")
503 |         AKAN = Option("Akan", "ak")
504 |         ALBANIAN = Option("Albanian", "sq")
505 |         ALGONQUIN = Option("Algonquin", "alg")
506 |         AMERICAN_SIGN_LANGUAGE = Option("American Sign Language", "ase")
507 |         AMHARIC = Option("Amharic", "am")
508 |         APACHE_LANGUAGES = Option("Apache languages", "apa")
509 |         ARABIC = Option("Arabic", "ar")
510 |         ARAGONESE = Option("Aragonese", "an")
511 |         ARAMAIC = Option("Aramaic", "arc")
512 |         ARAPAHO = Option("Arapaho", "arp")
513 |         ARMENIAN = Option("Armenian", "hy")
514 |         ASSAMESE = Option("Assamese", "as")
515 |         ASSYRIAN_NEO_ARAMAIC = Option("Assyrian Neo-Aramaic", "aii")
516 |         ATHAPASCAN_LANGUAGES = Option("Athapascan languages", "ath")
517 |         AUSTRALIAN_SIGN_LANGUAGE = Option("Australian Sign Language", "asf")
518 |         AWADHI = Option("Awadhi", "awa")
519 |         AYMARA = Option("Aymara", "ay")
520 |         AZERBAIJANI = Option("Azerbaijani", "az")
521 |         BABLE = Option("Bable", "ast")
522 |         BAKA = Option("Baka", "qbd")
523 |         BALINESE = Option("Balinese", "ban")
524 |         BAMBARA = Option("Bambara", "bm")
525 |         BASQUE = Option("Basque", "eu")
526 |         BASSARI = Option("Bassari", "bsc")
527 |         BELARUSIAN = Option("Belarusian", "be")
528 |         BEMBA = Option("Bemba", "bem")
529 |         BENGALI = Option("Bengali", "bn")
530 |         BERBER_LANGUAGES = Option("Berber languages", "ber")
531 |         BHOJPURI = Option("Bhojpuri", "bho")
532 |         BICOLANO = Option("Bicolano", "qbi")
533 |         BODO = Option("Bodo", "qbh")
534 |         BOSNIAN = Option("Bosnian", "bs")
535 |         BRAZILIAN_SIGN_LANGUAGE = Option("Brazilian Sign Language", "bzs")
536 |         BRETON = Option("Breton", "br")
537 |         BRITISH_SIGN_LANGUAGE = Option("British Sign Language", "bfi")
538 |         BULGARIAN = Option("Bulgarian", "bg")
539 |         BURMESE = Option("Burmese", "my")
540 |         CANTONESE = Option("Cantonese", "yue")
541 |         CATALAN = Option("Catalan", "ca")
542 |         CENTRAL_KHMER = Option("Central Khmer", "km")
543 |         CHAKMA = Option("Chakma", "ccp")
544 |         CHAOZHOU = Option("Chaozhou", "qax")
545 |         CHECHEN = Option("Chechen", "ce")
546 |         CHEROKEE = Option("Cherokee", "chr")
547 |         CHEYENNE = Option("Cheyenne", "chy")
548 |         CHHATTISGARHI = Option("Chhattisgarhi", "hne")
549 |         CHINESE = Option("Chinese", "zh")
550 |         CORNISH = Option("Cornish", "kw")
551 |         CORSICAN = Option("Corsican", "co")
552 |         CREE = Option("Cree", "cr")
553 |         CREEK = Option("Creek", "mus")
554 |         CROATIAN = Option("Croatian", "hr")
555 |         CROW = Option("Crow", "cro")
556 |         CZECH = Option("Czech", "cs")
557 |         DANISH = Option("Danish", "da")
558 |         DARI = Option("Dari", "prs")
559 |         DESIYA = Option("Desiya", "dso")
560 |         DINKA = Option("Dinka", "din")
561 |         DJERMA = Option("Djerma", "qaw")
562 |         DOGRI = Option("Dogri", "doi")
563 |         DUTCH = Option("Dutch", "nl")
564 |         DYULA = Option("Dyula", "dyu")
565 |         DZONGKHA = Option("Dzongkha", "dz")
566 |         EAST_GREENLANDIC = Option("East-Greenlandic", "qbc")
567 |         EASTERN_FRISIAN = Option("Eastern Frisian", "frs")
568 |         EGYPTIAN_ANCIENT = Option("Egyptian (Ancient)", "egy")
569 |         ENGLISH = Option("English", "en")
570 |         ESPERANTO = Option("Esperanto", "eo")
571 |         ESTONIAN = Option("Estonian", "et")
572 |         EWE = Option("Ewe", "ee")
573 |         FALIASCH = Option("Faliasch", "qbg")
574 |         FAROESE = Option("Faroese", "fo")
575 |         FILIPINO = Option("Filipino", "fil")
576 |         FINNISH = Option("Finnish", "fi")
577 |         FLEMISH = Option("Flemish", "qbn")
578 |         FON = Option("Fon", "fon")
579 |         FRENCH = Option("French", "fr")
580 |         FRENCH_SIGN_LANGUAGE = Option("French Sign Language", "fsl")
581 |         FULAH = Option("Fulah", "ff")
582 |         FUR = Option("Fur", "fvr")
583 |         GAELIC = Option("Gaelic", "gd")
584 |         GALICIAN = Option("Galician", "gl")
585 |         GEORGIAN = Option("Georgian", "ka")
586 |         GERMAN = Option("German", "de")
587 |         GERMAN_SIGN_LANGUAGE = Option("German Sign Language", "gsg")
588 |         GREBO = Option("Grebo", "grb")
589 |         GREEK = Option("Greek", "el")
590 |         GREEK_ANCIENT_TO_1453 = Option("Greek, Ancient (to 1453)", "grc")
591 |         GREENLANDIC = Option("Greenlandic", "kl")
592 |         GUARANI = Option("Guarani", "gn")
593 |         GUJARATI = Option("Gujarati", "gu")
594 |         GUMATJ = Option("Gumatj", "gnn")
595 |         GUNWINGGU = Option("Gunwinggu", "gup")
596 |         HAITIAN = Option("Haitian", "ht")
597 |         HAIDA = Option("Haida", "hai")
598 |         HAKKA = Option("Hakka", "hak")
599 |         HARYANVI = Option("Haryanvi", "bgc")
600 |         HASSANYA = Option("Hassanya", "qav")
601 |         HAUSA = Option("Hausa", "ha")
602 |         HAWAIIAN = Option("Hawaiian", "haw")
603 |         HEBREW = Option("Hebrew", "he")
604 |         HINDI = Option("Hindi", "hi")
605 |         HMONG = Option("Hmong", "hmn")
606 |         HOKKIEN = Option("Hokkien", "qab")
607 |         HOPI = Option("Hopi", "hop")
608 |         HUNGARIAN = Option("Hungarian", "hu")
609 |         IBAN = Option("Iban", "iba")
610 |         IBO = Option("Ibo", "qag")
611 |         ICELANDIC = Option("Icelandic", "is")
612 |         ICELANDIC_SIGN_LANGUAGE = Option("Icelandic Sign Language", "icl")
613 |         INDIAN_SIGN_LANGUAGE = Option("Indian Sign Language", "ins")
614 |         INDONESIAN = Option("Indonesian", "id")
615 |         INUKTITUT = Option("Inuktitut", "iu")
616 |         INUPIAQ = Option("Inupiaq", "ik")
617 |         IRISH_GAELIC = Option("Irish Gaelic", "ga")
618 |         IRULA = Option("Irula", "iru")
619 |         ITALIAN = Option("Italian", "it")
620 |         JAPANESE = Option("Japanese", "ja")
621 |         JAPANESE_SIGN_LANGUAGE = Option("Japanese Sign Language", "jsl")
622 |         JOLA_FONYI = Option("Jola-Fonyi", "dyo")
623 |         JU_HOAN = Option("Ju'hoan", "ktz")
624 |         KAADO = Option("Kaado", "qbf")
625 |         KABUVERDIANU = Option("Kabuverdianu", "kea")
626 |         KABYLE = Option("Kabyle", "kab")
627 |         KALMYK_OIRAT = Option("Kalmyk-Oirat", "xal")
628 |         KANNADA = Option("Kannada", "kn")
629 |         KARAJ = Option("Karajá", "kpj")
630 |         KARBI = Option("Karbi", "mjw")
631 |         KAREN = Option("Karen", "kar")
632 |         KAZAKH = Option("Kazakh", "kk")
633 |         KHANTY = Option("Khanty", "kca")
634 |         KHASI = Option("Khasi", "kha")
635 |         KIKUYU = Option("Kikuyu", "ki")
636 |         KINYARWANDA = Option("Kinyarwanda", "rw")
637 |         KIRUNDI = Option("Kirundi", "qar")
638 |         KLINGON = Option("Klingon", "tlh")
639 |         KODAVA = Option("Kodava", "kfa")
640 |         KONKANI = Option("Konkani", "kok")
641 |         KOREAN = Option("Korean", "ko")
642 |         KOREAN_SIGN_LANGUAGE = Option("Korean Sign Language", "kvk")
643 |         KOROWAI = Option("Korowai", "khe")
644 |         KRIOLU = Option("Kriolu", "qaq")
645 |         KRU = Option("Kru", "kro")
646 |         KUDMALI = Option("Kudmali", "kyw")
647 |         KUNA = Option("Kuna", "qbb")
648 |         KURDISH = Option("Kurdish", "ku")
649 |         KWAKIUTL = Option("Kwakiutl", "kwk")
650 |         KYRGYZ = Option("Kyrgyz", "ky")
651 |         LADAKHI = Option("Ladakhi", "lbj")
652 |         LADINO = Option("Ladino", "lad")
653 |         LAO = Option("Lao", "lo")
654 |         LATIN = Option("Latin", "la")
655 |         LATVIAN = Option("Latvian", "lv")
656 |         LIMBU = Option("Limbu", "lif")
657 |         LINGALA = Option("Lingala", "ln")
658 |         LITHUANIAN = Option("Lithuanian", "lt")
659 |         LOW_GERMAN = Option("Low German", "nds")
660 |         LUXEMBOURGISH = Option("Luxembourgish", "lb")
661 |         MACEDONIAN = Option("Macedonian", "mk")
662 |         MACRO_J = Option("Macro-Jê", "qbm")
663 |         MAGAHI = Option("Magahi", "mag")
664 |         MAITHILI = Option("Maithili", "mai")
665 |         MALAGASY = Option("Malagasy", "mg")
666 |         MALAY = Option("Malay", "ms")
667 |         MALAYALAM = Option("Malayalam", "ml")
668 |         MALECITE_PASSAMAQUODDY = Option("Malecite-Passamaquoddy", "pqm")
669 |         MALINKA = Option("Malinka", "qap")
670 |         MALTESE = Option("Maltese", "mt")
671 |         MANCHU = Option("Manchu", "mnc")
672 |         MANDARIN = Option("Mandarin", "cmn")
673 |         MANDINGO = Option("Mandingo", "man")
674 |         MANIPURI = Option("Manipuri", "mni")
675 |         MAORI = Option("Maori", "mi")
676 |         MAPUDUNGUN = Option("Mapudungun", "arn")
677 |         MARATHI = Option("Marathi", "mr")
678 |         MARSHALLESE = Option("Marshallese", "mh")
679 |         MASAI = Option("Masai", "mas")
680 |         MASALIT = Option("Masalit", "mls")
681 |         MAYA = Option("Maya", "myn")
682 |         MENDE = Option("Mende", "men")
683 |         MICMAC = Option("Micmac", "mic")
684 |         MIDDLE_ENGLISH = Option("Middle English", "enm")
685 |         MIN_NAN = Option("Min Nan", "nan")
686 |         MINANGKABAU = Option("Minangkabau", "min")
687 |         MIRANDESE = Option("Mirandese", "mwl")
688 |         MIXTEC = Option("Mixtec", "qmt")
689 |         MIZO = Option("Mizo", "lus")
690 |         MOHAWK = Option("Mohawk", "moh")
691 |         MONGOLIAN = Option("Mongolian", "mn")
692 |         MONTAGNAIS = Option("Montagnais", "moe")
693 |         MORE = Option("More", "qaf")
694 |         MORISYEN = Option("Morisyen", "mfe")
695 |         NAGPURI = Option("Nagpuri", "qbl")
696 |         NAHUATL = Option("Nahuatl", "nah")
697 |         NAMA = Option("Nama", "qba")
698 |         NAVAJO = Option("Navajo", "nv")
699 |         NAXI = Option("Naxi", "nbf")
700 |         NDEBELE = Option("Ndebele", "nd")
701 |         NEAPOLITAN = Option("Neapolitan", "nap")
702 |         NENETS = Option("Nenets", "yrk")
703 |         NEPALI = Option("Nepali", "ne")
704 |         NISGA_A = Option("Nisga'a", "ncg")
705 |         NONE = Option("None", "zxx")
706 |         NORSE_OLD = Option("Norse, Old", "non")
707 |         NORTH_AMERICAN_INDIAN = Option("North American Indian", "nai")
708 |         NORWEGIAN = Option("Norwegian", "no")
709 |         NUSHI = Option("Nushi", "qbk")
710 |         NYANEKA = Option("Nyaneka", "nyk")
711 |         NYANJA = Option("Nyanja", "ny")
712 |         OCCITAN = Option("Occitan", "oc")
713 |         OJIBWA = Option("Ojibwa", "oj")
714 |         OJIHIMBA = Option("Ojihimba", "qaz")
715 |         OLD_ENGLISH = Option("Old English", "ang")
716 |         ORIYA = Option("Oriya", "or")
717 |         PAPIAMENTO = Option("Papiamento", "pap")
718 |         PARSEE = Option("Parsee", "qaj")
719 |         PASHTU = Option("Pashtu", "ps")
720 |         PAWNEE = Option("Pawnee", "paw")
721 |         PERSIAN = Option("Persian", "fa")
722 |         PEUL = Option("Peul", "qai")
723 |         POLISH = Option("Polish", "pl")
724 |         POLYNESIAN = Option("Polynesian", "qah")
725 |         PORTUGUESE = Option("Portuguese", "pt")
726 |         PULAR = Option("Pular", "fuf")
727 |         PUNJABI = Option("Punjabi", "pa")
728 |         PUREPECHA = Option("Purepecha", "tsz")
729 |         QUECHUA = Option("Quechua", "qu")
730 |         QUENYA = Option("Quenya", "qya")
731 |         RAJASTHANI = Option("Rajasthani", "raj")
732 |         RAWAN = Option("Rawan", "qbj")
733 |         RHAETIAN = Option("Rhaetian", "xrr")
734 |         ROMANIAN = Option("Romanian", "ro")
735 |         ROMANSH = Option("Romansh", "rm")
736 |         ROMANY = Option("Romany", "rom")
737 |         ROTUMAN = Option("Rotuman", "rtm")
738 |         RUSSIAN = Option("Russian", "ru")
739 |         RUSSIAN_SIGN_LANGUAGE = Option("Russian Sign Language", "rsl")
740 |         RYUKYUAN = Option("Ryukyuan", "qao")
741 |         SAAMI = Option("Saami", "qae")
742 |         SAMOAN = Option("Samoan", "sm")
743 |         SANSKRIT = Option("Sanskrit", "sa")
744 |         SARDINIAN = Option("Sardinian", "sc")
745 |         SCANIAN = Option("Scanian", "qay")
746 |         SERBIAN = Option("Serbian", "sr")
747 |         SERBO_CROATIAN = Option("Serbo-Croatian", "qbo")
748 |         SERER = Option("Serer", "srr")
749 |         SHANGHAINESE = Option("Shanghainese", "qad")
750 |         SHANXI = Option("Shanxi", "qau")
751 |         SHONA = Option("Shona", "sn")
752 |         SHOSHONI = Option("Shoshoni", "shh")
753 |         SICILIAN = Option("Sicilian", "scn")
754 |         SINDARIN = Option("Sindarin", "sjn")
755 |         SINDHI = Option("Sindhi", "sd")
756 |         SINHALA = Option("Sinhala", "si")
757 |         SIOUX = Option("Sioux", "sio")
758 |         SLOVAK = Option("Slovak", "sk")
759 |         SLOVENIAN = Option("Slovenian", "sl")
760 |         SOMALI = Option("Somali", "so")
761 |         SONGHAY = Option("Songhay", "son")
762 |         SONINKE = Option("Soninke", "snk")
763 |         SORBIAN_LANGUAGES = Option("Sorbian languages", "wen")
764 |         SOTHO = Option("Sotho", "st")
765 |         SOUSSON = Option("Sousson", "qbe")
766 |         SPANISH = Option("Spanish", "es")
767 |         SPANISH_SIGN_LANGUAGE = Option("Spanish Sign Language", "ssp")
768 |         SRANAN = Option("Sranan", "srn")
769 |         SWAHILI = Option("Swahili", "sw")
770 |         SWEDISH = Option("Swedish", "sv")
771 |         SWISS_GERMAN = Option("Swiss German", "gsw")
772 |         SYLHETI = Option("Sylheti", "syl")
773 |         TAGALOG = Option("Tagalog", "tl")
774 |         TAJIK = Option("Tajik", "tg")
775 |         TAMASHEK = Option("Tamashek", "tmh")
776 |         TAMIL = Option("Tamil", "ta")
777 |         TARAHUMARA = Option("Tarahumara", "tac")
778 |         TATAR = Option("Tatar", "tt")
779 |         TELUGU = Option("Telugu", "te")
780 |         TEOCHEW = Option("Teochew", "qak")
781 |         THAI = Option("Thai", "th")
782 |         TIBETAN = Option("Tibetan", "bo")
783 |         TIGRIGNA = Option("Tigrigna", "qan")
784 |         TLINGIT = Option("Tlingit", "tli")
785 |         TOK_PISIN = Option("Tok Pisin", "tpi")
786 |         TONGA_TONGA_ISLANDS = Option("Tonga (Tonga Islands)", "to")
787 |         TSONGA = Option("Tsonga", "ts")
788 |         TSWA = Option("Tswa", "tsc")
789 |         TSWANA = Option("Tswana", "tn")
790 |         TULU = Option("Tulu", "tcy")
791 |         TUPI = Option("Tupi", "tup")
792 |         TURKISH = Option("Turkish", "tr")
793 |         TURKMEN = Option("Turkmen", "tk")
794 |         TUVINIAN = Option("Tuvinian", "tyv")
795 |         TZOTZIL = Option("Tzotzil", "tzo")
796 |         UKRAINIAN = Option("Ukrainian", "uk")
797 |         UKRAINIAN_SIGN_LANGUAGE = Option("Ukrainian Sign Language", "ukl")
798 |         UNGWATSI = Option("Ungwatsi", "qat")
799 |         URDU = Option("Urdu", "ur")
800 |         UZBEK = Option("Uzbek", "uz")
801 |         VIETNAMESE = Option("Vietnamese", "vi")
802 |         VISAYAN = Option("Visayan", "qaa")
803 |         WASHOE = Option("Washoe", "was")
804 |         WELSH = Option("Welsh", "cy")
805 |         WOLOF = Option("Wolof", "wo")
806 |         XHOSA = Option("Xhosa", "xh")
807 |         YAKUT = Option("Yakut", "sah")
808 |         YAPESE = Option("Yapese", "yap")
809 |         YIDDISH = Option("Yiddish", "yi")
810 |         YORUBA = Option("Yoruba", "yo")
811 |         ZULU = Option("Zulu", "zu")
812 | 
813 |     class SOUND_MIXES:
814 |         MONO = Option("Mono", "mono")
815 |         SILENT = Option("Silent", "silent")
816 |         STEREO = Option("Stereo", "stereo")
817 |         DOLBY_DIGITAL = Option("Dolby Digital", "dolby_digital")
818 |         DOLBY = Option("Dolby", "dolby")
819 |         DOLBY_SR = Option("Dolby SR", "dolby_sr")
820 |         DTS = Option("DTS", "dts")
821 |         SDDS = Option("SDDS", "sdds")
822 |         ULTRA_STEREO = Option("Ultra Stereo", "ultra_stereo")
823 |         _TRACK_STEREO = Option("6-Track Stereo", "6_track_stereo")
824 |         _MM_6_TRACK = Option("70 mm 6-Track", "70_mm_6_track")
825 |         VITAPHONE = Option("Vitaphone", "vitaphone")
826 |         DOLBY_DIGITAL_EX = Option("Dolby Digital EX", "dolby_digital_ex")
827 |         DE_FOREST_PHONOFILM = Option("De Forest Phonofilm", "de_forest_phonofilm")
828 |         DTS_STEREO = Option("DTS-Stereo", "dts_stereo")
829 |         CHRONOPHONE = Option("Chronophone", "chronophone")
830 |         DTS_ES = Option("DTS-ES", "dts_es")
831 |         PERSPECTA_STEREO = Option("Perspecta Stereo", "perspecta_stereo")
832 |         CINEPHONE = Option("Cinephone", "cinephone")
833 |         _CHANNEL_STEREO = Option("3 Channel Stereo", "3_channel_stereo")
834 |         CINEMATOPHONE = Option("Cinematophone", "cinematophone")
835 |         SONICS_DDP = Option("Sonics-DDP", "sonics_ddp")
836 |         _TRACK_DIGITAL_SOUND = Option(
837 |             "12-Track Digital Sound", "12_track_digital_sound"
838 |         )
839 |         DTS_70_MM = Option("DTS 70 mm", "dts_70_mm")
840 |         IMAX_6_TRACK = Option("IMAX 6-Track", "imax_6_track")
841 |         MATRIX_SURROUND = Option("Matrix Surround", "matrix_surround")
842 |         SONIX = Option("Sonix", "sonix")
843 |         SENSURROUND = Option("Sensurround", "sensurround")
844 |         CINERAMA_7_TRACK = Option("Cinerama 7-Track", "cinerama_7_track")
845 |         KINOPLASTICON = Option("Kinoplasticon", "kinoplasticon")
846 |         DIGITRAC_DIGITAL_AUDIO_SYSTEM = Option(
847 |             "Digitrac Digital Audio System", "digitrac_digital_audio_system"
848 |         )
849 |         CINESOUND = Option("Cinesound", "cinesound")
850 |         PHONO_KINEMA = Option("Phono-Kinema", "phono_kinema")
851 |         CDS = Option("CDS", "cds")
852 |         LC_CONCEPT_DIGITAL_SOUND = Option(
853 |             "LC-Concept Digital Sound", "lc_concept_digital_sound"
854 |         )
855 | 
856 |     class MY_RATINGS:
857 |         INCLUDE_ALL_TITLES = Option("Include All Titles", "")
858 |         EXCLUDE_TITLES_I_VE_SEEN = Option("Exclude Titles I've Seen", "exclude")
859 |         RESTRICT_TO_TITLES_I_VE_SEEN = Option(
860 |             "Restrict to Titles I've Seen", "restrict"
861 |         )
862 | 
863 |     class NOW_PLAYING:
864 |         SHOW_ALL_TITLES = Option("Show All Titles", "")
865 |         ONLY_SHOW_TITLES_CURRENTLY_PLAYING_NEAR_ME = Option(
866 |             "Only Show Titles Currently Playing Near Me", "restrict"
867 |         )
868 | 
869 |     class ADULT:
870 |         EXCLUDE = Option("Exclude", "")
871 |         INCLUDE = Option("Include", "include")
872 | 
873 | 
874 | class AdvancedSearchResult(ParseBase):
875 |     def __init__(
876 |         self,
877 |         imdb,
878 |         title="",
879 |         title_type=[],
880 |         release_date=("", ""),
881 |         user_rating=("", ""),
882 |         num_votes=("", ""),
883 |         genres=[],
884 |         groups=[],
885 |         has=[],
886 |         companies=[],
887 |         certificates=[],
888 |         colors=[],
889 |         countries=[],
890 |         keywords="",
891 |         languages=[],
892 |         locations="",
893 |         moviemeter=("", ""),
894 |         plot="",
895 |         runtime=("", ""),
896 |         sound_mixes=[],
897 |         my_ratings=[],
898 |         now_playing=[],
899 |         adult=[],
900 |     ):
901 |         self.imdb = imdb
902 | 
903 |         self.query = {}
904 |         self.query["title"] = title
905 |         self.query["title_type"] = ",".join(
906 |             [isinstance(v, str) and v or v.value for v in title_type]
907 |         )
908 |         self.query["release_date-min"] = release_date[0]
909 |         self.query["release_date-max"] = release_date[1]
910 |         self.query["user_rating-min"] = user_rating[0]
911 |         self.query["user_rating-max"] = user_rating[1]
912 |         self.query["num_votes-min"] = num_votes[0]
913 |         self.query["num_votes-max"] = num_votes[1]
914 |         self.query["genres"] = ",".join(
915 |             [isinstance(v, str) and v or v.value for v in genres]
916 |         )
917 |         self.query["groups"] = ",".join(
918 |             [isinstance(v, str) and v or v.value for v in groups]
919 |         )
920 |         self.query["has"] = ",".join([isinstance(v, str) and v or v.value for v in has])
921 |         self.query["companies"] = ",".join(
922 |             [isinstance(v, str) and v or v.value for v in companies]
923 |         )
924 |         self.query["certificates"] = ",".join(
925 |             [isinstance(v, str) and v or v.value for v in certificates]
926 |         )
927 |         self.query["colors"] = ",".join(
928 |             [isinstance(v, str) and v or v.value for v in colors]
929 |         )
930 |         self.query["countries"] = ",".join(
931 |             [isinstance(v, str) and v or v.value for v in countries]
932 |         )
933 |         self.query["keywords"] = keywords
934 |         self.query["languages"] = ",".join(
935 |             [isinstance(v, str) and v or v.value for v in languages]
936 |         )
937 |         self.query["locations"] = locations
938 |         self.query["moviemeter-min"] = moviemeter[0]
939 |         self.query["moviemeter-max"] = moviemeter[1]
940 |         self.query["plot"] = plot
941 |         self.query["runtime-min"] = runtime[0]
942 |         self.query["runtime-max"] = runtime[1]
943 |         self.query["sound_mixes"] = ",".join(
944 |             [isinstance(v, str) and v or v.value for v in sound_mixes]
945 |         )
946 |         self.query["my_ratings"] = ",".join(
947 |             [isinstance(v, str) and v or v.value for v in my_ratings]
948 |         )
949 |         self.query["now_playing"] = ",".join(
950 |             [isinstance(v, str) and v or v.value for v in now_playing]
951 |         )
952 |         self.query["adult"] = ",".join(
953 |             [isinstance(v, str) and v or v.value for v in adult]
954 |         )
955 | 


--------------------------------------------------------------------------------
/imdbparser/base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import lxml.html
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Base(object):
 9 |     fetched = False
10 | 
11 |     def __init__(self, imdb_id, imdb):
12 |         self.imdb_id = str(imdb_id).zfill(7)
13 |         self.imdb = imdb
14 | 
15 |     def _get_urls(self):
16 |         return [base_url % (self.imdb_id,) for base_url in self.base_urls]
17 | 
18 |     def fetch(self):
19 |         if not self.fetched:
20 |             urls = self._get_urls()
21 |             logger.debug("Fetching and parsing urls %s" % (urls,))
22 |             self.parse([self.imdb._get_data(url) for url in urls])
23 |             self.fetched = True
24 | 
25 |     def cleanup_photo_url(self, url):
26 |         if url:
27 |             if "title_addposter" in url or "imdb-share-logo" in url:
28 |                 return None
29 |             url = url.split(".")
30 |             url.pop(-2)
31 |             return ".".join(url)
32 | 
33 |     def parse(self, htmls):
34 |         self.trees = [lxml.html.fromstring(html) for html in htmls]
35 | 


--------------------------------------------------------------------------------
/imdbparser/chart.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import sys
 3 | from decimal import Decimal
 4 | 
 5 | from requests.compat import quote_plus
 6 | 
 7 | from .base import Base
 8 | from .movie import Movie
 9 | 
10 | 
11 | class Chart(Base):
12 |     base_url = "https://www.imdb.com/chart/%s"
13 | 
14 |     def __init__(self, chart, imdb):
15 |         self.chart = chart
16 |         self.imdb = imdb
17 | 
18 |     def _get_urls(self):
19 |         return [self.base_url % (self.chart,)]
20 | 
21 |     def parse(self, htmls):
22 |         super(Chart, self).parse(htmls)
23 | 
24 |         self.results = []
25 |         for item_row in self.trees[0].xpath("//tbody[@class='lister-list']/tr"):
26 |             poster_column = item_row.xpath(".//td[@class='posterColumn']")[0]
27 | 
28 |             cover = poster_column.xpath(".//img/@src")[0]
29 |             if "/nopicture/" in cover:
30 |                 cover = None
31 |             else:
32 |                 cover = self.cleanup_photo_url(cover)
33 | 
34 |             imdb_id = re.findall(r"/tt(\d+)/", poster_column.xpath(".//a/@href")[0])[0]
35 | 
36 |             rating_text = item_row.xpath(
37 |                 ".//td[contains(@class, 'imdbRating')]/strong/@title"
38 |             )
39 |             if rating_text:
40 |                 print(rating_text[0])
41 |                 rating, votes = re.findall("[0-9.,]+", rating_text[0])
42 |                 rating = Decimal(rating)
43 |                 votes = int(votes.replace(",", ""))
44 | 
45 |             year = None
46 |             for base_element in item_row.xpath(
47 |                 ".//td[@class='titleColumn']//span[@class='secondaryInfo']/text()"
48 |             ):
49 |                 years = re.findall(r"\((\d{4})\)", base_element)
50 |                 if years:
51 |                     year = int(years[0])
52 |                     break
53 | 
54 |             item = Movie(imdb_id, self.imdb)
55 | 
56 |             item.title = item_row.xpath(".//td[@class='titleColumn']//a/text()")[0]
57 |             item.year = year
58 |             item.cover = cover
59 |             item.rating = rating
60 |             item.votes = votes
61 | 
62 |             self.results.append(item)
63 | 


--------------------------------------------------------------------------------
/imdbparser/exceptions.py:
--------------------------------------------------------------------------------
1 | class IMDbException(Exception):
2 |     pass
3 | 
4 | 
5 | class UnknownChartTypeException(IMDbException):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/imdbparser/generateadvancedsearchresult.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from .advancedsearchresult import Option
 4 | 
 5 | 
 6 | def enumify(text):
 7 |     text = re.sub("[ -.]+", "_", text)
 8 |     text = re.sub(r"[^a-zA-Z0-9_]+", "", text)
 9 |     text = text.strip("_")
10 |     if text.startswith("20th"):
11 |         text = "twentieth" + text[4:]
12 |     return text.lstrip("012345679").upper()
13 | 
14 | 
15 | def generate_function_and_enums(tree):
16 |     enums = {}
17 |     all_fields = []
18 |     for section in tree.xpath("//div[@class='clause']"):
19 |         section_title = section.xpath(".//h3/text()")[0]
20 |         if (
21 |             section_title == "Instant Watch Options"
22 |         ):  # bugged and not all that interesting
23 |             continue
24 |         if section_title == "Cast/Crew":  # Skipped for now
25 |             continue
26 |         if section_title == "Display Options":  # sorting option, not part of the form
27 |             continue
28 |         input_fields = section.xpath(".//input")
29 |         input_field_names = section.xpath(".//input/@name")
30 |         select_fields = section.xpath(".//select")
31 |         select_field_names = section.xpath(".//select/@name")
32 |         is_min_max = (
33 |             any(f for f in input_field_names if f.endswith("-min"))
34 |             and any(f for f in input_field_names if f.endswith("-max"))
35 |             or any(f for f in select_field_names if f.endswith("-min"))
36 |             and any(f for f in select_field_names if f.endswith("-max"))
37 |         )
38 |         if len(input_fields) == 1 and len(select_fields) == 0:
39 |             all_fields.append((input_field_names[0], "normal", ""))
40 |         elif len(set(input_field_names)) == 1 and len(select_fields) == 0:
41 |             field_name = input_field_names[0]
42 |             all_fields.append((field_name, "enum", []))
43 |             for e in input_fields:
44 |                 label = section.xpath(f".//label[@for='{e.attrib['id']}']")[0]
45 |                 if label.text:
46 |                     label = label.text
47 |                 else:
48 |                     label = label.xpath(".//*/@title")[0]
49 |                 value = e.attrib["value"]
50 |                 enums.setdefault(enumify(field_name), {})[enumify(label)] = Option(
51 |                     label, value
52 |                 )
53 |         elif len(select_fields) == 1 and len(input_fields) == 0:
54 |             field_name = select_field_names[0]
55 |             all_fields.append((field_name, "enum", []))
56 |             for field in select_fields[0].xpath(".//option"):
57 |                 label = field.text
58 |                 value = field.attrib["value"]
59 |                 enums.setdefault(enumify(field_name), {})[enumify(label)] = Option(
60 |                     label, value
61 |                 )
62 |         elif is_min_max:
63 |             if select_field_names:
64 |                 field_name = select_field_names[0][:-4]
65 |             else:
66 |                 field_name = input_field_names[0][:-4]
67 |             all_fields.append((field_name, "minmax", ("", "")))
68 |         else:
69 |             print("Unknown", section_title, input_fields, select_fields)
70 | 
71 |     code = []
72 |     code.append("class AS:")
73 |     for k, v in enums.items():
74 |         code.append(f"    class {k}:")
75 |         for label, option in v.items():
76 |             code.append(f"        {label} = {option!r}")
77 |         code.append("")
78 |     code.append("")
79 | 
80 |     func_args = ", ".join([f"{fn}={v!r}" for (fn, t, v) in all_fields])
81 |     code.append("class AdvancedSearchResult(ParseBase):")
82 |     code.append(f"    def __init__(self, imdb, {func_args}):")
83 |     code.append("        self.imdb = imdb")
84 |     code.append("")
85 |     code.append("        self.query = {}")
86 |     for fn, t, v in all_fields:
87 |         if t == "normal":
88 |             code.append(f"        self.query['{fn}'] = {fn}")
89 |         elif t == "enum":
90 |             code.append(
91 |                 f"        self.query['{fn}'] = ','.join([isinstance(v, str) and v or v.value for v in {fn}])"
92 |             )
93 |         elif t == "minmax":
94 |             code.append(f"        self.query['{fn}-min'] = {fn}[0]")
95 |             code.append(f"        self.query['{fn}-max'] = {fn}[1]")
96 | 
97 |     return "\n".join(code)
98 | 


--------------------------------------------------------------------------------
/imdbparser/imdb.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import requests
  4 | 
  5 | from .advancedsearchresult import AS, AdvancedSearchResult
  6 | from .chart import Chart
  7 | from .exceptions import UnknownChartTypeException
  8 | from .movie import Movie
  9 | from .person import Person
 10 | from .searchresult import SearchResult
 11 | 
 12 | CHART_TYPES = [
 13 |     "tvmeter",
 14 |     "moviemeter",
 15 |     "top",
 16 |     "top-english-movies",
 17 |     "toptv",
 18 |     # 'top-rated-indian-movies',
 19 |     "bottom",
 20 | ]
 21 | 
 22 | 
 23 | class IMDb(object):
 24 |     def _get_data(self, url):
 25 |         headers = {
 26 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 27 |             "Accept-Language": "en",
 28 |         }
 29 |         r = requests.get(url, headers=headers)
 30 |         return r.text
 31 | 
 32 |     def _normalize_title(self, title):
 33 |         title = re.sub(r"[^\x00-\x7F]+", "", title)
 34 |         title = re.sub(r" +", " ", title)
 35 | 
 36 |         return title
 37 | 
 38 |     def resolve_movie(self, title, year=None):
 39 |         """Tries to find a movie with a given title and year"""
 40 |         r = self.search_movie(title)
 41 | 
 42 |         return self._match_results(r, title, year)
 43 | 
 44 |     def resolve_tv_show(self, title, year=None):
 45 |         """Tries to find a movie with a given title and year"""
 46 |         r = self.search_tv_show(title)
 47 | 
 48 |         return self._match_results(r, title, year)
 49 | 
 50 |     def _match_results(self, results, title, year):
 51 |         results.fetch()
 52 |         results = results.results
 53 | 
 54 |         if not results:
 55 |             return None
 56 | 
 57 |         normalized_title = self._normalize_title(title)
 58 | 
 59 |         for result in results[:7]:
 60 |             for title in result.get_titles():
 61 |                 result_normalized_title = result.title
 62 |                 if result_normalized_title == normalized_title and (
 63 |                     year is None or result.year is None or year == result.year
 64 |                 ):
 65 |                     return result
 66 | 
 67 |         if year:
 68 |             for result in results[:5]:
 69 |                 for title in result.get_titles():
 70 |                     result_normalized_title = result.title
 71 |                     if (
 72 |                         result_normalized_title == normalized_title
 73 |                         and result.year is not None
 74 |                         and abs(year - result.year) <= 1
 75 |                     ):
 76 |                         return result
 77 | 
 78 |         return results[0]
 79 | 
 80 |     def search_movie(self, query):
 81 |         return SearchResult("movie", query, self)
 82 | 
 83 |     def search_tv_show(self, query):
 84 |         return SearchResult("tv", query, self)
 85 | 
 86 |     def get_movie(self, imdb_id):
 87 |         return Movie(imdb_id, self)
 88 | 
 89 |     def get_person(self, imdb_id):
 90 |         return Person(imdb_id, self)
 91 | 
 92 |     def get_chart(self, chart_type):
 93 |         if chart_type not in CHART_TYPES:
 94 |             raise UnknownChartTypeException()
 95 | 
 96 |         return Chart(chart_type, self)
 97 | 
 98 |     def advanced_search(self, **kwargs):
 99 |         return AdvancedSearchResult(self, **kwargs)
100 | 


--------------------------------------------------------------------------------
/imdbparser/movie.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from decimal import Decimal
  4 | 
  5 | from .base import Base
  6 | from .person import Person
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class Movie(Base):
 12 |     title = None
 13 |     year = None
 14 | 
 15 |     base_urls = [
 16 |         "https://www.imdb.com/title/tt%s/reference",
 17 |         "https://www.imdb.com/title/tt%s/",
 18 |     ]
 19 | 
 20 |     def parse(self, htmls):
 21 |         super(Movie, self).parse(htmls)
 22 | 
 23 |         self.alternative_titles = []
 24 |         self.actors = []
 25 |         self.directors = []
 26 |         self.writers = []
 27 |         self.more_like_this = []
 28 | 
 29 |         self.languages = []
 30 |         self.genres = []
 31 |         self.countries = []
 32 |         self.plot_keywords = []
 33 | 
 34 |         self.cover = None
 35 |         self.duration = None
 36 |         self.title = None
 37 |         self.year = None
 38 |         self.release_date = None
 39 |         self.description = None
 40 |         self.plot = None
 41 |         self.storyline = None
 42 |         self.tagline = None
 43 |         self.rating = None
 44 |         self.votes = None
 45 | 
 46 |         titles = [
 47 |             x.strip()
 48 |             for x in self.trees[0].xpath('//h3[@itemprop="name"]//text()')
 49 |             if x.strip() and x not in ["(", ")"]
 50 |         ]
 51 | 
 52 |         self.title = titles[0]
 53 | 
 54 |         title_extra = [
 55 |             x.strip()
 56 |             for x in self.trees[0].xpath(
 57 |                 "//div[@class='titlereference-header']/div/text()"
 58 |             )
 59 |             if x.strip()
 60 |         ]
 61 |         if title_extra:
 62 |             title_extra = title_extra[0]
 63 |             if title_extra != "Reference View":
 64 |                 if self.trees[0].xpath(
 65 |                     "//span[@class='titlereference-original-title-label']"
 66 |                 ):
 67 |                     self.alternative_titles.append(self.title)
 68 |                     self.title = title_extra
 69 |                 else:
 70 |                     self.alternative_titles.append(title_extra)
 71 | 
 72 |         for t in titles[1:]:
 73 |             try:
 74 |                 self.year = int(t.strip(u"()").split(u"\u2013")[0])
 75 |             except ValueError:
 76 |                 continue
 77 |             else:
 78 |                 break
 79 |         else:
 80 |             header_title = self.trees[0].xpath("//meta[@name='title']/@content")
 81 |             if header_title:
 82 |                 try:
 83 |                     self.year = int(
 84 |                         re.findall(
 85 |                             r"\((?:TV Series )?(\d{4})(?:\u2013(?: |\d+))?\) - IMDb$",
 86 |                             header_title[0],
 87 |                         )[0]
 88 |                     )
 89 |                 except (ValueError, IndexError):
 90 |                     pass
 91 | 
 92 |         cover = self.trees[0].xpath("//link[@rel='image_src']/@href")
 93 |         if cover:
 94 |             self.cover = self.cleanup_photo_url(cover[0])
 95 |             if "images/logos/imdb_fb_logo" in self.cover:
 96 |                 self.cover = None
 97 | 
 98 |         rating = self.trees[0].xpath("//span[@class='ipl-rating-star__rating']/text()")
 99 |         if rating and rating[0]:
100 |             self.rating = Decimal(rating[0])
101 | 
102 |         votes = self.trees[0].xpath(
103 |             "//span[@class='ipl-rating-star__total-votes']/text()"
104 |         )
105 |         if votes and votes[0]:
106 |             self.votes = int(votes[0].strip("()").replace(",", ""))
107 | 
108 |         rows = self.trees[0].xpath(
109 |             "//table[@class='titlereference-list ipl-zebra-list']//tr"
110 |         )
111 |         for row in rows:
112 |             key, value = row.xpath("./td")
113 |             key = str(key.text)
114 | 
115 |             if key == "Genres":
116 |                 self.genres = [
117 |                     x.text for x in value.xpath(".//a") if "/genre/" in x.attrib["href"]
118 |                 ]
119 |             elif key == "Taglines":
120 |                 self.tagline = value.text.strip()
121 |             elif key == "Plot Summary":
122 |                 plot = value.xpath("./p")
123 |                 if plot:
124 |                     self.plot = plot[0].text.strip()
125 |             elif key == "Plot Keywords":
126 |                 self.plot_keywords = [
127 |                     x.text
128 |                     for x in value.xpath(".//a")
129 |                     if "/keyword/" in x.attrib["href"]
130 |                 ]
131 |             elif key == "Also Known As":
132 |                 self.alternative_titles += [
133 |                     x.strip().split("\n")[0].strip()
134 |                     for x in value.xpath(".//li/text()")
135 |                     if x.strip()
136 |                 ]
137 |             elif key == "Runtime":
138 |                 runtimes = re.findall("(\d+) min", " ".join(value.xpath(".//text()")))
139 |                 if runtimes:
140 |                     self.duration = int(runtimes[0])
141 |             elif key == "Country":
142 |                 self.countries = [
143 |                     x.text
144 |                     for x in value.xpath(".//a")
145 |                     if "/country/" in x.attrib["href"]
146 |                 ]
147 |             elif key == "Language":
148 |                 self.languages = [
149 |                     x.text
150 |                     for x in value.xpath(".//a")
151 |                     if "/language/" in x.attrib["href"]
152 |                 ]
153 | 
154 |         summary_texts = self.trees[1].xpath("//div[@class='summary_text']/text()")
155 |         if summary_texts:
156 |             self.description = summary_texts[0].strip()
157 | 
158 |         storylines = self.trees[1].xpath(
159 |             "//h2[text()='Storyline']/../div/p/span//text()"
160 |         )
161 |         if storylines:
162 |             self.storyline = "".join(storylines).strip()
163 |             if self.storyline.startswith("Add a Plot"):
164 |                 self.storyline = None
165 | 
166 |         release_dates = [
167 |             x.strip()
168 |             for x in self.trees[1].xpath("//h4[text()='Release Date:']/../text()")
169 |             if x.strip()
170 |         ]
171 |         if release_dates:
172 |             self.release_date = release_dates[0]
173 | 
174 |         rows = self.trees[0].xpath("//div[@class='titlereference-overview-section']")
175 |         for row in rows:
176 |             key = row.xpath("./text()")[0].strip()
177 |             if (
178 |                 key == "Director:"
179 |                 or key == "Directors:"
180 |                 or key == "Writers:"
181 |                 or key == "Writer:"
182 |             ):
183 |                 for elem in row.xpath(".//a"):
184 |                     if "/name/" not in elem.attrib["href"]:
185 |                         continue
186 |                     p = Person(
187 |                         re.findall("/nm(\d+)", elem.attrib["href"])[0], self.imdb
188 |                     )
189 |                     p.name = elem.text
190 |                     if key == "Director:" or key == "Directors:":
191 |                         self.directors.append(p)
192 |                     elif key == "Writers:" or key == "Writer:":
193 |                         self.writers.append(p)
194 | 
195 |         for row in self.trees[0].xpath(
196 |             "//h4[@id='cast']/../../following-sibling::table[1]//tr//td[@itemprop='actor']"
197 |         ):
198 |             elem = row.xpath(".//a")
199 |             if not elem:
200 |                 continue
201 | 
202 |             elem = elem[0]
203 |             if "/name/" not in elem.attrib["href"]:
204 |                 continue
205 | 
206 |             p = Person(re.findall("/nm(\d+)", elem.attrib["href"])[0], self.imdb)
207 |             p.name = elem.xpath("./span/text()")[0]
208 |             self.actors.append(p)
209 | 
210 |         self.alternative_titles = list(set(self.alternative_titles))
211 | 
212 |         for rec_details in self.trees[1].xpath("//div[@class='rec_details']"):
213 |             self.more_like_this.append(self.parse_recommendation(rec_details))
214 | 
215 |     def parse_recommendation(self, tree):
216 |         tree_title = tree.xpath(".//div[@class='rec-title']")[0]
217 |         imdb_id = tree_title.xpath("./a/@href")[0].split("/")[2][2:]
218 |         movie = Movie(imdb_id, self.imdb)
219 | 
220 |         title = tree_title.xpath(".//a//text()")
221 |         if title:
222 |             movie.title = title[0].strip()
223 | 
224 |         year = tree_title.xpath("./span/text()")
225 |         if year:
226 |             try:
227 |                 movie.year = int(tree_title.xpath("./span/text()")[0].strip("()"))
228 |             except ValueError:
229 |                 pass
230 | 
231 |         movie.genres = [
232 |             x.strip()
233 |             for x in tree.xpath(".//div[contains(@class, 'rec-cert-genre')]/text()")
234 |             if x.strip()
235 |         ]
236 | 
237 |         ratings = tree.xpath(".//div[contains(@class, 'rating-list')]/@title")
238 |         if ratings:
239 |             ratings = re.findall(r"([\d.]+)/10 \(([\d,]+) votes\)", ratings[0])
240 |             if ratings:
241 |                 ratings = ratings[0]
242 |                 movie.rating = Decimal(ratings[0])
243 |                 movie.votes = int(ratings[1].replace(",", ""))
244 | 
245 |         movie.tagline = "".join(
246 |             tree.xpath(".//div[@class='rec-outline']/p/text()")
247 |         ).strip()
248 | 
249 |         return movie
250 | 
251 |     def __repr__(self):
252 |         return "<Movie fetched=%r imdb_id=%r title=%r year=%r>" % (
253 |             self.fetched,
254 |             self.imdb_id,
255 |             self.title,
256 |             self.year,
257 |         )
258 | 
259 |     def get_titles(self):
260 |         yield self.title
261 |         for title in self.alternative_titles:
262 |             yield title
263 | 


--------------------------------------------------------------------------------
/imdbparser/person.py:
--------------------------------------------------------------------------------
 1 | from .base import Base
 2 | 
 3 | 
 4 | class Person(Base):
 5 |     name = None
 6 | 
 7 |     base_url = "http://www.imdb.com/name/nm%s/"
 8 | 
 9 |     def parse(self, html):
10 |         super(Person, self).parse(html)
11 | 
12 |     def __repr__(self):
13 |         return "<Person fetched=%r imdb_id=%r name=%r>" % (
14 |             self.fetched,
15 |             self.imdb_id,
16 |             self.name,
17 |         )
18 | 


--------------------------------------------------------------------------------
/imdbparser/searchresult.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import sys
 3 | 
 4 | from requests.compat import quote_plus
 5 | 
 6 | from .base import Base
 7 | from .movie import Movie
 8 | 
 9 | 
10 | class SearchResult(Base):
11 |     base_url = "http://www.imdb.com/find?q=%s&s=tt"
12 | 
13 |     def __init__(self, search_type, query, imdb):
14 |         self.search_type = search_type
15 |         if sys.version_info[0] < 3 and isinstance(query, unicode):
16 |             query = query.encode("utf-8")
17 |         self.imdb_id = quote_plus(query)
18 |         self.imdb = imdb
19 | 
20 |     def _get_urls(self):
21 |         url = self.base_url
22 |         if self.search_type == "tv":
23 |             url += "&ttype=tv"
24 |         elif self.search_type == "movie":
25 |             url += "&ttype=ft"
26 | 
27 |         return [url % (self.imdb_id,)]
28 | 
29 |     def parse(self, htmls):
30 |         super(SearchResult, self).parse(htmls)
31 | 
32 |         self.results = []
33 |         for movie_row in self.trees[0].xpath(
34 |             "//table[@class='findList']//tr[contains(@class, 'findResult')]"
35 |         ):
36 |             cover = movie_row.xpath(".//td[@class='primary_photo']//img/@src")[0]
37 |             if "/nopicture/" in cover:
38 |                 cover = None
39 |             else:
40 |                 cover = self.cleanup_photo_url(cover)
41 | 
42 |             text = movie_row.xpath(".//td[@class='result_text']")[0]
43 |             imdb_id = re.findall(r"/tt(\d+)/", text.xpath(".//a/@href")[0])[0]
44 |             orig_title = text.xpath(".//a")[0].text
45 |             alternative_titles = []
46 | 
47 |             base_elements = [x.strip() for x in text.xpath("./text()") if x.strip()]
48 | 
49 |             if "aka" in base_elements:
50 |                 base_elements.remove("aka")
51 | 
52 |                 alternative_titles.append(orig_title)
53 |                 orig_title = text.xpath("./i")[0].text.strip('"')
54 | 
55 |             year = None
56 |             if base_elements:
57 |                 try:
58 |                     years = re.findall(r"\((\d{4})\)", base_elements[0])
59 |                     if years:
60 |                         year = int(years[0])
61 |                 except ValueError:
62 |                     pass
63 | 
64 |             movie = Movie(imdb_id, self.imdb)
65 |             movie.title = orig_title
66 |             movie.year = year
67 |             movie.cover = cover
68 |             movie.alternative_titles = alternative_titles
69 | 
70 |             self.results.append(movie)
71 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = .git
3 | ignore = W601
4 | max-line-length = 119
5 | 
6 | [bdist_wheel]
7 | universal = 1
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | readme_path = os.path.join(os.path.dirname(__file__), "README.rst")
 7 | with open(readme_path) as fp:
 8 |     long_description = fp.read()
 9 | 
10 | setup(
11 |     name='imdbparser',
12 |     version='1.0.22',
13 |     url='https://github.com/JohnDoee/imdbparser',
14 |     author='John Doee',
15 |     author_email='johndoee@tidalstream.org',
16 |     description='IMDB Parser',
17 |     long_description=long_description,
18 |     license='MIT',
19 |     packages=['imdbparser'],
20 |     install_requires=['lxml', 'requests'],
21 |     classifiers=[
22 |         'Development Status :: 4 - Beta',
23 |         'Intended Audience :: Developers',
24 |         'License :: OSI Approved :: MIT License',
25 |         'Operating System :: OS Independent',
26 |         'Programming Language :: Python',
27 |         'Programming Language :: Python :: 2',
28 |         'Programming Language :: Python :: 2.7',
29 |         'Programming Language :: Python :: 3',
30 |         'Programming Language :: Python :: 3.4',
31 |         'Programming Language :: Python :: 3.5',
32 |         'Programming Language :: Python :: 3.6',
33 |         'Topic :: Database :: Front-Ends',
34 |         'Topic :: Software Development :: Libraries :: Python Modules',
35 |    ]
36 | )
37 | 


--------------------------------------------------------------------------------