├── database_settings.py.example ├── README.rst ├── importer.py ├── imdbapi.py ├── database.py └── index.tpl /database_settings.py.example: -------------------------------------------------------------------------------- 1 | CONNECTION_STRING = "sqlite:///imdbapi.db" 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | imdbapi 3 | ======= 4 | 5 | .. image:: https://www.codeshelter.co/static/badges/badge-flat.svg 6 | :target: www.codeshelter.co 7 | 8 | imdbapi is a web-based script that parses the IMDB datafiles and exposes 9 | an API to retrieve information from them. 10 | 11 | Dependencies 12 | ------------ 13 | 14 | To run imdbapi, you need the `bottle framework `_ 15 | and the `SQLAlchemy ORM `_. 16 | 17 | License 18 | ------- 19 | 20 | imdbapi is made available under the GPL (version 3 or later). 21 | 22 | -------------------------------------------------------------------------------- /importer.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy.orm import sessionmaker, relation, backref 4 | from database import Show, Episode, init_db 5 | 6 | import codecs 7 | import re 8 | import sqlalchemy 9 | 10 | session = init_db(transactional=True) 11 | 12 | def import_data(filename): 13 | """Import episode names and ratings from a file.""" 14 | regex = re.compile(""""(?P.*?)"\s+\((?P\d+)(?:|/.*?)\)\s+\{(?P.*?)\s?\(\#(?P\d+)\.(?P\d+)\)\}""") 15 | 16 | with codecs.open(filename, "r", "latin-1") as ratings: 17 | # Generate all the lines that matched. 18 | matches = (match for match in (regex.search(line.strip()) for line in ratings) if match) 19 | counter = 0 20 | for match in matches: 21 | counter += 1 22 | if not counter % 100: 23 | print counter 24 | episode = {} 25 | for field in ["show_name", "year", "episode_name", "episode_no", "season_no"]: 26 | episode[field] = match.group(field) 27 | 28 | # If the episode has no name it is given the same name as on imdb.com for consistency. 29 | if not episode["episode_name"]: 30 | episode["episode_name"] = "Episode #%s.%s" % (episode["season_no"], episode["episode_no"]) 31 | 32 | try: 33 | show = session.query(Show).filter_by(name=episode["show_name"], year=episode["year"]).one() 34 | except sqlalchemy.orm.exc.NoResultFound: 35 | show = Show(episode["show_name"], episode["year"]) 36 | session.add(show) 37 | 38 | try: 39 | episode = session.query(Episode).filter_by(name=episode["episode_name"], show=show).one() 40 | except sqlalchemy.orm.exc.NoResultFound: 41 | episode = Episode(show, episode["episode_name"], episode["season_no"], episode["episode_no"]) 42 | session.add(episode) 43 | 44 | session.commit() 45 | 46 | if __name__ == "__main__": 47 | import_data("movies.list") 48 | 49 | -------------------------------------------------------------------------------- /imdbapi.py: -------------------------------------------------------------------------------- 1 | from database import Show, Episode, Stats, init_db 2 | from bottle import route, run, request, template, response, default_app 3 | import simplejson 4 | import sqlalchemy 5 | import urllib 6 | 7 | session = init_db() 8 | 9 | def get_data(show_name, show_year=None): 10 | if not show_name or len(show_name) <= 3 or (show_year and not show_year.isdigit()): 11 | return None 12 | 13 | show = session.query(Show).filter(Show.name.like(show_name)) 14 | if show_year: 15 | show = show.filter(Show.year==int(show_year)) 16 | try: 17 | single_show = show.one() 18 | except sqlalchemy.orm.exc.NoResultFound: 19 | return None 20 | except sqlalchemy.orm.exc.MultipleResultsFound: 21 | shows = show.order_by(Show.name)[:15] 22 | show_list = [{"name": show.name, "year": show.year} for show in shows] 23 | return {"shows": show_list} 24 | 25 | episodes = [] 26 | for episode in single_show.episodes: 27 | episodes.append({"name": episode.name, "number": episode.number, "season": episode.season}) 28 | return {single_show.name: {"year": single_show.year, "episodes": episodes}} 29 | 30 | @route('/json/') 31 | def json(): 32 | response.content_type = 'application/json' 33 | show_name = request.GET.get("name", None) 34 | show_year = request.GET.get("year", None) 35 | callback = request.GET.get("callback", None) 36 | data = simplejson.dumps(get_data(show_name, show_year)) 37 | session.close() 38 | if callback: 39 | data = "%s(%s)" % (callback, data) 40 | return data 41 | 42 | @route('/js/') 43 | def js(): 44 | show_name = request.GET.get("name", None) 45 | show_year = request.GET.get("year", None) 46 | callback = request.GET.get("callback", None) 47 | data = simplejson.dumps(get_data(show_name, show_year)) 48 | session.close() 49 | if callback: 50 | data = "%s(%s)" % (callback, data) 51 | return data 52 | 53 | @route('/') 54 | def index(): 55 | return template("index") 56 | 57 | app = application = default_app() 58 | 59 | if __name__ == "__main__": 60 | run(host='localhost', port=8000) 61 | -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.declarative import declarative_base 2 | from sqlalchemy.orm import sessionmaker, relation, backref 3 | import sqlalchemy 4 | try: 5 | from database_settings import CONNECTION_STRING 6 | except ImportError: 7 | CONNECTION_STRING = "sqlite:///imdbapi.db" 8 | 9 | Base = declarative_base() 10 | 11 | class Show(Base): 12 | __tablename__ = 'shows' 13 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 14 | name = sqlalchemy.Column(sqlalchemy.Unicode(200), index=True) 15 | year = sqlalchemy.Column(sqlalchemy.Integer) 16 | 17 | def __init__(self, name, year): 18 | self.name = name 19 | self.year = year 20 | 21 | def __repr__(self): 22 | return "" % (self.name, self.year) 23 | 24 | sqlalchemy.Index('idx_show_name_year', Show.name, Show.year) 25 | 26 | class Episode(Base): 27 | __tablename__ = 'episodes' 28 | id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 29 | show_id = sqlalchemy.Column(sqlalchemy.Integer, sqlalchemy.ForeignKey('shows.id'), index=True) 30 | name = sqlalchemy.Column(sqlalchemy.Unicode(200), index=True) 31 | season = sqlalchemy.Column(sqlalchemy.Integer) 32 | number = sqlalchemy.Column(sqlalchemy.Integer) 33 | 34 | show = relation(Show, backref='episodes', order_by=id) 35 | 36 | def __init__(self, show, name, season, number): 37 | self.show = show 38 | self.name = name 39 | self.season = season 40 | self.number = number 41 | 42 | def __repr__(self): 43 | return "" % (self.season, self.number, self.name) 44 | 45 | class Stats(Base): 46 | __tablename__ = 'stats' 47 | key = sqlalchemy.Column(sqlalchemy.Unicode(200), index=True, primary_key=True) 48 | value = sqlalchemy.Column(sqlalchemy.Integer) 49 | 50 | def __init__(self, key, value): 51 | self.key = key 52 | self.value = value 53 | 54 | def __repr__(self): 55 | return "" % (self.key, self.value) 56 | 57 | 58 | def init_db(transactional=False): 59 | engine = sqlalchemy.create_engine(CONNECTION_STRING) 60 | Base.metadata.create_all(engine) 61 | Session = sessionmaker(bind=engine) 62 | session = Session() 63 | return session 64 | 65 | if __name__ == "__main__": 66 | init_db() 67 | -------------------------------------------------------------------------------- /index.tpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | IMDB API 5 | 15 | 16 | 17 |

IMDB API

18 |

by Stavros Korokithakis

19 |

20 | Well hello. I see you want to access IMDB programatically. Well, this is your chance. 21 | This service is updated every day from the 22 | IMDB data files and you 23 | can access it with the ?name= URL parameter. For example, 24 | this is the episode list (in JSON) for 25 | How I Met Your Mother. 26 |

27 |

28 | This service only supports shows, it does not have any movies. 29 |

30 | 31 | Instructions: 32 |
    33 |
  1. Use the "name" GET parameter followed by the urlencoded show name. If there 34 | are multiple shows with the same name, you can pass the "year" parameter as well 35 | to select a year. The script can now return JSONP, you can pass the "callback" 36 | parameter to get the data wrapped in a javascript function whose name is the 37 | value of the callback parameter (example).
  2. 38 |
  3. The URL /js/ returns data as text/html and /json/ returns data as application/json. 39 | Both use exactly the same parameters.
  4. 40 |
  5. You can use wildcards by using the percent sign, for example 41 | "how i met % mother" will work.
  6. 42 |
  7. If there are multiple show names, you will get a list of the 15 first 43 | that match your string (example).
  8. 44 |
  9. If the show was not found, you will get "null".
  10. 45 |
46 | 47 |
    48 |
  • The source code for this service is 49 | available on GitHub. 50 |
  • 51 |
  • I have also written a script that will rename your show's video files using 52 | this and other services, you can find it here: 53 | Episode renamer.
  • 54 |
  • If you need to contact me, my email is can be found on my site.
  • 55 |
  • My site is stavros.io.
  • 56 |
57 | 58 |

59 | Have fun! 60 |

61 |
a stavros.io project
62 | 63 | 64 | --------------------------------------------------------------------------------