├── .gitignore
├── Readme.md
├── config.py.example
├── requirements.txt
├── scanner.py
├── screenshot.png
└── watch.py


/.gitignore:
--------------------------------------------------------------------------------
1 | # This file stores the configuration
2 | config.py
3 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # What to watch?
 2 | 
 3 | I have a whole lot of unwatched movies, and deciding what to watch is not an easy thing to do.
 4 | 
 5 | Let's automate!
 6 | 
 7 | ![Scanning all my movies](screenshot.png)
 8 | 
 9 | *Currently, It only scans folders for movies, fetches their data from the OMDB API, and saves the json files in a folder. I then use some `jq` magic to filter out movies. I'll probably code something custom that allows you to better filter out movies.*
10 | 
11 | # Usage
12 | 
13 | Copy the sample config file: `cp config.py.example config.py`
14 | 
15 | Add your movie folder paths to the `PATHS` list in config file.
16 | 
17 | Run `python watch.py`
18 | 
19 | # Todo
20 | 
21 | * A Custom Filter Tool
22 |   * Takes in parameters like rating, genre, actors etc.
23 |   * Lists out the best OR top-n matches
24 | 
25 | *Currentl I do stuff like: `jq ". | select(.imdb_rating != \"N/A\") | select(.imdb_rating | tonumber >= 8.0) | .title, .imdb_rating" < *.json`*
26 | 
27 | * This can seriously become a movie database 'tagger' like Picard - all it needs is a sleek GUI.
28 | 
29 | * Errors can be handled by using searching rather than the top result, it'll require some human intervention though.
30 | 
31 | * We might need to get back to the omdb package if we need to add searching etc back.
32 | 


--------------------------------------------------------------------------------
/config.py.example:
--------------------------------------------------------------------------------
1 | # The paths to search for movies
2 | PATHS = []
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | guessit==0.10.3
2 | omdb==0.4.0
3 | 


--------------------------------------------------------------------------------
/scanner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | # All video extensions
 8 | EXT = (".3g2 .3gp .3gp2 .3gpp .60d .ajp .asf .asx .avchd .avi .bik .bix"
 9 |        ".box .cam .dat .divx .dmf .dv .dvr-ms .evo .flc .fli .flic .flv"
10 |        ".flx .gvi .gvp .h264 .m1v .m2p .m2ts .m2v .m4e .m4v .mjp .mjpeg"
11 |        ".mjpg .mkv .moov .mov .movhd .movie .movx .mp4 .mpe .mpeg .mpg"
12 |        ".mpv .mpv2 .mxf .nsv .nut .ogg .ogm .omf .ps .qt .ram .rm .rmvb"
13 |        ".swf .ts .vfw .vid .video .viv .vivo .vob .vro .wm .wmv .wmx"
14 |        ".wrap .wvx .wx .x264 .xvid")
15 | EXT = tuple(EXT.split())
16 | 
17 | 
18 | def scan_video(path):
19 |     """Scan a video from a video `path`.
20 | 
21 |     :param string path: absolute path to the video
22 |     """
23 | 
24 |     if os.path.getsize(path) < (25 * 10124 * 1024):
25 |         raise ValueError("Size less than 25 MB")
26 | 
27 |     # Todo: Should some processing happen here?
28 |     return path
29 | 
30 | 
31 | def scan_videos(paths):
32 |     """Scan `paths` for videos.
33 | 
34 |     :params paths: absolute paths to scan for videos
35 |     :type paths: list of string
36 |     :return: the scanned videos
37 |     :rtype: list of :class:`Video`
38 | 
39 |     """
40 |     videos = []
41 | 
42 |     # scan files
43 |     for filepath in [p for p in paths if os.path.isfile(p)]:
44 |         try:
45 |             videos.append(scan_video(filepath))
46 |         except ValueError as e:
47 |             logger.error('Skipping video %s: %s', (filepath, e))
48 |             continue
49 | 
50 |     # scan directories
51 |     for path in [p for p in paths if os.path.isdir(p)]:
52 | 
53 |         logger.info('Scanning directory %r', path)
54 | 
55 |         for dirpath, dirnames, filenames in os.walk(path):
56 | 
57 |             # skip hidden sub directories
58 |             for dirname in list(dirnames):
59 |                 if dirname.startswith('.'):
60 |                     logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
61 |                     dirnames.remove(dirname)
62 | 
63 |             # scan for videos
64 |             for filename in filenames:
65 | 
66 |                 # filter videos
67 |                 if not filename.endswith(EXT):
68 |                     continue
69 | 
70 |                 # skip hidden files
71 |                 if filename.startswith('.'):
72 |                     logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
73 |                     continue
74 | 
75 |                 filepath = os.path.join(dirpath, filename)
76 | 
77 |                 # skip links
78 |                 if os.path.islink(filepath):
79 |                     logger.debug('Skipping link %r in %r', filename, dirpath)
80 |                     continue
81 | 
82 |                 try:
83 |                     video = scan_video(filepath)
84 |                 except ValueError as e:
85 |                     logger.error('Skipping video %s: %s', filepath, e)
86 |                     continue
87 |                 videos.append(video)
88 | 
89 |     return videos
90 | 


--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dufferzafar/what-to-watch/09fe820e9407b5f008140b5c484ba8bef8d05d4d/screenshot.png


--------------------------------------------------------------------------------
/watch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | from shutil import copy
  5 | import logging
  6 | 
  7 | from urllib import urlopen, urlencode
  8 | 
  9 | from guessit import guess_file_info
 10 | 
 11 | from config import PATHS
 12 | from scanner import scan_videos
 13 | 
 14 | # Disable logging for scanner
 15 | logger = logging.getLogger("scanner")
 16 | logger.addHandler(logging.NullHandler())
 17 | 
 18 | # Enable for this file
 19 | logger = logging.getLogger(__name__)
 20 | logger.setLevel(logging.INFO)
 21 | logger.addHandler(logging.StreamHandler())
 22 | 
 23 | OMDB_URL = 'http://www.omdbapi.com/?'
 24 | 
 25 | 
 26 | def camelcase_to_underscore(string):
 27 |     """Convert string from ``CamelCase`` to ``under_score``."""
 28 |     return (re.sub('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))', r'_\1', string)
 29 |             .lower())
 30 | 
 31 | 
 32 | def omdb(title, year=None):
 33 |     """ Fetch data from OMDB API. """
 34 |     params = {'t': title.encode('ascii', 'ignore'),
 35 |               'plot': 'full',
 36 |               'type': 'movie',
 37 |               'tomatoes': 'true'}
 38 |     if year:
 39 |         params['y'] = year
 40 | 
 41 |     url = OMDB_URL + urlencode(params)
 42 |     logger.info('\033[33m' + "Fetching URL: %s" % url + '\033[0m')
 43 | 
 44 |     data = json.load(urlopen(url))
 45 | 
 46 |     rv = {}
 47 |     for key, val in data.items():
 48 |         rv[camelcase_to_underscore(key)] = val
 49 | 
 50 |     if rv['response'] == 'False':
 51 |         rv = None
 52 | 
 53 |     return rv, url
 54 | 
 55 | 
 56 | def get_movie_info(path):
 57 |     """Find movie information from a `path` to file."""
 58 | 
 59 |     # I've added this string to files that don't exist on OMDB
 60 |     if 'omdb' in path:
 61 |         return None
 62 | 
 63 |     # Use the guessit module to find details of a movie from name
 64 |     file = guess_file_info(os.path.basename(path))
 65 | 
 66 |     # BUG: Use some heuristics here too?
 67 |     if 'title' not in file:
 68 |         return None
 69 | 
 70 |     if not file['title']:
 71 |         return None
 72 | 
 73 |     # Use omdb to find ratings, genre etc. from title and year
 74 |     data, url = omdb(file['title'], file.get('year'))
 75 | 
 76 |     # Use the longest word as a title
 77 |     if not data:
 78 |         logger.warning('\033[35m' +
 79 |                        "OMDB 404 - %s. Retrying with longest word!" % url +
 80 |                        '\033[0m')
 81 |         data, url = omdb(max(file['title'].split(), key=len), file.get('year'))
 82 | 
 83 |     # Use the first word as title
 84 |     if not data:
 85 |         logger.warning('\033[35m' +
 86 |                        "OMDB 404 - %s. Retrying with first word!" % url +
 87 |                        '\033[0m')
 88 |         data, url = omdb(file['title'].split()[0], file.get('year'))
 89 | 
 90 |     # Still no luck :'(
 91 |     if not data:
 92 |         logger.warning('\033[35m' + "OMDB 404 - %s." % url + '\033[0m')
 93 |         return data
 94 | 
 95 |     # BUG: What if we end up fetching data of some other movie?
 96 |     if file['title'] != data['title']:
 97 |         logger.warning('\033[32m' +
 98 |                        "Titles don't match: %s - %s" % (file['title'], data['title']) +
 99 |                        '\033[0m')
100 | 
101 |     # Save the path to this movie in the data
102 |     data['movie_path'] = path
103 | 
104 |     return data
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     videos = scan_videos(PATHS)
109 | 
110 |     for video_path in videos:
111 | 
112 |         logger.info("Processing: %s" % video_path)
113 | 
114 |         movie_json = video_path + ".json"
115 | 
116 |         try:
117 |             with open(movie_json) as inp:
118 |                 info = json.load(inp)
119 | 
120 |             # Todo: What if I move the folder containing the movie to somewhere else
121 |             # the movie_path key in the json will then point to wrong place.
122 |             #
123 |             # Fix:
124 |             #
125 |             # if info['movie_path'] != video_path:
126 |             #     raise IOError
127 | 
128 |         except IOError, e:
129 |             info = get_movie_info(video_path)
130 | 
131 |             if not info:
132 |                 logger.error('\033[31m' +
133 |                              "No info found for: %s" % video_path +
134 |                              '\033[0m')
135 |                 continue
136 | 
137 |             with open(movie_json, "w") as out:
138 |                 json.dump(info, out, indent=2)
139 | 
140 |         # Copy these movie-data files to a folder
141 |         # so I can have a list of all installed movies :)
142 |         file = "%s - %s.json" % (info["year"], info["title"])
143 |         copy(movie_json, os.path.expanduser("~/Movies/%s" % file))
144 | 
145 |         # Group the movies according to categories and sort the categories
146 |         # according to ratings
147 | 
148 |         # You can now do shit like 'Play the best comedy movie I have.'
149 | 


--------------------------------------------------------------------------------