├── .gitignore ├── Readme.md ├── config.py.example ├── requirements.txt ├── scanner.py ├── screenshot.png └── watch.py /.gitignore: -------------------------------------------------------------------------------- 1 | # This file stores the configuration 2 | config.py 3 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # What to watch? 2 | 3 | I have a whole lot of unwatched movies, and deciding what to watch is not an easy thing to do. 4 | 5 | Let's automate! 6 | 7 | ![Scanning all my movies](screenshot.png) 8 | 9 | *Currently, It only scans folders for movies, fetches their data from the OMDB API, and saves the json files in a folder. I then use some `jq` magic to filter out movies. I'll probably code something custom that allows you to better filter out movies.* 10 | 11 | # Usage 12 | 13 | Copy the sample config file: `cp config.py.example config.py` 14 | 15 | Add your movie folder paths to the `PATHS` list in config file. 16 | 17 | Run `python watch.py` 18 | 19 | # Todo 20 | 21 | * A Custom Filter Tool 22 | * Takes in parameters like rating, genre, actors etc. 23 | * Lists out the best OR top-n matches 24 | 25 | *Currentl I do stuff like: `jq ". | select(.imdb_rating != \"N/A\") | select(.imdb_rating | tonumber >= 8.0) | .title, .imdb_rating" < *.json`* 26 | 27 | * This can seriously become a movie database 'tagger' like Picard - all it needs is a sleek GUI. 28 | 29 | * Errors can be handled by using searching rather than the top result, it'll require some human intervention though. 30 | 31 | * We might need to get back to the omdb package if we need to add searching etc back. 32 | -------------------------------------------------------------------------------- /config.py.example: -------------------------------------------------------------------------------- 1 | # The paths to search for movies 2 | PATHS = [] 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | guessit==0.10.3 2 | omdb==0.4.0 3 | -------------------------------------------------------------------------------- /scanner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | # All video extensions 8 | EXT = (".3g2 .3gp .3gp2 .3gpp .60d .ajp .asf .asx .avchd .avi .bik .bix" 9 | ".box .cam .dat .divx .dmf .dv .dvr-ms .evo .flc .fli .flic .flv" 10 | ".flx .gvi .gvp .h264 .m1v .m2p .m2ts .m2v .m4e .m4v .mjp .mjpeg" 11 | ".mjpg .mkv .moov .mov .movhd .movie .movx .mp4 .mpe .mpeg .mpg" 12 | ".mpv .mpv2 .mxf .nsv .nut .ogg .ogm .omf .ps .qt .ram .rm .rmvb" 13 | ".swf .ts .vfw .vid .video .viv .vivo .vob .vro .wm .wmv .wmx" 14 | ".wrap .wvx .wx .x264 .xvid") 15 | EXT = tuple(EXT.split()) 16 | 17 | 18 | def scan_video(path): 19 | """Scan a video from a video `path`. 20 | 21 | :param string path: absolute path to the video 22 | """ 23 | 24 | if os.path.getsize(path) < (25 * 10124 * 1024): 25 | raise ValueError("Size less than 25 MB") 26 | 27 | # Todo: Should some processing happen here? 28 | return path 29 | 30 | 31 | def scan_videos(paths): 32 | """Scan `paths` for videos. 33 | 34 | :params paths: absolute paths to scan for videos 35 | :type paths: list of string 36 | :return: the scanned videos 37 | :rtype: list of :class:`Video` 38 | 39 | """ 40 | videos = [] 41 | 42 | # scan files 43 | for filepath in [p for p in paths if os.path.isfile(p)]: 44 | try: 45 | videos.append(scan_video(filepath)) 46 | except ValueError as e: 47 | logger.error('Skipping video %s: %s', (filepath, e)) 48 | continue 49 | 50 | # scan directories 51 | for path in [p for p in paths if os.path.isdir(p)]: 52 | 53 | logger.info('Scanning directory %r', path) 54 | 55 | for dirpath, dirnames, filenames in os.walk(path): 56 | 57 | # skip hidden sub directories 58 | for dirname in list(dirnames): 59 | if dirname.startswith('.'): 60 | logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath) 61 | dirnames.remove(dirname) 62 | 63 | # scan for videos 64 | for filename in filenames: 65 | 66 | # filter videos 67 | if not filename.endswith(EXT): 68 | continue 69 | 70 | # skip hidden files 71 | if filename.startswith('.'): 72 | logger.debug('Skipping hidden filename %r in %r', filename, dirpath) 73 | continue 74 | 75 | filepath = os.path.join(dirpath, filename) 76 | 77 | # skip links 78 | if os.path.islink(filepath): 79 | logger.debug('Skipping link %r in %r', filename, dirpath) 80 | continue 81 | 82 | try: 83 | video = scan_video(filepath) 84 | except ValueError as e: 85 | logger.error('Skipping video %s: %s', filepath, e) 86 | continue 87 | videos.append(video) 88 | 89 | return videos 90 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dufferzafar/what-to-watch/09fe820e9407b5f008140b5c484ba8bef8d05d4d/screenshot.png -------------------------------------------------------------------------------- /watch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | from shutil import copy 5 | import logging 6 | 7 | from urllib import urlopen, urlencode 8 | 9 | from guessit import guess_file_info 10 | 11 | from config import PATHS 12 | from scanner import scan_videos 13 | 14 | # Disable logging for scanner 15 | logger = logging.getLogger("scanner") 16 | logger.addHandler(logging.NullHandler()) 17 | 18 | # Enable for this file 19 | logger = logging.getLogger(__name__) 20 | logger.setLevel(logging.INFO) 21 | logger.addHandler(logging.StreamHandler()) 22 | 23 | OMDB_URL = 'http://www.omdbapi.com/?' 24 | 25 | 26 | def camelcase_to_underscore(string): 27 | """Convert string from ``CamelCase`` to ``under_score``.""" 28 | return (re.sub('((?<=[a-z0-9])[A-Z]|(?!^)[A-Z](?=[a-z]))', r'_\1', string) 29 | .lower()) 30 | 31 | 32 | def omdb(title, year=None): 33 | """ Fetch data from OMDB API. """ 34 | params = {'t': title.encode('ascii', 'ignore'), 35 | 'plot': 'full', 36 | 'type': 'movie', 37 | 'tomatoes': 'true'} 38 | if year: 39 | params['y'] = year 40 | 41 | url = OMDB_URL + urlencode(params) 42 | logger.info('\033[33m' + "Fetching URL: %s" % url + '\033[0m') 43 | 44 | data = json.load(urlopen(url)) 45 | 46 | rv = {} 47 | for key, val in data.items(): 48 | rv[camelcase_to_underscore(key)] = val 49 | 50 | if rv['response'] == 'False': 51 | rv = None 52 | 53 | return rv, url 54 | 55 | 56 | def get_movie_info(path): 57 | """Find movie information from a `path` to file.""" 58 | 59 | # I've added this string to files that don't exist on OMDB 60 | if 'omdb' in path: 61 | return None 62 | 63 | # Use the guessit module to find details of a movie from name 64 | file = guess_file_info(os.path.basename(path)) 65 | 66 | # BUG: Use some heuristics here too? 67 | if 'title' not in file: 68 | return None 69 | 70 | if not file['title']: 71 | return None 72 | 73 | # Use omdb to find ratings, genre etc. from title and year 74 | data, url = omdb(file['title'], file.get('year')) 75 | 76 | # Use the longest word as a title 77 | if not data: 78 | logger.warning('\033[35m' + 79 | "OMDB 404 - %s. Retrying with longest word!" % url + 80 | '\033[0m') 81 | data, url = omdb(max(file['title'].split(), key=len), file.get('year')) 82 | 83 | # Use the first word as title 84 | if not data: 85 | logger.warning('\033[35m' + 86 | "OMDB 404 - %s. Retrying with first word!" % url + 87 | '\033[0m') 88 | data, url = omdb(file['title'].split()[0], file.get('year')) 89 | 90 | # Still no luck :'( 91 | if not data: 92 | logger.warning('\033[35m' + "OMDB 404 - %s." % url + '\033[0m') 93 | return data 94 | 95 | # BUG: What if we end up fetching data of some other movie? 96 | if file['title'] != data['title']: 97 | logger.warning('\033[32m' + 98 | "Titles don't match: %s - %s" % (file['title'], data['title']) + 99 | '\033[0m') 100 | 101 | # Save the path to this movie in the data 102 | data['movie_path'] = path 103 | 104 | return data 105 | 106 | 107 | if __name__ == '__main__': 108 | videos = scan_videos(PATHS) 109 | 110 | for video_path in videos: 111 | 112 | logger.info("Processing: %s" % video_path) 113 | 114 | movie_json = video_path + ".json" 115 | 116 | try: 117 | with open(movie_json) as inp: 118 | info = json.load(inp) 119 | 120 | # Todo: What if I move the folder containing the movie to somewhere else 121 | # the movie_path key in the json will then point to wrong place. 122 | # 123 | # Fix: 124 | # 125 | # if info['movie_path'] != video_path: 126 | # raise IOError 127 | 128 | except IOError, e: 129 | info = get_movie_info(video_path) 130 | 131 | if not info: 132 | logger.error('\033[31m' + 133 | "No info found for: %s" % video_path + 134 | '\033[0m') 135 | continue 136 | 137 | with open(movie_json, "w") as out: 138 | json.dump(info, out, indent=2) 139 | 140 | # Copy these movie-data files to a folder 141 | # so I can have a list of all installed movies :) 142 | file = "%s - %s.json" % (info["year"], info["title"]) 143 | copy(movie_json, os.path.expanduser("~/Movies/%s" % file)) 144 | 145 | # Group the movies according to categories and sort the categories 146 | # according to ratings 147 | 148 | # You can now do shit like 'Play the best comedy movie I have.' 149 | --------------------------------------------------------------------------------