├── requirements.txt ├── .gitignore ├── hlsdownload ├── debug.py ├── downloader.py └── __init__.py ├── setup.py ├── LICENSE └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | m3u8>=0.3.0 2 | pycurl 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | build/ 4 | dist/ 5 | .coverage 6 | *.swp 7 | -------------------------------------------------------------------------------- /hlsdownload/debug.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | # Copyright 2016 Eyevinn Technology. All rights reserved 4 | # Use of this source code is governed by a MIT License 5 | # license that can be found in the LICENSE file. 6 | # Author: Jonas Birme (Eyevinn Technology) 7 | 8 | 9 | global doDebug 10 | doDebug = False 11 | 12 | def log(*args, **kwargs): 13 | if doDebug: 14 | print(*args, file=sys.stderr, **kwargs) 15 | 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, abspath, join, exists 2 | from setuptools import setup 3 | 4 | try: 5 | from pypandoc import convert 6 | read_md = lambda f: convert(f, 'rst') 7 | except ImportError: 8 | print("warning: pypandoc module not found, could not convert Markdown to RST") 9 | read_md = lambda f: open(f, 'r').read() 10 | 11 | long_description = None 12 | if exists("README.md"): 13 | long_description = read_md("README.md") 14 | 15 | install_reqs = [req for req in open(abspath(join(dirname(__file__), 'requirements.txt')))] 16 | 17 | setup( 18 | name = "hlsdownload", 19 | version = "0.0.17", 20 | author = "Jonas Birme", 21 | author_email = "jonas.birme@eyevinn.se", 22 | description = "Download HLS and convert to MP4", 23 | long_description=long_description, 24 | license = "MIT", 25 | install_requires=install_reqs, 26 | url = "https://github.com/Eyevinn/hls-downloader", 27 | packages = ['hlsdownload' ], 28 | entry_points = { 29 | 'console_scripts': [ 30 | 'hls-downloader=hlsdownload.downloader:main', 31 | ] 32 | } 33 | ) 34 | 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | hlsorigin is licensed under the MIT License: 2 | 3 | The MIT License 4 | 5 | Copyright (c) 2016- Eyevinn Technology info@eyevinn.se 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 8 | software and associated documentation files (the "Software"), to deal in the Software 9 | without restriction, including without limitation the rights to use, copy, modify, 10 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 21 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 22 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Description 2 | This package contains a script that with the help of ffmpeg download segments in an 3 | HLS stream and convert and concatenate the video segments to MP4 files. HLS with 4 | discontinuities are supported. 5 | 6 | # Installation 7 | 8 | pip install hlsdownload 9 | 10 | # Usage 11 | 12 | hls-downloader "http://example.com/event/master.m3u8?t=2016-11-21T10:35:00Z-2016-11-21T10:45:00Z" outfile 13 | 14 | # Contribution 15 | We welcome contributions to this project. Just follow the normal procedures by forking 16 | this repository, create a topic branch for your fix and then submit a pull request. 17 | 18 | # License 19 | See LICENSE for details 20 | 21 | ## About Eyevinn Technology 22 | 23 | Eyevinn Technology is an independent consultant firm specialized in video and streaming. Independent in a way that we are not commercially tied to any platform or technology vendor. 24 | 25 | At Eyevinn, every software developer consultant has a dedicated budget reserved for open source development and contribution to the open source community. This give us room for innovation, team building and personal competence development. And also gives us as a company a way to contribute back to the open source community. 26 | 27 | Want to know more about Eyevinn and how it is to work here. Contact us at work@eyevinn.se! 28 | -------------------------------------------------------------------------------- /hlsdownload/downloader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Eyevinn Technology. All rights reserved 2 | # Use of this source code is governed by a MIT License 3 | # license that can be found in the LICENSE file. 4 | # Author: Jonas Birme (Eyevinn Technology) 5 | import argparse 6 | import logging 7 | from hlsdownload import debug 8 | from hlsdownload import HLSDownloader 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description='Download HLS and convert to MP4 files') 12 | parser.add_argument('hlsuri', metavar='HLSURI', default=None, help='URI to HLS master manifest') 13 | parser.add_argument('output', metavar='OUTPUT', default='out', help='Output name') 14 | parser.add_argument('--debug', dest='debug', action='store_true', default=False, help='Write debug info to stderr') 15 | parser.add_argument('--nocleanup', dest='nocleanup', action='store_true', default=False, help='Do not remove temp files') 16 | parser.add_argument('--nodownload', dest='nodownload', action='store_true', default=False, help='Do not download any segments') 17 | parser.add_argument('--singlebitrate', dest='bitrate', default=None, help='Download only one bitrate') 18 | parser.add_argument('--numretries', dest='retries', default=3, help='Number of times to retry downloading a failed segment. Default is 3') 19 | args = parser.parse_args() 20 | debug.doDebug = args.debug 21 | 22 | logger = logging.getLogger('hlsdownload') 23 | hdlr = logging.FileHandler('hls-downloader.log') 24 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 25 | hdlr.setFormatter(formatter) 26 | logger.addHandler(hdlr) 27 | logger.setLevel(logging.INFO) 28 | 29 | debug.log('Downloading HLS: %s' % args.hlsuri) 30 | logger.info("------------------------ NEW SESSION -------------------------") 31 | try: 32 | downloader = HLSDownloader(args.hlsuri, '.', not args.nocleanup, int(args.retries)) 33 | downloader.writeDiscontinuityFile(args.output) 34 | downloader.toMP4(args.output, args.bitrate, not args.nodownload) 35 | except Exception as e: 36 | logger.error('Unrecoverable error: ' + str(e)) 37 | 38 | if __name__ == '__main__': 39 | try: 40 | main() 41 | except Exception, err: 42 | raise 43 | 44 | -------------------------------------------------------------------------------- /hlsdownload/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Eyevinn Technology. All rights reserved 2 | # Use of this source code is governed by a MIT License 3 | # license that can be found in the LICENSE file. 4 | # Author: Jonas Birme (Eyevinn Technology) 5 | 6 | import m3u8 7 | import shutil 8 | import shlex 9 | import ntpath 10 | import os 11 | import pycurl 12 | import subprocess 13 | import re 14 | import logging 15 | import operator 16 | from Queue import Queue 17 | from threading import Thread 18 | from urlparse import urlparse 19 | 20 | logger = logging.getLogger('hlsdownload') 21 | 22 | class HLSDownloader: 23 | def __init__(self, manifesturi, tmpdir, cleanup=True, retries=3): 24 | self.manifesturi = manifesturi 25 | self.tmpdir = tmpdir 26 | self.bitrates = [] 27 | self.cleanup = cleanup 28 | self.retrylimit = retries + 1 29 | self._collectSegments() 30 | 31 | def _collectSegments(self): 32 | logger.info('Downloading and parsing HLS manifest from %s' % self.manifesturi) 33 | m3u8_obj = m3u8.load(self.manifesturi) 34 | if not m3u8_obj.is_variant: 35 | raise Exception('%s is not a master manifest' % self.manifesturi) 36 | listlengths = [] 37 | for mediaplaylist in m3u8_obj.playlists: 38 | url = urlparse(self.manifesturi) 39 | mediauri = mediaplaylist.uri 40 | if mediaplaylist.uri[0] == "/": 41 | mediauri = url.scheme + "://" + url.hostname + mediaplaylist.uri 42 | debug.log('Building segment list from %s' % mediauri) 43 | try: 44 | logger.info('Downloading segment playlist from %s' % mediauri) 45 | bw = mediaplaylist.stream_info.average_bandwidth 46 | if not bw: 47 | bw = mediaplaylist.stream_info.bandwidth 48 | segmentlist = SegmentList(mediauri, str(bw), self.tmpdir, self.retrylimit) 49 | except Exception as e: 50 | logger.error('Failed to download: %s' % str(e)) 51 | else: 52 | logger.info('Segment playlist from %s downloaded and parsed' % mediauri) 53 | self.bitrates.append(segmentlist) 54 | listlengths.append(segmentlist.getLength()) 55 | if len(self.bitrates) == 0: 56 | raise Exception('No segment playlists that could be downloaded was found') 57 | 58 | # This is to handle the edge case where the segmentlists differs in length and start segment 59 | # A special case that actually should not happened 60 | debug.log('Shortest list length %d' % min(listlengths)) 61 | debug.log('Longest list length %d' % max(listlengths)) 62 | headsegments = {} 63 | for segmentlist in self.bitrates: 64 | if segmentlist.getFirstSegment() in headsegments: 65 | headsegments[segmentlist.getFirstSegment()] += 1 66 | else: 67 | headsegments[segmentlist.getFirstSegment()] = 1 68 | debug.log(headsegments) 69 | 70 | # Find start segment winner 71 | winner = sorted(headsegments.items(), key=operator.itemgetter(1), reverse=True)[0][0] 72 | 73 | # Make sure all bitrates starts with the same segment 74 | if len(headsegments.keys()) > 1: 75 | debug.log('First segment differs and we have chosen %s as winner' % winner) 76 | for segmentlist in self.bitrates: 77 | if segmentlist.getFirstSegment() != winner: 78 | segmentlist.removeFirstSegment() 79 | 80 | # Make sure that we have the same length on all bitrates 81 | segmentlengths = {} 82 | for segmentlist in self.bitrates: 83 | length = segmentlist.getLength() 84 | if length in segmentlengths: 85 | segmentlengths[length] += 1 86 | else: 87 | segmentlengths[length] = 1 88 | shortestlength = sorted(segmentlengths.items(), key=operator.itemgetter(0))[0][0] 89 | debug.log(shortestlength) 90 | for segmentlist in self.bitrates: 91 | length = segmentlist.getLength() 92 | if length > shortestlength: 93 | segmentlist.removeLastSegment() 94 | 95 | # Sanity check 96 | firstsegments = {} 97 | for segmentlist in self.bitrates: 98 | debug.log('First segment: %s of (%d)' % (segmentlist.getFirstSegment(), segmentlist.getLength())) 99 | if segmentlist.getFirstSegment() in firstsegments: 100 | firstsegments[segmentlist.getFirstSegment()] += 1 101 | else: 102 | firstsegments[segmentlist.getFirstSegment()] = 1 103 | debug.log('Keys %d' % len(firstsegments.keys())) 104 | if len(firstsegments.keys()) > 1: 105 | debug.log(firstsegments) 106 | logger.warning("First segment in segment lists differs") 107 | 108 | def _downloadSegments(self, bitrate=None): 109 | for segmentlist in self.bitrates: 110 | if bitrate: 111 | debug.log('Specified bitrate to download %s (%s)' % (bitrate, segmentlist.getBitrate())) 112 | if segmentlist.getBitrate() == bitrate: 113 | segmentlist.download() 114 | else: 115 | segmentlist.download() 116 | 117 | def _convertSegments(self, bitrate=None): 118 | for segmentlist in self.bitrates: 119 | if bitrate: 120 | if segmentlist.getBitrate() == bitrate: 121 | segmentlist.convert() 122 | else: 123 | segmentlist.convert() 124 | 125 | def _concatSegments(self, output, bitrate=None): 126 | for segmentlist in self.bitrates: 127 | if bitrate: 128 | if segmentlist.getBitrate() == bitrate: 129 | segmentlist.concat(output) 130 | else: 131 | segmentlist.concat(output) 132 | 133 | def _cleanup(self): 134 | for segmentlist in self.bitrates: 135 | segmentlist.cleanup() 136 | 137 | def writeDiscontinuityFile(self, output): 138 | # We can assume that all bitrates are aligned so we only 139 | # need to look at one of the bitrates 140 | segmentlist = self.bitrates[0] 141 | with open(output + '.txt', 'w') as f: 142 | for d in segmentlist.getDiscontinuities(): 143 | f.write(str(d) + '\n') 144 | f.close() 145 | 146 | def toMP4(self, output, bitrate=None, download=True): 147 | if download: 148 | self._downloadSegments(bitrate) 149 | self._convertSegments(bitrate) 150 | self._concatSegments(output, bitrate) 151 | if self.cleanup: 152 | self._cleanup() 153 | 154 | class SegmentList: 155 | def __init__(self, mediaplaylisturi, bitrate, downloaddir, retrylimit=4): 156 | self.mediaplaylisturi = mediaplaylisturi 157 | self.bitrate = bitrate 158 | if not downloaddir == '.': 159 | self.downloaddir = downloaddir + '/' + str(self.bitrate) + '/' 160 | else: 161 | self.downloaddir = str(self.bitrate) + '/' 162 | self.downloadedsegs = [] 163 | self.mp4segs = [] 164 | self.m3u8_obj = m3u8.load(self.mediaplaylisturi) 165 | self.q = Queue() 166 | self.cq = Queue() 167 | self.num_worker_threads = 10 168 | self.failedDownloads = False 169 | self.retrylimit = retrylimit 170 | 171 | def getFirstSegment(self): 172 | p = re.compile('.*/(.*?)\.ts$') 173 | m = p.match(self.m3u8_obj.segments[0].uri) 174 | if m: 175 | return m.group(1) 176 | return None 177 | 178 | def getLength(self): 179 | return len(self.m3u8_obj.segments) 180 | 181 | def getBitrate(self): 182 | return self.bitrate 183 | 184 | def removeFirstSegment(self): 185 | self.m3u8_obj.segments.pop(0) 186 | 187 | def removeLastSegment(self): 188 | self.m3u8_obj.segments.pop() 189 | 190 | def downloadWorker(self): 191 | logger.info('Starting download worker (retries=%d)' % self.retrylimit-1) 192 | while True: 193 | item = self.q.get() 194 | try: 195 | debug.log('Downloading %s to %s%s (retries=%d)' % (item['remoteurl'], item['downloaddir'], item['localfname'], self.retrylimit-1)) 196 | fp = open(item['downloaddir'] + item['localfname'], 'wb') 197 | c = pycurl.Curl() 198 | c.setopt(c.URL, item['remoteurl']) 199 | c.setopt(c.WRITEDATA, fp) 200 | c.perform() 201 | if c.getinfo(pycurl.HTTP_CODE) != 200: 202 | logger.error("FAILED to download %s: %d" % (item['remoteurl'], c.getinfo(pycurl.HTTP_CODE))) 203 | raise pycurl.error() 204 | c.close() 205 | fp.close() 206 | self.downloadedsegs.append((item['order'], item['localfname'])) 207 | except pycurl.error: 208 | logger.error('Caught exception while downloading %s' % item['remoteurl']) 209 | c.close() 210 | item['retries'] += 1 211 | if (item['retries'] < self.retrylimit): 212 | logger.info('Retry counter is %d, will try again' % item['retries']) 213 | self.q.put(item) 214 | else: 215 | logger.error('Retry counter exceeded for %s' % item['localfname']) 216 | self.failedDownloads = True 217 | 218 | finally: 219 | self.q.task_done() 220 | 221 | def download(self): 222 | if not os.path.exists(self.downloaddir): 223 | os.mkdir(self.downloaddir) 224 | logger.info("Downloading segments from %s" % self.mediaplaylisturi) 225 | for i in range(self.num_worker_threads): 226 | t = Thread(target=self.downloadWorker) 227 | t.daemon = True 228 | t.start() 229 | order = 0 230 | for seg in self.m3u8_obj.segments: 231 | head, tail = ntpath.split(self.downloaddir + seg.uri) 232 | localfname = tail 233 | if not os.path.isfile(self.downloaddir + localfname): 234 | item = { 235 | 'remoteurl': self.m3u8_obj.base_uri + seg.uri, 236 | 'localfname': localfname, 237 | 'downloaddir': self.downloaddir, 238 | 'retries': 0, 239 | 'order': order 240 | } 241 | order += 1 242 | self.q.put(item) 243 | mp4fname = localfname + '.mp4' 244 | self.mp4segs.append(mp4fname) 245 | self.q.join() 246 | if self.failedDownloads: 247 | logger.error('Some segments failed to download, raising exception') 248 | raise Exception('Some segments failed to download') 249 | else: 250 | logger.info("All segments downloaded") 251 | 252 | def convertWorker(self): 253 | while True: 254 | item = self.cq.get() 255 | debug.log('Converting %s%s to %s%s' % (item['downloaddir'], item['localfname'], item['downloaddir'], item['mp4fname'])) 256 | if not os.path.isfile(item['downloaddir'] + item['mp4fname']): 257 | FFMpegCommand(item['downloaddir'] + item['localfname'], item['downloaddir'] + item['mp4fname'], '-acodec copy -avoid_negative_ts 1 -bsf:a aac_adtstoasc -vcodec copy -copyts') 258 | self.cq.task_done() 259 | 260 | def convert(self): 261 | logger.info("Converting downloaded TS segments to MP4 files") 262 | for i in range(self.num_worker_threads): 263 | t = Thread(target=self.convertWorker) 264 | t.daemon = True 265 | t.start() 266 | 267 | for segfname in sorted(self.downloadedsegs, key=operator.itemgetter(0)): 268 | debug.log('Processing %s (%s)' % (segfname[1], segfname[0])) 269 | mp4fname = segfname[1] + '.mp4' 270 | item = { 271 | 'downloaddir': self.downloaddir, 272 | 'localfname': segfname[1], 273 | 'mp4fname': mp4fname 274 | } 275 | self.cq.put(item) 276 | self.cq.join() 277 | 278 | def concat(self, outputname): 279 | output = outputname + '-' + str(self.bitrate) + '.mp4' 280 | logger.info("Converting segments and writing to %s" % output) 281 | if not os.path.isfile(output): 282 | lstfile = open(self.downloaddir + output + '.lst', 'w') 283 | for mp4fname in self.mp4segs: 284 | lstfile.write("file '%s'\n" % mp4fname) 285 | lstfile.close() 286 | FFMpegConcat(self.downloaddir + output + '.lst', output) 287 | logger.info("Segments converted") 288 | 289 | def getDiscontinuities(self): 290 | discont = [] 291 | position = 0.0 292 | for seg in self.m3u8_obj.segments: 293 | if seg.discontinuity: 294 | discont.append(position) 295 | position += float(seg.duration) 296 | return discont 297 | 298 | def cleanup(self): 299 | if os.path.exists(self.downloaddir): 300 | shutil.rmtree(self.downloaddir) 301 | 302 | def runcmd(cmd, name): 303 | debug.log('COMMAND: %s' % cmd) 304 | try: 305 | FNULL = open(os.devnull, 'w') 306 | if debug.doDebug: 307 | return subprocess.call(cmd) 308 | else: 309 | return subprocess.call(cmd, stdout=FNULL, stderr=subprocess.STDOUT) 310 | except subprocess.CalledProcessError as e: 311 | message = "binary tool failed with error %d" % e.returncode 312 | raise Exception(message) 313 | except OSError as e: 314 | raise Exception('Command %s not found, ensure that it is in your path' % name) 315 | 316 | def FFMpegCommand(infile, outfile, opts): 317 | cmd = [os.path.basename('ffmpeg')] 318 | cmd.append('-i') 319 | cmd.append(infile) 320 | args = shlex.split(opts) 321 | cmd += args 322 | cmd.append(outfile) 323 | runcmd(cmd, 'ffmpeg') 324 | 325 | def FFMpegConcat(lstfile, outfile): 326 | cmd = [os.path.basename('ffmpeg')] 327 | cmd.append('-f') 328 | cmd.append('concat') 329 | cmd.append('-safe') 330 | cmd.append('0') 331 | cmd.append('-i') 332 | cmd.append(lstfile) 333 | cmd.append('-c') 334 | cmd.append('copy') 335 | cmd.append(outfile) 336 | runcmd(cmd, 'ffmpeg') 337 | --------------------------------------------------------------------------------