├── LICENSE ├── README.md ├── demo.lua └── torch-ffmpeg.lua /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Carl Vondrick 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # torch-ffmpeg 2 | This is a simple wrapper for FFmpeg in Torch7. There are a couple of other wrappers for FFmpeg already, but I found them difficult to install. 3 | 4 | This wrapper: 5 | - talks to FFmpeg via Unix pipes so it is easy to install 6 | - it is a single Lua file (and only 100 lines), so easy to modify 7 | - it doesn't write to disk, so it is reasonably fast 8 | 9 | ## Usage 10 | 11 | The `demo.lua` code shows how to use it. It's pretty easy: 12 | 13 | require 'torch-ffmpeg' 14 | vid = FFmpeg('video.mp4') 15 | frames = vid:read(10) 16 | vid:close() 17 | 18 | `frames` will be a T x 3 x W x H tensor, where T is number of frames read, and W and H is width and height. In the example above, T = 10. 19 | 20 | ## Options 21 | 22 | If you want to specify different options, such as a different starting point or change the frame rate, you can pass additional options to FFmpeg like so: 23 | 24 | vid = FFmpeg('video.mp4', '-r 10') -- 10 fps 25 | vid = FFmpeg('video.mp4', '-ss 00:00:07') -- seek to 7sec mark 26 | vid = FFmpeg('video.mp4', '-s 100x100') -- downsample resolution to 100x100 27 | vid = FFmpeg('video.mp4', '-r 10 -s 100x100') -- frame rate and downsample 28 | 29 | Note that seeking is approximate, but fast. 30 | -------------------------------------------------------------------------------- /demo.lua: -------------------------------------------------------------------------------- 1 | require 'torch-ffmpeg' 2 | 3 | -- Open up video 4 | -- If you a recent enough FFmpeg, it can read from HTTP 5 | vid = FFmpeg('http://www.flickr.com/videos/29833755@N08/4415516615/play/orig/4d40e1ea19') 6 | 7 | -- Reads 10 frames into a tensor 8 | frames = vid:read(10) 9 | 10 | print('Frame tensor is size:') 11 | print(frames:size()) 12 | 13 | require 'image' 14 | image.save('foo.jpg', frames[1]) 15 | 16 | ---- Reads frame by frame until there are no more 17 | counter = 0 18 | while true do 19 | if vid:read(1) == nil then 20 | break 21 | end 22 | counter = counter + 1 23 | end 24 | print('There are '.. counter .. ' more frames') 25 | 26 | -- Close the stream 27 | vid:close() 28 | 29 | -- Retrieve some meta-data 30 | print('Some stats:') 31 | print(vid:stats()) 32 | -------------------------------------------------------------------------------- /torch-ffmpeg.lua: -------------------------------------------------------------------------------- 1 | do 2 | local FFmpeg = torch.class('FFmpeg') 3 | 4 | function FFmpeg:__init(video_path, opts) 5 | self.video_path = video_path 6 | self.opts = opts or '' 7 | self.valid = true 8 | self.fd = nil 9 | end 10 | 11 | function FFmpeg:read(nframes) 12 | if self.fd == nil then 13 | -- open ffmpeg pipe 14 | -- this subprocess will send raw RGB values to us, corresponding to frames 15 | local cmd = 'ffmpeg -i ' .. self.video_path .. ' ' .. self.opts .. ' -f image2pipe -pix_fmt rgb24 -loglevel fatal -vcodec ppm -' 16 | self.fd = assert(torch.PipeFile(cmd)) 17 | self.fd:binary() 18 | self.fd:quiet() 19 | end 20 | 21 | -- read nframes from the pipe 22 | local t 23 | local t2 24 | local dim = {} 25 | 26 | for i=1,nframes do 27 | local magic_str = self.fd:readString("*l") 28 | local dim_str = self.fd:readString("*l") 29 | local max_str = self.fd:readString("*l") 30 | 31 | if self.fd:hasError() then 32 | self.valid = false 33 | return nil 34 | end 35 | 36 | assert(magic_str == "P6") 37 | assert(tonumber(max_str) == 255) 38 | 39 | if i == 1 then 40 | for k in string.gmatch(dim_str, '%d+') do table.insert(dim, tonumber(k)) end 41 | assert(#dim == 2) 42 | 43 | t = torch.ByteTensor(nframes, dim[2], dim[1], 3):fill(0) 44 | t2 = torch.ByteTensor(dim[2], dim[1], 3) 45 | end 46 | 47 | self.fd:readByte(t2:storage()) 48 | t[i]:copy(t2) 49 | 50 | if self.fd:hasError() then 51 | self.valid = false 52 | return nil 53 | end 54 | end 55 | 56 | return t:permute(1,4,2,3) 57 | end 58 | 59 | function FFmpeg:close() 60 | if self.fd ~= nil then 61 | self.fd:close() 62 | self.fd = nil 63 | self.valid = false 64 | end 65 | end 66 | 67 | function FFmpeg:stats() 68 | -- use ffprobe to find width/height of video 69 | -- this will store self.width, self.height, self.duration 70 | local cmd = 'ffprobe -select_streams v -v error -show_entries stream=width,height,duration,r_frame_rate -of default=noprint_wrappers=1 ' .. self.video_path 71 | local fd = assert(torch.PipeFile(cmd)) 72 | fd:quiet() 73 | 74 | local retval = {} 75 | 76 | for i=1,4 do 77 | local line = fd:readString('*l') 78 | if fd:hasError() then 79 | self.valid = false 80 | break 81 | end 82 | local split = {} 83 | for k in string.gmatch(line, '[^=]*') do table.insert(split, k) end 84 | if( split[1] == 'r_frame_rate' ) then 85 | fps = split[3]:split('/') 86 | retval[split[1]] = tonumber(fps[1]) / tonumber(fps[2]) 87 | else 88 | retval[split[1]] = tonumber(split[3]) 89 | end 90 | end 91 | 92 | fd:close() 93 | 94 | return retval 95 | end 96 | end 97 | --------------------------------------------------------------------------------