├── .gitignore ├── decoder ├── artifacts │ ├── stream.h264 │ └── stream.h265 ├── decoder_test.go └── decoder.go ├── go.mod ├── README.md ├── go.sum ├── LICENSE └── rgb └── rgb.go /.gitignore: -------------------------------------------------------------------------------- 1 | decoder/artifacts/frames/ -------------------------------------------------------------------------------- /decoder/artifacts/stream.h264: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike1808/h264decoder/HEAD/decoder/artifacts/stream.h264 -------------------------------------------------------------------------------- /decoder/artifacts/stream.h265: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mike1808/h264decoder/HEAD/decoder/artifacts/stream.h265 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mike1808/h264decoder 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/ailumiyana/goav-incr v0.1.0 7 | gocv.io/x/gocv v0.22.0 8 | ) 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go H264/H265 decoder 2 | 3 | This is a decoder for H.264/H.265 encoded images implementing using fork of [goav](https://github.com/giorgisio/goav) called [ailumiyana/goav-incr](https://github.com/ailumiyana/goav-incr). 4 | 5 | To use this library you need to have `ffmpeg` installed. 6 | 7 | See example in [decoder_test.go]() -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/MikolajMGT/h264decoder v0.0.1 h1:Aykw6SlgoZZPHESErTy1qihDsoSBR8HOPWUVGkNl++c= 2 | github.com/ailumiyana/goav-incr v0.1.0 h1:i86JudKqDSnqgyMkIcSjcqfFxMxqT2J+z6jMX5llQuY= 3 | github.com/ailumiyana/goav-incr v0.1.0/go.mod h1:vE9FL56xPpmPygiA71OjUbbvTmXlMEiBLLICG3+z/yw= 4 | gocv.io/x/gocv v0.22.0 h1:pv+tcjcoW/xsaM/nfrzMK5PEEHYe2ND/LQRoyBpgjsg= 5 | gocv.io/x/gocv v0.22.0/go.mod h1:7Ju5KbPo+R85evmlhhKPVMwXtgDRNX/PtfVfbToSrLU= 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Mikael Manukyan 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /rgb/rgb.go: -------------------------------------------------------------------------------- 1 | // Package rgb provides RGB image which implements image.Image interface. 2 | package rgb 3 | 4 | import ( 5 | "image" 6 | "image/color" 7 | ) 8 | 9 | // Image represent image data which has RGB colors. 10 | // Image is compatible with image.RGBA, but does not have alpha channel to reduce using memory. 11 | type Image struct { 12 | // Pix holds the image's stream, in R, G, B order. 13 | Pix []uint8 14 | // Stride is the Pix stride (in bytes) between vertically adjacent pixels. 15 | Stride int 16 | // Rect is the image's bounds. 17 | Rect image.Rectangle 18 | } 19 | 20 | // NewImage allocates and returns RGB image 21 | func NewImage(r image.Rectangle) *Image { 22 | w, h := r.Dx(), r.Dy() 23 | return &Image{Pix: make([]uint8, 3*w*h), Stride: 3 * w, Rect: r} 24 | } 25 | 26 | // ColorModel returns RGB color model. 27 | func (p *Image) ColorModel() color.Model { 28 | return ColorModel 29 | } 30 | 31 | // Bounds implements image.Image.At 32 | func (p *Image) Bounds() image.Rectangle { 33 | return p.Rect 34 | } 35 | 36 | // At implements image.Image.At 37 | func (p *Image) At(x, y int) color.Color { 38 | return p.RGBAAt(x, y) 39 | } 40 | 41 | // RGBAAt returns the color of the pixel at (x, y) as RGBA. 42 | func (p *Image) RGBAAt(x, y int) color.RGBA { 43 | if !(image.Point{x, y}.In(p.Rect)) { 44 | return color.RGBA{} 45 | } 46 | i := (y-p.Rect.Min.Y)*p.Stride + (x-p.Rect.Min.X)*3 47 | return color.RGBA{p.Pix[i+0], p.Pix[i+1], p.Pix[i+2], 0xFF} 48 | } 49 | 50 | // ColorModel is RGB color model instance 51 | var ColorModel = color.ModelFunc(rgbModel) 52 | 53 | func rgbModel(c color.Color) color.Color { 54 | if _, ok := c.(RGB); ok { 55 | return c 56 | } 57 | r, g, b, _ := c.RGBA() 58 | return RGB{uint8(r >> 8), uint8(g >> 8), uint8(b >> 8)} 59 | } 60 | 61 | // RGB color 62 | type RGB struct { 63 | R, G, B uint8 64 | } 65 | 66 | // RGBA implements Color.RGBA 67 | func (c RGB) RGBA() (r, g, b, a uint32) { 68 | r = uint32(c.R) 69 | r |= r << 8 70 | g = uint32(c.G) 71 | g |= g << 8 72 | b = uint32(c.B) 73 | b |= b << 8 74 | a = uint32(0xFFFF) 75 | return 76 | } 77 | 78 | // Make sure Image implements image.Image. 79 | // See https://golang.org/doc/effective_go.html#blank_implements. 80 | var _ image.Image = new(Image) 81 | -------------------------------------------------------------------------------- /decoder/decoder_test.go: -------------------------------------------------------------------------------- 1 | package decoder_test 2 | 3 | import ( 4 | "fmt" 5 | "github.com/mike1808/h264decoder/decoder" 6 | "gocv.io/x/gocv" 7 | "image/jpeg" 8 | "io" 9 | "os" 10 | "testing" 11 | ) 12 | 13 | func BenchmarkDecoder(b *testing.B) { 14 | in, err := os.Open("./stream.raw") 15 | defer in.Close() 16 | if err != nil { 17 | panic(err) 18 | } 19 | 20 | for i := 0; i < b.N; i++ { 21 | d, err := decoder.New(decoder.PixelFormatRGB, decoder.H264) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | buf := make([]byte, 2048) 27 | offset := int64(0) 28 | 29 | for { 30 | nread, err := in.ReadAt(buf, offset) 31 | offset += int64(nread) 32 | 33 | if err != nil { 34 | if err == io.EOF { 35 | return 36 | } else { 37 | b.Error(err) 38 | } 39 | } 40 | 41 | _, err = d.Decode(buf[:nread]) 42 | if err != nil { 43 | b.Error(err) 44 | } 45 | } 46 | 47 | d.Close() 48 | } 49 | } 50 | 51 | func TestDecoderH264(t *testing.T) { 52 | d, err := decoder.New(decoder.PixelFormatBGR, decoder.H264) 53 | if err != nil { 54 | panic(err) 55 | } 56 | 57 | stream, err := os.Open("./artifacts/stream.h264") 58 | if err != nil { 59 | panic(err) 60 | } 61 | 62 | window := gocv.NewWindow("H.264 decoder") 63 | 64 | buf := make([]byte, 2048) 65 | 66 | for { 67 | nread, err := stream.Read(buf) 68 | 69 | if err != nil { 70 | if err == io.EOF { 71 | return 72 | } else { 73 | t.Error(err) 74 | } 75 | } 76 | frames, err := d.Decode(buf[:nread]) 77 | if err != nil { 78 | t.Error(err) 79 | } 80 | if len(frames) == 0 { 81 | t.Log("no frames") 82 | } else { 83 | for _, frame := range frames { 84 | img, _ := gocv.NewMatFromBytes(frame.Height, frame.Width, gocv.MatTypeCV8UC3, frame.Data) 85 | if img.Empty() { 86 | continue 87 | } 88 | 89 | window.IMShow(img) 90 | window.WaitKey(10) 91 | } 92 | 93 | t.Logf("found %d frames", len(frames)) 94 | } 95 | } 96 | } 97 | 98 | func TestDecoderImageH264(t *testing.T) { 99 | d, err := decoder.New(decoder.PixelFormatRGB, decoder.H264) 100 | if err != nil { 101 | panic(err) 102 | } 103 | 104 | stream, err := os.Open("./artifacts/stream.h264") 105 | if err != nil { 106 | panic(err) 107 | } 108 | 109 | buf := make([]byte, 2048) 110 | frameCounter := 0 111 | 112 | for { 113 | nread, err := stream.Read(buf) 114 | 115 | if err != nil { 116 | if err == io.EOF { 117 | return 118 | } else { 119 | t.Error(err) 120 | } 121 | } 122 | frames, err := d.Decode(buf[:nread]) 123 | if err != nil { 124 | t.Error(err) 125 | } 126 | if len(frames) == 0 { 127 | t.Log("no frames") 128 | } else { 129 | for _, frame := range frames { 130 | img := frame.ToRGB() 131 | f, err := os.Create(fmt.Sprintf("./artifacts/frames/frame_%d.jpg", frameCounter)) 132 | frameCounter++ 133 | if err != nil { 134 | t.Fatal(err) 135 | } 136 | err = jpeg.Encode(f, img, &jpeg.Options{Quality: 90}) 137 | if err != nil { 138 | t.Fatal(err) 139 | } 140 | f.Close() 141 | } 142 | t.Logf("found %d frames", len(frames)) 143 | } 144 | } 145 | } 146 | 147 | func TestDecoderH265(t *testing.T) { 148 | d, err := decoder.New(decoder.PixelFormatBGR, decoder.H265) 149 | if err != nil { 150 | panic(err) 151 | } 152 | 153 | stream, err := os.Open("./artifacts/stream.h265") 154 | if err != nil { 155 | panic(err) 156 | } 157 | 158 | window := gocv.NewWindow("H.265 decoder") 159 | 160 | buf := make([]byte, 2048) 161 | 162 | for { 163 | nread, err := stream.Read(buf) 164 | 165 | if err != nil { 166 | if err == io.EOF { 167 | return 168 | } else { 169 | t.Error(err) 170 | } 171 | } 172 | frames, err := d.Decode(buf[:nread]) 173 | if err != nil { 174 | t.Error(err) 175 | } 176 | if len(frames) == 0 { 177 | t.Log("no frames") 178 | } else { 179 | for _, frame := range frames { 180 | img, _ := gocv.NewMatFromBytes(frame.Height, frame.Width, gocv.MatTypeCV8UC3, frame.Data) 181 | if img.Empty() { 182 | continue 183 | } 184 | 185 | window.IMShow(img) 186 | window.WaitKey(10) 187 | } 188 | 189 | t.Logf("found %d frames", len(frames)) 190 | } 191 | } 192 | } 193 | 194 | func TestDecoderImageH265(t *testing.T) { 195 | d, err := decoder.New(decoder.PixelFormatRGB, decoder.H265) 196 | if err != nil { 197 | panic(err) 198 | } 199 | 200 | stream, err := os.Open("./artifacts/stream.h265") 201 | if err != nil { 202 | panic(err) 203 | } 204 | 205 | buf := make([]byte, 2048) 206 | frameCounter := 0 207 | 208 | for { 209 | nread, err := stream.Read(buf) 210 | 211 | if err != nil { 212 | if err == io.EOF { 213 | return 214 | } else { 215 | t.Error(err) 216 | } 217 | } 218 | frames, err := d.Decode(buf[:nread]) 219 | if err != nil { 220 | t.Error(err) 221 | } 222 | if len(frames) == 0 { 223 | t.Log("no frames") 224 | } else { 225 | for _, frame := range frames { 226 | img := frame.ToRGB() 227 | f, err := os.Create(fmt.Sprintf("./artifacts/frames/frame_%d.jpg", frameCounter)) 228 | frameCounter++ 229 | if err != nil { 230 | t.Fatal(err) 231 | } 232 | err = jpeg.Encode(f, img, &jpeg.Options{Quality: 90}) 233 | if err != nil { 234 | t.Fatal(err) 235 | } 236 | f.Close() 237 | } 238 | t.Logf("found %d frames", len(frames)) 239 | } 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /decoder/decoder.go: -------------------------------------------------------------------------------- 1 | package decoder 2 | 3 | import "C" 4 | import ( 5 | "errors" 6 | "github.com/ailumiyana/goav-incr/goav/avcodec" 7 | "github.com/ailumiyana/goav-incr/goav/avutil" 8 | "github.com/ailumiyana/goav-incr/goav/swscale" 9 | "github.com/mike1808/h264decoder/rgb" 10 | "image" 11 | "unsafe" 12 | ) 13 | 14 | type PixelFormat int 15 | 16 | const ( 17 | PixelFormatRGB = avcodec.AV_PIX_FMT_RGB24 18 | PixelFormatBGR = av_PIX_FMT_BGR24 19 | ) 20 | 21 | type Compression int 22 | 23 | const ( 24 | H264 = Compression(avcodec.AV_CODEC_ID_H264) 25 | H265 = Compression(avcodec.AV_CODEC_ID_H265) 26 | ) 27 | 28 | // our avcodec wrapper doesn't have this constant 29 | const av_PIX_FMT_BGR24 = 3 30 | 31 | type Decoder struct { 32 | context *avcodec.Context 33 | parser *avcodec.ParserContext 34 | frame *avutil.Frame 35 | pkt *avcodec.Packet 36 | converter *converter 37 | } 38 | 39 | // Frame represents decoded frame from H.264/H.265 stream 40 | // Data field will contain bitmap data in the pixel format specified in the decoder 41 | type Frame struct { 42 | Data []byte 43 | Width, Height, Stride int 44 | } 45 | 46 | // New creates new Decoder 47 | // It accepts expected pixel format for the output which 48 | func New(pxlFmt PixelFormat, cpr Compression) (*Decoder, error) { 49 | avcodec.AvcodecRegisterAll() 50 | codec := avcodec.AvcodecFindDecoder(avcodec.CodecId(cpr)) 51 | if codec == nil { 52 | return nil, errors.New("cannot find decoder") 53 | } 54 | context := codec.AvcodecAllocContext3() 55 | if context == nil { 56 | return nil, errors.New("cannot allocate context") 57 | } 58 | 59 | if context.AvcodecOpen2(codec, nil) < 0 { 60 | return nil, errors.New("cannot open content") 61 | } 62 | parser := avcodec.AvParserInit(int(cpr)) 63 | if parser == nil { 64 | return nil, errors.New("cannot init parser") 65 | } 66 | frame := avutil.AvFrameAlloc() 67 | if frame == nil { 68 | return nil, errors.New("cannot allocate frame") 69 | } 70 | pkt := avcodec.AvPacketAlloc() 71 | if pkt == nil { 72 | return nil, errors.New("cannot allocate packet") 73 | } 74 | pkt.AvInitPacket() 75 | pkt.SetFlags(pkt.Flags() | avcodec.AV_CODEC_FLAG_TRUNCATED) 76 | 77 | if cpr != H264 && cpr != H265 { 78 | return nil, errors.New("unsupported compression") 79 | } 80 | 81 | var converterPxlFmt swscale.PixelFormat 82 | switch pxlFmt { 83 | case PixelFormatRGB: 84 | converterPxlFmt = avcodec.AV_PIX_FMT_RGB24 85 | case PixelFormatBGR: 86 | converterPxlFmt = av_PIX_FMT_BGR24 87 | default: 88 | return nil, errors.New("unsupported pixel format") 89 | } 90 | 91 | converter, err := newConverter(converterPxlFmt) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | h := &Decoder{ 97 | context: context, 98 | parser: parser, 99 | frame: frame, 100 | pkt: pkt, 101 | converter: converter, 102 | } 103 | 104 | return h, nil 105 | } 106 | 107 | // Decode tries to parse the input data and return list of frames 108 | // If input data doesn't contain any H.264/H.265 frames the list will be empty 109 | func (h *Decoder) Decode(data []byte) ([]*Frame, error) { 110 | var frames []*Frame 111 | 112 | for len(data) > 0 { 113 | frame, nread, isFrameAvailable, err := h.decodeFrameImpl(data) 114 | 115 | if err != nil && nread < 0 { 116 | return nil, err 117 | } 118 | 119 | if isFrameAvailable && frame != nil { 120 | frames = append(frames, frame) 121 | } 122 | 123 | data = data[nread:] 124 | } 125 | 126 | return frames, nil 127 | } 128 | 129 | // Close free ups memory used for decoder structures 130 | // It needs to be called to prevent memory leaks 131 | func (h *Decoder) Close() { 132 | h.converter.Close() 133 | 134 | avcodec.AvParserClose(h.parser) 135 | h.context.AvcodecClose() 136 | avutil.AvFree(unsafe.Pointer(h.context)) 137 | avutil.AvFrameFree(h.frame) 138 | h.pkt.AvFreePacket() 139 | } 140 | 141 | // ToRGBA converts the frame into image.RGBA 142 | // The returned image share the same memory as the frame 143 | func (f *Frame) ToRGB() *rgb.Image { 144 | rect := image.Rect(0, 0, f.Width, f.Height) 145 | return &rgb.Image{ 146 | Pix: f.Data, 147 | Stride: f.Stride, 148 | Rect: rect, 149 | } 150 | } 151 | 152 | func (h *Decoder) parse(data []byte, bs int) int { 153 | return h.context.AvParserParse2( 154 | h.parser, 155 | h.pkt, 156 | data, 157 | bs, 158 | 0, 0, avcodec.AV_NOPTS_VALUE, 159 | ) 160 | } 161 | 162 | func (h *Decoder) isFrameAvailable() bool { 163 | return h.pkt.Size() > 0 164 | } 165 | 166 | func (h *Decoder) decodeFrame() (*avutil.Frame, error) { 167 | gotPicture := 0 168 | nread := h.context.AvcodecDecodeVideo2((*avcodec.Frame)(unsafe.Pointer(h.frame)), &gotPicture, h.pkt) 169 | if nread < 0 || gotPicture == 0 { 170 | return nil, errors.New("error decoding frame") 171 | } 172 | 173 | return h.frame, nil 174 | } 175 | 176 | func (h *Decoder) decodeFrameImpl(data []byte) (*Frame, int, bool, error) { 177 | size := len(data) 178 | nread := h.parse(data, size) 179 | 180 | if !h.isFrameAvailable() { 181 | return nil, nread, false, nil 182 | } 183 | 184 | frame, err := h.decodeFrame() 185 | if err != nil { 186 | return nil, nread, true, err 187 | } 188 | 189 | width, height := h.context.Width(), h.context.Height() 190 | bufferSize := uintptr(h.converter.PredictSize(width, height)) 191 | buffer := (*uint8)(avutil.AvMalloc(bufferSize)) 192 | defer avutil.AvFree(unsafe.Pointer(buffer)) 193 | rgbframe, err := h.converter.Convert(h.context, frame, buffer) 194 | 195 | if err != nil { 196 | return nil, nread, true, err 197 | } 198 | 199 | return newFrame(rgbframe), nread, true, nil 200 | } 201 | 202 | func newFrame(frame *avutil.Frame) *Frame { 203 | w, h, linesize := frame.Width(), frame.Height(), avutil.Linesize(frame) 204 | 205 | return &Frame{ 206 | Data: frameData(frame), 207 | Width: w, 208 | Height: h, 209 | Stride: int(linesize[0]), 210 | } 211 | } 212 | 213 | func frameData(frame *avutil.Frame) []byte { 214 | h, linesize, data := frame.Height(), avutil.Linesize(frame), avutil.Data(frame) 215 | size := int(linesize[0]) * h 216 | 217 | return C.GoBytes(unsafe.Pointer(data[0]), C.int(size)) 218 | } 219 | 220 | type converter struct { 221 | framergb *avutil.Frame 222 | context *swscale.Context 223 | pixFmt swscale.PixelFormat 224 | } 225 | 226 | func newConverter(pixelFormat swscale.PixelFormat) (*converter, error) { 227 | c := &converter{ 228 | pixFmt: pixelFormat, 229 | } 230 | 231 | c.framergb = avutil.AvFrameAlloc() 232 | if c.framergb == nil { 233 | return nil, errors.New("cannot allocate frame") 234 | } 235 | 236 | return c, nil 237 | } 238 | 239 | func (c *converter) Close() { 240 | swscale.SwsFreecontext(c.context) 241 | avutil.AvFrameFree(c.framergb) 242 | } 243 | 244 | func (c *converter) Convert(context *avcodec.Context, frame *avutil.Frame, out_rgb *uint8) (*avutil.Frame, error) { 245 | w, h, pixFmt := context.Width(), context.Height(), context.PixFmt() 246 | 247 | swsCtx := c.context 248 | 249 | if c.context == nil { 250 | swsCtx = swscale.SwsGetcontext( 251 | w, 252 | h, 253 | (swscale.PixelFormat)(pixFmt), 254 | w, 255 | h, 256 | c.pixFmt, 257 | avcodec.SWS_BILINEAR, 258 | nil, 259 | nil, 260 | nil, 261 | ) 262 | } else { 263 | swsCtx = swscale.SwsGetcachedcontext( 264 | swsCtx, 265 | w, 266 | h, 267 | (swscale.PixelFormat)(pixFmt), 268 | w, 269 | h, 270 | c.pixFmt, 271 | avcodec.SWS_BILINEAR, 272 | nil, 273 | nil, 274 | nil, 275 | ) 276 | } 277 | 278 | if context == nil { 279 | return nil, errors.New("cannot allocate context") 280 | } 281 | 282 | err := avutil.AvSetFrame(c.framergb, w, h, int(c.pixFmt)) 283 | if err != nil { 284 | return nil, err 285 | } 286 | 287 | avp := (*avcodec.Picture)(unsafe.Pointer(c.framergb)) 288 | avp.AvpictureFill( 289 | (*uint8)(out_rgb), 290 | (avcodec.PixelFormat)(c.pixFmt), 291 | w, h, 292 | ) 293 | swscale.SwsScale2(swsCtx, avutil.Data(frame), 294 | avutil.Linesize(frame), 0, h, 295 | avutil.Data(c.framergb), avutil.Linesize(c.framergb)) 296 | 297 | return c.framergb, err 298 | } 299 | 300 | func (c *converter) PredictSize(w, h int) int { 301 | avp := (*avcodec.Picture)(unsafe.Pointer(c.framergb)) 302 | return avp.AvpictureFill(nil, (avcodec.PixelFormat)(c.pixFmt), w, h) 303 | } 304 | --------------------------------------------------------------------------------