├── .github └── workflows │ └── build.yml ├── .gitignore ├── .travis.yml ├── .vscode └── settings.json ├── LICENSE.txt ├── Makefile ├── README.md ├── elements.go ├── examples ├── mkvdir2mrss │ └── mkvdir2mrss.go ├── mkvinfo │ └── mkvinfo.go └── mkvtags │ └── mkvtags.go ├── examples_handlerchain_test.go ├── examples_section_test.go ├── examples_simple_test.go ├── examples_test.go ├── go.mod ├── go.sum ├── handlers.go ├── internal └── generate │ └── generate.go ├── mkvparse.go ├── mkvparse_test.go ├── sectionparser.go ├── tags.go ├── testdata ├── example-cover.mkv ├── example-live+junk.mkv └── example.mkv ├── vint.go └── vint_test.go /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-go@v2 14 | with: 15 | go-version: '^1.16' 16 | - run: make install-tools 17 | - run: make 18 | - run: make check COVERAGE=1 19 | - run: make lint 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | bin/ 3 | coverage.out 4 | examples/mkvinfo/mkvinfo 5 | examples/mkvcover/mkvcover 6 | examples/mkvtags/mkvtags 7 | examples/mkvdir2mrss/mkvdir2mrss 8 | testdata/matroska-test-files 9 | ebml.xml 10 | ebml_matroska.xml 11 | matroska_tags.xml 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - "1.9" 4 | - "1.10" 5 | - "1.11" 6 | - "1.12" 7 | - master 8 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.tabSize": 2, 3 | "files.exclude": { 4 | }, 5 | "search.exclude": { 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Remko Tronçon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GO_TEST_FLAGS:=-timeout 5s 2 | ifeq ($(V),1) 3 | GO_TEST_FLAGS:=$(GO_TEST_FLAGS) -v 4 | endif 5 | ifeq ($(COVERAGE),1) 6 | GO_TEST_FLAGS:=$(GO_TEST_FLAGS) -coverprofile=coverage.out 7 | endif 8 | 9 | .PHONY: all 10 | all: check examples 11 | 12 | .PHONY: examples 13 | examples: 14 | mkdir -p bin 15 | go build -o bin/ ./examples/... 16 | 17 | .PHONY: matroska-test-files 18 | matroska-test-files: 19 | git clone https://github.com/ietf-wg-cellar/matroska-test-files.git testdata/matroska-test-files 20 | 21 | .PHONY: check 22 | check: 23 | go test $(GO_TEST_FLAGS) . 24 | ifeq ($(COVERAGE),1) 25 | go tool cover -html=coverage.out 26 | endif 27 | 28 | .PHONY: install-tools 29 | install-tools: 30 | go install honnef.co/go/tools/cmd/staticcheck@2023.1.3 31 | 32 | .PHONY: lint 33 | lint: 34 | go vet ./... 35 | staticcheck ./... 36 | 37 | example-live+junk.mkv: 38 | ffmpeg -t 1 -s 320x240 -f rawvideo -r 25 -pix_fmt rgb24 -i /dev/zero -metadata title="Live + Junk" -metadata author="John Doe" -c:v libx264 -pix_fmt yuv420p dirty.$@ 39 | mkclean --live dirty.$@ $@ 40 | -rm -rf dirty.$@ 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `mkvparse`: Matroska parser in Go 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/remko/go-mkvparse.svg)](https://pkg.go.dev/github.com/remko/go-mkvparse) 4 | 5 | Fast [Matroska](https://www.matroska.org) (`.mkv`, `.mka`, `.webm`) parser, written in Go. 6 | 7 | Features: 8 | 9 | - Supports [all Matroska elements](https://www.matroska.org/technical/specs/index.html) 10 | - Supports short-circuiting the parser, making it possible to 11 | read specific data (e.g. title, author) without reading the 12 | entire file (see the `mkvtags` example) 13 | - Also works with [WebM](https://www.webmproject.org) (`.webm`) files 14 | - Supports streaming 15 | - Event-based push API 16 | - No dependencies 17 | 18 | ## API 19 | 20 | See the [API Reference](https://godoc.org/github.com/remko/go-mkvparse). 21 | 22 | ## Examples 23 | 24 | Besides the examples in the [API Reference](https://godoc.org/github.com/remko/go-mkvparse), 25 | there are some larger examples in the `examples/` dir: 26 | 27 | - `examples/mkvinfo`: Example using basic parser API to print all elements 28 | - `examples/mkvtags`: Example using the optimized 'sections' API to print all tags without 29 | parsing the entire file. 30 | -------------------------------------------------------------------------------- /examples/mkvdir2mrss/mkvdir2mrss.go: -------------------------------------------------------------------------------- 1 | // mkvdir2mrss parses all MKV files in a dir, and generates a (Media) RSS feed 2 | // for them. Cover art is extracted, and put in the same dir as the output feed. 3 | // 4 | // Usage: 5 | // ./mkvdir2mrss --baseURL http://localhost --out=feeds/feed.xml Movies/ 6 | package main 7 | 8 | import ( 9 | "bytes" 10 | "crypto/sha1" 11 | "encoding/xml" 12 | "flag" 13 | "fmt" 14 | "image" 15 | "image/jpeg" 16 | "io/ioutil" 17 | "net/url" 18 | "os" 19 | "path/filepath" 20 | "regexp" 21 | "strconv" 22 | "strings" 23 | "time" 24 | 25 | "github.com/remko/go-mkvparse" 26 | "golang.org/x/image/draw" 27 | ) 28 | 29 | //////////////////////////////////////////////////////////////////////////////// 30 | // Parsing 31 | //////////////////////////////////////////////////////////////////////////////// 32 | 33 | type MediaFile struct { 34 | Title string 35 | Artist string 36 | Duration time.Duration 37 | Cover []byte 38 | Channels int64 39 | HasVideo bool 40 | HasAudio bool 41 | SamplingFrequency float64 42 | } 43 | 44 | type MediaParser struct { 45 | mkvparse.DefaultHandler 46 | 47 | duration float64 48 | timecodeScale int64 49 | title string 50 | channels int64 51 | hasVideo bool 52 | hasAudio bool 53 | samplingFrequency float64 54 | } 55 | 56 | func (p *MediaParser) HandleString(id mkvparse.ElementID, value string, info mkvparse.ElementInfo) error { 57 | switch id { 58 | case mkvparse.TitleElement: 59 | p.title = value 60 | } 61 | return nil 62 | } 63 | 64 | func (p *MediaParser) HandleInteger(id mkvparse.ElementID, value int64, info mkvparse.ElementInfo) error { 65 | switch id { 66 | case mkvparse.TimecodeScaleElement: 67 | p.timecodeScale = value 68 | case mkvparse.ChannelsElement: 69 | if value > p.channels { 70 | p.channels = value 71 | } 72 | case mkvparse.TrackTypeElement: 73 | switch value { 74 | case mkvparse.TrackType_Video: 75 | p.hasVideo = true 76 | case mkvparse.TrackType_Audio: 77 | p.hasAudio = true 78 | } 79 | } 80 | return nil 81 | } 82 | 83 | func (p *MediaParser) HandleFloat(id mkvparse.ElementID, value float64, info mkvparse.ElementInfo) error { 84 | switch id { 85 | case mkvparse.DurationElement: 86 | p.duration = value 87 | case mkvparse.SamplingFrequencyElement: 88 | if value > p.samplingFrequency { 89 | p.samplingFrequency = value 90 | } 91 | } 92 | return nil 93 | } 94 | 95 | func parseFile(path string) (*MediaFile, error) { 96 | file, err := os.Open(path) 97 | if err != nil { 98 | return nil, err 99 | } 100 | defer file.Close() 101 | handler := MediaParser{ 102 | duration: -1.0, 103 | channels: 0, 104 | timecodeScale: 1000000, 105 | } 106 | tagsh := mkvparse.NewTagsHandler() 107 | coverh := mkvparse.CoverHandler{} 108 | err = mkvparse.ParseSections(file, mkvparse.NewHandlerChain(tagsh, &coverh, &handler), mkvparse.InfoElement, mkvparse.TagsElement, mkvparse.TracksElement, mkvparse.AttachmentsElement) 109 | if err != nil { 110 | return nil, err 111 | } 112 | 113 | mf := MediaFile{ 114 | Title: handler.title, 115 | Artist: tagsh.Tags()[mkvparse.Tag_Artist], 116 | Cover: coverh.Data, 117 | Channels: handler.channels, 118 | HasAudio: handler.hasAudio, 119 | HasVideo: handler.hasVideo, 120 | SamplingFrequency: handler.samplingFrequency, 121 | } 122 | if handler.duration >= 0 { 123 | mf.Duration = time.Duration(int64(handler.duration * float64(handler.timecodeScale))) 124 | } else { 125 | mf.Duration = -1 126 | } 127 | return &mf, nil 128 | } 129 | 130 | //////////////////////////////////////////////////////////////////////////////// 131 | // RSS Generation 132 | //////////////////////////////////////////////////////////////////////////////// 133 | 134 | type RSSMediaContent struct { 135 | XMLName xml.Name `xml:"media:content"` 136 | URL string `xml:"url,attr"` 137 | FileSize int64 `xml:"fileSize,attr"` 138 | Duration int `xml:"duration,attr"` 139 | Channels int64 `xml:"channels,attr,omitempty"` 140 | Type string `xml:"type,attr,omitempty"` 141 | Medium string `xml:"medium,attr,omitempty"` 142 | SamplingRate string `xml:"samplingrate,attr,omitempty"` 143 | } 144 | 145 | type RSSEnclosure struct { 146 | XMLName xml.Name `xml:"enclosure"` 147 | URL string `xml:"url,attr"` 148 | Length int64 `xml:"length,attr"` 149 | Type string `xml:"type,attr"` 150 | } 151 | 152 | type RSSMediaTitle struct { 153 | XMLName xml.Name `xml:"media:title"` 154 | Type string `xml:"type,attr"` 155 | Value string `xml:",chardata"` 156 | } 157 | 158 | type RSSMediaCredit struct { 159 | XMLName xml.Name `xml:"media:credit"` 160 | Role string `xml:"role,attr"` 161 | Value string `xml:",chardata"` 162 | } 163 | 164 | type RSSMediaThumbnail struct { 165 | XMLName xml.Name `xml:"media:thumbnail"` 166 | URL string `xml:"url,attr"` 167 | } 168 | 169 | type RSSItem struct { 170 | XMLName xml.Name `xml:"item"` 171 | PubDate string `xml:"pubDate"` 172 | Title string `xml:"title"` 173 | Author string `xml:"author,omitempty"` 174 | Enclosure *RSSEnclosure 175 | 176 | MediaContent *RSSMediaContent 177 | MediaCredit *RSSMediaCredit 178 | MediaTitle *RSSMediaTitle 179 | MediaThumbnail *RSSMediaThumbnail 180 | 181 | ITunesDuration string `xml:"itunes:duration"` 182 | } 183 | 184 | type RSSChannel struct { 185 | XMLName xml.Name `xml:"channel"` 186 | Title string `xml:"title"` 187 | Items []*RSSItem `xml:"items"` 188 | } 189 | 190 | type RSSFeed struct { 191 | XMLName xml.Name `xml:"rss"` 192 | Version string `xml:"version,attr"` 193 | MediaNS string `xml:"xmlns:media,attr"` 194 | ITunesNS string `xml:"xmlns:itunes,attr"` 195 | Channel *RSSChannel `xml:"channel"` 196 | } 197 | 198 | func formatDuration(d time.Duration) string { 199 | d = d.Round(time.Second) 200 | h := d / time.Hour 201 | d -= h * time.Hour 202 | m := d / time.Minute 203 | d -= m * time.Minute 204 | s := d / time.Second 205 | return fmt.Sprintf("%02d:%02d:%02d", h, m, s) 206 | } 207 | 208 | var supportedMediaFileRE = regexp.MustCompile(`(?i)\.mk[av]$`) 209 | var unsupportedMediaFileRE = regexp.MustCompile(`(?i)\.(mp4|m4v|avi|mpg)$`) 210 | 211 | func run() error { 212 | var noDirs bool 213 | baseURL := flag.String("baseURL", "", "Base URL") 214 | outFile := flag.String("out", "", "Output RSS Feed file") 215 | flag.BoolVar(&noDirs, "noDirs", false, "Don't include dirnames in titles") 216 | flag.Parse() 217 | dirs := flag.Args() 218 | 219 | if baseURL == nil || outFile == nil || len(dirs) < 1 { 220 | return fmt.Errorf("missing parameters") 221 | } 222 | 223 | baseDir, _ := os.Getwd() 224 | outDir, outFilename := filepath.Split(*outFile) 225 | outDir, err := filepath.Abs(outDir) 226 | if err != nil { 227 | return err 228 | } 229 | extension := filepath.Ext(outFilename) 230 | title := outFilename[0 : len(outFilename)-len(extension)] 231 | 232 | feed := RSSFeed{ 233 | Version: "2.0", 234 | MediaNS: "http://search.yahoo.com/mrss/", 235 | ITunesNS: "http://www.itunes.com/dtds/podcast-1.0.dtd", 236 | Channel: &RSSChannel{ 237 | Title: title, 238 | }, 239 | } 240 | 241 | for _, dir := range dirs { 242 | absDir, err := filepath.Abs(dir) 243 | if err != nil { 244 | return err 245 | } 246 | err = filepath.Walk(absDir, func(path string, info os.FileInfo, err error) error { 247 | if err != nil { 248 | return fmt.Errorf("error walking %s: %v", path, err) 249 | } 250 | if !info.Mode().IsRegular() { 251 | return nil 252 | } 253 | 254 | publicPath, err := filepath.Rel(baseDir, path) 255 | if err != nil { 256 | return err 257 | } 258 | filename, err := filepath.Rel(absDir, path) 259 | if err != nil { 260 | return err 261 | } 262 | extension := filepath.Ext(filename) 263 | name := filename[0 : len(filename)-len(extension)] 264 | if noDirs { 265 | name = filepath.Base(name) 266 | } 267 | mediaURL := fmt.Sprintf("%s/%s", *baseURL, strings.Replace(url.PathEscape(publicPath), "%2F", "/", -1)) 268 | 269 | if supportedMediaFileRE.MatchString(path) { 270 | file, err := parseFile(path) 271 | if len(file.Title) == 0 { 272 | file.Title = name 273 | } 274 | if err != nil { 275 | return fmt.Errorf("error loading %s: %v", path, err) 276 | } else { 277 | item := &RSSItem{ 278 | Title: file.Title, 279 | PubDate: info.ModTime().Format(time.RFC822), 280 | ITunesDuration: formatDuration(file.Duration), 281 | Author: file.Artist, 282 | MediaContent: &RSSMediaContent{ 283 | FileSize: info.Size(), 284 | Duration: int(file.Duration / time.Second), 285 | URL: mediaURL, 286 | }, 287 | MediaTitle: &RSSMediaTitle{ 288 | Type: "plain", 289 | Value: file.Title, 290 | }, 291 | } 292 | item.Enclosure = &RSSEnclosure{ 293 | URL: item.MediaContent.URL, 294 | Length: item.MediaContent.FileSize, 295 | Type: item.MediaContent.Type, 296 | } 297 | if len(file.Artist) > 0 { 298 | item.MediaCredit = &RSSMediaCredit{ 299 | Role: "author", 300 | Value: file.Artist, 301 | } 302 | } 303 | if len(file.Cover) > 0 { 304 | thumbFile := filepath.Join(outDir, fmt.Sprintf("%x.jpg", sha1.Sum(file.Cover))) 305 | if _, err := os.Stat(thumbFile); os.IsNotExist(err) { 306 | img, err := scale(file.Cover, 512) 307 | if err != nil { 308 | return err 309 | } 310 | if err := ioutil.WriteFile(thumbFile, img, 0644); err != nil { 311 | return err 312 | } 313 | } 314 | publicThumbFile, err := filepath.Rel(baseDir, thumbFile) 315 | if err != nil { 316 | return err 317 | } 318 | item.MediaThumbnail = &RSSMediaThumbnail{ 319 | URL: fmt.Sprintf("%s/%s", *baseURL, strings.Replace(url.PathEscape(publicThumbFile), "%2F", "/", -1)), 320 | } 321 | } 322 | if file.Channels > 0 { 323 | item.MediaContent.Channels = file.Channels 324 | } 325 | if file.HasVideo { 326 | item.MediaContent.Medium = "video" 327 | item.MediaContent.Type = "video/x-matroska" 328 | } else if file.HasAudio { 329 | item.MediaContent.Medium = "audio" 330 | item.MediaContent.Type = "audio/x-matroska" 331 | } 332 | if file.SamplingFrequency > 0 { 333 | item.MediaContent.SamplingRate = strconv.FormatFloat(float64(file.SamplingFrequency)/1000, 'f', -1, 64) 334 | } 335 | feed.Channel.Items = append(feed.Channel.Items, item) 336 | } 337 | } else if unsupportedMediaFileRE.MatchString(path) { 338 | // Fallback to basic information for unsupported media files 339 | item := &RSSItem{ 340 | Title: name, 341 | PubDate: info.ModTime().Format(time.RFC822), 342 | Enclosure: &RSSEnclosure{ 343 | URL: mediaURL, 344 | Length: info.Size(), 345 | Type: "video/mp4", 346 | }, 347 | } 348 | feed.Channel.Items = append(feed.Channel.Items, item) 349 | } 350 | return nil 351 | }) 352 | if err != nil { 353 | return err 354 | } 355 | } 356 | 357 | output, err := xml.Marshal(feed) 358 | if err != nil { 359 | return err 360 | } 361 | return ioutil.WriteFile(*outFile, output, 0644) 362 | } 363 | 364 | func scale(data []byte, size int) ([]byte, error) { 365 | img, _, err := image.Decode(bytes.NewReader(data)) 366 | if err != nil { 367 | return nil, err 368 | } 369 | width := size 370 | height := size 371 | aspect := float64(img.Bounds().Dx()) / float64(img.Bounds().Dy()) 372 | if aspect > float64(width)/float64(height) { 373 | height = int(float64(height) / aspect) 374 | } else { 375 | width = int(float64(width) * aspect) 376 | } 377 | dst := image.NewRGBA(image.Rect(0, 0, width, height)) 378 | draw.CatmullRom.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil) 379 | out := bytes.Buffer{} 380 | err = jpeg.Encode(&out, dst, &jpeg.Options{Quality: 75}) 381 | return out.Bytes(), err 382 | } 383 | 384 | func main() { 385 | if err := run(); err != nil { 386 | fmt.Printf("Error: %s\n", err) 387 | os.Exit(1) 388 | } 389 | } 390 | -------------------------------------------------------------------------------- /examples/mkvinfo/mkvinfo.go: -------------------------------------------------------------------------------- 1 | // Prints all information of an MKV file 2 | package main 3 | 4 | import ( 5 | "fmt" 6 | "os" 7 | "strings" 8 | "time" 9 | 10 | "github.com/remko/go-mkvparse" 11 | ) 12 | 13 | type MyParser struct { 14 | } 15 | 16 | func (p *MyParser) HandleMasterBegin(id mkvparse.ElementID, info mkvparse.ElementInfo) (bool, error) { 17 | switch id { 18 | default: 19 | fmt.Printf("%s- %s:\n", indent(info.Level), mkvparse.NameForElementID(id)) 20 | return true, nil 21 | } 22 | } 23 | 24 | func (p *MyParser) HandleMasterEnd(id mkvparse.ElementID, info mkvparse.ElementInfo) error { 25 | return nil 26 | } 27 | 28 | func (p *MyParser) HandleString(id mkvparse.ElementID, value string, info mkvparse.ElementInfo) error { 29 | fmt.Printf("%s- %v: %q\n", indent(info.Level), mkvparse.NameForElementID(id), value) 30 | return nil 31 | } 32 | 33 | func (p *MyParser) HandleInteger(id mkvparse.ElementID, value int64, info mkvparse.ElementInfo) error { 34 | fmt.Printf("%s- %v: %v\n", indent(info.Level), mkvparse.NameForElementID(id), value) 35 | return nil 36 | } 37 | 38 | func (p *MyParser) HandleFloat(id mkvparse.ElementID, value float64, info mkvparse.ElementInfo) error { 39 | fmt.Printf("%s- %v: %v\n", indent(info.Level), mkvparse.NameForElementID(id), value) 40 | return nil 41 | } 42 | 43 | func (p *MyParser) HandleDate(id mkvparse.ElementID, value time.Time, info mkvparse.ElementInfo) error { 44 | fmt.Printf("%s- %v: %v\n", indent(info.Level), mkvparse.NameForElementID(id), value) 45 | return nil 46 | } 47 | 48 | func (p *MyParser) HandleBinary(id mkvparse.ElementID, value []byte, info mkvparse.ElementInfo) error { 49 | switch id { 50 | case mkvparse.SeekIDElement: 51 | fmt.Printf("%s- %v: %x\n", indent(info.Level), mkvparse.NameForElementID(id), value) 52 | default: 53 | fmt.Printf("%s- %v: (%d)\n", indent(info.Level), mkvparse.NameForElementID(id), info.Size) 54 | } 55 | return nil 56 | } 57 | 58 | func main() { 59 | handler := MyParser{} 60 | err := mkvparse.ParsePath(os.Args[1], &handler) 61 | if err != nil { 62 | fmt.Printf("%v", err) 63 | os.Exit(-1) 64 | } 65 | } 66 | 67 | func indent(n int) string { 68 | return strings.Repeat(" ", n) 69 | } 70 | -------------------------------------------------------------------------------- /examples/mkvtags/mkvtags.go: -------------------------------------------------------------------------------- 1 | // Prints tags of an MKV file 2 | package main 3 | 4 | import ( 5 | "fmt" 6 | "os" 7 | "sort" 8 | 9 | "github.com/remko/go-mkvparse" 10 | ) 11 | 12 | type MyParser struct { 13 | mkvparse.DefaultHandler 14 | 15 | title *string 16 | } 17 | 18 | func (p *MyParser) HandleString(id mkvparse.ElementID, value string, info mkvparse.ElementInfo) error { 19 | switch id { 20 | case mkvparse.TitleElement: 21 | p.title = &value 22 | } 23 | return nil 24 | } 25 | 26 | func main() { 27 | file, err := os.Open(os.Args[1]) 28 | if err != nil { 29 | fmt.Printf("%v", err) 30 | os.Exit(-1) 31 | } 32 | defer file.Close() 33 | titleh := MyParser{} 34 | tagsh := mkvparse.NewTagsHandler() 35 | err = mkvparse.ParseSections(file, mkvparse.NewHandlerChain(&titleh, tagsh), mkvparse.InfoElement, mkvparse.TagsElement) 36 | if err != nil { 37 | fmt.Printf("%v", err) 38 | os.Exit(-1) 39 | } 40 | 41 | // Print (sorted) tags 42 | if titleh.title != nil { 43 | fmt.Printf("- title: %q\n", *titleh.title) 44 | } 45 | tags := tagsh.Tags() 46 | var tagNames []string 47 | for tagName := range tags { 48 | tagNames = append(tagNames, tagName) 49 | } 50 | sort.Strings(tagNames) 51 | for _, tagName := range tagNames { 52 | fmt.Printf("- %s: %q\n", tagName, tags[tagName]) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /examples_handlerchain_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | func ExampleHandlerChain() { 9 | file, err := os.Open("testdata/example-cover.mkv") 10 | if err != nil { 11 | panic(err) 12 | } 13 | defer file.Close() 14 | 15 | coverh := CoverHandler{} 16 | titleh := TitleHandler{} 17 | if err := ParseSections(file, NewHandlerChain(&coverh, &titleh), InfoElement, AttachmentsElement); err != nil { 18 | panic(err) 19 | } 20 | fmt.Printf("parsed cover: %s (%d bytes)\n", coverh.MIMEType, len(coverh.Data)) 21 | 22 | // Output: 23 | // Title: Awesome Movie 24 | // parsed cover: image/jpeg (41363 bytes) 25 | } 26 | 27 | type TitleHandler struct { 28 | DefaultHandler 29 | } 30 | 31 | func (p *TitleHandler) HandleString(id ElementID, value string, info ElementInfo) error { 32 | switch id { 33 | case TitleElement: 34 | fmt.Printf("%s: %v\n", NameForElementID(id), value) 35 | } 36 | return nil 37 | } 38 | -------------------------------------------------------------------------------- /examples_section_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | ) 8 | 9 | func ExampleParseSections() { 10 | handler := myTitleHandler{} 11 | file, err := os.Open("testdata/example.mkv") 12 | if err != nil { 13 | log.Fatalf("%v", err) 14 | } 15 | defer file.Close() 16 | if err = ParseSections(file, &handler, InfoElement); err != nil { 17 | log.Fatalf("%v", err) 18 | } 19 | // Output: 20 | // Title: Awesome Movie 21 | } 22 | 23 | type myTitleHandler struct { 24 | DefaultHandler 25 | } 26 | 27 | func (p *myTitleHandler) HandleString(id ElementID, value string, info ElementInfo) error { 28 | switch id { 29 | case TitleElement: 30 | fmt.Printf("%s: %v\n", NameForElementID(id), value) 31 | } 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /examples_simple_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | ) 7 | 8 | func Example() { 9 | handler := MyHandler{} 10 | if err := ParsePath("testdata/example.mkv", &handler); err != nil { 11 | log.Fatalf("%v", err) 12 | } 13 | // Output: 14 | // Title: Awesome Movie 15 | } 16 | 17 | type MyHandler struct { 18 | DefaultHandler 19 | } 20 | 21 | func (p *MyHandler) HandleString(id ElementID, value string, info ElementInfo) error { 22 | switch id { 23 | case TitleElement: 24 | fmt.Printf("%s: %v\n", NameForElementID(id), value) 25 | } 26 | return nil 27 | } 28 | -------------------------------------------------------------------------------- /examples_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "image" 7 | "image/jpeg" 8 | "image/png" 9 | "log" 10 | "os" 11 | ) 12 | 13 | func ExampleNameForElementID() { 14 | fmt.Println(NameForElementID(InfoElement)) 15 | // Output: 16 | // Info 17 | } 18 | 19 | func ExampleCoverHandler() { 20 | file, err := os.Open("testdata/example-cover.mkv") 21 | if err != nil { 22 | panic(err) 23 | } 24 | defer file.Close() 25 | 26 | handler := CoverHandler{} 27 | if err := ParseSections(file, &handler, AttachmentsElement); err != nil { 28 | panic(err) 29 | } 30 | 31 | fmt.Printf("parsed cover: %s (%d bytes)\n", handler.MIMEType, len(handler.Data)) 32 | 33 | // Output: 34 | // parsed cover: image/jpeg (41363 bytes) 35 | } 36 | 37 | func ExampleParseCover() { 38 | data, typ, err := ParseCover("testdata/example-cover.mkv") 39 | if err != nil { 40 | panic(err) 41 | } 42 | 43 | fmt.Printf("parsed cover: %s (%d bytes)\n", typ, len(data)) 44 | 45 | // Output: 46 | // parsed cover: image/jpeg (41363 bytes) 47 | } 48 | 49 | func ExampleParseCover_image() { 50 | data, typ, err := ParseCover("testdata/example-cover.mkv") 51 | if err != nil { 52 | log.Panic(err) 53 | } 54 | if data == nil { 55 | log.Panic("no cover") 56 | return 57 | } 58 | var img image.Image 59 | switch typ { 60 | case "image/jpeg": 61 | img, err = jpeg.Decode(bytes.NewReader(data)) 62 | case "image/png": 63 | img, err = png.Decode(bytes.NewReader(data)) 64 | default: 65 | log.Panicf("unknown MIME type: %s", typ) 66 | } 67 | if err != nil { 68 | log.Panic(err) 69 | } 70 | 71 | fmt.Printf("parsed cover image: %dx%d\n", img.Bounds().Dx(), img.Bounds().Dy()) 72 | 73 | // Output: 74 | // parsed cover image: 265x377 75 | } 76 | 77 | func ExampleTagsHandler() { 78 | file, err := os.Open("testdata/example.mkv") 79 | if err != nil { 80 | panic(err) 81 | } 82 | defer file.Close() 83 | 84 | handler := NewTagsHandler() 85 | if err := ParseSections(file, handler, TagsElement); err != nil { 86 | panic(err) 87 | } 88 | 89 | fmt.Printf("Artist: %s\n", handler.Tags()[Tag_Artist]) 90 | 91 | // Output: 92 | // Artist: John Doe 93 | } 94 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/remko/go-mkvparse 2 | 3 | go 1.16 4 | 5 | require golang.org/x/image v0.0.0-20211028202545-6944b10bf410 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/image v0.0.0-20211028202545-6944b10bf410 h1:hTftEOvwiOq2+O8k2D5/Q7COC7k5Qcrgc2TFURJYnvQ= 2 | golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= 3 | golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= 4 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 5 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc= 6 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 7 | -------------------------------------------------------------------------------- /handlers.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "os" 5 | "time" 6 | ) 7 | 8 | //////////////////////////////////////////////////////////////////////////////// 9 | 10 | // Handler to extract cover image data 11 | // 12 | // Needs the section parser to handle `AttachmentsElement` 13 | type CoverHandler struct { 14 | DefaultHandler 15 | 16 | currentAttachmentData []byte 17 | currentAttachmentFileName string 18 | currentAttachmentMIMEType string 19 | 20 | Data []byte 21 | MIMEType string 22 | } 23 | 24 | func (p *CoverHandler) HandleMasterEnd(id ElementID, info ElementInfo) error { 25 | if id == AttachedFileElement && (p.currentAttachmentFileName == "cover.jpg" || p.currentAttachmentFileName == "cover.png") { 26 | p.Data = p.currentAttachmentData 27 | p.MIMEType = p.currentAttachmentMIMEType 28 | } 29 | return nil 30 | } 31 | 32 | func (p *CoverHandler) HandleString(id ElementID, value string, info ElementInfo) error { 33 | if id == FileNameElement { 34 | p.currentAttachmentFileName = value 35 | } else if id == FileMimeTypeElement { 36 | p.currentAttachmentMIMEType = value 37 | } 38 | return nil 39 | } 40 | 41 | func (p *CoverHandler) HandleBinary(id ElementID, value []byte, info ElementInfo) error { 42 | if id == FileDataElement { 43 | p.currentAttachmentData = value 44 | } 45 | return nil 46 | } 47 | 48 | func ParseCover(path string) ([]byte, string, error) { 49 | file, err := os.Open(path) 50 | if err != nil { 51 | return nil, "", err 52 | } 53 | defer file.Close() 54 | 55 | handler := CoverHandler{} 56 | err = ParseSections(file, &handler, AttachmentsElement) 57 | if err != nil { 58 | return nil, "", err 59 | } 60 | 61 | return handler.Data, handler.MIMEType, nil 62 | } 63 | 64 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 65 | 66 | // Handler that chains multiple handlers. 67 | // 68 | // All handlers are called in sequence. If one of the handers requests to descend, the parser will descend. 69 | type HandlerChain struct { 70 | Handlers []Handler 71 | } 72 | 73 | func (c *HandlerChain) HandleMasterBegin(id ElementID, info ElementInfo) (bool, error) { 74 | descend := false 75 | for _, h := range c.Handlers { 76 | d, err := h.HandleMasterBegin(id, info) 77 | if err != nil { 78 | return descend, err 79 | } 80 | descend = descend || d 81 | } 82 | return descend, nil 83 | } 84 | 85 | func (c *HandlerChain) HandleMasterEnd(id ElementID, info ElementInfo) error { 86 | for _, h := range c.Handlers { 87 | if err := h.HandleMasterEnd(id, info); err != nil { 88 | return err 89 | } 90 | } 91 | return nil 92 | } 93 | 94 | func (c *HandlerChain) HandleString(id ElementID, value string, info ElementInfo) error { 95 | for _, h := range c.Handlers { 96 | if err := h.HandleString(id, value, info); err != nil { 97 | return err 98 | } 99 | } 100 | return nil 101 | } 102 | 103 | func (c *HandlerChain) HandleInteger(id ElementID, value int64, info ElementInfo) error { 104 | for _, h := range c.Handlers { 105 | if err := h.HandleInteger(id, value, info); err != nil { 106 | return err 107 | } 108 | } 109 | return nil 110 | } 111 | 112 | func (c *HandlerChain) HandleFloat(id ElementID, value float64, info ElementInfo) error { 113 | for _, h := range c.Handlers { 114 | if err := h.HandleFloat(id, value, info); err != nil { 115 | return err 116 | } 117 | } 118 | return nil 119 | } 120 | 121 | func (c *HandlerChain) HandleDate(id ElementID, value time.Time, info ElementInfo) error { 122 | for _, h := range c.Handlers { 123 | if err := h.HandleDate(id, value, info); err != nil { 124 | return err 125 | } 126 | } 127 | return nil 128 | } 129 | 130 | func (c *HandlerChain) HandleBinary(id ElementID, value []byte, info ElementInfo) error { 131 | for _, h := range c.Handlers { 132 | if err := h.HandleBinary(id, value, info); err != nil { 133 | return err 134 | } 135 | } 136 | return nil 137 | } 138 | 139 | // Creates a new handler that chains `handlers` 140 | func NewHandlerChain(handlers ...Handler) *HandlerChain { 141 | return &HandlerChain{ 142 | Handlers: handlers, 143 | } 144 | } 145 | 146 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 147 | 148 | // Handler that collects tags. 149 | // 150 | // Needs the section parser to handle `TagsElement`. 151 | type TagsHandler struct { 152 | DefaultHandler 153 | 154 | currentTagTrackUIDs []int64 155 | currentTagChapterUIDs []int64 156 | currentTagAttachmentUIDs []int64 157 | currentTagEditionUIDs []int64 158 | 159 | currentTags map[string]string 160 | currentTagName *string 161 | currentTagValue *string 162 | 163 | tags map[string]string 164 | trackTags map[int64]map[string]string 165 | chapterTags map[int64]map[string]string 166 | attachmentTags map[int64]map[string]string 167 | editionTags map[int64]map[string]string 168 | } 169 | 170 | func NewTagsHandler() *TagsHandler { 171 | return &TagsHandler{ 172 | tags: map[string]string{}, 173 | trackTags: map[int64]map[string]string{}, 174 | chapterTags: map[int64]map[string]string{}, 175 | attachmentTags: map[int64]map[string]string{}, 176 | editionTags: map[int64]map[string]string{}, 177 | } 178 | } 179 | 180 | // Retrieves all global tags 181 | func (h *TagsHandler) Tags() map[string]string { 182 | return h.tags 183 | } 184 | 185 | // Retrieves tags for track with UID `uid`. 186 | // 187 | // Returns nil if no tags were encountered for this track. 188 | func (h *TagsHandler) TrackTags(uid int64) map[string]string { 189 | return h.trackTags[uid] 190 | } 191 | 192 | // Retrieves tags for chapter with UID `uid`. 193 | // 194 | // Returns nil if no tags were encountered for this chapter. 195 | func (h *TagsHandler) ChapterTags(uid int64) map[string]string { 196 | return h.chapterTags[uid] 197 | } 198 | 199 | // Retrieves tags for attachment with UID `uid`. 200 | // 201 | // Returns nil if no tags were encountered for this attachment. 202 | func (h *TagsHandler) AttachmentTags(uid int64) map[string]string { 203 | return h.attachmentTags[uid] 204 | } 205 | 206 | // Retrieves tags for edition with UID `uid`. 207 | // 208 | // Returns nil if no tags were encountered for this edition. 209 | func (h *TagsHandler) EditionTags(uid int64) map[string]string { 210 | return h.editionTags[uid] 211 | } 212 | 213 | func (p *TagsHandler) HandleMasterBegin(id ElementID, info ElementInfo) (bool, error) { 214 | switch id { 215 | case TagElement: 216 | p.resetTagState() 217 | p.currentTags = map[string]string{} 218 | 219 | case SimpleTagElement: 220 | p.resetSimpleTagState() 221 | } 222 | return true, nil 223 | } 224 | 225 | func (p *TagsHandler) HandleMasterEnd(id ElementID, info ElementInfo) error { 226 | switch id { 227 | case TagElement: 228 | if len(p.currentTagAttachmentUIDs) == 0 && len(p.currentTagChapterUIDs) == 0 && len(p.currentTagAttachmentUIDs) == 0 && len(p.currentTagEditionUIDs) == 0 { 229 | for k, v := range p.currentTags { 230 | p.tags[k] = v 231 | } 232 | } else { 233 | for _, uid := range p.currentTagTrackUIDs { 234 | t := p.trackTags[uid] 235 | if t == nil { 236 | p.trackTags[uid] = p.currentTags 237 | } else { 238 | for k, v := range p.currentTags { 239 | t[k] = v 240 | } 241 | } 242 | } 243 | for _, uid := range p.currentTagChapterUIDs { 244 | t := p.chapterTags[uid] 245 | if t == nil { 246 | p.chapterTags[uid] = p.currentTags 247 | } else { 248 | for k, v := range p.currentTags { 249 | t[k] = v 250 | } 251 | } 252 | } 253 | for _, uid := range p.currentTagAttachmentUIDs { 254 | t := p.attachmentTags[uid] 255 | if t == nil { 256 | p.attachmentTags[uid] = p.currentTags 257 | } else { 258 | for k, v := range p.currentTags { 259 | t[k] = v 260 | } 261 | } 262 | } 263 | for _, uid := range p.currentTagEditionUIDs { 264 | t := p.editionTags[uid] 265 | if t == nil { 266 | p.editionTags[uid] = p.currentTags 267 | } else { 268 | for k, v := range p.currentTags { 269 | t[k] = v 270 | } 271 | } 272 | } 273 | } 274 | p.resetTagState() 275 | 276 | case SimpleTagElement: 277 | if p.currentTagName != nil && p.currentTagValue != nil { 278 | p.currentTags[*p.currentTagName] = *p.currentTagValue 279 | } 280 | p.resetSimpleTagState() 281 | } 282 | return nil 283 | } 284 | 285 | func (p *TagsHandler) HandleString(id ElementID, value string, info ElementInfo) error { 286 | switch id { 287 | case TagNameElement: 288 | p.currentTagName = &value 289 | case TagStringElement: 290 | p.currentTagValue = &value 291 | } 292 | return nil 293 | } 294 | 295 | func (p *TagsHandler) HandleInteger(id ElementID, value int64, info ElementInfo) error { 296 | switch id { 297 | case TagTrackUIDElement: 298 | p.currentTagTrackUIDs = append(p.currentTagTrackUIDs, value) 299 | case TagChapterUIDElement: 300 | p.currentTagChapterUIDs = append(p.currentTagChapterUIDs, value) 301 | case TagAttachmentUIDElement: 302 | p.currentTagAttachmentUIDs = append(p.currentTagAttachmentUIDs, value) 303 | case TagEditionUIDElement: 304 | p.currentTagEditionUIDs = append(p.currentTagEditionUIDs, value) 305 | } 306 | return nil 307 | } 308 | 309 | func (p *TagsHandler) resetTagState() { 310 | p.currentTags = nil 311 | p.currentTagTrackUIDs = nil 312 | p.currentTagChapterUIDs = nil 313 | p.currentTagAttachmentUIDs = nil 314 | p.currentTagEditionUIDs = nil 315 | } 316 | 317 | func (p *TagsHandler) resetSimpleTagState() { 318 | p.currentTagName = nil 319 | p.currentTagValue = nil 320 | } 321 | -------------------------------------------------------------------------------- /internal/generate/generate.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/xml" 6 | "fmt" 7 | "go/format" 8 | "io" 9 | "log" 10 | "net/http" 11 | "os" 12 | "path" 13 | "regexp" 14 | "sort" 15 | "strings" 16 | "text/template" 17 | "unicode" 18 | ) 19 | 20 | func main() { 21 | if err := generateElements(); err != nil { 22 | log.Fatalf("%v", err) 23 | } 24 | if err := generateTags(); err != nil { 25 | log.Fatalf("%v", err) 26 | } 27 | } 28 | 29 | //////////////////////////////////////////////////////////////////////////////// 30 | // Elements 31 | //////////////////////////////////////////////////////////////////////////////// 32 | 33 | type ElementsTable struct { 34 | XMLName xml.Name `xml:"EBMLSchema"` 35 | Elements []*EBMLSchemaElement `xml:"element"` 36 | } 37 | 38 | type EBMLSchemaElement struct { 39 | Name string `xml:"name,attr"` 40 | ID string `xml:"id,attr"` 41 | Type string `xml:"type,attr"` 42 | Path string `xml:"path,attr"` 43 | Deprecated bool `xml:"-"` 44 | IsRoot bool `xml:"-"` 45 | Restriction struct { 46 | Enums []*EBMLSchemaEnum `xml:"enum"` 47 | } `xml:"restriction"` 48 | Descendants []struct { 49 | Path string 50 | Name string 51 | } `xml:"-"` 52 | } 53 | 54 | type EBMLSchemaEnum struct { 55 | Value string `xml:"value,attr"` 56 | Label string `xml:"label,attr"` 57 | Name string 58 | Type string 59 | } 60 | 61 | var pathCountCleanRE = regexp.MustCompile(`\d*\*\d*\(|\(|\)`) 62 | var pathRE = regexp.MustCompile(`\\(\(\d*-\d*\\\))?(.*)`) 63 | 64 | func generateElements() error { 65 | var elements []*EBMLSchemaElement 66 | haveElement := map[string]bool{} 67 | for _, schema := range []string{ 68 | "https://raw.githubusercontent.com/ietf-wg-cellar/ebml-specification/master/ebml.xml", 69 | // "https://raw.githubusercontent.com/ietf-wg-cellar/matroska-specification/master/ebml_matroska.xml", 70 | "https://raw.githubusercontent.com/ietf-wg-cellar/matroska-specification/v03/ebml_matroska.xml", 71 | } { 72 | isLegacySchema := strings.HasSuffix(schema, "ebml_matroska.xml") 73 | sb, err := loadSchema(schema) 74 | if err != nil { 75 | return err 76 | } 77 | defer sb.Close() 78 | data, err := io.ReadAll(sb) 79 | if err != nil { 80 | return err 81 | } 82 | table := ElementsTable{} 83 | err = xml.Unmarshal(data, &table) 84 | if err != nil { 85 | return err 86 | } 87 | for _, el := range table.Elements { 88 | if _, ok := haveElement[el.Name]; ok { 89 | continue 90 | } 91 | haveElement[el.Name] = true 92 | if isLegacySchema { 93 | el.Path = pathCountCleanRE.ReplaceAllString(el.Path, "") 94 | } 95 | 96 | var enums []*EBMLSchemaEnum 97 | enumNames := map[string]struct{}{} 98 | for i, e := range el.Restriction.Enums { 99 | e.Name = camelCase(e.Label) 100 | if e.Name == "Reserved" { 101 | e.Name = fmt.Sprintf("Reserved%d", i) 102 | } 103 | if _, ok := enumNames[e.Name]; ok { 104 | continue 105 | } 106 | if el.Type == "string" { 107 | e.Type = "string" 108 | e.Value = fmt.Sprintf("\"%s\"", e.Value) 109 | } else { 110 | e.Type = "int64" 111 | } 112 | enums = append(enums, e) 113 | enumNames[e.Name] = struct{}{} 114 | } 115 | el.Restriction.Enums = enums 116 | 117 | elements = append(elements, el) 118 | } 119 | } 120 | 121 | // Add legacy named fields 122 | // elements = append(elements, []*EBMLSchemaElement{ 123 | // {Name: "ChapterTrackNumber", ID: "ChapterTrackUIDElement", Deprecated: true, Type: "uinteger"}, 124 | // {Name: "ReferenceTimeCode", ID: "ReferenceTimestampElement", Deprecated: true, Type: "uinteger"}, 125 | // {Name: "TimeCode", ID: "TimestampElement", Deprecated: true, Type: "uinteger"}, 126 | // {Name: "TimeCodeScale", ID: "TimestampScaleElement", Deprecated: true, Type: "uinteger"}, 127 | // {Name: "TrackTimeCodeScale", ID: "TrackTimestampScaleElement", Deprecated: true, Type: "float"}, 128 | // }...) 129 | 130 | log.Printf("Generating elements.go ...") 131 | 132 | for _, v := range elements { 133 | v.Name = elementName(v.Name) 134 | for _, del := range elements { 135 | if strings.Count(v.Path, "\\") == 1 { 136 | v.IsRoot = true 137 | } 138 | if isDescendantPath(del.Path, v.Path) { 139 | v.Descendants = append(v.Descendants, struct { 140 | Path string 141 | Name string 142 | }{del.Path, elementName(del.Name)}) 143 | } 144 | } 145 | } 146 | sort.Slice(elements, func(i, j int) bool { 147 | return strings.Compare(elements[i].Name+"Element", elements[j].Name+"Element") < 0 148 | }) 149 | 150 | var buf bytes.Buffer 151 | if err := elementsTemplate.Execute(&buf, elements); err != nil { 152 | return err 153 | } 154 | 155 | // log.Printf("Pre-format: %s", buf.String()) 156 | formatted, err := format.Source(buf.Bytes()) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | return os.WriteFile("elements.go", formatted, 0644) 162 | } 163 | 164 | func elementName(n string) string { 165 | return strings.Replace(n, "-", "", -1) 166 | } 167 | 168 | func isDescendantPath(p1, p2 string) bool { 169 | if p1 == p2 { 170 | return false 171 | } 172 | m1 := pathRE.FindStringSubmatch(p1) 173 | if m1 == nil { 174 | panic(fmt.Sprintf("unable to match path: %v", p1)) 175 | } 176 | if m1[1] != "" { 177 | return true 178 | } 179 | 180 | m2 := pathRE.FindStringSubmatch(p2) 181 | if m2 == nil { 182 | panic(fmt.Sprintf("unable to match path: %v", p2)) 183 | } 184 | return strings.HasPrefix(m1[2], m2[2]) 185 | } 186 | 187 | var elementsTemplate = template.Must(template.New("").Parse(`// Code generated by generate.go. DO NOT EDIT. 188 | 189 | package mkvparse 190 | 191 | // Supported ElementIDs. See https://www.matroska.org/technical/elements.html 192 | const ( 193 | {{- range . }} 194 | {{ .Name }}Element ElementID = {{ .ID -}} {{- if .Deprecated -}}// Deprecated. Do not use.{{- end -}} 195 | {{end }} 196 | ) 197 | 198 | func getElementType(el ElementID) elementType { 199 | switch (el) { 200 | {{- range . -}} 201 | {{- if not .Deprecated }} 202 | case {{ .Name }}Element: 203 | {{- if eq .Type "master" }} 204 | return masterType 205 | {{- else if eq .Type "uinteger" }} 206 | return uintegerType 207 | {{- else if eq .Type "integer" }} 208 | return integerType 209 | {{- else if eq .Type "binary" }} 210 | return binaryType 211 | {{- else if eq .Type "utf-8" }} 212 | return utf8Type 213 | {{- else if eq .Type "string" }} 214 | return stringType 215 | {{- else if eq .Type "float" }} 216 | return floatType 217 | {{- else if eq .Type "date" }} 218 | return dateType 219 | {{- end -}} 220 | {{ end -}} 221 | {{ end }} 222 | default: 223 | return elementType(0) 224 | } 225 | } 226 | 227 | var elementNames = map[ElementID]string { 228 | {{- range . }} 229 | {{- if not .Deprecated }} 230 | {{ .Name }}Element: {{ printf "%q" .Name }}, 231 | {{- end -}} 232 | {{- end }} 233 | } 234 | 235 | func isDescendantElement(p1, p2 ElementID) bool { 236 | switch (p2) { 237 | {{ range . -}} 238 | {{ if eq .Type "master" -}} 239 | case {{ .Name }}Element: // {{ .Path }} 240 | switch(p1) { 241 | {{ range .Descendants -}} 242 | case {{ .Name }}Element: // {{ .Path }} 243 | return true 244 | {{ end -}} 245 | default: 246 | return false 247 | } 248 | {{ end -}} 249 | {{ end -}} 250 | default: 251 | return false 252 | } 253 | } 254 | 255 | func isRootElement(el ElementID) bool { 256 | switch (el) { 257 | {{ range . -}} 258 | {{ if .IsRoot -}} 259 | case {{ .Name }}Element: // {{ .Path }} 260 | return true 261 | {{ end -}} 262 | {{ end -}} 263 | default: 264 | return false 265 | } 266 | } 267 | {{- range . -}} 268 | {{- if .Restriction.Enums }} 269 | // Possible {{ .Name}}Element values 270 | const ( 271 | {{- $prefix := .Name -}} 272 | {{- range .Restriction.Enums -}} 273 | {{$prefix}}_{{.Name}} {{.Type}} = {{.Value}} // {{.Label}} 274 | {{ end -}} 275 | ) 276 | {{ end -}} 277 | {{ end -}} 278 | `)) 279 | 280 | //////////////////////////////////////////////////////////////////////////////// 281 | // Tags 282 | //////////////////////////////////////////////////////////////////////////////// 283 | 284 | type Tag struct { 285 | Name string `xml:"name,attr"` 286 | GoName string `xml:"-"` 287 | } 288 | 289 | type TagRegistry struct { 290 | XMLName xml.Name `xml:"matroska_tagging_registry"` 291 | Tags *struct { 292 | Tags []*Tag `xml:"tag"` 293 | } `xml:"tags"` 294 | } 295 | 296 | func generateTags() error { 297 | sb, err := loadSchema("https://raw.githubusercontent.com/ietf-wg-cellar/matroska-specification/master/matroska_tags.xml") 298 | if err != nil { 299 | return err 300 | } 301 | defer sb.Close() 302 | data, err := io.ReadAll(sb) 303 | // data, err := ioutil.ReadFile("specdata.xml") 304 | if err != nil { 305 | return err 306 | } 307 | registry := TagRegistry{} 308 | err = xml.Unmarshal(data, ®istry) 309 | if err != nil { 310 | return err 311 | } 312 | 313 | log.Printf("Generating tags.go ...") 314 | 315 | for _, v := range registry.Tags.Tags { 316 | switch v.Name { 317 | case "BPM", "BPS", "FPS", "IMDB", "ISBN", "ISRC", "LCCN", "MCDI", "TMDB", "TVDB", "URL": 318 | v.GoName = v.Name 319 | case "REPLAYGAIN_GAIN": 320 | v.GoName = "ReplayGainGain" 321 | case "REPLAYGAIN_PEAK": 322 | v.GoName = "ReplayGainPeak" 323 | default: 324 | v.GoName = strings.ReplaceAll(strings.Title(strings.ToLower(strings.ReplaceAll(v.Name, "_", " "))), " ", "") 325 | } 326 | } 327 | sort.Slice(registry.Tags.Tags, func(i, j int) bool { 328 | return strings.Compare("Tag"+registry.Tags.Tags[i].GoName, "Tag"+registry.Tags.Tags[j].GoName) < 0 329 | }) 330 | 331 | var buf bytes.Buffer 332 | if err := tagsTemplate.Execute(&buf, registry); err != nil { 333 | return err 334 | } 335 | 336 | formatted, err := format.Source(buf.Bytes()) 337 | if err != nil { 338 | return err 339 | } 340 | 341 | return os.WriteFile("tags.go", formatted, 0644) 342 | } 343 | 344 | var tagsTemplate = template.Must(template.New("").Parse(`// Code generated by generate.go. DO NOT EDIT. 345 | 346 | package mkvparse 347 | 348 | // Official tags. See https://www.matroska.org/technical/tagging.html 349 | const ( 350 | {{- range .Tags.Tags }} 351 | Tag_{{ .GoName }} string = "{{ .Name }}" 352 | {{- end }} 353 | ) 354 | `)) 355 | 356 | //////////////////////////////////////////////////////////////////////////////// 357 | 358 | func loadSchema(schema string) (io.ReadCloser, error) { 359 | _, fn := path.Split(schema) 360 | var sb io.ReadCloser 361 | sb, err := os.Open(fn) 362 | if err == nil { 363 | return sb, nil 364 | } 365 | log.Printf("Downloading %s ...", schema) 366 | resp, err := http.Get(schema) 367 | if err != nil { 368 | return nil, err 369 | } 370 | return resp.Body, nil 371 | } 372 | 373 | var space = regexp.MustCompile(`([-.)]|the|\bis\b)`) 374 | var underscore = regexp.MustCompile(`[/(]`) 375 | 376 | func camelCase(text string) string { 377 | text = space.ReplaceAllString(text, " ") 378 | text = underscore.ReplaceAllString(text, " _") 379 | var gs []string 380 | for _, f := range strings.Fields(text) { 381 | if strings.HasPrefix(f, "_") { 382 | gs = append(gs, "_") 383 | f = f[1:] 384 | } 385 | if f == "tff" || f == "bff" { 386 | f = strings.ToUpper(f) 387 | } else if !isUpper(f) && !isMixed(f) { 388 | f = strings.ToLower(f) 389 | } 390 | gs = append(gs, strings.Title(f)) 391 | } 392 | return strings.Join(gs, "") 393 | } 394 | 395 | func isUpper(s string) bool { 396 | for _, r := range s { 397 | if !unicode.IsUpper(r) && unicode.IsLetter(r) { 398 | return false 399 | } 400 | } 401 | return true 402 | } 403 | 404 | func isMixed(s string) bool { 405 | haveUpper := false 406 | haveLower := false 407 | for _, r := range s { 408 | if !unicode.IsUpper(r) && unicode.IsLetter(r) { 409 | haveUpper = true 410 | } 411 | if !unicode.IsLower(r) && unicode.IsLetter(r) { 412 | haveLower = true 413 | } 414 | } 415 | return haveUpper && haveLower 416 | } 417 | -------------------------------------------------------------------------------- /mkvparse.go: -------------------------------------------------------------------------------- 1 | //go:generate go run ./internal/generate 2 | 3 | // Package mkvparse provides push-style parser functions for parsing Matroska 4 | // (`.mkv`, `.mka`, `.webm`) files. 5 | package mkvparse 6 | 7 | import ( 8 | "bytes" 9 | "encoding/binary" 10 | "fmt" 11 | "io" 12 | "io/ioutil" 13 | "math" 14 | "os" 15 | "time" 16 | ) 17 | 18 | //////////////////////////////////////////////////////////////////////////////// 19 | // Types 20 | //////////////////////////////////////////////////////////////////////////////// 21 | 22 | // ElementID represents the EBML ID of an element. 23 | // The supported EBML IDs are documented in the Matroska specification: 24 | // https://www.matroska.org/technical/specs/index.html 25 | type ElementID int64 26 | 27 | type elementType int 28 | 29 | const ( 30 | _ = iota 31 | uintegerType 32 | integerType 33 | binaryType 34 | stringType 35 | utf8Type 36 | floatType 37 | dateType 38 | masterType 39 | ) 40 | 41 | // ElementInfo contains information about an element encountered in 42 | // the stream, and is passed to the handler by the parser on parse events. 43 | type ElementInfo struct { 44 | ElementOffset int64 45 | Offset int64 // Data offset 46 | Size int64 47 | Level int 48 | } 49 | 50 | // Handler declares an interface for handling parse events 51 | type Handler interface { 52 | // Return `true` to descend into the element, `false` to skip this element's children. 53 | HandleMasterBegin(ElementID, ElementInfo) (bool, error) 54 | HandleMasterEnd(ElementID, ElementInfo) error 55 | HandleString(ElementID, string, ElementInfo) error 56 | HandleInteger(ElementID, int64, ElementInfo) error 57 | HandleFloat(ElementID, float64, ElementInfo) error 58 | HandleDate(ElementID, time.Time, ElementInfo) error 59 | HandleBinary(ElementID, []byte, ElementInfo) error 60 | } 61 | 62 | //////////////////////////////////////////////////////////////////////////////// 63 | 64 | // Parse the file pointed to by `path` 65 | func ParsePath(path string, handler Handler) error { 66 | file, err := os.Open(path) 67 | if err != nil { 68 | return err 69 | } 70 | defer file.Close() 71 | return Parse(file, handler) 72 | } 73 | 74 | // Parse the contents of `reader` 75 | func Parse(reader io.Reader, handler Handler) error { 76 | _, err := parseElements(reader, 0, -1, 0, handler) 77 | if err != nil && err != io.EOF { 78 | return err 79 | } 80 | return nil 81 | } 82 | 83 | // Parse all sibling elements on one level until 'size' bytes 84 | // have been read (or until EOF) 85 | func parseElements(reader io.Reader, currentOffset int64, size int64, level int, handler Handler) (count int64, err error) { 86 | for size < 0 || count < size { 87 | elementCount, _, _, err := parseElement(reader, currentOffset+count, level, -1, handler) 88 | if err != nil { 89 | return -1, err 90 | } 91 | count = count + elementCount 92 | } 93 | return count, nil 94 | } 95 | 96 | // Parse all sibling elements on one level until 'size' bytes 97 | // have been read (or until EOF) 98 | func parseUnknownSizeElements(reader io.Reader, currentOffset int64, unknownSizeParent ElementID, level int, handler Handler) (count int64, nextID ElementID, nextIDCount int64, err error) { 99 | for { 100 | elementCount, nextID, nextIDCount, err := parseElement(reader, currentOffset+count, level, unknownSizeParent, handler) 101 | if err != nil { 102 | return -1, -1, -1, err 103 | } 104 | count = count + elementCount 105 | if nextID != -1 { 106 | return count, nextID, nextIDCount, nil 107 | } 108 | } 109 | // return count, -1, -1, nil 110 | } 111 | 112 | func skipUnknownSizeElements(reader io.Reader, unknownSizeParent ElementID) (count int64, nextID ElementID, nextIDCount int64, err error) { 113 | for { 114 | id, idCount, err := readElementID(reader) 115 | if err != nil { 116 | return -1, -1, -1, err 117 | } 118 | if isFinishUnknownSizeBlock(id, unknownSizeParent) { 119 | return count, id, idCount, nil 120 | } 121 | size, sizeCount, all1, err := readVarInt(reader) 122 | if err != nil { 123 | return -1, -1, -1, err 124 | } 125 | if all1 { 126 | return -1, -1, -1, fmt.Errorf("nested unknown size not supported") 127 | } 128 | if err := skipData(reader, size); err != nil { 129 | return -1, -1, -1, err 130 | } 131 | count = count + idCount + sizeCount + size 132 | } 133 | } 134 | 135 | func isFinishUnknownSizeBlock(id, parentID ElementID) bool { 136 | // TODO: Known size + End of file 137 | return isDescendantElement(parentID, id) || !isDescendantElement(id, parentID) || isRootElement(id) 138 | } 139 | 140 | // Parse one complete element. 141 | // Recursively descends master elements. 142 | // If unknownSizeParent is set, returns nextID and nextIDCount if it was read 143 | func parseElement(reader io.Reader, currentOffset int64, level int, unknownSizeParent ElementID, handler Handler) (count int64, nextID ElementID, nextIDCount int64, err error) { 144 | id, idCount, err := readElementID(reader) 145 | if err != nil { 146 | return -1, -1, -1, err 147 | } 148 | if unknownSizeParent != -1 && isFinishUnknownSizeBlock(id, unknownSizeParent) { 149 | return 0, id, idCount, nil 150 | } 151 | count, err = parseElementAfterID(reader, id, currentOffset, currentOffset+idCount, level, unknownSizeParent, handler) 152 | if err != nil { 153 | return -1, -1, -1, err 154 | } 155 | return count + idCount, -1, -1, nil 156 | } 157 | 158 | func parseElementAfterID(reader io.Reader, id ElementID, elementOffset int64, currentOffset int64, level int, unknownSizeParent ElementID, handler Handler) (count int64, err error) { 159 | size, sizeCount, all1, err := readVarInt(reader) 160 | if err != nil { 161 | return -1, err 162 | } 163 | typ := getElementType(id) 164 | // fmt.Printf("@%x %d %s %x %x\n", currentOffset, level, NameForElementID(id), size, typ) 165 | dataOffset := currentOffset + sizeCount 166 | count = sizeCount + size 167 | info := ElementInfo{ 168 | ElementOffset: elementOffset, 169 | Offset: dataOffset, 170 | Size: size, 171 | Level: level, 172 | } 173 | if typ == masterType { 174 | if all1 { 175 | info.Size = -1 176 | } 177 | descend, err := handler.HandleMasterBegin(id, info) 178 | if err != nil { 179 | return -1, err 180 | } 181 | if all1 { 182 | var ucount int64 183 | var nextID ElementID 184 | var nextIDCount int64 185 | if descend { 186 | ucount, nextID, nextIDCount, err = parseUnknownSizeElements(reader, dataOffset, id, level+1, handler) 187 | } else { 188 | ucount, nextID, nextIDCount, err = skipUnknownSizeElements(reader, id) 189 | } 190 | if err != nil { 191 | return -1, err 192 | } 193 | err = handler.HandleMasterEnd(id, info) 194 | if err != nil { 195 | return -1, err 196 | } 197 | count = sizeCount + ucount 198 | if nextID == -1 { 199 | return count, nil 200 | } 201 | nextcount, err := parseElementAfterID(reader, nextID, dataOffset+count, dataOffset+count+nextIDCount, level, unknownSizeParent, handler) 202 | if err != nil { 203 | return -1, err 204 | } 205 | return count + nextcount + nextIDCount, nil 206 | } else { 207 | if descend { 208 | _, err := parseElements(reader, dataOffset, size, level+1, handler) 209 | if err != nil { 210 | return -1, err 211 | } 212 | } else { 213 | if err := skipData(reader, size); err != nil { 214 | return -1, err 215 | } 216 | } 217 | err = handler.HandleMasterEnd(id, info) 218 | if err != nil { 219 | return -1, err 220 | } 221 | } 222 | return count, nil 223 | } else { 224 | switch typ { 225 | case uintegerType: 226 | data, err := readDataN(reader, size, 8) 227 | if err != nil { 228 | return -1, err 229 | } 230 | err = handler.HandleInteger(id, int64(binary.BigEndian.Uint64(pad(data, 8))), info) 231 | if err != nil { 232 | return -1, err 233 | } 234 | case integerType: 235 | data, err := readDataN(reader, size, 8) 236 | if err != nil { 237 | return -1, err 238 | } 239 | err = handler.HandleInteger(id, convertBytesToSignedInt(data), info) 240 | if err != nil { 241 | return -1, err 242 | } 243 | case floatType: 244 | data, err := readDataN(reader, size, 8) 245 | if err != nil { 246 | return -1, err 247 | } 248 | var value float64 249 | if size == 4 { 250 | value = float64(math.Float32frombits(binary.BigEndian.Uint32(data))) 251 | } else if size == 8 { 252 | value = math.Float64frombits(binary.BigEndian.Uint64(data)) 253 | } else { 254 | return -1, fmt.Errorf("unexpected float size: %d", size) 255 | } 256 | err = handler.HandleFloat(id, value, info) 257 | if err != nil { 258 | return -1, err 259 | } 260 | case dateType: 261 | data, err := readDataN(reader, size, 8) 262 | if err != nil { 263 | return -1, err 264 | } 265 | err = handler.HandleDate(id, baseDate.Add(time.Duration(convertBytesToSignedInt(data))), info) 266 | if err != nil { 267 | return -1, err 268 | } 269 | case binaryType: 270 | data, err := readData(reader, size) 271 | if err != nil { 272 | return -1, err 273 | } 274 | err = handler.HandleBinary(id, data, info) 275 | if err != nil { 276 | return -1, err 277 | } 278 | case stringType, utf8Type: 279 | data, err := readData(reader, size) 280 | if err != nil { 281 | return -1, err 282 | } 283 | err = handler.HandleString(id, string(unpadString(data)), info) 284 | if err != nil { 285 | return -1, err 286 | } 287 | } 288 | return count, nil 289 | } 290 | } 291 | 292 | // Gives the human-readable name for the given element ID. 293 | func NameForElementID(id ElementID) string { 294 | name, ok := elementNames[id] 295 | if !ok { 296 | return fmt.Sprintf("UNKNOWN:%x", id) 297 | } 298 | return name 299 | } 300 | 301 | //////////////////////////////////////////////////////////////////////////////// 302 | // Utility 303 | //////////////////////////////////////////////////////////////////////////////// 304 | 305 | var baseDate = time.Date(2001, time.January, 1, 0, 0, 0, 0, time.UTC) 306 | 307 | // readData reads and returns size bytes from r. 308 | // An error is returned if EOF is encountered before the requested bytes have been read. 309 | func readData(r io.Reader, size int64) ([]byte, error) { 310 | // Use bytes.Buffer to avoid allocating the full size until needed: 311 | // https://github.com/remko/go-mkvparse/issues/4 312 | var buf bytes.Buffer 313 | if _, err := io.CopyN(&buf, r, size); err != nil { 314 | return nil, err 315 | } 316 | return buf.Bytes(), nil 317 | } 318 | 319 | // Read data with a limited size 320 | func readDataN(reader io.Reader, size int64, limit int64) ([]byte, error) { 321 | if size > limit { 322 | return nil, fmt.Errorf("data too large: %d > %d", size, limit) 323 | } 324 | data := make([]byte, size) 325 | _, err := reader.Read(data) 326 | return data, err 327 | } 328 | 329 | func skipData(reader io.Reader, size int64) (err error) { 330 | switch reader := reader.(type) { 331 | case io.Seeker: 332 | _, err = reader.Seek(size, io.SeekCurrent) 333 | default: 334 | _, err = io.CopyN(ioutil.Discard, reader, size) 335 | } 336 | return 337 | } 338 | 339 | func convertBytesToSignedInt(data []byte) int64 { 340 | if data[0] >= 0x80 { 341 | result := make([]byte, len(data)) 342 | for i := range data { 343 | result[i] = ^data[i] 344 | } 345 | return -(int64(binary.BigEndian.Uint64(pad(result, 8))) + 1) 346 | } else { 347 | return int64(binary.BigEndian.Uint64(pad(data, 8))) 348 | } 349 | } 350 | 351 | func pad(b []byte, size int) []byte { 352 | if len(b) == size { 353 | return b 354 | } 355 | tmp := make([]byte, size) 356 | copy(tmp[size-len(b):], b) 357 | return tmp 358 | } 359 | 360 | func unpadString(b []byte) []byte { 361 | for i := len(b) - 1; i >= 0; i-- { 362 | if b[i] != 0x0 { 363 | return b[:i+1] 364 | } 365 | } 366 | return b[0:0] 367 | } 368 | 369 | func readElementID(reader io.Reader) (ElementID, int64, error) { 370 | rawID, count, _, err := readVarIntRaw(reader, false) 371 | return ElementID(rawID), count, err 372 | } 373 | 374 | //////////////////////////////////////////////////////////////////////////////// 375 | 376 | // A handler that does nothing (but recurses into master elements). 377 | // Can be embedded into other handler struct to avoid implementing all callbacks. 378 | type DefaultHandler struct{} 379 | 380 | // Returns `true` (recurses into the master element) 381 | func (h DefaultHandler) HandleMasterBegin(id ElementID, info ElementInfo) (bool, error) { 382 | return true, nil 383 | } 384 | 385 | func (h DefaultHandler) HandleMasterEnd(id ElementID, info ElementInfo) error { 386 | return nil 387 | } 388 | 389 | func (h DefaultHandler) HandleString(id ElementID, value string, info ElementInfo) error { 390 | return nil 391 | } 392 | 393 | func (h DefaultHandler) HandleInteger(id ElementID, value int64, info ElementInfo) error { 394 | return nil 395 | } 396 | 397 | func (h DefaultHandler) HandleFloat(id ElementID, value float64, info ElementInfo) error { 398 | return nil 399 | } 400 | 401 | func (h DefaultHandler) HandleDate(id ElementID, value time.Time, info ElementInfo) error { 402 | return nil 403 | } 404 | 405 | func (h DefaultHandler) HandleBinary(id ElementID, value []byte, info ElementInfo) error { 406 | return nil 407 | } 408 | -------------------------------------------------------------------------------- /mkvparse_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "reflect" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | func TestReadElementID(t *testing.T) { 13 | testIDs := map[ElementID][]byte{ 14 | TimecodeElement: {0xE7}, 15 | EBMLVersionElement: {0x42, 0x86}, 16 | DefaultDurationElement: {0x23, 0xE3, 0x83}, 17 | EBMLElement: {0x1A, 0x45, 0xDF, 0xA3}, 18 | } 19 | for id, encoding := range testIDs { 20 | encoding = append(encoding, 0xde, 0xad, 0xbe, 0xef) 21 | reader := bytes.NewReader(encoding) 22 | result, count, err := readElementID(reader) 23 | if err != nil { 24 | t.Errorf("%x: %v", encoding, err) 25 | } 26 | if count != int64(len(encoding))-4 { 27 | t.Errorf("%x: %d != %d", encoding, count, len(encoding)-4) 28 | } 29 | if result != id { 30 | t.Errorf("%x: %x != %x", encoding, result, id) 31 | } 32 | } 33 | } 34 | 35 | ////////////////////////////////////////////////////////////////////// 36 | 37 | type ParseEvent struct { 38 | id ElementID 39 | info ElementInfo 40 | value interface{} 41 | } 42 | 43 | type MasterBeginEvent struct{} 44 | type MasterEndEvent struct{} 45 | 46 | type ParseHandler struct { 47 | events []ParseEvent 48 | skipDescend bool 49 | } 50 | 51 | func (p *ParseHandler) HandleMasterBegin(id ElementID, info ElementInfo) (bool, error) { 52 | p.events = append(p.events, ParseEvent{id, info, MasterBeginEvent{}}) 53 | return !p.skipDescend, nil 54 | } 55 | 56 | func (p *ParseHandler) HandleMasterEnd(id ElementID, info ElementInfo) error { 57 | p.events = append(p.events, ParseEvent{id, info, MasterEndEvent{}}) 58 | return nil 59 | } 60 | 61 | func (p *ParseHandler) HandleString(id ElementID, value string, info ElementInfo) error { 62 | p.events = append(p.events, ParseEvent{id, info, value}) 63 | return nil 64 | } 65 | 66 | func (p *ParseHandler) HandleInteger(id ElementID, value int64, info ElementInfo) error { 67 | p.events = append(p.events, ParseEvent{id, info, value}) 68 | return nil 69 | } 70 | 71 | func (p *ParseHandler) HandleFloat(id ElementID, value float64, info ElementInfo) error { 72 | p.events = append(p.events, ParseEvent{id, info, value}) 73 | return nil 74 | } 75 | 76 | func (p *ParseHandler) HandleDate(id ElementID, value time.Time, info ElementInfo) error { 77 | p.events = append(p.events, ParseEvent{id, info, value}) 78 | return nil 79 | } 80 | 81 | func (p *ParseHandler) HandleBinary(id ElementID, value []byte, info ElementInfo) error { 82 | p.events = append(p.events, ParseEvent{id, info, value}) 83 | return nil 84 | } 85 | 86 | type ParseTest struct { 87 | data []byte 88 | events []ParseEvent 89 | fail bool // error is expected 90 | } 91 | 92 | func TestParseElement(t *testing.T) { 93 | tests := map[string]ParseTest{ 94 | "time before millenium": { 95 | []byte{0x44, 0x61, 0x88, 0xf6, 0xd3, 0xc2, 0xb9, 0x1b, 0xee, 0x28, 0x00}, 96 | []ParseEvent{{ 97 | DateUTCElement, 98 | ElementInfo{ 99 | Offset: 3, 100 | Size: 8, 101 | Level: 0, 102 | }, 103 | time.Date(1980, time.January, 21, 21, 03, 0, 0, time.UTC), 104 | }}, 105 | false, 106 | }, 107 | "master": { 108 | data: []byte{ 109 | 0x1F, 0x43, 0xB6, 0x75, 0x80 | 0x3, 110 | 0xE7, 0x80 | 0x1, 0x3, 111 | }, 112 | events: []ParseEvent{ 113 | { 114 | ClusterElement, 115 | ElementInfo{ 116 | ElementOffset: 0, 117 | Offset: 5, 118 | Size: 3, 119 | Level: 0, 120 | }, 121 | MasterBeginEvent{}, 122 | }, 123 | { 124 | TimecodeElement, 125 | ElementInfo{ 126 | ElementOffset: 5, 127 | Offset: 7, 128 | Size: 1, 129 | Level: 1, 130 | }, 131 | int64(0x3), 132 | }, 133 | { 134 | ClusterElement, 135 | ElementInfo{ 136 | ElementOffset: 0, 137 | Offset: 5, 138 | Size: 3, 139 | Level: 0, 140 | }, 141 | MasterEndEvent{}, 142 | }, 143 | }, 144 | }, 145 | "unknown type": { 146 | data: []byte{ 147 | 0x1A, 0x45, 0xDF, 0xA4, 0x80 | 0x6, 148 | 0xE7, 0x80 | 0x1, 0x3, 149 | 0xE7, 0x80 | 0x1, 0x3, 150 | }, 151 | events: nil, 152 | }, 153 | "invalid integer size": { 154 | data: []byte{ 155 | 0xE7, 0x80 | 0xa, 0x10, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x02, 0x40, 0x02, 156 | }, 157 | events: nil, 158 | fail: true, 159 | }, 160 | 161 | "master (unknown size)": { 162 | data: []byte{ 163 | 0x1F, 0x43, 0xB6, 0x75, 0xFF, 164 | 0xE7, 0x80 | 0x1, 0x3, 165 | 0xE7, 0x80 | 0x1, 0x3, 166 | 0xE7, 0x80 | 0x1, 0x3, 167 | 0x1F, 0x43, 0xB6, 0x75, 0x80 | 0x3, 168 | 0xE7, 0x80 | 0x1, 0x3, 169 | }, 170 | events: []ParseEvent{ 171 | { 172 | ClusterElement, 173 | ElementInfo{ 174 | ElementOffset: 0, 175 | Offset: 5, 176 | Size: -1, 177 | Level: 0, 178 | }, 179 | MasterBeginEvent{}, 180 | }, 181 | { 182 | TimecodeElement, 183 | ElementInfo{ 184 | ElementOffset: 5, 185 | Offset: 7, 186 | Size: 1, 187 | Level: 1, 188 | }, 189 | int64(0x3), 190 | }, 191 | { 192 | TimecodeElement, 193 | ElementInfo{ 194 | ElementOffset: 8, 195 | Offset: 10, 196 | Size: 1, 197 | Level: 1, 198 | }, 199 | int64(0x3), 200 | }, 201 | { 202 | TimecodeElement, 203 | ElementInfo{ 204 | ElementOffset: 11, 205 | Offset: 13, 206 | Size: 1, 207 | Level: 1, 208 | }, 209 | int64(0x3), 210 | }, 211 | { 212 | ClusterElement, 213 | ElementInfo{ 214 | ElementOffset: 0, 215 | Offset: 5, 216 | Size: -1, 217 | Level: 0, 218 | }, 219 | MasterEndEvent{}, 220 | }, 221 | { 222 | ClusterElement, 223 | ElementInfo{ 224 | ElementOffset: 15, 225 | Offset: 20, 226 | Size: 3, 227 | Level: 0, 228 | }, 229 | MasterBeginEvent{}, 230 | }, 231 | { 232 | TimecodeElement, 233 | ElementInfo{ 234 | ElementOffset: 20, 235 | Offset: 22, 236 | Size: 1, 237 | Level: 1, 238 | }, 239 | int64(0x3), 240 | }, 241 | { 242 | ClusterElement, 243 | ElementInfo{ 244 | ElementOffset: 15, 245 | Offset: 20, 246 | Size: 3, 247 | Level: 0, 248 | }, 249 | MasterEndEvent{}, 250 | }, 251 | }, 252 | 253 | // TODO: Test unknown size in unknown size (e.g. \Segment\Cluster(unknown)\BlockGroup(unknown)\BlockDuration) 254 | }, 255 | 256 | // Avoid panicking with a too-large slice allocation when an element claims a 257 | // very large size: https://github.com/remko/go-mkvparse/issues/4 258 | "excessive size": { 259 | []byte{0xa3, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x02, 0x03, 0x04}, 260 | []ParseEvent{}, 261 | true, 262 | }, 263 | } 264 | for name, test := range tests { 265 | name := name 266 | test := test 267 | t.Run(name, func(t *testing.T) { 268 | reader := bytes.NewReader(test.data) 269 | handler := ParseHandler{} 270 | count, nextID, _, err := parseElement(reader, 0, 0, -1, &handler) 271 | if nextID != -1 { 272 | t.Fatalf("unexpected next ID") 273 | } 274 | if test.fail { 275 | if err == nil { 276 | t.Fatal("Unexpectedly succeeded") 277 | } 278 | } else { 279 | if err != nil { 280 | t.Fatal(err) 281 | } 282 | if count != int64(len(test.data)) { 283 | t.Fatalf("Invalid #bytes read: %d != %d. Data: %v", count, len(test.data), test.data) 284 | } 285 | if !reflect.DeepEqual(test.events, handler.events) { 286 | t.Fatalf("Invalid events: %#v != %#v", test.events, handler.events) 287 | } 288 | } 289 | }) 290 | } 291 | } 292 | 293 | func TestParseMaster_Skips(t *testing.T) { 294 | tests := []struct { 295 | data []byte 296 | expectedEvents []ParseEvent 297 | }{ 298 | { 299 | data: []byte{ 300 | 0x1F, 0x43, 0xB6, 0x75, 0x80 | 0x3, 301 | 0xE7, 0x80 | 0x1, 0x3, 302 | }, 303 | expectedEvents: []ParseEvent{ 304 | { 305 | ClusterElement, 306 | ElementInfo{ 307 | ElementOffset: 0, 308 | Offset: 5, 309 | Size: 3, 310 | Level: 0, 311 | }, 312 | MasterBeginEvent{}, 313 | }, 314 | { 315 | ClusterElement, 316 | ElementInfo{ 317 | ElementOffset: 0, 318 | Offset: 5, 319 | Size: 3, 320 | Level: 0, 321 | }, 322 | MasterEndEvent{}, 323 | }, 324 | }, 325 | }, 326 | 327 | // Unknown size 328 | { 329 | data: []byte{ 330 | 0x1F, 0x43, 0xB6, 0x75, 0xFF, 331 | 0xE7, 0x80 | 0x1, 0x3, 332 | 0xE7, 0x80 | 0x1, 0x3, 333 | 0xE7, 0x80 | 0x1, 0x3, 334 | 0x1F, 0x43, 0xB6, 0x75, 0x80 | 0x3, 335 | 0xE7, 0x80 | 0x1, 0x3, 336 | }, 337 | expectedEvents: []ParseEvent{ 338 | { 339 | ClusterElement, 340 | ElementInfo{ 341 | ElementOffset: 0, 342 | Offset: 5, 343 | Size: -1, 344 | Level: 0, 345 | }, 346 | MasterBeginEvent{}, 347 | }, 348 | { 349 | ClusterElement, 350 | ElementInfo{ 351 | ElementOffset: 0, 352 | Offset: 5, 353 | Size: -1, 354 | Level: 0, 355 | }, 356 | MasterEndEvent{}, 357 | }, 358 | { 359 | ClusterElement, 360 | ElementInfo{ 361 | ElementOffset: 15, 362 | Offset: 20, 363 | Size: 3, 364 | Level: 0, 365 | }, 366 | MasterBeginEvent{}, 367 | }, 368 | { 369 | ClusterElement, 370 | ElementInfo{ 371 | ElementOffset: 15, 372 | Offset: 20, 373 | Size: 3, 374 | Level: 0, 375 | }, 376 | MasterEndEvent{}, 377 | }, 378 | }, 379 | 380 | // TODO: Test unknown size in unknown size (e.g. \Segment\Cluster(unknown)\BlockGroup(unknown)\BlockDuration) 381 | }, 382 | } 383 | for _, test := range tests { 384 | test := test 385 | t.Run(fmt.Sprintf("%v", test.data), func(t *testing.T) { 386 | readers := [](func([]byte) io.Reader){ 387 | func(b []byte) io.Reader { return bytes.NewReader(b) }, 388 | func(b []byte) io.Reader { return bytes.NewBuffer(b) }, 389 | } 390 | 391 | for _, reader := range readers { 392 | reader := reader(test.data) 393 | handler := ParseHandler{skipDescend: true} 394 | count, nextID, _, err := parseElement(reader, 0, 0, -1, &handler) 395 | if nextID != -1 { 396 | t.Fatalf("unexpected next ID") 397 | } 398 | if err != nil { 399 | t.Errorf("%v", err) 400 | continue 401 | } 402 | if count != int64(len(test.data)) { 403 | t.Errorf("Invalid #bytes read: %d != %d. Data: %v", count, len(test.data), test.data) 404 | } 405 | if !reflect.DeepEqual(test.expectedEvents, handler.events) { 406 | t.Errorf("Invalid events: %#v != %#v", test.expectedEvents, handler.events) 407 | } 408 | } 409 | }) 410 | } 411 | } 412 | 413 | func TestParseMaster_SkipsWithInsufficientData(t *testing.T) { 414 | data := []byte{0x1F, 0x43, 0xB6, 0x75, 0x80 | 0x3, 0xE7} 415 | reader := bytes.NewBuffer(data) 416 | handler := ParseHandler{skipDescend: true} 417 | _, nextID, _, err := parseElement(reader, 0, 0, -1, &handler) 418 | if err == nil { 419 | t.Errorf("unexpected success") 420 | } 421 | if nextID != -1 { 422 | t.Fatalf("unexpected next ID") 423 | } 424 | } 425 | -------------------------------------------------------------------------------- /sectionparser.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "io" 7 | "time" 8 | ) 9 | 10 | var errAbort = errors.New("abort") 11 | 12 | type sectionsHandler struct { 13 | sections map[ElementID]bool 14 | seenSections map[int64]bool 15 | segmentInfo ElementInfo 16 | index map[ElementID]int64 17 | delegateHandler Handler 18 | inSeekHead bool 19 | currentSeekPosition int64 20 | currentSeekID ElementID 21 | } 22 | 23 | func (p *sectionsHandler) HandleMasterBegin(id ElementID, info ElementInfo) (bool, error) { 24 | if id == SegmentElement { 25 | p.segmentInfo = info 26 | return p.delegateHandler.HandleMasterBegin(id, info) 27 | } else if info.Level <= 1 { 28 | if id == SeekHeadElement { 29 | p.index = make(map[ElementID]int64) 30 | p.inSeekHead = true 31 | return true, nil 32 | } else if p.sections[id] && !p.seenSections[info.Offset] { 33 | return p.delegateHandler.HandleMasterBegin(id, info) 34 | } else { 35 | return false, nil 36 | } 37 | } else if p.inSeekHead { 38 | return true, nil 39 | } else { 40 | return p.delegateHandler.HandleMasterBegin(id, info) 41 | } 42 | } 43 | 44 | func (p *sectionsHandler) HandleMasterEnd(id ElementID, info ElementInfo) error { 45 | if id == SegmentElement { 46 | return p.delegateHandler.HandleMasterEnd(id, info) 47 | } else if info.Level <= 1 { 48 | if id == SeekHeadElement { 49 | return errAbort 50 | } else if p.sections[id] && !p.seenSections[info.Offset] { 51 | p.seenSections[info.Offset] = true 52 | return p.delegateHandler.HandleMasterEnd(id, info) 53 | } 54 | } else if p.inSeekHead { 55 | if id == SeekElement { 56 | p.index[p.currentSeekID] = p.segmentInfo.Offset + p.currentSeekPosition 57 | } 58 | return nil 59 | } else { 60 | return p.delegateHandler.HandleMasterEnd(id, info) 61 | } 62 | return nil 63 | } 64 | 65 | func (p *sectionsHandler) HandleString(id ElementID, value string, info ElementInfo) error { 66 | if p.inSeekHead { 67 | return nil 68 | } else { 69 | return p.delegateHandler.HandleString(id, value, info) 70 | } 71 | } 72 | 73 | func (p *sectionsHandler) HandleInteger(id ElementID, value int64, info ElementInfo) error { 74 | if p.inSeekHead { 75 | if id == SeekPositionElement { 76 | p.currentSeekPosition = value 77 | } 78 | return nil 79 | } else { 80 | return p.delegateHandler.HandleInteger(id, value, info) 81 | } 82 | } 83 | 84 | func (p *sectionsHandler) HandleFloat(id ElementID, value float64, info ElementInfo) error { 85 | if p.inSeekHead { 86 | return nil 87 | } else { 88 | return p.delegateHandler.HandleFloat(id, value, info) 89 | } 90 | } 91 | 92 | func (p *sectionsHandler) HandleDate(id ElementID, value time.Time, info ElementInfo) error { 93 | if p.inSeekHead { 94 | return nil 95 | } else { 96 | return p.delegateHandler.HandleDate(id, value, info) 97 | } 98 | } 99 | 100 | func (p *sectionsHandler) HandleBinary(id ElementID, value []byte, info ElementInfo) error { 101 | if p.inSeekHead { 102 | if id == SeekIDElement { 103 | p.currentSeekID = ElementID(binary.BigEndian.Uint64(pad(value, 8))) 104 | } 105 | return nil 106 | } else { 107 | return p.delegateHandler.HandleBinary(id, value, info) 108 | } 109 | } 110 | 111 | // Parses only the given sections of `file`. 112 | // 113 | // When present, uses the seek index to avoid having to parse the entire file 114 | func ParseSections(file io.ReadSeeker, handler Handler, sections ...ElementID) error { 115 | sectionsHandler := sectionsHandler{ 116 | sections: make(map[ElementID]bool), 117 | seenSections: make(map[int64]bool), 118 | delegateHandler: handler, 119 | } 120 | for _, section := range sections { 121 | sectionsHandler.sections[section] = true 122 | } 123 | 124 | // First pass 125 | err := Parse(file, §ionsHandler) 126 | if err == errAbort { 127 | // Second pass 128 | for _, section := range sections { 129 | sectionOffset, ok := sectionsHandler.index[section] 130 | if ok && !sectionsHandler.seenSections[sectionOffset] { 131 | if _, err := file.Seek(sectionOffset, io.SeekStart); err != nil { 132 | return err 133 | } 134 | if _, _, _, err = parseElement(file, sectionOffset, 1, -1, handler); err != nil { 135 | return err 136 | } 137 | sectionsHandler.seenSections[sectionOffset] = true 138 | } 139 | } 140 | return handler.HandleMasterEnd(SegmentElement, sectionsHandler.segmentInfo) 141 | } else if err != nil { 142 | return err 143 | } 144 | return nil 145 | } 146 | -------------------------------------------------------------------------------- /tags.go: -------------------------------------------------------------------------------- 1 | // Code generated by generate.go. DO NOT EDIT. 2 | 3 | package mkvparse 4 | 5 | // Official tags. See https://www.matroska.org/technical/tagging.html 6 | const ( 7 | Tag_Accompaniment string = "ACCOMPANIMENT" 8 | Tag_Actor string = "ACTOR" 9 | Tag_Address string = "ADDRESS" 10 | Tag_Arranger string = "ARRANGER" 11 | Tag_ArtDirector string = "ART_DIRECTOR" 12 | Tag_Artist string = "ARTIST" 13 | Tag_AssistantDirector string = "ASSISTANT_DIRECTOR" 14 | Tag_BPM string = "BPM" 15 | Tag_BPS string = "BPS" 16 | Tag_Barcode string = "BARCODE" 17 | Tag_CatalogNumber string = "CATALOG_NUMBER" 18 | Tag_Character string = "CHARACTER" 19 | Tag_Choregrapher string = "CHOREGRAPHER" 20 | Tag_Comment string = "COMMENT" 21 | Tag_Composer string = "COMPOSER" 22 | Tag_ComposerNationality string = "COMPOSER_NATIONALITY" 23 | Tag_CompositionLocation string = "COMPOSITION_LOCATION" 24 | Tag_Conductor string = "CONDUCTOR" 25 | Tag_ContentType string = "CONTENT_TYPE" 26 | Tag_Coproducer string = "COPRODUCER" 27 | Tag_Copyright string = "COPYRIGHT" 28 | Tag_CostumeDesigner string = "COSTUME_DESIGNER" 29 | Tag_Country string = "COUNTRY" 30 | Tag_DateDigitized string = "DATE_DIGITIZED" 31 | Tag_DateEncoded string = "DATE_ENCODED" 32 | Tag_DatePurchased string = "DATE_PURCHASED" 33 | Tag_DateRecorded string = "DATE_RECORDED" 34 | Tag_DateReleased string = "DATE_RELEASED" 35 | Tag_DateTagged string = "DATE_TAGGED" 36 | Tag_DateWritten string = "DATE_WRITTEN" 37 | Tag_Description string = "DESCRIPTION" 38 | Tag_Director string = "DIRECTOR" 39 | Tag_DirectorOfPhotography string = "DIRECTOR_OF_PHOTOGRAPHY" 40 | Tag_DistributedBy string = "DISTRIBUTED_BY" 41 | Tag_EditedBy string = "EDITED_BY" 42 | Tag_Email string = "EMAIL" 43 | Tag_EncodedBy string = "ENCODED_BY" 44 | Tag_Encoder string = "ENCODER" 45 | Tag_EncoderSettings string = "ENCODER_SETTINGS" 46 | Tag_ExecutiveProducer string = "EXECUTIVE_PRODUCER" 47 | Tag_FPS string = "FPS" 48 | Tag_Fax string = "FAX" 49 | Tag_Genre string = "GENRE" 50 | Tag_IMDB string = "IMDB" 51 | Tag_ISBN string = "ISBN" 52 | Tag_ISRC string = "ISRC" 53 | Tag_InitialKey string = "INITIAL_KEY" 54 | Tag_Instruments string = "INSTRUMENTS" 55 | Tag_Keywords string = "KEYWORDS" 56 | Tag_LCCN string = "LCCN" 57 | Tag_Label string = "LABEL" 58 | Tag_LabelCode string = "LABEL_CODE" 59 | Tag_LawRating string = "LAW_RATING" 60 | Tag_LeadPerformer string = "LEAD_PERFORMER" 61 | Tag_License string = "LICENSE" 62 | Tag_Lyricist string = "LYRICIST" 63 | Tag_Lyrics string = "LYRICS" 64 | Tag_MCDI string = "MCDI" 65 | Tag_MasteredBy string = "MASTERED_BY" 66 | Tag_Measure string = "MEASURE" 67 | Tag_MixedBy string = "MIXED_BY" 68 | Tag_Mood string = "MOOD" 69 | Tag_Original string = "ORIGINAL" 70 | Tag_OriginalMediaType string = "ORIGINAL_MEDIA_TYPE" 71 | Tag_PartNumber string = "PART_NUMBER" 72 | Tag_PartOffset string = "PART_OFFSET" 73 | Tag_Period string = "PERIOD" 74 | Tag_Phone string = "PHONE" 75 | Tag_PlayCounter string = "PLAY_COUNTER" 76 | Tag_Producer string = "PRODUCER" 77 | Tag_ProductionCopyright string = "PRODUCTION_COPYRIGHT" 78 | Tag_ProductionDesigner string = "PRODUCTION_DESIGNER" 79 | Tag_ProductionStudio string = "PRODUCTION_STUDIO" 80 | Tag_Publisher string = "PUBLISHER" 81 | Tag_PurchaseCurrency string = "PURCHASE_CURRENCY" 82 | Tag_PurchaseInfo string = "PURCHASE_INFO" 83 | Tag_PurchaseItem string = "PURCHASE_ITEM" 84 | Tag_PurchaseOwner string = "PURCHASE_OWNER" 85 | Tag_PurchasePrice string = "PURCHASE_PRICE" 86 | Tag_Rating string = "RATING" 87 | Tag_RecordingLocation string = "RECORDING_LOCATION" 88 | Tag_RemixedBy string = "REMIXED_BY" 89 | Tag_ReplayGainGain string = "REPLAYGAIN_GAIN" 90 | Tag_ReplayGainPeak string = "REPLAYGAIN_PEAK" 91 | Tag_Sample string = "SAMPLE" 92 | Tag_ScreenplayBy string = "SCREENPLAY_BY" 93 | Tag_SortWith string = "SORT_WITH" 94 | Tag_SoundEngineer string = "SOUND_ENGINEER" 95 | Tag_Subject string = "SUBJECT" 96 | Tag_Subtitle string = "SUBTITLE" 97 | Tag_Summary string = "SUMMARY" 98 | Tag_Synopsis string = "SYNOPSIS" 99 | Tag_TMDB string = "TMDB" 100 | Tag_TVDB string = "TVDB" 101 | Tag_TermsOfUse string = "TERMS_OF_USE" 102 | Tag_ThanksTo string = "THANKS_TO" 103 | Tag_Title string = "TITLE" 104 | Tag_TotalParts string = "TOTAL_PARTS" 105 | Tag_Tuning string = "TUNING" 106 | Tag_URL string = "URL" 107 | Tag_WrittenBy string = "WRITTEN_BY" 108 | ) 109 | -------------------------------------------------------------------------------- /testdata/example-cover.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/remko/go-mkvparse/2180d03e7755c99e976cdf60c281265baa16eee5/testdata/example-cover.mkv -------------------------------------------------------------------------------- /testdata/example-live+junk.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/remko/go-mkvparse/2180d03e7755c99e976cdf60c281265baa16eee5/testdata/example-live+junk.mkv -------------------------------------------------------------------------------- /testdata/example.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/remko/go-mkvparse/2180d03e7755c99e976cdf60c281265baa16eee5/testdata/example.mkv -------------------------------------------------------------------------------- /vint.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "io" 7 | "math" 8 | ) 9 | 10 | func readVarInt(reader io.Reader) (int64, int64, bool, error) { 11 | return readVarIntRaw(reader, true) 12 | } 13 | 14 | func readVarIntRaw(reader io.Reader, doMask bool) (int64, int64, bool, error) { 15 | b := make([]byte, 1) 16 | _, err := reader.Read(b) 17 | if err != nil { 18 | return -1, -1, false, err 19 | } 20 | 21 | var mask byte 22 | var allmask uint64 23 | var length int 24 | if ((b[0] & 0x80) >> 7) == 1 { 25 | length = 1 26 | mask = 0x7f 27 | allmask = 0x7f 28 | } else if ((b[0] & 0x40) >> 6) == 1 { 29 | length = 2 30 | mask = 0x3f 31 | allmask = 0x3fff 32 | } else if ((b[0] & 0x20) >> 5) == 1 { 33 | length = 3 34 | mask = 0x1f 35 | allmask = 0x1fffff 36 | } else if ((b[0] & 0x10) >> 4) == 1 { 37 | length = 4 38 | mask = 0xf 39 | allmask = 0x0fffffff 40 | } else if ((b[0] & 0x08) >> 3) == 1 { 41 | length = 5 42 | mask = 0x7 43 | allmask = 0x07ffffffff 44 | } else if ((b[0] & 0x04) >> 2) == 1 { 45 | length = 6 46 | mask = 0x3 47 | allmask = 0x03ffffffffff 48 | } else if ((b[0] & 0x02) >> 1) == 1 { 49 | length = 7 50 | mask = 0x1 51 | allmask = 0x01ffffffffffff 52 | } else if ((b[0] & 0x01) >> 0) == 1 { 53 | length = 8 54 | mask = 0x0 55 | allmask = 0x00ffffffffffffff 56 | } else { 57 | return -1, -1, false, fmt.Errorf("invalid varint length") 58 | } 59 | 60 | result := make([]byte, 8) 61 | if doMask { 62 | result[8-length] = b[0] & mask 63 | } else { 64 | result[8-length] = b[0] 65 | } 66 | _, err = reader.Read(result[8-length+1:]) 67 | if err != nil { 68 | return -1, -1, false, err 69 | } 70 | 71 | uiresult := binary.BigEndian.Uint64(result) 72 | return int64(uiresult), int64(length), (uiresult & allmask) == (uint64(math.MaxUint64) & allmask), nil 73 | } 74 | -------------------------------------------------------------------------------- /vint_test.go: -------------------------------------------------------------------------------- 1 | package mkvparse 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "testing" 7 | ) 8 | 9 | func TestReadVarInt(t *testing.T) { 10 | tests := []struct { 11 | in []byte 12 | result int64 13 | size int64 14 | isAll1 bool 15 | isError bool 16 | }{ 17 | // All 1 encodings 18 | {in: []byte{0xff}, result: 127, size: 1, isAll1: true, isError: false}, 19 | {in: []byte{0x40, 0x7f}, result: 127, size: 2, isAll1: false, isError: false}, 20 | {in: []byte{0x7f, 0xff}, result: 16383, size: 2, isAll1: true, isError: false}, 21 | 22 | // Zero length 23 | {in: []byte{0x0}, result: 0, size: 0, isAll1: false, isError: true}, 24 | 25 | // Different encodings of '2' 26 | {in: []byte{0x82}, result: 2, size: 1, isAll1: false, isError: false}, 27 | {in: []byte{0x40, 0x02}, result: 2, size: 2, isAll1: false, isError: false}, 28 | {in: []byte{0x20, 0x00, 0x02}, result: 2, size: 3, isAll1: false, isError: false}, 29 | {in: []byte{0x10, 0x00, 0x00, 0x02}, result: 2, size: 4, isAll1: false, isError: false}, 30 | } 31 | for _, test := range tests { 32 | test := test 33 | t.Run(fmt.Sprintf("%v", test.in), func(t *testing.T) { 34 | in := make([]byte, len(test.in)) 35 | copy(in, test.in) 36 | in = append(in, 0xde, 0xad, 0xbe, 0xef) 37 | reader := bytes.NewReader(in) 38 | result, count, isAll1, err := readVarInt(reader) 39 | if test.isError { 40 | if err == nil { 41 | t.Fatalf("unexpected succes") 42 | } 43 | } else { 44 | if err != nil { 45 | t.Fatal(err) 46 | } 47 | if count != test.size { 48 | t.Fatalf("unexpected count: %d != %d", count, test.size) 49 | } 50 | if result != test.result { 51 | t.Fatalf("unexpected result: %d != %d", result, test.result) 52 | } 53 | if isAll1 != test.isAll1 { 54 | t.Fatalf("unexpected all1 %v != %v", isAll1, test.isAll1) 55 | } 56 | } 57 | 58 | }) 59 | } 60 | } 61 | --------------------------------------------------------------------------------