├── README.md
├── explainer.md
├── flipped-gif.gif
├── test-gif.gif
└── test-still.png
/README.md:
--------------------------------------------------------------------------------
1 | # image-decoder-api
2 | [Explainer](explainer.md) for a ImageDecoder API extension to [WebCodecs](https://w3c.github.io/webcodecs/#image-decoding).
3 |
--------------------------------------------------------------------------------
/explainer.md:
--------------------------------------------------------------------------------
1 | # ImageDecoder Explainer
2 |
3 | ## Authors:
4 |
5 | - Dale Curtis @ Google
6 |
7 | ## Participate:
8 |
9 | - [Issue Tracker](https://github.com/w3c/webcodecs/issues)
10 | - [Spec](https://w3c.github.io/webcodecs/#image-decoding)
11 |
12 | ## Introduction
13 | Today [`
`](https://developer.mozilla.org/en-US/docs/Web/API/HTMLImageElement) elements don't provide access to any frames beyond the first. They also provide no control over which frame is displayed in an animation. As we look to provide audio and video codecs through [WebCodecs](https://github.com/WICG/web-codecs/blob/master/explainer.md) we should consider similar interfaces for images as well.
14 |
15 | We propose a new ImageDecoder API to provide web authors access to an [ImageBitmap](https://developer.mozilla.org/en-US/docs/Web/API/ImageBitmap) of each frame given an arbitrary byte array as input. The returned ImageBitmaps can be used for drawing to canvas or WebGL (as well as any other future ImageBitmap use cases). Since the API is not bound to the DOM it may also be used in workers.
16 |
17 | ## Goals
18 | * Providing explicit control over decoded images and their display.
19 | * Extracting a given frame (or sequence of frames) from animated images.
20 | * Usage of image decoding in out of DOM scenarios (offscreen worker, etc).
21 |
22 | ## Non-Goals
23 | * Defining how authors may provide their own decoders for formats that are unsupported by the user agent.
24 | * E.g., <img src="cats.pcx">.
25 | * Defining an ImageEncoder API; that's left for another explainer and is already provided somewhat by the [Canvas.toBlob() API](https://developer.mozilla.org/en-US/docs/Web/API/HTMLCanvasElement/toBlob).
26 |
27 | ## ImageDecoder API
28 |
29 | ### Example 1: Animated GIF Renderer
30 |
31 | ```Javascript
32 | // This example renders an animated image to a canvas via ReadableStream.
33 |
34 | let canvas = document.createElement('canvas');
35 | let canvasContext = canvas.getContext('2d');
36 | let imageDecoder = null;
37 | let imageIndex = 0;
38 |
39 | function renderImage(result) {
40 | canvasContext.drawImage(result.image, 0, 0);
41 |
42 | let track = imageDecoder.tracks.selectedTrack;
43 | if (track.frameCount == 1)
44 | return;
45 |
46 | if (imageIndex + 1 >= track.frameCount)
47 | imageIndex = 0;
48 |
49 | // Decode the next frame ahead of display so it's ready in time.
50 | imageDecoder.decode({frameIndex: ++imageIndex})
51 | .then(nextResult => setTimeout(_ => { renderImage(nextResult); },
52 | result.image.duration / 1000.0));
53 | }
54 |
55 | function logMetadata() {
56 | console.log('imageDecoder.type = ' + imageDecoder.type);
57 | console.log('imageDecoder.complete = ' + imageDecoder.complete);
58 | console.log('imageDecoder.tracks.length = ' + imageDecoder.tracks.length);
59 | for (var i = 0; i < imageDecoder.tracks.length; ++i) {
60 | let track = imageDecoder.tracks[i];
61 | console.log(`track[${i}].frameCount = ` + track.frameCount);
62 | console.log(`track[${i}].repetitionCount = ` + track.repetitionCount);
63 | console.log(`track[${i}].animated = ` + track.animated);
64 | console.log(`track[${i}].selected = ` + track.selected);
65 | }
66 | }
67 |
68 | function decodeImage(imageByteStream) {
69 | imageDecoder = new ImageDecoder({data: imageByteStream, type: "image/gif"});
70 | imageDecoder.tracks.ready.then(logMetadata);
71 | imageDecoder.decode({frameIndex : imageIndex}).then(renderImage);
72 | }
73 |
74 | fetch("animated.gif").then(response => decodeImage(response.body));
75 | ```
76 |
77 | Output:
78 | ```Text
79 | imageDecoder.type = image/gif
80 | imageDecoder.complete = false
81 | imageDecoder.tracks.length = 1
82 | track[0].frameCount = 20
83 | track[0].repetitionCount = Infinity
84 | track[0].animated = true
85 | track[0].selected = true
86 | ```
87 | 
88 |
89 |
90 | ### Example 2: MJPEG Renderer
91 | ```Javascript
92 | // This example renders a multipart/x-mixed-replace MJPEG stream to canvas.
93 |
94 | let canvas = document.createElement('canvas');
95 | let canvasContext = canvas.getContext('2d');
96 | let imageDecoder = null;
97 |
98 | function decodeImage(imageArrayBufferChunk) {
99 | // JPEG decoders don't have the concept of multiple frames, so we need a new
100 | // ImageDecoder instance for each frame.
101 | imageDecoder = new ImageDecoder({
102 | data: imageArrayBufferChunk,
103 | type: "image/jpeg",
104 | });
105 | imageDecoder.tracks.ready.then(logMetadata);
106 | imageDecoder.decode({frameIndex: imageIndex})
107 | .then(result => canvasContext.drawImage(result.image, 0, 0));
108 | }
109 |
110 | fetch("https://mjpeg_server/mjpeg_stream").then(response => {
111 | const contentType = response.headers.get("Content-Type");
112 | if (!contentType.startsWith("multipart"))
113 | return;
114 |
115 | let boundary = contentType.split("=").pop();
116 |
117 | // See https://github.com/whatwg/fetch/issues/1021#issuecomment-614920327
118 | let parser = new MultipartParser(boundary);
119 | parser.onChunk = arrayBufferChunk => decodeImage(arrayBufferChunk);
120 |
121 | let reader = response.body.getReader();
122 | reader.read().then(function getNextImageChunk({done, value}) {
123 | if (done)
124 | return;
125 | parser.addBinaryData(value);
126 | return reader.read().then(getNextImageChunk);
127 | });
128 | });
129 | ```
130 |
131 | Output:
132 | ```Text
133 | imageDecoder.type = image/jpeg
134 | imageDecoder.complete = false
135 | imageDecoder.tracks.length = 1
136 | track[0].frameCount = 1
137 | track[0].repetitionCount = 0
138 | track[0].animated = false
139 | track[0].selected = true
140 | ...
141 | ```
142 | 
143 |
144 |
145 | ### Example 3: Multi-track image selection.
146 | ```Javascript
147 | // This example renders the animation track of a multi-track image after
148 | // initially selecting the still image.
149 |
150 | let canvas = document.createElement('canvas');
151 | let canvasContext = canvas.getContext('2d');
152 | let imageDecoder = null;
153 | let imageIndex = 0;
154 |
155 | function decodeImage(imageByteStream) {
156 | // preferAnimation=false ensures we select the still image instead of whatever
157 | // the container metadata might want us to select instead.
158 | imageDecoder = new ImageDecoder(
159 | {data: imageByteStream, type: "image/avif", preferAnimation: false});
160 |
161 | // This step isn't necessary, but shows how you can get metadata before any
162 | // frames have been decoded.
163 | imageDecoder.tracks.ready.then(logMetadata);
164 |
165 | // Start decoding of the first still image.
166 | imageDecoder.decode({frameIndex: imageIndex}).then(result => {
167 | renderImage(result);
168 |
169 | let track = imageDecoder.tracks.selectedTrack;
170 | if (track.frameCount > 1)
171 | return;
172 |
173 | // Identify the first animated track.
174 | var animationTrackId = -1;
175 | for (var i = 0; i < imageDecoder.tracks.length; ++i) {
176 | if (imageDecoder.tracks[i].animated) {
177 | animationTrackId = i;
178 | break;
179 | }
180 | }
181 |
182 | if (animationTrackId == -1)
183 | return;
184 |
185 | // Switch to the animation track.
186 | imageDecoder.tracks[animationTrackId].selected = true;
187 |
188 | // Start decoding loop for the animation track.
189 | imageIndex = 0;
190 | imageDecoder.decode({frameIndex: imageIndex}).then(renderImage);
191 | });
192 | }
193 |
194 | fetch("animated_and_still.avif").then(response => decodeImage(response.body));
195 | ```
196 |
197 | Output:
198 | ```Text
199 | imageDecoder.type = image/avif
200 | imageDecoder.complete = false
201 | imageDecoder.tracks.length = 2
202 | track[0].frameCount = 1
203 | track[0].repetitionCount = 0
204 | track[0].animated = false
205 | track[0].selected = true
206 | track[1].frameCount = 0
207 | track[1].repetitionCount = 0
208 | track[1].animated = true
209 | track[1].selected = false
210 | ...
211 | ```
212 |  
213 |
214 | ## Open Questions / Notes / Links
215 | * image/svg support is not currently possibly in Chrome since it's bound to DOM.
216 | * Using a ReadableStream may over time accumulate enough data to cause OOM.
217 | * Is there more EXIF information that we'd want to expose?
218 |
219 | ## Considered alternatives
220 |
221 | ### Providing image decoders through the VideoDecoder API.
222 | The VideoDecoder API being designed for WebCodecs is intended for transforming demuxed encoded data chunks into decoded frames. Which is problematic for image formats since generally their containers and encoded data are tightly coupled. E.g., you don't generally have a gif demuxer and a gif decoder, just a decoder.
223 |
224 | If we allow VideoDecoder users to enqueue raw image blobs we'll have to output all contained frames at once. Without external knowledge of frame locations within the blob, users will have to decode batches of unknown size or decode everything at once. I.e., there is no piece-wise decoding of an arbitrarily long image sequence and users need to cache all decoded outputs. This feels bad from a utility and resource usage perspective.
225 |
226 | The current API allows users to provide as much or as little data as they want. Images are not decoded until needed. Users don't need to cache their decoded output since they have random access to arbitrary images.
227 |
228 | Other minor cumbersome details:
229 | * Image containers may define image specific fields like repetition count.
230 | * Image containers typically have complicated ICC profiles which need application.
231 |
232 | ### Hang the API off Image/Picture elements
233 | This is precluded due to our goal of having the API work out of DOM.
234 |
235 | ## Stakeholder Feedback / Opposition
236 |
237 | - Chrome : Positive
238 | - Developers : Positive
239 | - Firefox : Positive (unofficially)
240 |
241 | ## Proposed IDL
242 |
243 | ```Javascript
244 | dictionary ImageDecodeResult {
245 | // Actual decoded image; includes resolution information.
246 | required VideoFrame image;
247 |
248 | // Indicates if the decoded image is actually complete.
249 | required boolean complete;
250 | };
251 |
252 | typedef (ArrayBuffer or ArrayBufferView or ReadableStream) ImageBufferSource;
253 | dictionary ImageDecoderInit {
254 | required ImageBufferSource data;
255 |
256 | // Mime type for |data|. Providing the wrong mime type will lead to a decoding
257 | // failure.
258 | required USVString type;
259 |
260 | // Options used for setting up the decoder. See ImageBitmapOptions.
261 | PremultiplyAlpha premultiplyAlpha = "default";
262 | ColorSpaceConversion colorSpaceConversion = "default";
263 |
264 | // Some image decoders support reduced resolution decoding. The desired size
265 | // can optionally be provided to trigger this behavior.
266 | [EnforceRange] unsigned long desiredWidth;
267 | [EnforceRange] unsigned long desiredHeight;
268 |
269 | // For multi-track images, indicates that the animation is preferred over any
270 | // still images that are present. When unspecified the decoder will use hints
271 | // from the data stream to make a decision.
272 | boolean preferAnimation;
273 | };
274 |
275 | dictionary ImageDecodeOptions {
276 | // The index of the frame to decode.
277 | unsigned long frameIndex = 0;
278 |
279 | // When |completeFramesOnly| is set to false, partial progressive frames will
280 | // be returned. When in this mode, decode() calls will resolve only once per
281 | // new partial image at |frameIndex| until the frame is complete.
282 | boolean completeFramesOnly = true;
283 | };
284 |
285 | interface ImageTrackList {
286 | getter ImageTrack(unsigned long index);
287 | readonly attribute unsigned long length;
288 |
289 | // Index of the currently selected track or -1 if no track is selected.
290 | readonly attribute long selectedIndex;
291 |
292 | // Helper accessor for the currently selected track.
293 | readonly attribute ImageTrack? selectedTrack;
294 |
295 | // Property-based promise for signaling initial metadata readiness (e.g.,
296 | // track count, animation status, etc).
297 | [CallWith=ScriptState] readonly attribute Promise ready;
298 | };
299 |
300 | interface ImageTrack {
301 | // The number of frames in the image.
302 | //
303 | // When decoding a ReadableStream with a format that has no fixed count, the
304 | // value will increase as frames are received by the decoder.
305 | readonly attribute unsigned long frameCount;
306 |
307 | // Indicates that this track has more than one frame.
308 | readonly attribute boolean animated;
309 |
310 | // The image's preferred repetition count. Zero means play through only once,
311 | // a value of N means play N + 1 times. Infinity means play forever.
312 | readonly attribute unrestricted float repetitionCount;
313 |
314 | // Modify to choose this track in the image. Identical track selections will
315 | // be ignored.
316 | //
317 | // Changing tracks will resolve all outstanding decode requests as rejected
318 | // and reset any partially decoded frame state. Outstanding ImageDecodeResults
319 | // and metadata decode promises will remain valid.
320 | attribute boolean selected;
321 | };
322 |
323 | interface ImageDecoder {
324 | constructor(ImageDecoderInit init);
325 |
326 | // Returns true if ImageDecoder supports decoding of the given mime type.
327 | static Promise isTypeSupported(DOMString type);
328 |
329 | // Decodes a frame using the given |options| or the first frame if no options
330 | // are provided. If data is still being received, the promise won't be
331 | // resolved or rejected until the given |options.frameIndex| is available,
332 | // all data is received, or a decoding error occurs.
333 | Promise decode(optional ImageDecodeOptions options);
334 |
335 | // Aborts all pending metadata and decode promises.
336 | void reset();
337 |
338 | // Immediately shut down the decoder and free its resources. All pending
339 | // decode requests are aborted.
340 | //
341 | // Not recoverable: make a new ImageDecoder if needed.
342 | void close();
343 |
344 | // The mime type for the decoded image. This reflects the value provided
345 | // during construction.
346 | readonly attribute DOMString type;
347 |
348 | // True if all available data has been received by the decoder. Always true
349 | // when an ArrayBuffer is provided at construction. False for ReadableStream
350 | // usage
351 | readonly attribute boolean complete;
352 |
353 | // List of tracks available in this image.
354 | //
355 | // When decoding a ReadableStream the array will be empty until enough data to
356 | // decode metadata has been received.
357 | [SameObject] readonly attribute ImageTrackList tracks;
358 | };
359 |
360 | ```
361 |
--------------------------------------------------------------------------------
/flipped-gif.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dalecurtis/image-decoder-api/09e84d235b3ec16db66bb3827b7f682bef5a7a07/flipped-gif.gif
--------------------------------------------------------------------------------
/test-gif.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dalecurtis/image-decoder-api/09e84d235b3ec16db66bb3827b7f682bef5a7a07/test-gif.gif
--------------------------------------------------------------------------------
/test-still.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dalecurtis/image-decoder-api/09e84d235b3ec16db66bb3827b7f682bef5a7a07/test-still.png
--------------------------------------------------------------------------------