├── .gitignore
├── LICENSE
├── README.md
├── go.mod
├── main.go
└── video.rgb24


/.gitignore:
--------------------------------------------------------------------------------
1 | decoded.rgb24
2 | decoded.yuv
3 | encoded.yuv


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Video Encoding from Scratch
 2 | 
 3 | Ever wondered how a video encoder works? This is a simple video encoder
 4 | that walks through building a video encoder from scratch to achieve a 90% compression ratio!
 5 | 
 6 | https://user-images.githubusercontent.com/511342/203627486-611066cd-f8e5-48c1-863b-eab9529ff90d.mp4
 7 | 
 8 | Start by opening up `main.go`. You can run the code by running
 9 | `cat video.rgb24 | go run main.go` and you should see this as output
10 | 
11 | ```sh
12 | $ cat video.rgb24 | go run main.go
13 | 2022/11/23 13:54:03 Raw size: 53996544 bytes
14 | 2022/11/23 13:54:03 YUV420P size: 26998272 bytes (50.00% original size)
15 | 2022/11/23 13:54:03 RLE size: 13592946 bytes (25.17% original size)
16 | 2022/11/23 13:54:15 DEFLATE size: 5457415 bytes (10.11% original size)
17 | ```
18 | 
19 | The actual encoding is done in about 120 lines of code. This is meant
20 | to be a didactic exercise rather than a comprehensive guide, but maybe
21 | if there's interest we could add more features that appear in modern video
22 | codecs.
23 | 
24 | Sample video from [Ketut Subiyanto](https://www.pexels.com/video/a-little-girl-preparing-a-scramble-egg-meal-4823190/).
25 | 
26 | ## Other languages
27 | 
28 | * Rust: https://github.com/Carel155/codec-from-scratch-rust
29 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/kevmo314/codec-from-scratch
2 | 
3 | go 1.19
4 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"compress/flate"
  6 | 	"flag"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | )
 11 | 
 12 | // This script shows how to build a basic video encoder. In the real world, video encoders
 13 | // are a lot more complex than this, achieving upwards of 99.9% compression or more, but
 14 | // this guide will show how we can achieve 90% compression with a simple encoder.
 15 | //
 16 | // Fundamentally, video encoding is much like image encoding but with the ability to compress
 17 | // temporally. Image compression often takes advantage of the human eye's insensitivity to
 18 | // small changes in color, which we will also take advantage of in this encoder.
 19 | //
 20 | // Additionally, we will stick to older techniques and skip over more modern ones that involve
 21 | // a lot more math. This is to focus on the core concepts of video encoding instead of
 22 | // getting lost in the "optimal" encoding approach.
 23 | //
 24 | // Run this code with:
 25 | //   cat video.rgb24 | go run main.go
 26 | 
 27 | func main() {
 28 | 	var width, height int
 29 | 	flag.IntVar(&width, "width", 384, "width of the video")
 30 | 	flag.IntVar(&height, "height", 216, "height of the video")
 31 | 	flag.Parse()
 32 | 
 33 | 	frames := make([][]byte, 0)
 34 | 
 35 | 	for {
 36 | 		// Read raw video frames from stdin. In rgb24 format, each pixel (r, g, b) is one byte
 37 | 		// so the total size of the frame is width * height * 3.
 38 | 
 39 | 		frame := make([]byte, width*height*3)
 40 | 
 41 | 		// read the frame from stdin
 42 | 		if _, err := io.ReadFull(os.Stdin, frame); err != nil {
 43 | 			break
 44 | 		}
 45 | 
 46 | 		frames = append(frames, frame)
 47 | 	}
 48 | 
 49 | 	// Now we have our raw video, using a truly ridiculous amount of memory!
 50 | 
 51 | 	rawSize := size(frames)
 52 | 	log.Printf("Raw size: %d bytes", rawSize)
 53 | 
 54 | 	for i, frame := range frames {
 55 | 		// First, we will convert each frame to YUV420 format. Each pixel in RGB24 format
 56 | 		// looks like this:
 57 | 		//
 58 | 		// +-----------+-----------+-----------+-----------+
 59 | 		// |           |           |           |           |
 60 | 		// | (r, g, b) | (r, g, b) | (r, g, b) | (r, g, b) |
 61 | 		// |           |           |           |           |
 62 | 		// +-----------+-----------+-----------+-----------+
 63 | 		// |           |           |           |           |
 64 | 		// | (r, g, b) | (r, g, b) | (r, g, b) | (r, g, b) |
 65 | 		// |           |           |           |           |
 66 | 		// +-----------+-----------+-----------+-----------+  ...
 67 | 		// |           |           |           |           |
 68 | 		// | (r, g, b) | (r, g, b) | (r, g, b) | (r, g, b) |
 69 | 		// |           |           |           |           |
 70 | 		// +-----------+-----------+-----------+-----------+
 71 | 		// |           |           |           |           |
 72 | 		// | (r, g, b) | (r, g, b) | (r, g, b) | (r, g, b) |
 73 | 		// |           |           |           |           |
 74 | 		// +-----------+-----------+-----------+-----------+
 75 | 		//
 76 | 		//                        ...
 77 | 		//
 78 | 		// YUV420 format looks like this:
 79 | 		//
 80 | 		// +-----------+-----------+-----------+-----------+
 81 | 		// |  Y(0, 0)  |  Y(0, 1)  |  Y(0, 2)  |  Y(0, 3)  |
 82 | 		// |  U(0, 0)  |  U(0, 0)  |  U(0, 1)  |  U(0, 1)  |
 83 | 		// |  V(0, 0)  |  V(0, 0)  |  V(0, 1)  |  V(0, 1)  |
 84 | 		// +-----------+-----------+-----------+-----------+
 85 | 		// |  Y(1, 0)  |  Y(1, 1)  |  Y(1, 2)  |  Y(1, 3)  |
 86 | 		// |  U(0, 0)  |  U(0, 0)  |  U(0, 1)  |  U(0, 1)  |
 87 | 		// |  V(0, 0)  |  V(0, 0)  |  V(0, 1)  |  V(0, 1)  |
 88 | 		// +-----------+-----------+-----------+-----------+  ...
 89 | 		// |  Y(2, 0)  |  Y(2, 1)  |  Y(2, 2)  |  Y(2, 3)  |
 90 | 		// |  U(1, 0)  |  U(1, 0)  |  U(1, 1)  |  U(1, 1)  |
 91 | 		// |  V(1, 0)  |  V(1, 0)  |  V(1, 1)  |  V(1, 1)  |
 92 | 		// +-----------+-----------+-----------+-----------+
 93 | 		// |  Y(3, 0)  |  Y(3, 1)  |  Y(3, 2)  |  Y(3, 3)  |
 94 | 		// |  U(1, 0)  |  U(1, 0)  |  U(1, 1)  |  U(1, 1)  |
 95 | 		// |  V(1, 0)  |  V(1, 0)  |  V(1, 1)  |  V(1, 1)  |
 96 | 		// +-----------+-----------+-----------+-----------+
 97 | 		//					      ...
 98 | 		//
 99 | 		// The gist of this format is that instead of the components R, G, B which each
100 | 		// pixel needs, we first convert it to a different space, Y (luminance) and UV (chrominance).
101 | 		// The way to think about this is that the Y component is the brightness of the pixel,
102 | 		// and the UV components are the color of the pixel. The UV components are shared
103 | 		// between 4 adjacent pixels, so we only need to store them once for each 4 pixels.
104 | 		//
105 | 		// The intuition is that the human eye is more sensitive to brightness than color,
106 | 		// so we can store the brightness of each pixel and then store the color of each
107 | 		// 4 pixels. This is a huge space savings, since we only need to store 1/4 of the
108 | 		// pixels in the image.
109 | 		//
110 | 		// If you're seeking more resources, YUV format is also known as YCbCr.
111 | 		// Actually that's not completely true, but it's close enough and color space selection
112 | 		// is a whole other topic.
113 | 		//
114 | 		// By convention, in our byte slice, we store reading left to right then top to bottom.
115 | 		// That is, to find a pixel at row i, column j, we would find the byte at index
116 | 		// (i * width + j) * 3.
117 | 		//
118 | 		// In practice, this doesn't matter that much because our image will be transposed if
119 | 		// this is done backwards. The important thing is that we are consistent.
120 | 
121 | 		Y := make([]byte, width*height)
122 | 		U := make([]float64, width*height)
123 | 		V := make([]float64, width*height)
124 | 		for j := 0; j < width*height; j++ {
125 | 			// Convert the pixel from RGB to YUV
126 | 			r, g, b := float64(frame[3*j]), float64(frame[3*j+1]), float64(frame[3*j+2])
127 | 
128 | 			// These coefficients are from the ITU-R standard.
129 | 			// See https://en.wikipedia.org/wiki/YUV#Y%E2%80%B2UV444_to_RGB888_conversion
130 | 			//
131 | 			// In practice, the actual coefficients vary based on the standard.
132 | 			// For our example, it doesn't matter that much, the key insight is
133 | 			// more that converting to YUV allows us to downsample the color
134 | 			// space efficiently.
135 | 			y := +0.299*r + 0.587*g + 0.114*b
136 | 			u := -0.169*r - 0.331*g + 0.449*b + 128
137 | 			v := 0.499*r - 0.418*g - 0.0813*b + 128
138 | 
139 | 			// Store the YUV values in our byte slices. These are separated to make the
140 | 			// next step a bit easier.
141 | 			Y[j] = uint8(y)
142 | 			U[j] = u
143 | 			V[j] = v
144 | 		}
145 | 
146 | 		// Now, we will downsample the U and V components. This is a process where we
147 | 		// take the 4 pixels that share a U and V component and average them together.
148 | 
149 | 		// We will store the downsampled U and V components in these slices.
150 | 		uDownsampled := make([]byte, width*height/4)
151 | 		vDownsampled := make([]byte, width*height/4)
152 | 		for x := 0; x < height; x += 2 {
153 | 			for y := 0; y < width; y += 2 {
154 | 				// We will average the U and V components of the 4 pixels that share this
155 | 				// U and V component.
156 | 				u := (U[x*width+y] + U[x*width+y+1] + U[(x+1)*width+y] + U[(x+1)*width+y+1]) / 4
157 | 				v := (V[x*width+y] + V[x*width+y+1] + V[(x+1)*width+y] + V[(x+1)*width+y+1]) / 4
158 | 
159 | 				// Store the downsampled U and V components in our byte slices.
160 | 				uDownsampled[x/2*width/2+y/2] = uint8(u)
161 | 				vDownsampled[x/2*width/2+y/2] = uint8(v)
162 | 			}
163 | 		}
164 | 
165 | 		yuvFrame := make([]byte, len(Y)+len(uDownsampled)+len(vDownsampled))
166 | 
167 | 		// Now we need to store the YUV values in a byte slice. To make the data more
168 | 		// compressible, we will store all the Y values first, then all the U values,
169 | 		// then all the V values. This is called a planar format.
170 | 		//
171 | 		// The intuition is that adjacent Y, U, and V values are more likely to be
172 | 		// similar than Y, U, and V themselves. Therefore, storing the components
173 | 		// in a planar format will save more data later.
174 | 
175 | 		copy(yuvFrame, Y)
176 | 		copy(yuvFrame[len(Y):], uDownsampled)
177 | 		copy(yuvFrame[len(Y)+len(uDownsampled):], vDownsampled)
178 | 
179 | 		frames[i] = yuvFrame
180 | 	}
181 | 
182 | 	// Now we have our YUV-encoded video, which takes half the space!
183 | 
184 | 	yuvSize := size(frames)
185 | 	log.Printf("YUV420P size: %d bytes (%0.2f%% original size)", yuvSize, 100*float32(yuvSize)/float32(rawSize))
186 | 
187 | 	// We can also write this out to a file, which can be played with ffplay:
188 | 	//
189 | 	//   ffplay -f rawvideo -pixel_format yuv420p -video_size 384x216 -framerate 25 encoded.yuv
190 | 
191 | 	if err := os.WriteFile("encoded.yuv", bytes.Join(frames, nil), 0644); err != nil {
192 | 		log.Fatal(err)
193 | 	}
194 | 
195 | 	encoded := make([][]byte, len(frames))
196 | 	for i := range frames {
197 | 		// Next, we will simplify the data by computing the delta between each frame.
198 | 		// Observe that in many cases, pixels between frames don't change much. Therefore,
199 | 		// many of the deltas will be small. We can store these small deltas more efficiently.
200 | 		//
201 | 		// Of course, the first frame doesn't have a previous frame so we will store the entire thing.
202 | 		// This is called a keyframe. In the real world, keyframes are computed periodically and
203 | 		// demarcated in the metadata. Keyframes can also be compressed, but we will deal with that later.
204 | 		// In our encoder, we will (by convention) make frame 0 the keyframe.
205 | 		//
206 | 		// The rest of the frames will delta from the previous frame. These are called predicted frames,
207 | 		// also known as P-frames.
208 | 
209 | 		if i == 0 {
210 | 			// This is the keyframe, store the raw frame.
211 | 			encoded[i] = frames[i]
212 | 			continue
213 | 		}
214 | 
215 | 		delta := make([]byte, len(frames[i]))
216 | 		for j := 0; j < len(delta); j++ {
217 | 			delta[j] = frames[i][j] - frames[i-1][j]
218 | 		}
219 | 
220 | 		// Now we have our delta frame, which if we print out contains a bunch of zeroes (woah!).
221 | 		// These zeros are pretty compressible, so we will compress them with run length encoding.
222 | 		// This is a simple algorithm where we store the number of times a value repeats, then the value.
223 | 		//
224 | 		// For example, the sequence 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0
225 | 		// would be stored as 4, 0, 12, 1, 4, 0.
226 | 		//
227 | 		// Run length encoding is no longer used in modern codecs, but it's a good exercise and sufficient
228 | 		// to achieve our compression goals.
229 | 
230 | 		var rle []byte
231 | 		for j := 0; j < len(delta); {
232 | 			// Count the number of times the current value repeats.
233 | 			var count byte
234 | 			for count = 0; count < 255 && j+int(count) < len(delta) && delta[j+int(count)] == delta[j]; count++ {
235 | 			}
236 | 
237 | 			// Store the count and value.
238 | 			rle = append(rle, count)
239 | 			rle = append(rle, delta[j])
240 | 
241 | 			j += int(count)
242 | 		}
243 | 
244 | 		// Save the RLE frame.
245 | 		encoded[i] = rle
246 | 	}
247 | 
248 | 	rleSize := size(encoded)
249 | 	log.Printf("RLE size: %d bytes (%0.2f%% original size)", rleSize, 100*float32(rleSize)/float32(rawSize))
250 | 
251 | 	// This is good, we're at 1/4 the size of the original video. But we can do better.
252 | 	// Note that most of our longest runs are runs of zeros. This is because the delta
253 | 	// between frames is usually small. We have a bit of flexibility in choice of algorithm
254 | 	// here, so to keep the encoder simple, we will defer to using the DEFLATE algorithm
255 | 	// which is available in the standard library. The implementation is beyond the scope
256 | 	// of this demonstration.
257 | 
258 | 	var deflated bytes.Buffer
259 | 	w, err := flate.NewWriter(&deflated, flate.BestCompression)
260 | 	if err != nil {
261 | 		log.Fatal(err)
262 | 	}
263 | 	for i := range frames {
264 | 		if i == 0 {
265 | 			// This is the keyframe, write the raw frame.
266 | 			if _, err := w.Write(frames[i]); err != nil {
267 | 				log.Fatal(err)
268 | 			}
269 | 			continue
270 | 		}
271 | 
272 | 		delta := make([]byte, len(frames[i]))
273 | 		for j := 0; j < len(delta); j++ {
274 | 			delta[j] = frames[i][j] - frames[i-1][j]
275 | 		}
276 | 		if _, err := w.Write(delta); err != nil {
277 | 			log.Fatal(err)
278 | 		}
279 | 	}
280 | 	if err := w.Close(); err != nil {
281 | 		log.Fatal(err)
282 | 	}
283 | 
284 | 	deflatedSize := deflated.Len()
285 | 	log.Printf("DEFLATE size: %d bytes (%0.2f%% original size)", deflatedSize, 100*float32(deflatedSize)/float32(rawSize))
286 | 
287 | 	// You'll note that the DEFLATE step takes quite a while to run. In general, encoders tend to run
288 | 	// much slower than decoders. This is true for most compression algorithms, not just video codecs.
289 | 	// This is because the encoder needs to do a lot of work to analyze the data and make decisions
290 | 	// about how to compress it. The decoder, on the other hand, is just a simple loop that reads the
291 | 	// data and does the opposite of the encoder.
292 | 	//
293 | 	// At this point, we've achieved a 90% compression ratio!
294 | 	//
295 | 	// As an aside, you might be thinking that typical JPEG compression is 90%, so why not JPEG encode
296 | 	// every frame? While true, the algorithm we have supplied above is quite a bit simpler than JPEG.
297 | 	// We demonstrate that taking advantage of temporal locality can yield compression ratios just as
298 | 	// high as JPEG, but with a much simpler algorithm.
299 | 	//
300 | 	// Additionally, the DEFLATE algorithm does not take advantage of the two dimensionality of the data
301 | 	// and is therefore not as efficient as it could be. In the real world, video codecs are much more
302 | 	// complex than the one we have implemented here. They take advantage of the two dimensionality of
303 | 	// the data, they use more sophisticated algorithms, and they are optimized for the hardware they
304 | 	// run on. For example, the H.264 codec is implemented in hardware on many modern GPUs.
305 | 	//
306 | 	// Now we have our encoded video. Let's decode it and see what we get.
307 | 
308 | 	// First, we will decode the DEFLATE stream.
309 | 	var inflated bytes.Buffer
310 | 	r := flate.NewReader(&deflated)
311 | 	if _, err := io.Copy(&inflated, r); err != nil {
312 | 		log.Fatal(err)
313 | 	}
314 | 	if err := r.Close(); err != nil {
315 | 		log.Fatal(err)
316 | 	}
317 | 
318 | 	// Split the inflated stream into frames.
319 | 	decodedFrames := make([][]byte, 0)
320 | 	for {
321 | 		frame := make([]byte, width*height*3/2)
322 | 		if _, err := io.ReadFull(&inflated, frame); err != nil {
323 | 			if err == io.EOF {
324 | 				break
325 | 			}
326 | 			log.Fatal(err)
327 | 		}
328 | 		decodedFrames = append(decodedFrames, frame)
329 | 	}
330 | 
331 | 	// For every frame except the first one, we need to add the previous frame to the delta frame.
332 | 	// This is the opposite of what we did in the encoder.
333 | 	for i := range decodedFrames {
334 | 		if i == 0 {
335 | 			continue
336 | 		}
337 | 
338 | 		for j := 0; j < len(decodedFrames[i]); j++ {
339 | 			decodedFrames[i][j] += decodedFrames[i-1][j]
340 | 		}
341 | 	}
342 | 
343 | 	if err := os.WriteFile("decoded.yuv", bytes.Join(decodedFrames, nil), 0644); err != nil {
344 | 		log.Fatal(err)
345 | 	}
346 | 
347 | 	// Then convert each YUV frame into RGB.
348 | 	for i, frame := range decodedFrames {
349 | 		Y := frame[:width*height]
350 | 		U := frame[width*height : width*height+width*height/4]
351 | 		V := frame[width*height+width*height/4:]
352 | 
353 | 		rgb := make([]byte, 0, width*height*3)
354 | 		for j := 0; j < height; j++ {
355 | 			for k := 0; k < width; k++ {
356 | 				y := float64(Y[j*width+k])
357 | 				u := float64(U[(j/2)*(width/2)+(k/2)]) - 128
358 | 				v := float64(V[(j/2)*(width/2)+(k/2)]) - 128
359 | 
360 | 				r := clamp(y+1.402*v, 0, 255)
361 | 				g := clamp(y-0.344*u-0.714*v, 0, 255)
362 | 				b := clamp(y+1.772*u, 0, 255)
363 | 
364 | 				rgb = append(rgb, uint8(r), uint8(g), uint8(b))
365 | 			}
366 | 		}
367 | 		decodedFrames[i] = rgb
368 | 	}
369 | 
370 | 	// Finally, write the decoded video to a file.
371 | 	//
372 | 	// This video can be played with ffplay:
373 | 	//
374 | 	//   ffplay -f rawvideo -pixel_format rgb24 -video_size 384x216 -framerate 25 decoded.rgb24
375 | 	//
376 | 	out, err := os.Create("decoded.rgb24")
377 | 	if err != nil {
378 | 		log.Fatal(err)
379 | 	}
380 | 	defer out.Close()
381 | 
382 | 	for i := range decodedFrames {
383 | 		if _, err := out.Write(decodedFrames[i]); err != nil {
384 | 			log.Fatal(err)
385 | 		}
386 | 	}
387 | }
388 | 
389 | func size(frames [][]byte) int {
390 | 	var size int
391 | 	for _, frame := range frames {
392 | 		size += len(frame)
393 | 	}
394 | 	return size
395 | }
396 | 
397 | func clamp(x, min, max float64) float64 {
398 | 	if x < min {
399 | 		return min
400 | 	}
401 | 	if x > max {
402 | 		return max
403 | 	}
404 | 	return x
405 | }
406 | 


--------------------------------------------------------------------------------
/video.rgb24:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevmo314/codec-from-scratch/f70c5e948d57b93a9081161955056758e3a980f8/video.rgb24


--------------------------------------------------------------------------------