├── LICENSE
├── LUT.npy
├── Makefile
├── colors.pkl
├── convert.py
├── imgToTextColor.py
├── playback.c
├── readme.md
├── screenshot.png
└── videoToTextColor.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 The Science Elf
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LUT.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/LUT.npy


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS += -Wall -I../include -I ./include -L../lib -os
 2 | 
 3 | all: playback.exe
 4 | 
 5 | data.h:
 6 | 	python3 convert.py
 7 | 	xxd -i data > data.h
 8 | 
 9 | playback: playback.c data.h
10 | 	gcc $(CFLAGS) -o playback playback.c
11 | 
12 | playback.exe: playback.c data.h
13 | 	i686-w64-mingw32-gcc $(CFLAGS) -o playback.exe playback.c
14 | 
15 | clean:
16 | 	rm -f data
17 | 	rm -f data.h
18 | 	rm -f playback
19 | 	rm -f playback.exe


--------------------------------------------------------------------------------
/colors.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/colors.pkl


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import time
  4 | import multiprocessing
  5 | from joblib import Parallel, delayed
  6 | 
  7 | aspect_ratio = 16 / 9
  8 | 
  9 | #Dimensions of the output in terminal characters
 10 | width = 80
 11 | height = int(width / (2 * aspect_ratio))
 12 | 
 13 | # Framerate of the source and output video
 14 | src_FPS = 30
 15 | dest_FPS = 15
 16 | 
 17 | 
 18 | num_cores = multiprocessing.cpu_count()
 19 | 
 20 | cap = cv2.VideoCapture('vid.mp4')
 21 | frames = []
 22 | 
 23 | 
 24 | #Our characters, and their approximate brightness values
 25 | charSet = " ,(S#g@"
 26 | levels = [0.000, 1.060, 2.167, 3.036, 3.977, 4.730, 6.000]
 27 | numChrs = len(charSet)
 28 | 
 29 | 
 30 | # Converts a greyscale video frame into a dithered 7-color frame
 31 | def processFrame(scaled):
 32 |   reduced = scaled * 6. / 255
 33 |   
 34 |   out = np.zeros((height, width), dtype= np.int8)
 35 |   
 36 |   line = ''
 37 |   for y in range(height):
 38 |     for x in range(width):
 39 |       level = min(6, max(0, int(reduced[y, x])))
 40 |       
 41 |       error = reduced[y, x] - levels[level]
 42 |   
 43 |       err16 = error / 16
 44 |   
 45 |       if (x + 1) < width:
 46 |         reduced[y    , x + 1] += 7 * err16
 47 |       if (y + 1) < height:
 48 |         reduced[y + 1, x    ] += 5 * err16
 49 |   
 50 |         if (x + 1) < width:
 51 |           reduced[y + 1, x + 1] += 1 * err16
 52 |         if (x - 1) > 0:
 53 |           reduced[y + 1, x - 1] += 3 * err16
 54 |       
 55 |       out[y, x] = level
 56 | 
 57 |   return out
 58 | 
 59 | # Prints out a frame in ASCII
 60 | def toStr(frame):
 61 |   line = ''
 62 |   
 63 |   for y in range(height):
 64 |     for x in range(width):
 65 |       line += charSet[frame[y, x]]
 66 |     line += '\n'
 67 |   
 68 |   return line
 69 | 
 70 | # Compute the prediction matrix for each character combination
 71 | # Each row in this matrix corresponds with a character, and lists
 72 | # in decreasing order, the next most likely character to follow this one
 73 | #
 74 | # We also convert the provided frame to this new markov encoding, and provide
 75 | # the count of each prediction rank to be passed to the huffman encoding
 76 | def computeMarkov(frame):
 77 |   mat = np.zeros((numChrs, numChrs)).astype(np.uint16)
 78 | 
 79 |   h, w = frame.shape
 80 | 
 81 |   prevChar = 0
 82 | 
 83 |   for y in range(h):
 84 |     for x in range(w):
 85 |       char = frame[y, x]
 86 | 
 87 |       mat[prevChar, char] += 1
 88 | 
 89 |       prevChar = char
 90 |   
 91 |   ranks = np.zeros((numChrs, numChrs)).astype(np.uint16)
 92 |   for i in range(numChrs):
 93 |     ranks[i][mat[i].argsort()] = 6 - np.arange(numChrs)
 94 | 
 95 |   cnt = np.zeros(numChrs).astype(np.uint16)
 96 | 
 97 |   out = np.zeros_like(frame)
 98 |   prevChar = 0
 99 |   for y in range(h):
100 |     for x in range(w):
101 |       char = frame[y, x]
102 | 
103 |       out[y, x] = ranks[prevChar, char]
104 |       cnt[out[y, x]] += 1
105 | 
106 |       prevChar = char
107 |   
108 |   return out, ranks, cnt
109 | 
110 | # Computes Huffman encodings based on the counts of each number in the frame
111 | def computeHuffman(cnts):
112 |   codes = []
113 |   sizes = []
114 |   tree = []
115 |   for i in range(len(cnts)):
116 |     codes.append('')
117 |     sizes.append((cnts[i], [i], i))
118 |     tree.append((i, i))
119 | 
120 |   sizes = sorted(sizes, reverse = True)
121 | 
122 |   while(len(sizes) > 1):
123 |     # Take the two least frequent entries
124 |     right = sizes.pop()
125 |     left  = sizes.pop()
126 | 
127 |     (lnum, lchars, ltree) = left
128 |     (rnum, rchars, rtree) = right
129 | 
130 |     # Add a new tree node
131 |     tree.append((ltree, rtree))
132 | 
133 |     # Update the encodings
134 |     for char in lchars:
135 |       codes[char] = '0' + codes[char]
136 |     for char in rchars:
137 |       codes[char] = '1' + codes[char]
138 | 
139 |     # Merge these entries
140 |     new = (lnum + rnum, lchars + rchars, len(tree) - 1)
141 | 
142 |     # Find the position in the list to inser these entries
143 |     for insertPos in range(len(sizes) + 1):
144 |       # Append if we hit the end of the list
145 |       if(insertPos == len(sizes)):
146 |         sizes.append(new)
147 |         break
148 |         
149 |       cnt, _, _ = sizes[insertPos]
150 |       
151 |       if(cnt <= lnum + rnum):
152 |         sizes.insert(insertPos, new)
153 |         break
154 | 
155 |   return codes, tree
156 | 
157 | # Take a markov frame and an array of huffman encodings, and create an array of
158 | # bytes corresponding to the compressed frame
159 | def convertHuffman(markovFrame, codes):
160 |   out = ''
161 | 
162 |   h, w = frame.shape
163 | 
164 |   for y in range(h):
165 |     for x in range(w):
166 |       out = out + codes[markovFrame[y, x]]
167 |   
168 |   # Pad this bit-string to be byte-aligned
169 |   padding = (8 - (len(out) % 8)) % 8
170 |   out += ("0" * padding)
171 | 
172 |   # Convert each octet to a char
173 |   compressed = []
174 |   for i in range(0, len(out), 8):
175 |     byte = out[i:i+8]
176 |     char = 0
177 |     for bit in range(8):
178 |       char *= 2
179 |       if byte[bit] == "1":
180 |         char += 1
181 | 
182 |     compressed.append(char)
183 | 
184 |   return compressed
185 | 
186 | # Converts a rank matrix into a binary format to be stored in the output file
187 | def encodeMatrix(ranks):
188 |   out = []
189 | 
190 |   for row in ranks:
191 |     encoding = 0
192 | 
193 |     fact = 1
194 |     idxs = list(range(len(charSet)))
195 | 
196 |     for rank in range(len(charSet)):
197 |       rank = list(row).index(rank)
198 |       encoding += idxs.index(rank) * fact
199 | 
200 |       fact *= len(idxs)
201 |       idxs.remove(rank)
202 |     
203 |     low_byte = int(encoding) % 256
204 |     high_byte = (encoding - low_byte) // 256
205 |     
206 |     out.append(high_byte)
207 |     out.append(low_byte)
208 | 
209 |   return out
210 | 
211 | # Converts the huffman tree into a binary format to be stored in the output file
212 | def encodeTree(tree):
213 |   tree = tree[len(charSet):]
214 | 
215 |   out = []
216 | 
217 |   for (l, r) in tree:
218 |     out.append(l * 16 + r)
219 | 
220 |   return out
221 | 
222 | # Load all frames into memory, then convert them to greyscale and resize them to
223 | # our terminal dimensions
224 | vidFrames = []
225 | while(cap.isOpened()):
226 |   if (len(vidFrames) % 500) == 0:
227 |     print('Loading frame %i' % len(vidFrames))
228 |   
229 |   # Skip frames to reach target framerate
230 |   for i in range(int(src_FPS / dest_FPS)):
231 |     ret, frame = cap.read()
232 |   
233 |   if frame is None:
234 |     break
235 |   
236 |   gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
237 |   scaled = cv2.resize(gray, (width, height))
238 |   
239 |   vidFrames.append(scaled)
240 | 
241 | # Compute dithering for all frames in parallel
242 | print('Dithering Frames')
243 | frames = Parallel(n_jobs=num_cores)(delayed(processFrame)(i) for i in vidFrames)
244 | 
245 | # Compute markov and huffman encoding for all frames
246 | print('Encoding Frames')
247 | out = ''
248 | size = 0
249 | 
250 | with open('data', 'wb') as filehandle:
251 |   for frame in frames:
252 |     markovFrame, ranks, cnts = computeMarkov(frame)
253 | 
254 |     codes, tree = computeHuffman(cnts)
255 |     chars = convertHuffman(markovFrame, codes)
256 | 
257 |     matrixData = encodeMatrix(ranks)
258 |     treeData = encodeTree(tree)
259 | 
260 |     filehandle.write(bytearray(matrixData))
261 |     filehandle.write(bytearray(treeData))
262 |     filehandle.write(bytearray(chars))
263 | 
264 |     size += len(matrixData) + len(treeData) + len(chars)
265 | 
266 | # Print the size of the output file in human-readable form
267 | if size > 1048576:
268 |   print('%.1f MB' % (size / 1048576))
269 | elif size > 1024:
270 |   print('%.1f KB' % (size / 1024))
271 | else:
272 |   print('%i B' % (size))


--------------------------------------------------------------------------------
/imgToTextColor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import pickle
 4 | import sys
 5 | 
 6 | #Width of the output in terminal characters
 7 | width = 80
 8 | height = 1 
 9 | 
10 | 
11 | #Our characters, and their approximate brightness values
12 | charSet = " ,(S#g@@g#S(, "
13 | 
14 | # Generates a character sequence to set the foreground and background colors
15 | def setColor (bg, fg):
16 |   return "\u001b[48;5;%s;38;5;%sm" % (bg, fg)
17 | 
18 | black = setColor(16, 16)
19 | 
20 | # Load in color lookup table data
21 | lerped =  pickle.load( open( "colors.pkl", "rb" ) )
22 | LUT = np.load("LUT.npy")
23 | 
24 | # Convert an RGB image to a stream of text with ANSI color codes
25 | def convertImg(img):
26 |   line = ""
27 |   
28 |   for row in img:
29 |     for color in row:
30 |       color = np.round(color).astype(int)
31 | 
32 |       b, g, r = color[0], color[1], color[2]
33 | 
34 |       # Lookup the color index in the RGB lookup table
35 |       idx = LUT[b, g, r]
36 |   
37 |       # Get the ANSI color codes and lerp character
38 |       bg, fg, lerp, rgb = lerped[idx]
39 | 
40 |       char = charSet[lerp]
41 |   
42 |       line += "%s%c" % (setColor(bg, fg), char)
43 |     # End each line with a black background to avoid color fringe
44 |     line += "%s\n" % black
45 |   
46 |   # Move the cursor back to the top of the frame to prevent rolling
47 |   line += "\u001b[%iD\u001b[%iA" % (width, height + 1)
48 |   return line
49 | 
50 | if len(sys.argv) == 2:
51 |   img = cv2.imread(sys.argv[1])
52 | 
53 |   # Match the aspect ratio to that of the provided image
54 |   src_height, src_width, _ = img.shape
55 | 
56 |   aspect_ratio = src_width / src_height
57 |   height = int(width / (2 * aspect_ratio))
58 | 
59 |   img = cv2.resize(img, (width, height))
60 |   print(convertImg(img))
61 | else:
62 |   print("Expected image file as argument.")


--------------------------------------------------------------------------------
/playback.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "data.h"
  3 | 
  4 | #ifdef WIN32
  5 | #include <windows.h>
  6 | #else
  7 | #endif
  8 | #include <unistd.h>
  9 | 
 10 | #define CHAR_CNT 7
 11 | #define NODE_CNT (CHAR_CNT + CHAR_CNT - 1)
 12 | 
 13 | #define WIDTH 80
 14 | #define HEIGHT 22
 15 | #define FPS 15
 16 | 
 17 | #define FRAME_SIZE ((WIDTH + 1) * HEIGHT + 1)
 18 | 
 19 | const char* char_set = " ,(S#g@";
 20 | 
 21 | char frame[FRAME_SIZE];
 22 | 
 23 | int prediction_matrix[CHAR_CNT][CHAR_CNT];
 24 | int huffman_tree_left[NODE_CNT];
 25 | int huffman_tree_right[NODE_CNT];
 26 | 
 27 | int data_pos = 0;
 28 | 
 29 | int next_char() {
 30 |   if(data_pos < data_len) {
 31 |       return data[data_pos++];
 32 |   }
 33 | 
 34 |   return EOF;
 35 | }
 36 | 
 37 | int peek_char() {
 38 |   if(data_pos < data_len) {
 39 |       return data[data_pos];
 40 |   }
 41 | 
 42 |   return EOF;
 43 | }
 44 | 
 45 | void decode_matrix_row(int row, int row_data) {
 46 |     int idxs[CHAR_CNT];
 47 | 
 48 |     for(int i = 0; i < CHAR_CNT; i++) {
 49 |         idxs[i] = i;
 50 |     }
 51 |     
 52 |     for(int i = 0; i < CHAR_CNT; i++) {
 53 |         int idx = row_data % (CHAR_CNT - i);
 54 |         int rank = idxs[idx];
 55 |         row_data /= (CHAR_CNT - i);
 56 | 
 57 |         // Shift all the idxs over by one
 58 |         for(int j = idx; j < CHAR_CNT - 1; j++) {
 59 |             idxs[j] = idxs[j + 1];
 60 |         }
 61 | 
 62 |         prediction_matrix[row][i] = rank;
 63 |     }
 64 | }
 65 | 
 66 | void decode_tree() {
 67 |     for(int i = 0; i < CHAR_CNT - 1; i++) {
 68 |         int c = next_char();
 69 | 
 70 |         int left = c / 16;
 71 |         int right = c % 16;
 72 | 
 73 |         huffman_tree_left[i + CHAR_CNT] = left;
 74 |         huffman_tree_right[i + CHAR_CNT] = right;
 75 |     }
 76 | }
 77 | 
 78 | void decode_frame() {
 79 |     int bit_cnt = 0;
 80 |     int c = 0;
 81 | 
 82 |     // Start our node at the root of the tree
 83 |     int node = NODE_CNT - 1;
 84 |     int last_char = 0;
 85 | 
 86 |     int out_pos = 0;
 87 | 
 88 |     int out_cnt = 0;
 89 |     int line_cnt = 0;
 90 | 
 91 |     while(1) {
 92 |         if(bit_cnt == 0) {
 93 |             c = next_char();
 94 |             bit_cnt = 8;
 95 |         }
 96 | 
 97 |         if(c == EOF) return;
 98 | 
 99 |         int bit = c & 0b10000000;
100 |         c <<= 1;
101 |         bit_cnt -= 1;
102 |         
103 | 
104 |         node = (bit > 0) ? huffman_tree_right[node] : huffman_tree_left[node];
105 | 
106 |         if(node < CHAR_CNT) {
107 |             last_char = prediction_matrix[last_char][node];
108 |             
109 |             frame[out_pos++] = char_set[last_char];
110 | 
111 |             out_cnt++;
112 |             if(out_cnt >= WIDTH) {
113 |                 out_cnt = 0;
114 |                 
115 |                 line_cnt++;
116 | 
117 |                 if(line_cnt >= HEIGHT) {
118 |                     frame[out_pos++] = '\0';
119 |                     return;
120 |                 }
121 |             }
122 | 
123 |             node = NODE_CNT - 1;
124 |         }
125 |     }
126 |     
127 | 
128 | }
129 | 
130 | int main() {
131 | 
132 |     // Null terminate the string
133 |     frame[FRAME_SIZE - 1] = '\0';
134 | 
135 |     #ifdef WIN32
136 |     COORD coord;
137 |     coord.X = 0;
138 |     coord.Y = 0;
139 | 
140 |     HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
141 |     #endif
142 | 
143 |     do {
144 |         for(int row = 0; row < CHAR_CNT; row++) {
145 |             int row_data = (next_char() * 256) + next_char();
146 |             decode_matrix_row(row, row_data);
147 |         }
148 | 
149 |         decode_tree();
150 |         decode_frame();
151 | 
152 |         #ifdef WIN32
153 |         SetConsoleCursorPosition(hConsole, coord);
154 |         printf("%s", frame);
155 |         Sleep(1000 / FPS);
156 |         #else
157 |         printf("%s\n", frame);
158 |         usleep(1000000 / FPS);
159 |         #endif
160 | 
161 | 
162 |     } while(peek_char() != EOF);
163 | 
164 |     return 0;
165 | }


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Color Video to Text Conversion
 2 | 
 3 | A few tools to convert video and images into ASCII art in an ANSI terminal. These tools support color output using the ANSI 256 color set, as well as the creation of a self-contained playback executable for video converted to text, with compression able to fit 4 minutes of 80 column 15 FPS video onto a single floppy disk!
 4 | 
 5 |  ## Check out [this video](https://www.youtube.com/watch?v=uGoR3ZYZqjc) for more information and to see sample output for video to text conversion.
 6 | 
 7 | ![Screenshot](screenshot.png)
 8 | 
 9 | A sample image converted to text and printed to the terminal.
10 | 
11 | ---
12 | 
13 | **Note:** To run these programs, you will need Python 3 installed, alongside NumPy and OpenCV (for image io).
14 | 
15 | ## Displaying Images as Text
16 | The python script imageToTextColor.py will print an image file provided as an argument as text to the terminal.
17 | 
18 | `python3 imgToTextColor.py your_image_here.jpg`
19 | 
20 | The width of the output can be configured in the header of the python file.
21 | 
22 | ## Displaying Videos as Text
23 | The python script videoToTextColor.py will play back a video provided as an argument as text to the terminal.
24 | 
25 | `python3 videoToTextColor.py your_video_here.mp4`
26 | 
27 | The width and aspect ratio of the output can be configured in the header of the python file.
28 | 
29 | 
30 | ## Creating Video Playback Executables
31 | The provided makefile allows building programs which will play the compressed text encoding of the video stored in the executable. The target video should be named `vid.mp4`, otherwise the path to the video can be changed in the header of convert.py.
32 | 
33 | To build for Linux targets (using GCC) run 
34 | 
35 | `make playback`
36 | 
37 | Otherwise to build for Windows targets (using MinGW) run 
38 | 
39 | `make playback.exe`
40 | 
41 | Other aspects of the video encoding, such as character width and framerate can be adjusted in both convert.py and playback.c. **Be sure to update these parameters in both files.**
42 | 


--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/screenshot.png


--------------------------------------------------------------------------------
/videoToTextColor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import pickle
 4 | import sys
 5 | 
 6 | aspect_ratio = 16 / 9
 7 | 
 8 | #Dimensions of the output in terminal characters
 9 | width = 80
10 | height = int(width / (2 * aspect_ratio))
11 | 
12 | 
13 | 
14 | 
15 | #Our characters, and their approximate brightness values
16 | charSet = " ,(S#g@@g#S(, "
17 | 
18 | # Generates a character sequence to set the foreground and background colors
19 | def setColor (bg, fg):
20 |   return "\u001b[48;5;%s;38;5;%sm" % (bg, fg)
21 | 
22 | black = setColor(16, 16)
23 | 
24 | # Load in color lookup table data
25 | lerped =  pickle.load( open( "colors.pkl", "rb" ) )
26 | LUT = np.load("LUT.npy")
27 | 
28 | # Convert an RGB image to a stream of text with ANSI color codes
29 | def convertImg(img):
30 |   line = ""
31 |   
32 |   for row in img:
33 |     for color in row:
34 |       color = np.round(color).astype(int)
35 | 
36 |       b, g, r = color[0], color[1], color[2]
37 | 
38 |       # Lookup the color index in the RGB lookup table
39 |       idx = LUT[b, g, r]
40 |   
41 |       # Get the ANSI color codes and lerp character
42 |       bg, fg, lerp, rgb = lerped[idx]
43 | 
44 |       char = charSet[lerp]
45 |   
46 |       line += "%s%c" % (setColor(bg, fg), char)
47 |     # End each line with a black background to avoid color fringe
48 |     line += "%s\n" % black
49 |   
50 |   # Move the cursor back to the top of the frame to prevent rolling
51 |   line += "\u001b[%iD\u001b[%iA" % (width, height + 1)
52 |   return line
53 | 
54 | 
55 | if len(sys.argv) == 2:
56 |   cap = cv2.VideoCapture(sys.argv[1])
57 | 
58 |   while(cap.isOpened()):  
59 |     ret, frame = cap.read()
60 | 
61 |     if frame is None:
62 |       break
63 |     
64 |     img = cv2.resize(frame, (width, height))
65 |     print(convertImg(img))
66 | else:
67 |   print("Expected video file as argument.")
68 | 
69 | 


--------------------------------------------------------------------------------