├── LICENSE ├── LUT.npy ├── Makefile ├── colors.pkl ├── convert.py ├── imgToTextColor.py ├── playback.c ├── readme.md ├── screenshot.png └── videoToTextColor.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 The Science Elf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LUT.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/LUT.npy -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS += -Wall -I../include -I ./include -L../lib -os 2 | 3 | all: playback.exe 4 | 5 | data.h: 6 | python3 convert.py 7 | xxd -i data > data.h 8 | 9 | playback: playback.c data.h 10 | gcc $(CFLAGS) -o playback playback.c 11 | 12 | playback.exe: playback.c data.h 13 | i686-w64-mingw32-gcc $(CFLAGS) -o playback.exe playback.c 14 | 15 | clean: 16 | rm -f data 17 | rm -f data.h 18 | rm -f playback 19 | rm -f playback.exe -------------------------------------------------------------------------------- /colors.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/colors.pkl -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import time 4 | import multiprocessing 5 | from joblib import Parallel, delayed 6 | 7 | aspect_ratio = 16 / 9 8 | 9 | #Dimensions of the output in terminal characters 10 | width = 80 11 | height = int(width / (2 * aspect_ratio)) 12 | 13 | # Framerate of the source and output video 14 | src_FPS = 30 15 | dest_FPS = 15 16 | 17 | 18 | num_cores = multiprocessing.cpu_count() 19 | 20 | cap = cv2.VideoCapture('vid.mp4') 21 | frames = [] 22 | 23 | 24 | #Our characters, and their approximate brightness values 25 | charSet = " ,(S#g@" 26 | levels = [0.000, 1.060, 2.167, 3.036, 3.977, 4.730, 6.000] 27 | numChrs = len(charSet) 28 | 29 | 30 | # Converts a greyscale video frame into a dithered 7-color frame 31 | def processFrame(scaled): 32 | reduced = scaled * 6. / 255 33 | 34 | out = np.zeros((height, width), dtype= np.int8) 35 | 36 | line = '' 37 | for y in range(height): 38 | for x in range(width): 39 | level = min(6, max(0, int(reduced[y, x]))) 40 | 41 | error = reduced[y, x] - levels[level] 42 | 43 | err16 = error / 16 44 | 45 | if (x + 1) < width: 46 | reduced[y , x + 1] += 7 * err16 47 | if (y + 1) < height: 48 | reduced[y + 1, x ] += 5 * err16 49 | 50 | if (x + 1) < width: 51 | reduced[y + 1, x + 1] += 1 * err16 52 | if (x - 1) > 0: 53 | reduced[y + 1, x - 1] += 3 * err16 54 | 55 | out[y, x] = level 56 | 57 | return out 58 | 59 | # Prints out a frame in ASCII 60 | def toStr(frame): 61 | line = '' 62 | 63 | for y in range(height): 64 | for x in range(width): 65 | line += charSet[frame[y, x]] 66 | line += '\n' 67 | 68 | return line 69 | 70 | # Compute the prediction matrix for each character combination 71 | # Each row in this matrix corresponds with a character, and lists 72 | # in decreasing order, the next most likely character to follow this one 73 | # 74 | # We also convert the provided frame to this new markov encoding, and provide 75 | # the count of each prediction rank to be passed to the huffman encoding 76 | def computeMarkov(frame): 77 | mat = np.zeros((numChrs, numChrs)).astype(np.uint16) 78 | 79 | h, w = frame.shape 80 | 81 | prevChar = 0 82 | 83 | for y in range(h): 84 | for x in range(w): 85 | char = frame[y, x] 86 | 87 | mat[prevChar, char] += 1 88 | 89 | prevChar = char 90 | 91 | ranks = np.zeros((numChrs, numChrs)).astype(np.uint16) 92 | for i in range(numChrs): 93 | ranks[i][mat[i].argsort()] = 6 - np.arange(numChrs) 94 | 95 | cnt = np.zeros(numChrs).astype(np.uint16) 96 | 97 | out = np.zeros_like(frame) 98 | prevChar = 0 99 | for y in range(h): 100 | for x in range(w): 101 | char = frame[y, x] 102 | 103 | out[y, x] = ranks[prevChar, char] 104 | cnt[out[y, x]] += 1 105 | 106 | prevChar = char 107 | 108 | return out, ranks, cnt 109 | 110 | # Computes Huffman encodings based on the counts of each number in the frame 111 | def computeHuffman(cnts): 112 | codes = [] 113 | sizes = [] 114 | tree = [] 115 | for i in range(len(cnts)): 116 | codes.append('') 117 | sizes.append((cnts[i], [i], i)) 118 | tree.append((i, i)) 119 | 120 | sizes = sorted(sizes, reverse = True) 121 | 122 | while(len(sizes) > 1): 123 | # Take the two least frequent entries 124 | right = sizes.pop() 125 | left = sizes.pop() 126 | 127 | (lnum, lchars, ltree) = left 128 | (rnum, rchars, rtree) = right 129 | 130 | # Add a new tree node 131 | tree.append((ltree, rtree)) 132 | 133 | # Update the encodings 134 | for char in lchars: 135 | codes[char] = '0' + codes[char] 136 | for char in rchars: 137 | codes[char] = '1' + codes[char] 138 | 139 | # Merge these entries 140 | new = (lnum + rnum, lchars + rchars, len(tree) - 1) 141 | 142 | # Find the position in the list to inser these entries 143 | for insertPos in range(len(sizes) + 1): 144 | # Append if we hit the end of the list 145 | if(insertPos == len(sizes)): 146 | sizes.append(new) 147 | break 148 | 149 | cnt, _, _ = sizes[insertPos] 150 | 151 | if(cnt <= lnum + rnum): 152 | sizes.insert(insertPos, new) 153 | break 154 | 155 | return codes, tree 156 | 157 | # Take a markov frame and an array of huffman encodings, and create an array of 158 | # bytes corresponding to the compressed frame 159 | def convertHuffman(markovFrame, codes): 160 | out = '' 161 | 162 | h, w = frame.shape 163 | 164 | for y in range(h): 165 | for x in range(w): 166 | out = out + codes[markovFrame[y, x]] 167 | 168 | # Pad this bit-string to be byte-aligned 169 | padding = (8 - (len(out) % 8)) % 8 170 | out += ("0" * padding) 171 | 172 | # Convert each octet to a char 173 | compressed = [] 174 | for i in range(0, len(out), 8): 175 | byte = out[i:i+8] 176 | char = 0 177 | for bit in range(8): 178 | char *= 2 179 | if byte[bit] == "1": 180 | char += 1 181 | 182 | compressed.append(char) 183 | 184 | return compressed 185 | 186 | # Converts a rank matrix into a binary format to be stored in the output file 187 | def encodeMatrix(ranks): 188 | out = [] 189 | 190 | for row in ranks: 191 | encoding = 0 192 | 193 | fact = 1 194 | idxs = list(range(len(charSet))) 195 | 196 | for rank in range(len(charSet)): 197 | rank = list(row).index(rank) 198 | encoding += idxs.index(rank) * fact 199 | 200 | fact *= len(idxs) 201 | idxs.remove(rank) 202 | 203 | low_byte = int(encoding) % 256 204 | high_byte = (encoding - low_byte) // 256 205 | 206 | out.append(high_byte) 207 | out.append(low_byte) 208 | 209 | return out 210 | 211 | # Converts the huffman tree into a binary format to be stored in the output file 212 | def encodeTree(tree): 213 | tree = tree[len(charSet):] 214 | 215 | out = [] 216 | 217 | for (l, r) in tree: 218 | out.append(l * 16 + r) 219 | 220 | return out 221 | 222 | # Load all frames into memory, then convert them to greyscale and resize them to 223 | # our terminal dimensions 224 | vidFrames = [] 225 | while(cap.isOpened()): 226 | if (len(vidFrames) % 500) == 0: 227 | print('Loading frame %i' % len(vidFrames)) 228 | 229 | # Skip frames to reach target framerate 230 | for i in range(int(src_FPS / dest_FPS)): 231 | ret, frame = cap.read() 232 | 233 | if frame is None: 234 | break 235 | 236 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 237 | scaled = cv2.resize(gray, (width, height)) 238 | 239 | vidFrames.append(scaled) 240 | 241 | # Compute dithering for all frames in parallel 242 | print('Dithering Frames') 243 | frames = Parallel(n_jobs=num_cores)(delayed(processFrame)(i) for i in vidFrames) 244 | 245 | # Compute markov and huffman encoding for all frames 246 | print('Encoding Frames') 247 | out = '' 248 | size = 0 249 | 250 | with open('data', 'wb') as filehandle: 251 | for frame in frames: 252 | markovFrame, ranks, cnts = computeMarkov(frame) 253 | 254 | codes, tree = computeHuffman(cnts) 255 | chars = convertHuffman(markovFrame, codes) 256 | 257 | matrixData = encodeMatrix(ranks) 258 | treeData = encodeTree(tree) 259 | 260 | filehandle.write(bytearray(matrixData)) 261 | filehandle.write(bytearray(treeData)) 262 | filehandle.write(bytearray(chars)) 263 | 264 | size += len(matrixData) + len(treeData) + len(chars) 265 | 266 | # Print the size of the output file in human-readable form 267 | if size > 1048576: 268 | print('%.1f MB' % (size / 1048576)) 269 | elif size > 1024: 270 | print('%.1f KB' % (size / 1024)) 271 | else: 272 | print('%i B' % (size)) -------------------------------------------------------------------------------- /imgToTextColor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import pickle 4 | import sys 5 | 6 | #Width of the output in terminal characters 7 | width = 80 8 | height = 1 9 | 10 | 11 | #Our characters, and their approximate brightness values 12 | charSet = " ,(S#g@@g#S(, " 13 | 14 | # Generates a character sequence to set the foreground and background colors 15 | def setColor (bg, fg): 16 | return "\u001b[48;5;%s;38;5;%sm" % (bg, fg) 17 | 18 | black = setColor(16, 16) 19 | 20 | # Load in color lookup table data 21 | lerped = pickle.load( open( "colors.pkl", "rb" ) ) 22 | LUT = np.load("LUT.npy") 23 | 24 | # Convert an RGB image to a stream of text with ANSI color codes 25 | def convertImg(img): 26 | line = "" 27 | 28 | for row in img: 29 | for color in row: 30 | color = np.round(color).astype(int) 31 | 32 | b, g, r = color[0], color[1], color[2] 33 | 34 | # Lookup the color index in the RGB lookup table 35 | idx = LUT[b, g, r] 36 | 37 | # Get the ANSI color codes and lerp character 38 | bg, fg, lerp, rgb = lerped[idx] 39 | 40 | char = charSet[lerp] 41 | 42 | line += "%s%c" % (setColor(bg, fg), char) 43 | # End each line with a black background to avoid color fringe 44 | line += "%s\n" % black 45 | 46 | # Move the cursor back to the top of the frame to prevent rolling 47 | line += "\u001b[%iD\u001b[%iA" % (width, height + 1) 48 | return line 49 | 50 | if len(sys.argv) == 2: 51 | img = cv2.imread(sys.argv[1]) 52 | 53 | # Match the aspect ratio to that of the provided image 54 | src_height, src_width, _ = img.shape 55 | 56 | aspect_ratio = src_width / src_height 57 | height = int(width / (2 * aspect_ratio)) 58 | 59 | img = cv2.resize(img, (width, height)) 60 | print(convertImg(img)) 61 | else: 62 | print("Expected image file as argument.") -------------------------------------------------------------------------------- /playback.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "data.h" 3 | 4 | #ifdef WIN32 5 | #include 6 | #else 7 | #endif 8 | #include 9 | 10 | #define CHAR_CNT 7 11 | #define NODE_CNT (CHAR_CNT + CHAR_CNT - 1) 12 | 13 | #define WIDTH 80 14 | #define HEIGHT 22 15 | #define FPS 15 16 | 17 | #define FRAME_SIZE ((WIDTH + 1) * HEIGHT + 1) 18 | 19 | const char* char_set = " ,(S#g@"; 20 | 21 | char frame[FRAME_SIZE]; 22 | 23 | int prediction_matrix[CHAR_CNT][CHAR_CNT]; 24 | int huffman_tree_left[NODE_CNT]; 25 | int huffman_tree_right[NODE_CNT]; 26 | 27 | int data_pos = 0; 28 | 29 | int next_char() { 30 | if(data_pos < data_len) { 31 | return data[data_pos++]; 32 | } 33 | 34 | return EOF; 35 | } 36 | 37 | int peek_char() { 38 | if(data_pos < data_len) { 39 | return data[data_pos]; 40 | } 41 | 42 | return EOF; 43 | } 44 | 45 | void decode_matrix_row(int row, int row_data) { 46 | int idxs[CHAR_CNT]; 47 | 48 | for(int i = 0; i < CHAR_CNT; i++) { 49 | idxs[i] = i; 50 | } 51 | 52 | for(int i = 0; i < CHAR_CNT; i++) { 53 | int idx = row_data % (CHAR_CNT - i); 54 | int rank = idxs[idx]; 55 | row_data /= (CHAR_CNT - i); 56 | 57 | // Shift all the idxs over by one 58 | for(int j = idx; j < CHAR_CNT - 1; j++) { 59 | idxs[j] = idxs[j + 1]; 60 | } 61 | 62 | prediction_matrix[row][i] = rank; 63 | } 64 | } 65 | 66 | void decode_tree() { 67 | for(int i = 0; i < CHAR_CNT - 1; i++) { 68 | int c = next_char(); 69 | 70 | int left = c / 16; 71 | int right = c % 16; 72 | 73 | huffman_tree_left[i + CHAR_CNT] = left; 74 | huffman_tree_right[i + CHAR_CNT] = right; 75 | } 76 | } 77 | 78 | void decode_frame() { 79 | int bit_cnt = 0; 80 | int c = 0; 81 | 82 | // Start our node at the root of the tree 83 | int node = NODE_CNT - 1; 84 | int last_char = 0; 85 | 86 | int out_pos = 0; 87 | 88 | int out_cnt = 0; 89 | int line_cnt = 0; 90 | 91 | while(1) { 92 | if(bit_cnt == 0) { 93 | c = next_char(); 94 | bit_cnt = 8; 95 | } 96 | 97 | if(c == EOF) return; 98 | 99 | int bit = c & 0b10000000; 100 | c <<= 1; 101 | bit_cnt -= 1; 102 | 103 | 104 | node = (bit > 0) ? huffman_tree_right[node] : huffman_tree_left[node]; 105 | 106 | if(node < CHAR_CNT) { 107 | last_char = prediction_matrix[last_char][node]; 108 | 109 | frame[out_pos++] = char_set[last_char]; 110 | 111 | out_cnt++; 112 | if(out_cnt >= WIDTH) { 113 | out_cnt = 0; 114 | 115 | line_cnt++; 116 | 117 | if(line_cnt >= HEIGHT) { 118 | frame[out_pos++] = '\0'; 119 | return; 120 | } 121 | } 122 | 123 | node = NODE_CNT - 1; 124 | } 125 | } 126 | 127 | 128 | } 129 | 130 | int main() { 131 | 132 | // Null terminate the string 133 | frame[FRAME_SIZE - 1] = '\0'; 134 | 135 | #ifdef WIN32 136 | COORD coord; 137 | coord.X = 0; 138 | coord.Y = 0; 139 | 140 | HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); 141 | #endif 142 | 143 | do { 144 | for(int row = 0; row < CHAR_CNT; row++) { 145 | int row_data = (next_char() * 256) + next_char(); 146 | decode_matrix_row(row, row_data); 147 | } 148 | 149 | decode_tree(); 150 | decode_frame(); 151 | 152 | #ifdef WIN32 153 | SetConsoleCursorPosition(hConsole, coord); 154 | printf("%s", frame); 155 | Sleep(1000 / FPS); 156 | #else 157 | printf("%s\n", frame); 158 | usleep(1000000 / FPS); 159 | #endif 160 | 161 | 162 | } while(peek_char() != EOF); 163 | 164 | return 0; 165 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Color Video to Text Conversion 2 | 3 | A few tools to convert video and images into ASCII art in an ANSI terminal. These tools support color output using the ANSI 256 color set, as well as the creation of a self-contained playback executable for video converted to text, with compression able to fit 4 minutes of 80 column 15 FPS video onto a single floppy disk! 4 | 5 | ## Check out [this video](https://www.youtube.com/watch?v=uGoR3ZYZqjc) for more information and to see sample output for video to text conversion. 6 | 7 | ![Screenshot](screenshot.png) 8 | 9 | A sample image converted to text and printed to the terminal. 10 | 11 | --- 12 | 13 | **Note:** To run these programs, you will need Python 3 installed, alongside NumPy and OpenCV (for image io). 14 | 15 | ## Displaying Images as Text 16 | The python script imageToTextColor.py will print an image file provided as an argument as text to the terminal. 17 | 18 | `python3 imgToTextColor.py your_image_here.jpg` 19 | 20 | The width of the output can be configured in the header of the python file. 21 | 22 | ## Displaying Videos as Text 23 | The python script videoToTextColor.py will play back a video provided as an argument as text to the terminal. 24 | 25 | `python3 videoToTextColor.py your_video_here.mp4` 26 | 27 | The width and aspect ratio of the output can be configured in the header of the python file. 28 | 29 | 30 | ## Creating Video Playback Executables 31 | The provided makefile allows building programs which will play the compressed text encoding of the video stored in the executable. The target video should be named `vid.mp4`, otherwise the path to the video can be changed in the header of convert.py. 32 | 33 | To build for Linux targets (using GCC) run 34 | 35 | `make playback` 36 | 37 | Otherwise to build for Windows targets (using MinGW) run 38 | 39 | `make playback.exe` 40 | 41 | Other aspects of the video encoding, such as character width and framerate can be adjusted in both convert.py and playback.c. **Be sure to update these parameters in both files.** 42 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheScienceElf/Video-to-Text/16b7075e6d3720d28cbd4545134e846a1ffa7abe/screenshot.png -------------------------------------------------------------------------------- /videoToTextColor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import pickle 4 | import sys 5 | 6 | aspect_ratio = 16 / 9 7 | 8 | #Dimensions of the output in terminal characters 9 | width = 80 10 | height = int(width / (2 * aspect_ratio)) 11 | 12 | 13 | 14 | 15 | #Our characters, and their approximate brightness values 16 | charSet = " ,(S#g@@g#S(, " 17 | 18 | # Generates a character sequence to set the foreground and background colors 19 | def setColor (bg, fg): 20 | return "\u001b[48;5;%s;38;5;%sm" % (bg, fg) 21 | 22 | black = setColor(16, 16) 23 | 24 | # Load in color lookup table data 25 | lerped = pickle.load( open( "colors.pkl", "rb" ) ) 26 | LUT = np.load("LUT.npy") 27 | 28 | # Convert an RGB image to a stream of text with ANSI color codes 29 | def convertImg(img): 30 | line = "" 31 | 32 | for row in img: 33 | for color in row: 34 | color = np.round(color).astype(int) 35 | 36 | b, g, r = color[0], color[1], color[2] 37 | 38 | # Lookup the color index in the RGB lookup table 39 | idx = LUT[b, g, r] 40 | 41 | # Get the ANSI color codes and lerp character 42 | bg, fg, lerp, rgb = lerped[idx] 43 | 44 | char = charSet[lerp] 45 | 46 | line += "%s%c" % (setColor(bg, fg), char) 47 | # End each line with a black background to avoid color fringe 48 | line += "%s\n" % black 49 | 50 | # Move the cursor back to the top of the frame to prevent rolling 51 | line += "\u001b[%iD\u001b[%iA" % (width, height + 1) 52 | return line 53 | 54 | 55 | if len(sys.argv) == 2: 56 | cap = cv2.VideoCapture(sys.argv[1]) 57 | 58 | while(cap.isOpened()): 59 | ret, frame = cap.read() 60 | 61 | if frame is None: 62 | break 63 | 64 | img = cv2.resize(frame, (width, height)) 65 | print(convertImg(img)) 66 | else: 67 | print("Expected video file as argument.") 68 | 69 | --------------------------------------------------------------------------------