├── README.md ├── RLE2image.py ├── emma.png ├── harry.jpg ├── image2RLE.py └── zigzag.py /README.md: -------------------------------------------------------------------------------- 1 | # compression-DCT 2 | 3 | (for learning purposes to understand RLE encoding) 4 | 5 | Basic implementation of image compression using DCT has been done. Note that JPEG compression exploits many other techniques to achieve 6 | higher compression. Here only Quantization (lossy step) & Run length encoding has been done. 7 | 8 | The working can be simply explained as : 9 | 1. image2RLE reads an image and performs DCT, applies quantization (Q-Matrix taken is standard JPEG matrix obtained from psycho-visual) 10 | experiments) and encodes it using Run Length Encoding. 11 | 2. Encoded data is written onto a text file with name image.txt {this text file has lesser bytes than original image = Compression} 12 | 3. RLE2image reads image.txt and decodes it into image again, writing a new compressed image onto disk. 13 | 14 | The compressed image has block-artifacts which can be seen easily. 15 | 16 | One can read about DCT-image compression from : https://www.youtube.com/watch?v=sckLJpjH5p8 NPTEL IMAGE PROCESSING SERIES 17 | 18 | credits : 19 | 1. zigzag.py has been taken from : https://github.com/amzhang1/simple-JPEG-compression 20 | 2. https://github.com/abhishek-sehgal954/Lossy-and-lossless-image-compression-techniques 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /RLE2image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | 5 | # import zigzag functions 6 | from zigzag import * 7 | 8 | QUANTIZATION_MAT = np.array([[16,11,10,16,24,40,51,61],[12,12,14,19,26,58,60,55],[14,13,16,24,40,57,69,56 ],[14,17,22,29,51,87,80,62],[18,22,37,56,68,109,103,77],[24,35,55,64,81,104,113,92],[49,64,78,87,103,121,120,101],[72,92,95,98,112,100,103,99]]) 9 | 10 | # defining block size 11 | block_size = 8 12 | 13 | # Reading image.txt to decode it as image 14 | with open('image.txt', 'r') as myfile: 15 | image=myfile.read() 16 | 17 | # spplits into tokens seperated by space characters 18 | details = image.split() 19 | 20 | # just python-crap to get integer from tokens : h and w are height and width of image (first two items) 21 | h = int(''.join(filter(str.isdigit, details[0]))) 22 | w = int(''.join(filter(str.isdigit, details[1]))) 23 | 24 | # declare an array of zeros (It helps to reconstruct bigger array on which IDCT and all has to be applied) 25 | array = np.zeros(h*w).astype(int) 26 | 27 | 28 | # some loop var initialisation 29 | k = 0 30 | i = 2 31 | x = 0 32 | j = 0 33 | 34 | 35 | # This loop gives us reconstructed array of size of image 36 | 37 | while k < array.shape[0]: 38 | # Oh! image has ended 39 | if(details[i] == ';'): 40 | break 41 | # This is imp! note that to get negative numbers in array check for - sign in string 42 | if "-" not in details[i]: 43 | array[k] = int(''.join(filter(str.isdigit, details[i]))) 44 | else: 45 | array[k] = -1*int(''.join(filter(str.isdigit, details[i]))) 46 | 47 | if(i+3 < len(details)): 48 | j = int(''.join(filter(str.isdigit, details[i+3]))) 49 | 50 | if j == 0: 51 | k = k + 1 52 | else: 53 | k = k + j + 1 54 | 55 | i = i + 2 56 | 57 | array = np.reshape(array,(h,w)) 58 | 59 | # loop for constructing intensity matrix form frequency matrix (IDCT and all) 60 | i = 0 61 | j = 0 62 | k = 0 63 | 64 | # initialisation of compressed image 65 | padded_img = np.zeros((h,w)) 66 | 67 | while i < h: 68 | j = 0 69 | while j < w: 70 | temp_stream = array[i:i+8,j:j+8] 71 | block = inverse_zigzag(temp_stream.flatten(), int(block_size),int(block_size)) 72 | de_quantized = np.multiply(block,QUANTIZATION_MAT) 73 | padded_img[i:i+8,j:j+8] = cv2.idct(de_quantized) 74 | j = j + 8 75 | i = i + 8 76 | 77 | # clamping to 8-bit max-min values 78 | padded_img[padded_img > 255] = 255 79 | padded_img[padded_img < 0] = 0 80 | 81 | # compressed image is written into compressed_image.mp file 82 | cv2.imwrite("compressed_image.bmp",np.uint8(padded_img)) 83 | 84 | # DONE! -------------------------------------------------------------------------------- /emma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getsanjeev/compression-DCT/ba9d65af366f2baa314ab2c1bac94f5eb9211be3/emma.png -------------------------------------------------------------------------------- /harry.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getsanjeev/compression-DCT/ba9d65af366f2baa314ab2c1bac94f5eb9211be3/harry.jpg -------------------------------------------------------------------------------- /image2RLE.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | 5 | # import zigzag functions 6 | from zigzag import * 7 | 8 | 9 | def get_run_length_encoding(image): 10 | i = 0 11 | skip = 0 12 | stream = [] 13 | bitstream = "" 14 | image = image.astype(int) 15 | while i < image.shape[0]: 16 | if image[i] != 0: 17 | stream.append((image[i],skip)) 18 | bitstream = bitstream + str(image[i])+ " " +str(skip)+ " " 19 | skip = 0 20 | else: 21 | skip = skip + 1 22 | i = i + 1 23 | 24 | return bitstream 25 | 26 | # defining block size 27 | block_size = 8 28 | 29 | # Quantization Matrix 30 | QUANTIZATION_MAT = np.array([[16,11,10,16,24,40,51,61],[12,12,14,19,26,58,60,55],[14,13,16,24,40,57,69,56 ],[14,17,22,29,51,87,80,62],[18,22,37,56,68,109,103,77],[24,35,55,64,81,104,113,92],[49,64,78,87,103,121,120,101],[72,92,95,98,112,100,103,99]]) 31 | 32 | # reading image in grayscale style 33 | img = cv2.imread('harry.jpg', cv2.IMREAD_GRAYSCALE) 34 | 35 | #You can try with this matrix to understand working of DCT 36 | #img = np.array([[255,255,227,204,204,203,192,217],[215,189,167,166,160,135,167,244],[169,115,99,99,99,82,127,220],[146,90,86,88,84,63,195,189],[255,255,231,239,240,182,251,232],[255,255,21,245,226,169,229,247],[255,255,222,251,174,209,174,163],[255,255,221,184,205,248,249,220]]) 37 | 38 | 39 | # get size of the image 40 | [h , w] = img.shape 41 | 42 | 43 | 44 | # No of blocks needed : Calculation 45 | 46 | height = h 47 | width = w 48 | h = np.float32(h) 49 | w = np.float32(w) 50 | 51 | nbh = math.ceil(h/block_size) 52 | nbh = np.int32(nbh) 53 | 54 | nbw = math.ceil(w/block_size) 55 | nbw = np.int32(nbw) 56 | 57 | 58 | # Pad the image, because sometime image size is not dividable to block size 59 | # get the size of padded image by multiplying block size by number of blocks in height/width 60 | 61 | # height of padded image 62 | H = block_size * nbh 63 | 64 | # width of padded image 65 | W = block_size * nbw 66 | 67 | # create a numpy zero matrix with size of H,W 68 | padded_img = np.zeros((H,W)) 69 | 70 | # copy the values of img into padded_img[0:h,0:w] 71 | # for i in range(height): 72 | # for j in range(width): 73 | # pixel = img[i,j] 74 | # padded_img[i,j] = pixel 75 | 76 | # or this other way here 77 | padded_img[0:height,0:width] = img[0:height,0:width] 78 | 79 | cv2.imwrite('uncompressed.bmp', np.uint8(padded_img)) 80 | 81 | 82 | 83 | # start encoding: 84 | # divide image into block size by block size (here: 8-by-8) blocks 85 | # To each block apply 2D discrete cosine transform 86 | # reorder DCT coefficients in zig-zag order 87 | # reshaped it back to block size by block size (here: 8-by-8) 88 | 89 | for i in range(nbh): 90 | 91 | # Compute start and end row index of the block 92 | row_ind_1 = i*block_size 93 | row_ind_2 = row_ind_1+block_size 94 | 95 | for j in range(nbw): 96 | 97 | # Compute start & end column index of the block 98 | col_ind_1 = j*block_size 99 | col_ind_2 = col_ind_1+block_size 100 | 101 | block = padded_img[ row_ind_1 : row_ind_2 , col_ind_1 : col_ind_2 ] 102 | 103 | # apply 2D discrete cosine transform to the selected block 104 | DCT = cv2.dct(block) 105 | 106 | DCT_normalized = np.divide(DCT,QUANTIZATION_MAT).astype(int) 107 | 108 | # reorder DCT coefficients in zig zag order by calling zigzag function 109 | # it will give you a one dimentional array 110 | reordered = zigzag(DCT_normalized) 111 | 112 | # reshape the reorderd array back to (block size by block size) (here: 8-by-8) 113 | reshaped= np.reshape(reordered, (block_size, block_size)) 114 | 115 | # copy reshaped matrix into padded_img on current block corresponding indices 116 | padded_img[row_ind_1 : row_ind_2 , col_ind_1 : col_ind_2] = reshaped 117 | 118 | cv2.imshow('encoded image', np.uint8(padded_img)) 119 | 120 | arranged = padded_img.flatten() 121 | 122 | # Now RLE encoded data is written to a text file (You can check no of bytes in text file is very less than no of bytes in the image 123 | # THIS IS COMPRESSION WE WANTED, NOTE THAT ITS JUST COMPRESSION DUE TO RLE, YOU CAN COMPRESS IT FURTHER USING HUFFMAN CODES OR MAY BE 124 | # REDUCING MORE FREQUENCY COEFFICIENTS TO ZERO) 125 | 126 | bitstream = get_run_length_encoding(arranged) 127 | 128 | # Two terms are assigned for size as well, semicolon denotes end of image to reciever 129 | bitstream = str(padded_img.shape[0]) + " " + str(padded_img.shape[1]) + " " + bitstream + ";" 130 | 131 | # Written to image.txt 132 | file1 = open("image.txt","w") 133 | file1.write(bitstream) 134 | file1.close() 135 | 136 | cv2.waitKey(0) 137 | cv2.destroyAllWindows() 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /zigzag.py: -------------------------------------------------------------------------------- 1 | # Zigzag scan of a matrix 2 | # Argument is a two-dimensional matrix of any size, 3 | # not strictly a square one. 4 | # Function returns a 1-by-(m*n) array, 5 | # where m and n are sizes of an input matrix, 6 | # consisting of its items scanned by a zigzag method. 7 | # 8 | # Matlab Code: 9 | # Alexey S. Sokolov a.k.a. nICKEL, Moscow, Russia 10 | # June 2007 11 | # alex.nickel@gmail.com 12 | 13 | import numpy as np 14 | 15 | def zigzag(input): 16 | #initializing the variables 17 | #---------------------------------- 18 | h = 0 19 | v = 0 20 | 21 | vmin = 0 22 | hmin = 0 23 | 24 | vmax = input.shape[0] 25 | hmax = input.shape[1] 26 | 27 | #print(vmax ,hmax ) 28 | 29 | i = 0 30 | 31 | output = np.zeros(( vmax * hmax)) 32 | #---------------------------------- 33 | 34 | while ((v < vmax) and (h < hmax)): 35 | 36 | if ((h + v) % 2) == 0: # going up 37 | 38 | if (v == vmin): 39 | #print(1) 40 | output[i] = input[v, h] # if we got to the first line 41 | 42 | if (h == hmax): 43 | v = v + 1 44 | else: 45 | h = h + 1 46 | 47 | i = i + 1 48 | 49 | elif ((h == hmax -1 ) and (v < vmax)): # if we got to the last column 50 | #print(2) 51 | output[i] = input[v, h] 52 | v = v + 1 53 | i = i + 1 54 | 55 | elif ((v > vmin) and (h < hmax -1 )): # all other cases 56 | #print(3) 57 | output[i] = input[v, h] 58 | v = v - 1 59 | h = h + 1 60 | i = i + 1 61 | 62 | 63 | else: # going down 64 | 65 | if ((v == vmax -1) and (h <= hmax -1)): # if we got to the last line 66 | #print(4) 67 | output[i] = input[v, h] 68 | h = h + 1 69 | i = i + 1 70 | 71 | elif (h == hmin): # if we got to the first column 72 | #print(5) 73 | output[i] = input[v, h] 74 | 75 | if (v == vmax -1): 76 | h = h + 1 77 | else: 78 | v = v + 1 79 | 80 | i = i + 1 81 | 82 | elif ((v < vmax -1) and (h > hmin)): # all other cases 83 | #print(6) 84 | output[i] = input[v, h] 85 | v = v + 1 86 | h = h - 1 87 | i = i + 1 88 | 89 | 90 | 91 | 92 | if ((v == vmax-1) and (h == hmax-1)): # bottom right element 93 | #print(7) 94 | output[i] = input[v, h] 95 | break 96 | 97 | #print ('v:',v,', h:',h,', i:',i) 98 | return output 99 | 100 | 101 | 102 | 103 | # Inverse zigzag scan of a matrix 104 | # Arguments are: a 1-by-m*n array, 105 | # where m & n are vertical & horizontal sizes of an output matrix. 106 | # Function returns a two-dimensional matrix of defined sizes, 107 | # consisting of input array items gathered by a zigzag method. 108 | # 109 | # Matlab Code: 110 | # Alexey S. Sokolov a.k.a. nICKEL, Moscow, Russia 111 | # June 2007 112 | # alex.nickel@gmail.com 113 | 114 | 115 | def inverse_zigzag(input, vmax, hmax): 116 | 117 | #print input.shape 118 | 119 | # initializing the variables 120 | #---------------------------------- 121 | h = 0 122 | v = 0 123 | 124 | vmin = 0 125 | hmin = 0 126 | 127 | output = np.zeros((vmax, hmax)) 128 | 129 | i = 0 130 | #---------------------------------- 131 | 132 | while ((v < vmax) and (h < hmax)): 133 | #print ('v:',v,', h:',h,', i:',i) 134 | if ((h + v) % 2) == 0: # going up 135 | 136 | if (v == vmin): 137 | #print(1) 138 | 139 | output[v, h] = input[i] # if we got to the first line 140 | 141 | if (h == hmax): 142 | v = v + 1 143 | else: 144 | h = h + 1 145 | 146 | i = i + 1 147 | 148 | elif ((h == hmax -1 ) and (v < vmax)): # if we got to the last column 149 | #print(2) 150 | output[v, h] = input[i] 151 | v = v + 1 152 | i = i + 1 153 | 154 | elif ((v > vmin) and (h < hmax -1 )): # all other cases 155 | #print(3) 156 | output[v, h] = input[i] 157 | v = v - 1 158 | h = h + 1 159 | i = i + 1 160 | 161 | 162 | else: # going down 163 | 164 | if ((v == vmax -1) and (h <= hmax -1)): # if we got to the last line 165 | #print(4) 166 | output[v, h] = input[i] 167 | h = h + 1 168 | i = i + 1 169 | 170 | elif (h == hmin): # if we got to the first column 171 | #print(5) 172 | output[v, h] = input[i] 173 | if (v == vmax -1): 174 | h = h + 1 175 | else: 176 | v = v + 1 177 | i = i + 1 178 | 179 | elif((v < vmax -1) and (h > hmin)): # all other cases 180 | output[v, h] = input[i] 181 | v = v + 1 182 | h = h - 1 183 | i = i + 1 184 | 185 | 186 | 187 | 188 | if ((v == vmax-1) and (h == hmax-1)): # bottom right element 189 | #print(7) 190 | output[v, h] = input[i] 191 | break 192 | 193 | 194 | return output 195 | 196 | 197 | 198 | 199 | --------------------------------------------------------------------------------