├── .gitignore ├── README.md ├── Makefile ├── scripts └── create_image.sh └── src ├── pic_info.c ├── color_to_bw.h ├── pic_info.h ├── brightness.h ├── timer.c ├── brightness.c ├── bmp_write.h ├── color_to_bw.c ├── bmp_read.h ├── yuv422.h ├── timer.h ├── brightness_sse.asm ├── brightness_avx2.asm ├── yuv422.c ├── bmp_write.c ├── image_proc.c ├── yuv422_sse.asm └── bmp_read.c /.gitignore: -------------------------------------------------------------------------------- 1 | build/*.o 2 | *.swp 3 | image_proc 4 | 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | SIMD examples 3 | ============= 4 | 5 | 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | default: 3 | @$(MAKE) -C build 4 | 5 | clean: 6 | @rm -f build/*.o image_proc 7 | 8 | -------------------------------------------------------------------------------- /scripts/create_image.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if (( $# != 1 )) 4 | then 5 | echo "Usage: bash create_image.sh " 6 | exit 0 7 | fi 8 | 9 | ffmpeg -i ${1} -pix_fmt yuv422p out.yuv 10 | 11 | -------------------------------------------------------------------------------- /src/pic_info.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | #include "pic_info.h" 16 | 17 | void pic_info_free(struct _pic_info *pic_info) 18 | { 19 | free(pic_info->picture); 20 | } 21 | 22 | -------------------------------------------------------------------------------- /src/color_to_bw.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef _COLOR_TO_BW_H 13 | #define _COLOR_TO_BW_H 14 | 15 | #include 16 | 17 | #include "pic_info.h" 18 | 19 | void color_to_bw(uint8_t *image_bw, struct _pic_info *pic_info); 20 | 21 | #endif 22 | 23 | -------------------------------------------------------------------------------- /src/pic_info.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef _PIC_INFO_H 13 | #define _PIC_INFO_H 14 | 15 | #include 16 | 17 | struct _pic_info 18 | { 19 | int width; 20 | int height; 21 | uint32_t *picture; 22 | }; 23 | 24 | void pic_info_free(struct _pic_info *pic_info); 25 | 26 | #endif 27 | 28 | -------------------------------------------------------------------------------- /src/brightness.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef _BRIGHTNESS_H 13 | #define _BRIGHTNESS_H 14 | 15 | #include 16 | 17 | void brightness(uint8_t *buffer, int length, int value); 18 | void brightness_sse(uint8_t *buffer, int length, int value); 19 | void brightness_avx2(uint8_t *buffer, int length, int value); 20 | 21 | #endif 22 | 23 | -------------------------------------------------------------------------------- /src/timer.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include "timer.h" 13 | 14 | double diff_time(struct timespec *tp_start, struct timespec *tp_stop) 15 | { 16 | long nsec = tp_stop->tv_nsec - tp_start->tv_nsec; 17 | long sec = tp_stop->tv_sec - tp_start->tv_sec; 18 | 19 | if (nsec < 0) { sec--; nsec += 1000000000; } 20 | 21 | double t = (sec * 1000) + ((double)nsec / 1000000); 22 | 23 | return t; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /src/brightness.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | 14 | void brightness(uint8_t *buffer, int length, int value) 15 | { 16 | int t, r; 17 | 18 | if (value > 0) 19 | { 20 | for (t = 0; t < length; t++) 21 | { 22 | r = buffer[t] + value; 23 | 24 | buffer[t] = (r > 255) ? 255 : r; 25 | } 26 | } 27 | else 28 | { 29 | for (t = 0; t < length; t++) 30 | { 31 | r = buffer[t] + value; 32 | 33 | buffer[t] = (r < 0) ? 0 : r; 34 | } 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/bmp_write.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef _BMP_WRITE_H 13 | #define _BMP_WRITE_H 14 | 15 | #include 16 | 17 | #include "pic_info.h" 18 | 19 | int bmp_write(const char *filename, struct _pic_info *pic_info); 20 | int bmp_write_bw(const char *filename, uint8_t *image_bw, int width, int height); 21 | int bmp_write_rgb24(const char *filename, uint8_t *image_rgb24, int width, int height); 22 | int bmp_write_rgb32(const char *filename, uint32_t *image_rgb32, int width, int height); 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/color_to_bw.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | 14 | #include "pic_info.h" 15 | 16 | void color_to_bw(uint8_t *image_bw, struct _pic_info *pic_info) 17 | { 18 | int x; 19 | int length = pic_info->width * pic_info->height; 20 | 21 | for (x = 0; x < length; x++) 22 | { 23 | float r = pic_info->picture[x] & 0xff; 24 | float g = (pic_info->picture[x] >> 8) & 0xff; 25 | float b = (pic_info->picture[x] >> 16) & 0xff; 26 | 27 | float y = (0.299 * r) + (0.587 * g) + (0.114 * b); 28 | int y1 = (int)y; 29 | 30 | image_bw[x] = (y1 > 255) ? 255 : y1; 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /src/bmp_read.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef _BMP_READ_H 13 | #define _BMP_READ_H 14 | 15 | #include 16 | 17 | #include "pic_info.h" 18 | 19 | struct _bitmap_file 20 | { 21 | uint8_t type[2]; 22 | uint32_t size; 23 | uint16_t reserved1; 24 | uint16_t reserved2; 25 | uint32_t offset; 26 | }; 27 | 28 | struct _bitmap_info 29 | { 30 | uint32_t size; 31 | uint32_t width; 32 | uint32_t height; 33 | uint16_t planes; 34 | uint16_t bit_count; 35 | uint32_t compression; 36 | uint32_t image_size; 37 | uint32_t x_pels_per_metre; 38 | uint32_t y_pels_per_metre; 39 | uint32_t colors_used; 40 | uint32_t colors_important; 41 | int colors[256]; 42 | }; 43 | 44 | int bmp_read(const char *filename, struct _pic_info *pic_info); 45 | 46 | #endif 47 | 48 | -------------------------------------------------------------------------------- /src/yuv422.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | * YUV422 is a planer format where the image_yuv422 is formatted as: 11 | * 12 | * Y bytes (length is width * height) 13 | * U bytes (length is width * height / 2) 14 | * V bytes (length is width * height / 2) 15 | * 16 | * Every U and V byte goes together with 2 Y bytes. There is twice as 17 | * much Y (brightness) information than there is UV (color). 18 | * 19 | */ 20 | 21 | #ifndef _YUV422_H 22 | #define _YUV422_H 23 | 24 | #include 25 | 26 | int yuv422_read(const char *filename, uint8_t **image_yuv); 27 | 28 | void yuv422_to_rgb32_float( 29 | uint32_t *image_rgb32, 30 | uint8_t *image_yuv422, 31 | int width, 32 | int height); 33 | 34 | void yuv422_to_rgb32_int( 35 | uint32_t *image_rgb32, 36 | uint8_t *image_yuv422, 37 | int width, 38 | int height); 39 | 40 | void yuv422_to_rgb32_float_sse( 41 | uint32_t *image_rgb32, 42 | uint8_t *image_yuv422, 43 | int width, 44 | int height); 45 | 46 | void yuv422_to_rgb32_int_sse( 47 | uint32_t *image_rgb32, 48 | uint8_t *image_yuv422, 49 | int width, 50 | int height); 51 | 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/timer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #ifndef TIMER_H 13 | #define TIMER_H 14 | 15 | #include 16 | #include 17 | 18 | union _perftime 19 | { 20 | struct _split 21 | { 22 | uint32_t lo; 23 | uint32_t hi; 24 | } split; 25 | uint64_t count; 26 | }; 27 | 28 | #ifdef CYCLES_COUNT 29 | 30 | #define TIMER_START \ 31 | asm __volatile__ \ 32 | ( \ 33 | "rdtsc" : "=a" (perf_start.split.lo), "=d" (perf_start.split.hi) \ 34 | ); \ 35 | count = 0; 36 | 37 | #define TIMER_STOP \ 38 | asm __volatile__ \ 39 | ( \ 40 | "rdtsc" : "=a" (perf_end.split.lo), "=d" (perf_end.split.hi) \ 41 | ); \ 42 | printf("count=%d cpu=%ld\n", count, perf_end.count - perf_start.count); \ 43 | fflush(stdout); 44 | 45 | #else 46 | 47 | #define TIMER_START \ 48 | struct timespec tp_start; \ 49 | struct timespec tp_stop; \ 50 | clock_gettime(CLOCK_MONOTONIC, &tp_start); \ 51 | 52 | #define TIMER_STOP \ 53 | clock_gettime(CLOCK_MONOTONIC, &tp_stop); \ 54 | printf("msec=%f\n", diff_time(&tp_start, &tp_stop)); \ 55 | fflush(stdout); 56 | 57 | #endif 58 | 59 | double diff_time(struct timespec *tp_start, struct timespec *tp_stop); 60 | 61 | #endif 62 | 63 | -------------------------------------------------------------------------------- /src/brightness_sse.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Image processing routines. 3 | ;; Author: Michael Kohn 4 | ;; Email: mike@mikekohn.net 5 | ;; Web: http://www.mikekohn.net/ 6 | ;; License: BSD 7 | ;; 8 | ;; Copyright 2018 by Michael Kohn 9 | ;; 10 | 11 | BITS 64 12 | 13 | global _brightness_sse 14 | global brightness_sse 15 | 16 | ;void brightness_sse(uint8_t *buffer, int length, int value) 17 | _brightness_sse: 18 | brightness_sse: 19 | test edx, edx 20 | jg brightness_sse_not_neg 21 | neg dl 22 | brightness_sse_not_neg: 23 | 24 | ;; Create a 16 byte vector setting each byte to value. Inserts a 25 | ;; dword of vvvv into vector element 0 and then uses shuffle dword 26 | ;; to copy that value to the other vector dword elements. 27 | mov dh, dl 28 | pinsrw xmm1, dx, 0 29 | pinsrw xmm1, dx, 1 30 | pshufd xmm1, xmm1, 0 31 | 32 | test edx, edx 33 | jg brightness_sse_pos_loop 34 | 35 | ;; Iterate over image 16 pixels at a time and sub value from each pixel. 36 | brightness_sse_neg_loop: 37 | movups xmm0, [rdi] 38 | psubusb xmm0, xmm1 39 | movups [rdi], xmm0 40 | add rdi, 16 41 | sub rsi, 16 42 | jnz brightness_sse_neg_loop 43 | ret 44 | 45 | ;; Iterate over image 16 pixels at a time and add value to each pixel. 46 | brightness_sse_pos_loop: 47 | movups xmm0, [rdi] 48 | paddusb xmm0, xmm1 49 | movups [rdi], xmm0 50 | add rdi, 16 51 | sub rsi, 16 52 | jnz brightness_sse_pos_loop 53 | ret 54 | 55 | -------------------------------------------------------------------------------- /src/brightness_avx2.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Image processing routines. 3 | ;; Author: Michael Kohn 4 | ;; Email: mike@mikekohn.net 5 | ;; Web: http://www.mikekohn.net/ 6 | ;; License: BSD 7 | ;; 8 | ;; Copyright 2018 by Michael Kohn 9 | ;; 10 | 11 | BITS 64 12 | 13 | global _brightness_avx2 14 | global brightness_avx2 15 | 16 | ;void brightness_avx2(uint8_t *buffer, int length, int value) 17 | _brightness_avx2: 18 | brightness_avx2: 19 | test edx, edx 20 | jg brightness_sse_not_neg 21 | neg dl 22 | brightness_sse_not_neg: 23 | 24 | ;; Create a 16 byte vector setting each byte to value. Inserts a 25 | ;; dword of vvvv into vector element 0 and then uses shuffle dword 26 | ;; to copy that value to the other vector dword elements. 27 | mov dh, dl 28 | pinsrw xmm1, dx, 0 29 | pinsrw xmm1, dx, 1 30 | pshufd xmm1, xmm1, 0 31 | vinsertf128 ymm1, ymm1, xmm1, 1 32 | 33 | test eax, eax 34 | jg brightness_sse_pos_loop 35 | 36 | ;; Iterate over image 16 pixels at a time and sub value from each pixel. 37 | brightness_sse_neg_loop: 38 | vmovups ymm0, [rdi] 39 | vpsubusb ymm0, ymm1 40 | vmovups [rdi], ymm0 41 | add rdi, 32 42 | sub rsi, 32 43 | jnz brightness_sse_neg_loop 44 | ret 45 | 46 | ;; Iterate over image 16 pixels at a time and add value to each pixel. 47 | brightness_sse_pos_loop: 48 | vmovups ymm0, [rdi] 49 | vpaddusb ymm0, ymm1 50 | vmovups [rdi], ymm0 51 | add rdi, 32 52 | sub rsi, 32 53 | jnz brightness_sse_pos_loop 54 | ret 55 | 56 | -------------------------------------------------------------------------------- /src/yuv422.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | * YUV422 is a planer format where the image_yuv422 is formatted as: 11 | * 12 | * Y bytes (length is width * height) 13 | * U bytes (length is width * height / 2) 14 | * V bytes (length is width * height / 2) 15 | * 16 | * Every U and V byte goes together with 2 Y bytes. There is twice as 17 | * much Y (brightness) information than there is UV (color). 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include "yuv422.h" 26 | 27 | int yuv422_read(const char *filename, uint8_t **image_yuv422) 28 | { 29 | FILE *in; 30 | long length; 31 | 32 | *image_yuv422 = NULL; 33 | 34 | in = fopen(filename, "rb"); 35 | 36 | if (in == NULL) 37 | { 38 | printf("Could not open %s for reading.\n", filename); 39 | return 0; 40 | } 41 | 42 | fseek(in, 0, SEEK_END); 43 | length = ftell(in); 44 | fseek(in, 0, SEEK_SET); 45 | 46 | *image_yuv422 = (uint8_t *)malloc(length); 47 | 48 | length = fread(*image_yuv422, 1, length, in); 49 | fclose(in); 50 | 51 | return length; 52 | } 53 | 54 | void yuv422_to_rgb32_float( 55 | uint32_t *image_rgb32, 56 | uint8_t *image_yuv422, 57 | int width, 58 | int height) 59 | { 60 | uint8_t *u_buffer, *v_buffer; 61 | float u1, uv1, v1; 62 | float u, v; 63 | int length; 64 | int r, g, b; 65 | int rgb_ptr, y_ptr, n; 66 | 67 | length = width * height; 68 | 69 | u_buffer = image_yuv422 + length; 70 | length = length / 2; 71 | v_buffer = u_buffer + length; 72 | 73 | rgb_ptr = 0; 74 | y_ptr = 0; 75 | 76 | for (n = 0; n < length; n++) 77 | { 78 | // Process 2 pixels at a time. 79 | // Compute parts of the UV components. 80 | 81 | u = u_buffer[n] - 128; 82 | v = v_buffer[n] - 128; 83 | 84 | v1 = (1.13983 * (float)v); 85 | uv1 = -(0.39466 * (float)u) - (0.58060*(float)v); 86 | u1 = (2.03211 * (float)u); 87 | 88 | // Run even pixel through formula. 89 | 90 | r = image_yuv422[y_ptr] + v1; 91 | g = image_yuv422[y_ptr] + uv1; 92 | b = image_yuv422[y_ptr] + u1; 93 | 94 | // Saturate pixels to a value between 0 and 255. 95 | r = (r > 255) ? 255 : r; 96 | g = (g > 255) ? 255 : g; 97 | b = (b > 255) ? 255 : b; 98 | 99 | r = (r < 0) ? 0 : r; 100 | g = (g < 0) ? 0 : g; 101 | b = (b < 0) ? 0 : b; 102 | 103 | image_rgb32[rgb_ptr++] = (r << 16) | (g << 8) | b; 104 | 105 | // Run odd pixel through formula. 106 | 107 | r = image_yuv422[y_ptr + 1] + v1; 108 | g = image_yuv422[y_ptr + 1] + uv1; 109 | b = image_yuv422[y_ptr + 1] + u1; 110 | 111 | // Saturate pixels to a value between 0 and 255. 112 | r = (r > 255) ? 255 : r; 113 | g = (g > 255) ? 255 : g; 114 | b = (b > 255) ? 255 : b; 115 | 116 | r = (r < 0) ? 0 : r; 117 | g = (g < 0) ? 0 : g; 118 | b = (b < 0) ? 0 : b; 119 | 120 | image_rgb32[rgb_ptr++] = (r << 16) | (g << 8) | b; 121 | 122 | y_ptr += 2; 123 | } 124 | } 125 | 126 | void yuv422_to_rgb32_int( 127 | uint32_t *image_rgb32, 128 | uint8_t *image_yuv422, 129 | int width, 130 | int height) 131 | { 132 | uint8_t *u_buffer, *v_buffer; 133 | int u1, uv1, v1; 134 | int u, v; 135 | int length; 136 | int r, g, b; 137 | int rgb_ptr, y_ptr, n; 138 | int y1; 139 | 140 | length = width * height; 141 | 142 | u_buffer = image_yuv422 + length; 143 | length = length / 2; 144 | v_buffer =u_buffer + length; 145 | 146 | rgb_ptr = 0; 147 | y_ptr = 0; 148 | 149 | for (n = 0; n < length; n++) 150 | { 151 | // Process 2 pixels at a time. 152 | // Compute parts of the UV components. 153 | 154 | u = u_buffer[n] - 128; 155 | v = v_buffer[n] - 128; 156 | 157 | v1 = (5727 * v); 158 | uv1= - (1617 * u) - (2378 * v); 159 | u1 = (8324 * u); 160 | 161 | // Run even pixel through formula. 162 | 163 | y1 = image_yuv422[y_ptr] << 12; 164 | 165 | r = (y1 + v1) >> 12; 166 | g = (y1 + uv1) >> 12; 167 | b = (y1 + u1) >> 12; 168 | 169 | // Saturate pixels to a value between 0 and 255. 170 | r = (r > 255) ? 255 : r; 171 | g = (g > 255) ? 255 : g; 172 | b = (b > 255) ? 255 : b; 173 | 174 | r = (r < 0) ? 0 : r; 175 | g = (g < 0) ? 0 : g; 176 | b = (b < 0) ? 0 : b; 177 | 178 | image_rgb32[rgb_ptr++] = (r << 16) | (g << 8) | b; 179 | 180 | // Run odd pixel through formula. 181 | 182 | y1 = image_yuv422[y_ptr + 1] << 12; 183 | 184 | r = (y1 + v1) >> 12; 185 | g = (y1 + uv1) >> 12; 186 | b = (y1 + u1) >> 12; 187 | 188 | // Saturate pixels to a value between 0 and 255. 189 | r = (r > 255) ? 255 : r; 190 | g = (g > 255) ? 255 : g; 191 | b = (b > 255) ? 255 : b; 192 | 193 | r = (r < 0) ? 0 : r; 194 | g = (g < 0) ? 0 : g; 195 | b = (b < 0) ? 0 : b; 196 | 197 | image_rgb32[rgb_ptr++] = (r << 16) | (g << 8) | b; 198 | 199 | y_ptr += 2; 200 | } 201 | } 202 | 203 | -------------------------------------------------------------------------------- /src/bmp_write.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "bmp_write.h" 17 | 18 | struct _bmp 19 | { 20 | FILE *out; 21 | int curr_x; 22 | int total_x; 23 | }; 24 | 25 | static int write_chars(FILE *out, char *s) 26 | { 27 | int t; 28 | 29 | t = 0; 30 | 31 | while(s[t] != 0 && t < 255) 32 | { 33 | putc(s[t++], out); 34 | } 35 | 36 | return 0; 37 | } 38 | 39 | static int write_int32(FILE *out, int n) 40 | { 41 | putc(n & 0xff, out); 42 | putc((n >> 8) & 0xff, out); 43 | putc((n >> 16) & 0xff, out); 44 | putc((n >> 24) & 0xff, out); 45 | 46 | return 0; 47 | } 48 | 49 | static int write_int16(FILE *out, int n) 50 | { 51 | putc(n & 255, out); 52 | putc((n >> 8) & 0xff, out); 53 | 54 | return 0; 55 | } 56 | 57 | static void write_bmp_header(struct _bmp *bmp, int width, int height) 58 | { 59 | int image_width; 60 | 61 | image_width = width; 62 | while ((image_width % 4) != 0) { image_width++; } 63 | bmp->total_x = width * 3; 64 | 65 | // size: 14 bytes 66 | 67 | write_chars(bmp->out, "BM"); 68 | write_int32(bmp->out, 0); 69 | write_int16(bmp->out, 0); 70 | write_int16(bmp->out, 0); 71 | write_int32(bmp->out, 54); 72 | 73 | // head1: 14 head2: 40 74 | 75 | write_int32(bmp->out, 40); 76 | write_int32(bmp->out, width); 77 | write_int32(bmp->out, height); 78 | write_int16(bmp->out, 1); 79 | write_int16(bmp->out, 24); 80 | write_int32(bmp->out, 0); // compression 81 | write_int32(bmp->out, image_width*height); 82 | write_int32(bmp->out, 0); // biXPelsperMetre 83 | write_int32(bmp->out, 0); // biYPelsperMetre 84 | write_int32(bmp->out, 0); 85 | write_int32(bmp->out, 0); 86 | 87 | /* 88 | for (t = 0; t < 256; t++) 89 | { 90 | write_int32(bmp->out, palette[t]); 91 | } 92 | */ 93 | 94 | bmp->curr_x = 0; 95 | } 96 | 97 | static void write_bmp_pixel(struct _bmp *bmp, uint8_t red, uint8_t green, uint8_t blue) 98 | { 99 | putc(blue, bmp->out); 100 | putc(green, bmp->out); 101 | putc(red, bmp->out); 102 | 103 | bmp->curr_x += 3; 104 | 105 | if (bmp->curr_x == bmp->total_x) 106 | { 107 | while ((bmp->curr_x % 4) != 0) 108 | { 109 | putc(0, bmp->out); 110 | bmp->curr_x++; 111 | } 112 | 113 | bmp->curr_x = 0; 114 | } 115 | } 116 | 117 | static int write_bmp_footer(struct _bmp *bmp) 118 | { 119 | long marker; 120 | 121 | marker = ftell(bmp->out); 122 | fseek(bmp->out, 2, SEEK_SET); 123 | write_int32(bmp->out, marker); 124 | fseek(bmp->out, marker, SEEK_SET); 125 | 126 | return 0; 127 | } 128 | 129 | int bmp_write(const char *filename, struct _pic_info *pic_info) 130 | { 131 | struct _bmp bmp; 132 | int x, y; 133 | const int width = pic_info->width; 134 | const int height = pic_info->height; 135 | const uint32_t *picture = pic_info->picture; 136 | 137 | bmp.out = fopen(filename, "wb"); 138 | 139 | if (bmp.out == NULL) 140 | { 141 | printf("Cannot open file %s for writing.\n", filename); 142 | return -1; 143 | } 144 | 145 | write_bmp_header(&bmp, width, height); 146 | 147 | for (y = height - 1; y >= 0; y--) 148 | { 149 | int ptr = y * width; 150 | 151 | for (x = 0; x < width; x++) 152 | { 153 | write_bmp_pixel(&bmp, 154 | picture[ptr] & 0xff, 155 | (picture[ptr] >> 8) & 0xff, 156 | (picture[ptr] >> 16) & 0xff); 157 | ptr++; 158 | } 159 | } 160 | 161 | write_bmp_footer(&bmp); 162 | 163 | fclose(bmp.out); 164 | 165 | return 0; 166 | } 167 | 168 | int bmp_write_bw(const char *filename, uint8_t *image_bw, int width, int height) 169 | { 170 | struct _bmp bmp; 171 | int x, y; 172 | 173 | bmp.out = fopen(filename, "wb"); 174 | 175 | if (bmp.out == NULL) 176 | { 177 | printf("Cannot open file %s for writing.\n", filename); 178 | return -1; 179 | } 180 | 181 | write_bmp_header(&bmp, width, height); 182 | 183 | for (y = height - 1; y >= 0; y--) 184 | { 185 | int ptr = y * width; 186 | 187 | for (x = 0; x < width; x++) 188 | { 189 | write_bmp_pixel(&bmp, image_bw[ptr], image_bw[ptr], image_bw[ptr]); 190 | ptr++; 191 | } 192 | } 193 | 194 | write_bmp_footer(&bmp); 195 | 196 | fclose(bmp.out); 197 | 198 | return 0; 199 | } 200 | 201 | int bmp_write_rgb24(const char *filename, uint8_t *image_rgb24, int width, int height) 202 | { 203 | struct _bmp bmp; 204 | int x, y; 205 | 206 | bmp.out = fopen(filename, "wb"); 207 | 208 | if (bmp.out == NULL) 209 | { 210 | printf("Cannot open file %s for writing.\n", filename); 211 | return -1; 212 | } 213 | 214 | write_bmp_header(&bmp, width, height); 215 | 216 | for (y = height - 1; y >= 0; y--) 217 | { 218 | int ptr = y * width * 3; 219 | 220 | for (x = 0; x < width; x++) 221 | { 222 | write_bmp_pixel(&bmp, image_rgb24[ptr + 0], 223 | image_rgb24[ptr + 1], 224 | image_rgb24[ptr + 2]); 225 | ptr += 3; 226 | } 227 | } 228 | 229 | write_bmp_footer(&bmp); 230 | 231 | fclose(bmp.out); 232 | 233 | return 0; 234 | } 235 | 236 | int bmp_write_rgb32(const char *filename, uint32_t *image_rgb32, int width, int height) 237 | { 238 | struct _bmp bmp; 239 | int x, y; 240 | 241 | bmp.out = fopen(filename, "wb"); 242 | 243 | if (bmp.out == NULL) 244 | { 245 | printf("Cannot open file %s for writing.\n", filename); 246 | return -1; 247 | } 248 | 249 | write_bmp_header(&bmp, width, height); 250 | 251 | for (y = height - 1; y >= 0; y--) 252 | { 253 | int ptr = y * width; 254 | 255 | for (x = 0; x < width; x++) 256 | { 257 | const uint32_t pixel = image_rgb32[ptr++]; 258 | 259 | write_bmp_pixel(&bmp, (pixel >> 16) & 0xff, 260 | (pixel >> 8) & 0xff, 261 | pixel & 0xff); 262 | #if 0 263 | write_bmp_pixel(&bmp, pixel & 0xff, 264 | (pixel >> 8) & 0xff, 265 | (pixel >> 16) & 0xff); 266 | #endif 267 | } 268 | } 269 | 270 | write_bmp_footer(&bmp); 271 | 272 | fclose(bmp.out); 273 | 274 | return 0; 275 | } 276 | 277 | -------------------------------------------------------------------------------- /src/image_proc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "bmp_read.h" 18 | #include "bmp_write.h" 19 | #include "brightness.h" 20 | #include "color_to_bw.h" 21 | #include "pic_info.h" 22 | #include "timer.h" 23 | #include "yuv422.h" 24 | 25 | //int test_sse(void *dest, void *src); 26 | 27 | enum 28 | { 29 | INSTRUCTIONS_UNKNOWN, 30 | INSTRUCTIONS_NORMAL, 31 | INSTRUCTIONS_SSE, 32 | INSTRUCTIONS_AVX, 33 | INSTRUCTIONS_AVX512, 34 | }; 35 | 36 | enum 37 | { 38 | FUNCTION_UNKNOWN, 39 | FUNCTION_BRIGHTNESS, 40 | FUNCTION_YUV, 41 | }; 42 | 43 | enum 44 | { 45 | TYPE_UNKNOWN, 46 | TYPE_FLOAT, 47 | TYPE_INT, 48 | }; 49 | 50 | static int get_function(const char *function) 51 | { 52 | if (strcmp(function, "brightness") == 0) 53 | { 54 | return FUNCTION_BRIGHTNESS; 55 | } 56 | else if (strcmp(function, "yuv") == 0) 57 | { 58 | return FUNCTION_YUV; 59 | } 60 | 61 | return FUNCTION_UNKNOWN; 62 | } 63 | 64 | static int get_instructions(const char *instructions) 65 | { 66 | if (strcmp(instructions, "normal") == 0) 67 | { 68 | return INSTRUCTIONS_NORMAL; 69 | } 70 | else if (strcmp(instructions, "sse") == 0) 71 | { 72 | return INSTRUCTIONS_SSE; 73 | } 74 | else if (strcmp(instructions, "avx") == 0) 75 | { 76 | return INSTRUCTIONS_AVX; 77 | } 78 | 79 | return INSTRUCTIONS_UNKNOWN; 80 | } 81 | 82 | static int get_type(const char *type) 83 | { 84 | if (strcmp(type, "int") == 0) 85 | { 86 | return TYPE_INT; 87 | } 88 | else if (strcmp(type, "float") == 0) 89 | { 90 | return TYPE_FLOAT; 91 | } 92 | 93 | return TYPE_UNKNOWN; 94 | } 95 | 96 | static void process_brightness( 97 | struct _pic_info *pic_info, 98 | int instructions, 99 | int value) 100 | { 101 | uint8_t *image_bw; 102 | const int length = pic_info->width * pic_info->height; 103 | 104 | image_bw = (uint8_t *)malloc(length); 105 | color_to_bw(image_bw, pic_info); 106 | 107 | TIMER_START 108 | 109 | if (instructions == INSTRUCTIONS_NORMAL) 110 | { 111 | brightness(image_bw, length, value); 112 | } 113 | else if (instructions == INSTRUCTIONS_SSE) 114 | { 115 | brightness_sse(image_bw, length, value); 116 | } 117 | else if (instructions == INSTRUCTIONS_AVX) 118 | { 119 | brightness_avx2(image_bw, length, value); 120 | } 121 | else 122 | { 123 | printf("Error: Unsupported instruction set.\n"); 124 | } 125 | 126 | TIMER_STOP 127 | 128 | bmp_write_bw("out.bmp", image_bw, pic_info->width, pic_info->height); 129 | 130 | free(image_bw); 131 | } 132 | 133 | static void process_yuv(uint8_t *image_yuv422, int instructions, int type, int width, int height) 134 | { 135 | uint32_t *image_rgb32; 136 | 137 | image_rgb32 = (uint32_t *)malloc(width * height * sizeof(uint32_t)); 138 | 139 | TIMER_START 140 | 141 | if (instructions == INSTRUCTIONS_NORMAL) 142 | { 143 | if (type == TYPE_INT) 144 | { 145 | yuv422_to_rgb32_int(image_rgb32, image_yuv422, width, height); 146 | } 147 | else 148 | { 149 | yuv422_to_rgb32_float(image_rgb32, image_yuv422, width, height); 150 | } 151 | } 152 | else if (instructions == INSTRUCTIONS_SSE) 153 | { 154 | if (type == TYPE_INT) 155 | { 156 | yuv422_to_rgb32_int_sse(image_rgb32, image_yuv422, width, height); 157 | } 158 | else 159 | { 160 | yuv422_to_rgb32_float_sse(image_rgb32, image_yuv422, width, height); 161 | } 162 | } 163 | else 164 | { 165 | printf("Error: Unsupported instruction set.\n"); 166 | } 167 | 168 | TIMER_STOP 169 | 170 | bmp_write_rgb32("out.bmp", image_rgb32, width, height); 171 | 172 | free(image_rgb32); 173 | } 174 | 175 | #if 0 176 | int test() 177 | { 178 | int s[4]; 179 | int d[4]; 180 | 181 | s[0] = 1; 182 | s[1] = 2; 183 | s[2] = 3; 184 | s[3] = 4; 185 | 186 | d[0] = 5; 187 | d[1] = 6; 188 | d[2] = 7; 189 | d[3] = 8; 190 | 191 | test_sse(d, s); 192 | 193 | #if 0 194 | float k = -0.39466; 195 | 196 | printf("%08x\n", *((uint32_t *)((void *)&k))); 197 | #endif 198 | 199 | printf("test: %08x %08x %08x %08x\n", 200 | d[0], 201 | d[1], 202 | d[2], 203 | d[3]); 204 | 205 | float *f = (float *)d; 206 | 207 | printf("test: %f %f %f %f\n", 208 | f[0], 209 | f[1], 210 | f[2], 211 | f[3]); 212 | 213 | return 0; 214 | } 215 | #endif 216 | 217 | int main(int argc, char *argv[]) 218 | { 219 | struct _pic_info pic_info; 220 | const char *filename; 221 | int function; 222 | int value; 223 | int instructions; 224 | int type; 225 | 226 | memset(&pic_info, 0, sizeof(pic_info)); 227 | 228 | //test(); 229 | 230 | if (argc != 6) 231 | { 232 | printf("Usage: %s \n", argv[0]); 233 | printf( 234 | " brightness: Convert .bmp image to bw and change brightness.\n" 235 | " yuv: Convert yuv422 to rgb (value is image width).\n" 236 | " normal: Use straight C.\n" 237 | " sse: Use SSE/SSE2/SSE3/SSE4 instructions.\n" 238 | " avx: Use AVX/AVX2 instructions.\n" 239 | " int: Use all integer instructions.\n" 240 | " float: Use float instructions for main parts of code.\n"); 241 | exit(0); 242 | } 243 | 244 | filename = argv[1]; 245 | function = get_function(argv[2]); 246 | value = atoi(argv[3]); 247 | instructions = get_instructions(argv[4]); 248 | type = get_type(argv[5]); 249 | 250 | if (function == FUNCTION_UNKNOWN) 251 | { 252 | printf("Error: function %s is unknown\n", argv[2]); 253 | exit(1); 254 | } 255 | 256 | if (instructions == INSTRUCTIONS_UNKNOWN) 257 | { 258 | printf("Error: instruction set %s is unknown\n", argv[4]); 259 | exit(1); 260 | } 261 | 262 | if (type == TYPE_UNKNOWN) 263 | { 264 | printf("Error: Type %s is unknown\n", argv[5]); 265 | exit(1); 266 | } 267 | 268 | if (function == FUNCTION_BRIGHTNESS) 269 | { 270 | if (type == TYPE_FLOAT) 271 | { 272 | printf("Float isn't valid for brightness.\n"); 273 | exit(1); 274 | } 275 | 276 | bmp_read(filename, &pic_info); 277 | 278 | process_brightness(&pic_info, instructions, value); 279 | 280 | pic_info_free(&pic_info); 281 | } 282 | else if (function == FUNCTION_YUV) 283 | { 284 | uint8_t *image_yuv422; 285 | int length; 286 | int width, height; 287 | 288 | length = yuv422_read(filename, &image_yuv422); 289 | 290 | if (length != 0) 291 | { 292 | width = value; 293 | height = (length / 2) / width; 294 | 295 | process_yuv(image_yuv422, instructions, type, width, height); 296 | free(image_yuv422); 297 | } 298 | } 299 | 300 | return 0; 301 | } 302 | 303 | -------------------------------------------------------------------------------- /src/yuv422_sse.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; Image processing routines. 3 | ;; Author: Michael Kohn 4 | ;; Email: mike@mikekohn.net 5 | ;; Web: http://www.mikekohn.net/ 6 | ;; License: BSD 7 | ;; 8 | ;; Copyright 2018 by Michael Kohn 9 | ;; 10 | 11 | BITS 64 12 | 13 | global _yuv422_to_rgb32_float_sse 14 | global yuv422_to_rgb32_float_sse 15 | global _yuv422_to_rgb32_int_sse 16 | global yuv422_to_rgb32_int_sse 17 | global test_sse 18 | 19 | ;default rel 20 | 21 | ;const_shuffle_y: align 16 22 | ; db 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03 23 | 24 | ;const_128: align 16 25 | ; dd 128, 128, 128, 120 26 | 27 | ;const_v1: align 16 28 | ; dd 1.13983, 1.13983, 1.13983, 1.13983 29 | 30 | ;const_uv1: align 16 31 | ; dd -0.39466, -0.39466, -0.39466, -0.39466 32 | 33 | ;const_uv2: align 16 34 | ; dd 0.58060, 0.58060, 0.58060, 0.58060 35 | 36 | ;const_u1: align 16 37 | ; dd 2.03211, 2.03211, 2.03211, 2.03211 38 | 39 | ; v1 = (1.13983 * (float)v); 40 | ; uv1 = -(0.39466 * (float)u) - (0.58060*(float)v); 41 | ; u1 = (2.03211 * (float)u); 42 | 43 | ;void yuv422_to_rgb32_float_sse(uint8_t *image_rgb32, uint8_t *image_yuv422, int width, int height) 44 | _yuv422_to_rgb32_float_sse: 45 | yuv422_to_rgb32_float_sse: 46 | ;; rdi = image_rgb32 47 | ;; rsi = image_yuv422 48 | ;; rdx = width 49 | ;; rcx = height 50 | 51 | ;; ecx = length of Y buffer 52 | imul ecx, edx 53 | ;; rdx points to U buffer (y + length) 54 | mov rdx, rsi 55 | add rdx, rcx 56 | ;; r8 is length / 2 57 | mov r8d, ecx 58 | sar r8d, 1 59 | 60 | ;; Load constants 61 | pxor xmm0, xmm0 ; xmm0 = [ 0, 0, 0, 0 ] 62 | 63 | mov eax, 128 64 | movd xmm1, eax 65 | pshufd xmm1, xmm1, 0 ; xmm1 = [ 128, 128, 128, 128 ] 66 | 67 | mov eax, 255 68 | movd xmm15, eax 69 | pshufd xmm15, xmm15, 0 ; xmm15 = [ 255, 255, 255, 255 ] 70 | 71 | mov eax, 0x3f91e5f3 ; 1.13983 72 | movd xmm8, eax 73 | pshufd xmm8, xmm8, 0 ; xmm8 = [ 1.13983, 1.13983, 1.13983, 1.13983 ] 74 | 75 | mov eax, 0xbeca10e0 ; -0.39466 76 | movd xmm9, eax 77 | pshufd xmm9, xmm9, 0 ; xmm9 = [ -0.39466, -0.39466, -0.39466, -0.39466 ] 78 | 79 | mov eax, 0xbf14a234 ; -0.58060 80 | movd xmm10, eax 81 | pshufd xmm10, xmm10, 0 ; xmm10 = [ -0.58060, -0.58060, -0.58060, -0.58060 ] 82 | 83 | mov eax, 0x40020e17 ; 2.03211 84 | movd xmm11, eax 85 | pshufd xmm11, xmm11, 0 ; xmm11 = [ 2.03211, 2.03211, 2.03211, 2.03211 ] 86 | 87 | yuv422_to_rgb32_float_sse_loop: 88 | 89 | ;; Load YYYY data from memory. 90 | movss xmm2, [rsi] 91 | 92 | ;; Unpack YYYY into [ Y, Y, Y, Y ]. 93 | punpcklbw xmm2, xmm0 94 | punpcklwd xmm2, xmm0 95 | 96 | xor eax, eax 97 | 98 | ;; Load U0U1 into xmm3 [ U0, U0, U1, U1 ]. 99 | mov ax, [rdx] 100 | movd xmm3, eax 101 | punpcklbw xmm3, xmm0 102 | punpcklwd xmm3, xmm0 103 | pshufd xmm3, xmm3, 0x50 104 | 105 | ;; Load V0V1 into xmm4 [ V0, V0, V1, V1 ]. 106 | mov ax, [rdx+r8] 107 | movd xmm4, eax 108 | punpcklbw xmm4, xmm0 109 | punpcklwd xmm4, xmm0 110 | pshufd xmm4, xmm4, 0x50 111 | 112 | ;; [ U, U, U, U ] - [ 128, 128, 128, 128 ] 113 | ;; [ V, V, V, V ] - [ 128, 128, 128, 128 ] 114 | psubd xmm3, xmm1 115 | psubd xmm4, xmm1 116 | 117 | ;; Convert to float. 118 | cvtdq2ps xmm2, xmm2 ; xmm2 = (float)vecy 119 | cvtdq2ps xmm3, xmm3 ; xmm3 = (float)vecu 120 | cvtdq2ps xmm4, xmm4 ; xmm4 = (float)vecv 121 | 122 | ; xmm5 = uv1 = -(0.39466 * (float)u) - (0.58060*(float)v); 123 | movaps xmm5, xmm3 124 | mulps xmm5, xmm9 125 | movaps xmm6, xmm4 126 | mulps xmm6, xmm10 127 | addps xmm5, xmm6 128 | 129 | ; xmm3 = u1 = (2.03211 * (float)u); 130 | mulps xmm3, xmm11 131 | 132 | ; xmm4 = v1 = (1.13983 * (float)v); 133 | mulps xmm4, xmm8 134 | 135 | ; r = yuv_buffer[y_ptr] + v1; (xmm4) 136 | addps xmm4, xmm2 137 | 138 | ; g = yuv_buffer[y_ptr] + uv1; (xmm5) 139 | addps xmm5, xmm2 140 | 141 | ; b = yuv_buffer[y_ptr] + u1; (xmm3) 142 | addps xmm3, xmm2 143 | 144 | cvttps2dq xmm4, xmm4 ; float to int r 145 | cvttps2dq xmm5, xmm5 ; float to int b 146 | cvttps2dq xmm3, xmm3 ; float to int g 147 | 148 | pmaxsd xmm4, xmm0 ; if (r < 0) { r = 0; } 149 | pmaxsd xmm5, xmm0 ; if (g < 0) { g = 0; } 150 | pmaxsd xmm3, xmm0 ; if (b < 0) { b = 0; } 151 | 152 | pminsd xmm4, xmm15 ; if (r > 255) { r = 255; } 153 | pminsd xmm5, xmm15 ; if (g > 255) { g = 255; } 154 | pminsd xmm3, xmm15 ; if (b > 255) { b = 255; } 155 | 156 | pslld xmm5, 8 157 | pslld xmm4, 16 158 | 159 | por xmm4, xmm5 160 | por xmm4, xmm3 161 | 162 | movups [rdi], xmm4 163 | 164 | add rdi, 16 165 | add rsi, 4 166 | add rdx, 2 167 | sub ecx, 4 168 | jnz yuv422_to_rgb32_float_sse_loop 169 | ret 170 | 171 | ;void yuv422_to_rgb32_int_sse(uint8_t *image_rgb32, uint8_t *image_yuv422, int width, int height) 172 | _yuv422_to_rgb32_int_sse: 173 | yuv422_to_rgb32_int_sse: 174 | ;; rdi = image_rgb32 175 | ;; rsi = image_yuv422 176 | ;; rdx = width 177 | ;; rcx = height 178 | 179 | ;; ecx = length of Y buffer 180 | imul ecx, edx 181 | ;; rdx points to U buffer (y + length) 182 | mov rdx, rsi 183 | add rdx, rcx 184 | ;; r8 is length / 2 185 | mov r8d, ecx 186 | sar r8d, 1 187 | 188 | ;; Load constants 189 | pxor xmm0, xmm0 ; xmm0 = [ 0, 0, 0, 0 ] 190 | 191 | mov eax, 128 192 | movd xmm1, eax 193 | pshufd xmm1, xmm1, 0 ; xmm1 = [ 128, 128, 128, 128 ] 194 | 195 | mov eax, 255 196 | movd xmm15, eax 197 | pshufd xmm15, xmm15, 0 ; xmm15 = [ 255, 255, 255, 255 ] 198 | 199 | mov eax, 5727 200 | movd xmm8, eax 201 | pshufd xmm8, xmm8, 0 ; xmm8 = [ 5727, 5727, 5727, 5727 ] 202 | 203 | mov eax, 1617 204 | movd xmm9, eax 205 | pshufd xmm9, xmm9, 0 ; xmm9 = [ 1617, 1617, 1617, 1617 ] 206 | 207 | mov eax, 2378 208 | movd xmm10, eax 209 | pshufd xmm10, xmm10, 0 ; xmm10 = [ 2378, 2378, 2378, 2378 ] 210 | 211 | mov eax, 8324 212 | movd xmm11, eax 213 | pshufd xmm11, xmm11, 0 ; xmm11 = [ 8324, 8324, 8324, 8324 ] 214 | 215 | yuv422_to_rgb32_int_sse_loop: 216 | 217 | ;; Load YYYY data from memory. 218 | movss xmm2, [rsi] 219 | 220 | ;; Unpack YYYY into [ Y, Y, Y, Y ]. 221 | punpcklbw xmm2, xmm0 222 | punpcklwd xmm2, xmm0 223 | 224 | xor eax, eax 225 | 226 | ;; Load U0U1 into xmm3 [ U0, U0, U1, U1 ]. 227 | mov ax, [rdx] 228 | movd xmm3, eax 229 | punpcklbw xmm3, xmm0 230 | punpcklwd xmm3, xmm0 231 | pshufd xmm3, xmm3, 0x50 232 | 233 | ;; Load V0V1 into xmm4 [ V0, V0, V1, V1 ]. 234 | mov ax, [rdx+r8] 235 | movd xmm4, eax 236 | punpcklbw xmm4, xmm0 237 | punpcklwd xmm4, xmm0 238 | pshufd xmm4, xmm4, 0x50 239 | 240 | ;; [ U, U, U, U ] - [ 128, 128, 128, 128 ] 241 | ;; [ V, V, V, V ] - [ 128, 128, 128, 128 ] 242 | psubd xmm3, xmm1 243 | psubd xmm4, xmm1 244 | 245 | ; xmm5 = uv1 = -(1617 * u) - (2378 * v); 246 | movaps xmm5, xmm3 247 | pmuldq xmm5, xmm9 248 | movaps xmm6, xmm4 249 | pmuldq xmm6, xmm10 250 | paddd xmm5, xmm6 251 | 252 | ; xmm3 = u1 = (8324 * u); 253 | pmuldq xmm3, xmm11 254 | 255 | ; xmm4 = v1 = (5727 * v); 256 | pmuldq xmm4, xmm8 257 | 258 | pslld xmm2, 12 ; y = y << 12 259 | 260 | ; r = yuv_buffer[y_ptr] + v1; (xmm4) 261 | paddd xmm4, xmm2 262 | 263 | ; g = yuv_buffer[y_ptr] + uv1; (xmm5) 264 | paddd xmm5, xmm2 265 | 266 | ; b = yuv_buffer[y_ptr] + u1; (xmm3) 267 | paddd xmm3, xmm2 268 | 269 | psrad xmm4, 12 ; r = r >> 12 270 | psrad xmm5, 12 ; g = g >> 12 271 | psrad xmm3, 12 ; b = b >> 12 272 | 273 | pmaxsd xmm4, xmm0 ; if (r < 0) { r = 0; } 274 | pmaxsd xmm5, xmm0 ; if (g < 0) { g = 0; } 275 | pmaxsd xmm3, xmm0 ; if (b < 0) { b = 0; } 276 | 277 | pminsd xmm4, xmm15 ; if (r > 255) { r = 255; } 278 | pminsd xmm5, xmm15 ; if (g > 255) { g = 255; } 279 | pminsd xmm3, xmm15 ; if (b > 255) { b = 255; } 280 | 281 | pslld xmm5, 8 282 | pslld xmm4, 16 283 | 284 | por xmm4, xmm5 285 | por xmm4, xmm3 286 | 287 | movups [rdi], xmm4 288 | 289 | add rdi, 16 290 | add rsi, 4 291 | add rdx, 2 292 | sub ecx, 4 293 | jnz yuv422_to_rgb32_int_sse_loop 294 | ret 295 | 296 | test_sse: 297 | pxor xmm0, xmm0 298 | 299 | ret 300 | 301 | -------------------------------------------------------------------------------- /src/bmp_read.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Image processing routines. 3 | * Author: Michael Kohn 4 | * Email: mike@mikekohn.net 5 | * Web: http://www.mikekohn.net/ 6 | * License: BSD 7 | * 8 | * Copyright 2007-2018 by Michael Kohn 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | #include "bmp_read.h" 16 | 17 | static int read_int32(FILE *in) 18 | { 19 | int c; 20 | 21 | c = getc(in); 22 | c |= getc(in) << 8; 23 | c |= getc(in) << 16; 24 | c |= getc(in) << 24; 25 | 26 | return c; 27 | } 28 | 29 | static int read_int16(FILE *in) 30 | { 31 | int c; 32 | 33 | c = getc(in); 34 | c |= getc(in) << 8; 35 | 36 | return c; 37 | } 38 | 39 | static void bmp_read_file_header(FILE *in, struct _bitmap_file *bitmap_file) 40 | { 41 | bitmap_file->type[0] = getc(in); 42 | bitmap_file->type[1] = getc(in); 43 | bitmap_file->size = read_int32(in); 44 | bitmap_file->reserved1 = read_int16(in); 45 | bitmap_file->reserved2 = read_int16(in); 46 | bitmap_file->offset = read_int32(in); 47 | } 48 | 49 | static void bmp_read_info_header(FILE *in, struct _bitmap_info *bitmap_info) 50 | { 51 | bitmap_info->size = read_int32(in); 52 | bitmap_info->width = read_int32(in); 53 | bitmap_info->height = read_int32(in); 54 | bitmap_info->planes = read_int16(in); 55 | bitmap_info->bit_count = read_int16(in); 56 | bitmap_info->compression = read_int32(in); 57 | bitmap_info->image_size = read_int32(in); 58 | bitmap_info->x_pels_per_metre = read_int32(in); 59 | bitmap_info->y_pels_per_metre = read_int32(in); 60 | bitmap_info->colors_used = read_int32(in); 61 | bitmap_info->colors_important = read_int32(in); 62 | } 63 | 64 | static void bmp_raw_uncompressed(FILE *in, uint32_t *picture, struct _bitmap_info *bitmap_info) 65 | { 66 | int x, y; 67 | int c = 0,t; 68 | int byte_count; 69 | 70 | for (y = bitmap_info->height - 1; y >= 0; y--) 71 | { 72 | byte_count = 0; 73 | 74 | for (x = 0; x < bitmap_info->width; x++) 75 | { 76 | if (bitmap_info->bit_count == 8 || 77 | bitmap_info->bit_count == 24 || 78 | bitmap_info->bit_count == 32) 79 | { 80 | if (bitmap_info->bit_count==8) 81 | { 82 | c = getc(in); 83 | c = bitmap_info->colors[c]; 84 | byte_count++; 85 | } 86 | else 87 | if (bitmap_info->bit_count == 24) 88 | { 89 | c = getc(in) + (getc(in) << 8) + (getc(in) << 16); 90 | byte_count = byte_count + 3; 91 | } 92 | else if (bitmap_info->bit_count == 32) 93 | { 94 | c = getc(in) + (getc(in) << 8) + (getc(in) << 16); 95 | t = getc(in); 96 | 97 | byte_count = byte_count + 4; 98 | } 99 | 100 | picture[x + (y * bitmap_info->width)] = c; 101 | } 102 | else if (bitmap_info->bit_count == 4) 103 | { 104 | c = getc(in); 105 | byte_count++; 106 | 107 | picture[x + (y * bitmap_info->width)] = bitmap_info->colors[(( c >> 4) & 15)]; 108 | x++; 109 | 110 | if (x < bitmap_info->width) 111 | { 112 | picture[x + (y * bitmap_info->width)] = bitmap_info->colors[c & 15]; 113 | } 114 | } 115 | else if (bitmap_info->bit_count == 1) 116 | { 117 | c = getc(in); 118 | byte_count++; 119 | 120 | for (t = 7; t >= 0; t--) 121 | { 122 | if (x < bitmap_info->width) 123 | { 124 | const int index = x + (y * bitmap_info->width); 125 | if (((c >> t) & 1) == 0) 126 | { 127 | picture[index] = bitmap_info->colors[0]; 128 | } 129 | else 130 | { 131 | picture[index] = bitmap_info->colors[1]; 132 | } 133 | } 134 | 135 | x++; 136 | } 137 | 138 | x--; 139 | } 140 | } 141 | 142 | c = (byte_count % 4); 143 | 144 | if (c != 0) 145 | { 146 | for (t = c; t < 4; t++) 147 | { 148 | getc(in); 149 | } 150 | } 151 | } 152 | } 153 | 154 | static void bmp_raw_compressed(FILE *in, uint32_t *picture, struct _bitmap_info *bitmap_info) 155 | { 156 | int x, y; 157 | int c, t, r; 158 | 159 | y = bitmap_info->height - 1; 160 | x = 0; 161 | 162 | while (1) 163 | { 164 | c = getc(in); 165 | 166 | if (c == EOF) { return; } 167 | 168 | if (c != 0) 169 | { 170 | r = getc(in); 171 | 172 | for (t = 0; t < c; t++) 173 | { 174 | const int index = x + (y * bitmap_info->width); 175 | 176 | if (bitmap_info->bit_count == 4) 177 | { 178 | if ((t & 1) == 0) 179 | { 180 | picture[index] = bitmap_info->colors[r >> 4]; 181 | } 182 | else 183 | { 184 | picture[index] = bitmap_info->colors[r & 15]; 185 | } 186 | } 187 | else 188 | if (bitmap_info->bit_count == 8) 189 | { 190 | picture[index] = bitmap_info->colors[r]; 191 | } 192 | 193 | x++; 194 | } 195 | } 196 | else 197 | { 198 | r = getc(in); 199 | 200 | if (r == 0) 201 | { 202 | x = 0; 203 | y--; 204 | continue; 205 | } 206 | else if (r == 1) 207 | { 208 | break; 209 | } 210 | else if (r == 2) 211 | { 212 | x = x + getc(in); 213 | y = y - getc(in); 214 | 215 | return; 216 | } 217 | 218 | for (t = 0; t < r; t++) 219 | { 220 | c = getc(in); 221 | 222 | const int index = x + (y * bitmap_info->width); 223 | 224 | if (bitmap_info->bit_count == 8) 225 | { 226 | picture[index] = bitmap_info->colors[c]; 227 | } 228 | else if (bitmap_info->bit_count == 4) 229 | { 230 | picture[index] = bitmap_info->colors[c >> 4]; 231 | 232 | t++; 233 | 234 | if (tcolors[c & 15]; 238 | } 239 | } 240 | 241 | x++; 242 | } 243 | 244 | if (bitmap_info->bit_count == 8) 245 | { 246 | c = r % 2; 247 | } 248 | else if (bitmap_info->bit_count == 4) 249 | { 250 | t = (r / 2) + (r % 2); 251 | c = t % 2; 252 | } 253 | 254 | if (c != 0) 255 | { 256 | getc(in); 257 | } 258 | } 259 | } 260 | } 261 | 262 | int bmp_read(const char *filename, struct _pic_info *pic_info) 263 | { 264 | FILE *in; 265 | struct _bitmap_file bitmap_file; 266 | struct _bitmap_info bitmap_info; 267 | int t; 268 | 269 | in = fopen(filename, "rb"); 270 | 271 | if (in == NULL) 272 | { 273 | printf("Cannot open file %s for reading.\n", filename); 274 | return -1; 275 | } 276 | 277 | bmp_read_file_header(in, &bitmap_file); 278 | 279 | if (bitmap_file.type[0] != 'B' || bitmap_file.type[1] != 'M') 280 | { 281 | fclose(in); 282 | printf("Not a bitmap.\n"); 283 | return -1; 284 | } 285 | 286 | bmp_read_info_header(in, &bitmap_info); 287 | 288 | #ifdef DEBUG 289 | printf("Bitmap File Header\n"); 290 | printf("----------------------------------------------\n"); 291 | printf(" bfType: %c%c\n", bitmap_file.type[0], 292 | bitmap_file.type[1]); 293 | printf(" bfSize: %d\n", bitmap_file.size); 294 | printf(" reserved1: %d\n", bitmap_file.reserved1); 295 | printf(" reserved2: %d\n", bitmap_file.reserved2); 296 | printf(" bfOffs: %d\n", bitmap_file.offset); 297 | printf("----------------------------------------------\n"); 298 | printf("Bitmap Info Header\n"); 299 | printf("----------------------------------------------\n"); 300 | printf(" biSize: %d\n", bitmap_info.size); 301 | printf(" biWidth: %d\n", bitmap_info.width); 302 | printf(" biHeight: %d\n", bitmap_info.height); 303 | printf(" biPlanes: %d\n", bitmap_info.planes); 304 | printf(" biBitCount: %d\n", bitmap_info.bit_count); 305 | printf(" biCompression: %d\n", bitmap_info.compression); 306 | printf(" biSizeImage: %d\n", bitmap_info.image_size); 307 | printf("biXPelsPerMetre: %d\n", bitmap_info.x_pels_per_metre); 308 | printf("biYPelsPerMetre: %d\n", bitmap_info.y_pels_per_metre); 309 | printf(" biClrUsed: %d\n", bitmap_info.colors_used); 310 | printf(" biClrImportant: %d\n", bitmap_info.colors_important); 311 | printf("----------------------------------------------\n"); 312 | #endif 313 | 314 | bitmap_info.colors[0] = 0; 315 | bitmap_info.colors[1] = 0xffffff; 316 | bitmap_info.colors[255] = 0xffffff; 317 | 318 | if (bitmap_info.colors_important == 0 && bitmap_info.bit_count == 8) 319 | { 320 | bitmap_info.colors_important = 256; 321 | } 322 | 323 | for (t = 0; t < bitmap_info.colors_important; t++) 324 | { 325 | bitmap_info.colors[t] = read_int32(in); 326 | } 327 | 328 | pic_info->width = bitmap_info.width; 329 | pic_info->height = bitmap_info.height; 330 | 331 | pic_info->picture = malloc(bitmap_info.width * bitmap_info.height * sizeof(int)); 332 | // posix_memalign((void **)&pic_info->picture, 16, bitmap_info.width*bitmap_info.height * sizeof(int)); 333 | 334 | fseek(in, bitmap_file.offset, 0); 335 | 336 | if (bitmap_info.compression == 0) 337 | { 338 | bmp_raw_uncompressed(in, pic_info->picture, &bitmap_info); 339 | } 340 | else if (bitmap_info.compression == 1) 341 | { 342 | bmp_raw_compressed(in, pic_info->picture, &bitmap_info); 343 | } 344 | else if (bitmap_info.compression == 2) 345 | { 346 | bmp_raw_compressed(in, pic_info->picture, &bitmap_info); 347 | } 348 | else if (bitmap_info.compression == 3) 349 | { 350 | bmp_raw_uncompressed(in, pic_info->picture, &bitmap_info); 351 | } 352 | else 353 | { 354 | printf("This type of compression is not supported at this time.\n"); 355 | } 356 | 357 | fclose(in); 358 | 359 | return 0; 360 | } 361 | 362 | --------------------------------------------------------------------------------