├── dummy.cpp ├── test ├── extra │ ├── musl-libm │ │ ├── VERSION │ │ ├── __math_invalidf.c │ │ ├── __math_oflowf.c │ │ ├── __math_uflowf.c │ │ ├── __math_divzerof.c │ │ ├── __math_xflowf.c │ │ ├── mymath.h │ │ ├── logf_data.h │ │ ├── exp2f_data.h │ │ ├── powf_data.h │ │ ├── logf_data.c │ │ ├── exp2f_data.c │ │ ├── powf_data.c │ │ ├── fpu_wrapper.c │ │ ├── logf.c │ │ ├── expf.c │ │ ├── log10f.c │ │ ├── cos.c │ │ ├── sin.c │ │ ├── __sin.c │ │ └── __cos.c │ └── sha1 │ │ ├── config.h │ │ └── sha1.h ├── main.cpp ├── graph │ ├── filter_validator.h │ ├── audit_buffer.h │ └── mock_filter.h ├── depth │ ├── arm │ │ └── f16c_neon_test.cpp │ └── x86 │ │ ├── f16c_ivb_test.cpp │ │ ├── f16c_sse2_test.cpp │ │ └── dither_sse2_test.cpp ├── colorspace │ └── x86 │ │ ├── colorspace_avx_test.cpp │ │ ├── colorspace_sse_test.cpp │ │ ├── gamma_constants_avx512_test.cpp │ │ ├── colorspace_avx2_test.cpp │ │ └── colorspace_sse2_test.cpp ├── api │ └── api_test.cpp └── resize │ ├── filter_test.cpp │ └── x86 │ ├── resize_impl_sse_test.cpp │ └── resize_impl_avx_test.cpp ├── doc └── example │ ├── misc │ ├── mmap.h │ ├── mmap.cpp │ ├── argparse.h │ ├── argparse.cpp │ ├── aligned_malloc.h │ ├── win32_bitmap.cpp │ └── win32_bitmap.h │ └── Makefile ├── autogen.sh ├── .gitmodules ├── .coverity-prepare.sh ├── src ├── zimg │ ├── common │ │ ├── builder.h │ │ ├── libm_wrapper.cpp │ │ ├── arm │ │ │ ├── cpuinfo_arm.h │ │ │ ├── cpuinfo_arm.cpp │ │ │ └── neon_util.cpp │ │ ├── x86 │ │ │ ├── x86util.h │ │ │ ├── sse_util.h │ │ │ ├── avx_util.h │ │ │ └── cpuinfo_x86.h │ │ ├── zassert.h │ │ ├── ccdep.h │ │ ├── cpuinfo.cpp │ │ ├── cpuinfo.h │ │ ├── libm_wrapper.h │ │ ├── make_unique.h │ │ ├── align.h │ │ └── except.h │ ├── depth │ │ ├── blue.h │ │ ├── arm │ │ │ ├── f16c_arm.h │ │ │ ├── dither_arm.h │ │ │ ├── depth_convert_arm.h │ │ │ ├── f16c_neon.cpp │ │ │ └── dither_arm.cpp │ │ ├── x86 │ │ │ ├── f16c_x86.h │ │ │ ├── depth_convert_x86.h │ │ │ ├── f16c_ivb.cpp │ │ │ └── dither_x86.h │ │ ├── depth.h │ │ ├── dither.h │ │ ├── depth_convert.h │ │ ├── quantize.h │ │ └── depth.cpp │ ├── colorspace │ │ ├── graph.h │ │ ├── arm │ │ │ ├── operation_impl_arm.h │ │ │ └── operation_impl_arm.cpp │ │ ├── x86 │ │ │ ├── operation_impl_x86.h │ │ │ ├── gamma_constants_avx512.h │ │ │ └── operation_impl_x86.cpp │ │ ├── matrix3.h │ │ ├── operation_impl.h │ │ ├── matrix3.cpp │ │ ├── colorspace.h │ │ ├── gamma.h │ │ └── colorspace.cpp │ ├── resize │ │ ├── resize.h │ │ ├── arm │ │ │ ├── resize_impl_arm.h │ │ │ └── resize_impl_arm.cpp │ │ ├── x86 │ │ │ ├── resize_impl_avx512_vnni.cpp │ │ │ └── resize_impl_x86.h │ │ ├── resize_impl.h │ │ └── resize.cpp │ ├── unresize │ │ ├── unresize_impl.h │ │ ├── bilinear.h │ │ ├── unresize.cpp │ │ └── unresize.h │ └── graph │ │ └── basic_filter.h ├── testcommon │ ├── aligned_malloc.h │ ├── mmap.h │ ├── timer.h │ ├── win32_bitmap.h │ └── argparse.h └── testapp │ ├── utils.h │ ├── apps.h │ ├── table.h │ ├── pair_filter.h │ ├── frame.h │ ├── utils.cpp │ └── main.cpp ├── zimg.pc.in ├── _msvc ├── zimg.def ├── _example_api │ └── _example_api.vcxproj.filters ├── _example_hdr │ └── _example_hdr.vcxproj.filters ├── _example_tile │ └── _example_tile.vcxproj.filters ├── _example_api_c │ └── _example_api_c.vcxproj.filters ├── _example_interlace │ └── _example_interlace.vcxproj.filters ├── dll │ └── dll.vcxproj.filters ├── testcommon │ └── testcommon.vcxproj.filters └── testapp │ └── testapp.vcxproj.filters ├── COPYING ├── .travis-script.sh ├── .gitignore ├── .travis.yml ├── m4 └── ax_check_compile_flag.m4 └── README.md /dummy.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/extra/musl-libm/VERSION: -------------------------------------------------------------------------------- 1 | 1.2.0 2 | -------------------------------------------------------------------------------- /doc/example/misc/mmap.h: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/mmap.h -------------------------------------------------------------------------------- /doc/example/misc/mmap.cpp: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/mmap.cpp -------------------------------------------------------------------------------- /doc/example/misc/argparse.h: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/argparse.h -------------------------------------------------------------------------------- /doc/example/misc/argparse.cpp: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/argparse.cpp -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | autoreconf --verbose --install --force 4 | -------------------------------------------------------------------------------- /doc/example/misc/aligned_malloc.h: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/aligned_malloc.h -------------------------------------------------------------------------------- /doc/example/misc/win32_bitmap.cpp: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/win32_bitmap.cpp -------------------------------------------------------------------------------- /doc/example/misc/win32_bitmap.h: -------------------------------------------------------------------------------- 1 | ../../../src/testcommon/win32_bitmap.h -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "test/extra/googletest"] 2 | path = test/extra/googletest 3 | url = https://github.com/google/googletest.git 4 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__math_invalidf.c: -------------------------------------------------------------------------------- 1 | #include "libm.h" 2 | 3 | float my__math_invalidf(float x) 4 | { 5 | return (x - x) / (x - x); 6 | } 7 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__math_oflowf.c: -------------------------------------------------------------------------------- 1 | #include "libm.h" 2 | 3 | float my__math_oflowf(uint32_t sign) 4 | { 5 | return my__math_xflowf(sign, 0x1p97f); 6 | } 7 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__math_uflowf.c: -------------------------------------------------------------------------------- 1 | #include "libm.h" 2 | 3 | float my__math_uflowf(uint32_t sign) 4 | { 5 | return my__math_xflowf(sign, 0x1p-95f); 6 | } 7 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__math_divzerof.c: -------------------------------------------------------------------------------- 1 | #include "libm.h" 2 | 3 | float my__math_divzerof(uint32_t sign) 4 | { 5 | return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f; 6 | } 7 | -------------------------------------------------------------------------------- /.coverity-prepare.sh: -------------------------------------------------------------------------------- 1 | cov-configure --comptype gcc --compiler ${CC} --template 2 | ./autogen.sh 3 | ./configure --enable-simd --enable-testapp --enable-example 4 | make clean 5 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__math_xflowf.c: -------------------------------------------------------------------------------- 1 | #include "libm.h" 2 | 3 | float my__math_xflowf(uint32_t sign, float y) 4 | { 5 | return eval_as_float(fp_barrierf(sign ? -y : y) * y); 6 | } 7 | -------------------------------------------------------------------------------- /test/extra/sha1/config.h: -------------------------------------------------------------------------------- 1 | #ifndef LITTLE_ENDIAN 2 | #define LITTLE_ENDIAN 4321 3 | #endif 4 | 5 | #ifndef BIG_ENDIAN 6 | #define BIG_ENDIAN 1234 7 | #endif 8 | 9 | #ifndef BYTE_ORDER 10 | #define BYTE_ORDER LITTLE_ENDIAN 11 | #endif 12 | -------------------------------------------------------------------------------- /src/zimg/common/builder.h: -------------------------------------------------------------------------------- 1 | #ifndef BUILDER_MEMBER 2 | #define BUILDER_MEMBER(type, name) \ 3 | template \ 4 | auto set_##name(T &&val) -> decltype(*this) { name = std::forward(val); return *this; } \ 5 | type name; 6 | #endif // BUILDER_MEMBER 7 | -------------------------------------------------------------------------------- /src/zimg/common/libm_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "libm_wrapper.h" 3 | 4 | float (*zimg_x_expf)(float) = expf; 5 | float (*zimg_x_logf)(float) = logf; 6 | float (*zimg_x_log10f)(float) = log10f; 7 | 8 | float (*zimg_x_powf)(float, float) = powf; 9 | 10 | double (*zimg_x_sin)(double) = sin; 11 | double (*zimg_x_cos)(double) = cos; 12 | -------------------------------------------------------------------------------- /test/extra/musl-libm/mymath.h: -------------------------------------------------------------------------------- 1 | #ifndef MUSL_MYMATH_H_ 2 | #define MUSL_MYMATH_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | float myexpf(float x); 9 | float mylogf(float x); 10 | float mylog10f(float x); 11 | 12 | float mypowf(float x, float y); 13 | 14 | double mycos(double x); 15 | double mysin(double x); 16 | 17 | #ifdef __cplusplus 18 | } /* extern "C" */ 19 | #endif 20 | 21 | #endif /* MUSL_MYMATH_H_ */ 22 | -------------------------------------------------------------------------------- /zimg.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: zimg 7 | Description: Scaling, colorspace conversion, and dithering library 8 | Version: @VERSION@ 9 | 10 | # If building a static library against a C++ runtime other than libstdc++, 11 | # define STL_LIBS when running configure. 12 | Libs: -L${libdir} -lzimg 13 | Libs.private: @STL_LIBS@ 14 | Cflags: -I${includedir} 15 | -------------------------------------------------------------------------------- /_msvc/zimg.def: -------------------------------------------------------------------------------- 1 | LIBRARY z 2 | EXPORTS 3 | zimg_get_version_info 4 | zimg_get_api_version 5 | zimg_get_last_error 6 | zimg_clear_last_error 7 | zimg_select_buffer_mask 8 | zimg_filter_graph_free 9 | zimg_filter_graph_get_tmp_size 10 | zimg_filter_graph_get_input_buffering 11 | zimg_filter_graph_get_output_buffering 12 | zimg_filter_graph_process 13 | zimg_image_format_default 14 | zimg_graph_builder_params_default 15 | zimg_filter_graph_build 16 | -------------------------------------------------------------------------------- /src/zimg/depth/blue.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_DEPTH_BLUE_H_ 4 | #define ZIMG_DEPTH_BLUE_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | namespace depth { 10 | 11 | constexpr unsigned BLUE_NOISE_LEN = 64; 12 | constexpr unsigned BLUE_NOISE_SCALE = 255; 13 | 14 | extern const uint8_t blue_noise_table[BLUE_NOISE_LEN][BLUE_NOISE_LEN]; 15 | 16 | } // namespace depth 17 | } // namespace zimg 18 | 19 | #endif // ZIMG_DEPTH_BLUE_H_ 20 | -------------------------------------------------------------------------------- /src/zimg/depth/arm/f16c_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_DEPTH_ARM_F16C_ARM_H_ 6 | 7 | namespace zimg { 8 | namespace depth { 9 | 10 | void f16c_half_to_float_neon(const void *src, void *dst, unsigned left, unsigned right); 11 | void f16c_float_to_half_neon(const void *src, void *dst, unsigned left, unsigned right); 12 | 13 | } // namespace depth 14 | } // namespace zimg 15 | 16 | #endif // ZIMG_DEPTH_ARM_F16C_ARM_H_ 17 | 18 | #endif // ZIMG_ARM 19 | -------------------------------------------------------------------------------- /src/zimg/common/arm/cpuinfo_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_X86_CPUINFO_ARM_H_ 6 | #define ZIMG_X86_CPUINFO_ARM_H_ 7 | 8 | namespace zimg { 9 | 10 | /** 11 | * Bitfield of selected ARM feature flags. 12 | */ 13 | struct ARMCapabilities { 14 | unsigned neon : 1; 15 | unsigned vfpv4 : 1; 16 | }; 17 | 18 | ARMCapabilities query_arm_capabilities() noexcept; 19 | 20 | } // namespace zimg 21 | 22 | #endif // ZIMG_X86_CPUINFO_ARM_H_ 23 | #endif // ZIMG_ARM 24 | -------------------------------------------------------------------------------- /src/zimg/common/x86/x86util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_X86_X86UTIL_H_ 6 | #define ZIMG_X86_X86UTIL_H_ 7 | 8 | #include 9 | #include "common/ccdep.h" 10 | 11 | namespace zimg { 12 | 13 | // The n-th mask vector has the lower n bytes set to all-ones. 14 | extern const uint8_t xmm_mask_table alignas(16)[17][16]; 15 | extern const uint8_t ymm_mask_table alignas(32)[33][32]; 16 | 17 | } // namespace zimg 18 | 19 | #endif // ZIMG_X86_X86UTIL_H_ 20 | 21 | #endif // ZIMG_X86 22 | -------------------------------------------------------------------------------- /src/zimg/common/zassert.h: -------------------------------------------------------------------------------- 1 | #undef zassert 2 | #undef zassert_d 3 | 4 | #ifdef NDEBUG 5 | #define Z_NDEBUG 6 | #undef NDEBUG 7 | #endif 8 | 9 | #include 10 | 11 | #define zassert(x, msg) assert((x) && (msg)) 12 | 13 | #ifdef Z_NDEBUG 14 | #include "ccdep.h" 15 | #define zassert_d(x, msg) ASSUME_CONDITION(x) 16 | #define zassert_dfatal(msg) 17 | #undef Z_NDEBUG 18 | #define NDEBUG 19 | #else 20 | #define zassert_d(x, msg) zassert(x, msg) 21 | #define zassert_dfatal(msg) zassert(false, msg) 22 | #endif 23 | -------------------------------------------------------------------------------- /test/extra/musl-libm/logf_data.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017-2018, Arm Limited. 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | #ifndef _LOGF_DATA_H 6 | #define _LOGF_DATA_H 7 | 8 | /* #include */ 9 | 10 | #define LOGF_TABLE_BITS 4 11 | #define LOGF_POLY_ORDER 4 12 | extern /* hidden */ const struct logf_data { 13 | struct { 14 | double invc, logc; 15 | } tab[1 << LOGF_TABLE_BITS]; 16 | double ln2; 17 | double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ 18 | } my__logf_data; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | 15 | -------------------------------------------------------------------------------- /test/extra/sha1/sha1.h: -------------------------------------------------------------------------------- 1 | /* ================ sha1.h ================ */ 2 | /* 3 | SHA-1 in C 4 | By Steve Reid 5 | 100% Public Domain 6 | */ 7 | 8 | typedef struct { 9 | uint32_t state[5]; 10 | uint32_t count[2]; 11 | unsigned char buffer[64]; 12 | } SHA1_CTX; 13 | 14 | void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]); 15 | void SHA1Init(SHA1_CTX* context); 16 | void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len); 17 | void SHA1Final(unsigned char digest[20], SHA1_CTX* context); 18 | -------------------------------------------------------------------------------- /src/testcommon/aligned_malloc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ALIGNED_MALLOC_H_ 4 | #define ALIGNED_MALLOC_H_ 5 | 6 | #ifdef _WIN32 7 | #include 8 | static void *aligned_malloc(size_t size, size_t alignment) { return _aligned_malloc(size, alignment); } 9 | static void aligned_free(void *ptr) { _aligned_free(ptr); } 10 | #else 11 | #include 12 | static void *aligned_malloc(size_t size, size_t alignment) { void *p; if (posix_memalign(&p, alignment, size)) return 0; else return p; } 13 | static void aligned_free(void *ptr) { free(ptr); } 14 | #endif 15 | 16 | #endif /* ALIGNED_MALLOC_H_ */ 17 | -------------------------------------------------------------------------------- /test/extra/musl-libm/exp2f_data.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017-2018, Arm Limited. 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | #ifndef _EXP2F_DATA_H 6 | #define _EXP2F_DATA_H 7 | 8 | /* #include */ 9 | #include 10 | 11 | /* Shared between expf, exp2f and powf. */ 12 | #define EXP2F_TABLE_BITS 5 13 | #define EXP2F_POLY_ORDER 3 14 | extern /* hidden */ const struct exp2f_data { 15 | uint64_t tab[1 << EXP2F_TABLE_BITS]; 16 | double shift_scaled; 17 | double poly[EXP2F_POLY_ORDER]; 18 | double shift; 19 | double invln2_scaled; 20 | double poly_scaled[EXP2F_POLY_ORDER]; 21 | } my__exp2f_data; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/zimg/depth/x86/f16c_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_DEPTH_X86_F16C_X86_H_ 6 | 7 | namespace zimg { 8 | namespace depth { 9 | 10 | void f16c_half_to_float_sse2(const void *src, void *dst, unsigned left, unsigned right); 11 | void f16c_float_to_half_sse2(const void *src, void *dst, unsigned left, unsigned right); 12 | 13 | void f16c_half_to_float_ivb(const void *src, void *dst, unsigned left, unsigned right); 14 | void f16c_float_to_half_ivb(const void *src, void *dst, unsigned left, unsigned right); 15 | 16 | } // namespace depth 17 | } // namespace zimg 18 | 19 | #endif // ZIMG_DEPTH_X86_F16C_X86_H_ 20 | 21 | #endif // ZIMG_X86 22 | -------------------------------------------------------------------------------- /test/extra/musl-libm/powf_data.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017-2018, Arm Limited. 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | #ifndef _POWF_DATA_H 6 | #define _POWF_DATA_H 7 | 8 | #include "libm.h" 9 | #include "exp2f_data.h" 10 | 11 | #define POWF_LOG2_TABLE_BITS 4 12 | #define POWF_LOG2_POLY_ORDER 5 13 | #if TOINT_INTRINSICS 14 | #define POWF_SCALE_BITS EXP2F_TABLE_BITS 15 | #else 16 | #define POWF_SCALE_BITS 0 17 | #endif 18 | #define POWF_SCALE ((double)(1 << POWF_SCALE_BITS)) 19 | extern /* hidden */ const struct powf_log2_data { 20 | struct { 21 | double invc, logc; 22 | } tab[1 << POWF_LOG2_TABLE_BITS]; 23 | double poly[POWF_LOG2_POLY_ORDER]; 24 | } my__powf_log2_data; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/zimg/common/ccdep.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_CCDEP_H_ 4 | #define ZIMG_CCDEP_H_ 5 | 6 | #if defined(_MSC_VER) 7 | #define FORCE_INLINE __forceinline 8 | #elif defined(__GNUC__) 9 | #define FORCE_INLINE __attribute__((always_inline)) 10 | #else 11 | #define FORCE_INLINE 12 | #endif 13 | 14 | #if defined(_MSC_VER) || defined(__GNUC__) 15 | #define RESTRICT __restrict 16 | #else 17 | #define RESTRICT 18 | #endif 19 | 20 | #if defined(_MSC_VER) 21 | #define ASSUME_CONDITION(x) __assume((x)) 22 | #elif defined(__GNUC__) 23 | #define ASSUME_CONDITION(x) do { if (!(x)) __builtin_unreachable(); } while (0) 24 | #else 25 | #define ASSUME_CONDITION(x) ((void)0) 26 | #endif 27 | 28 | #endif /* ZIMG_CCDEP_H_ */ 29 | -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "common/libm_wrapper.h" 4 | 5 | #include "gtest/gtest.h" 6 | #include "musl-libm/mymath.h" 7 | 8 | namespace { 9 | 10 | void wrap_libm() 11 | { 12 | zimg_x_expf = myexpf; 13 | zimg_x_logf = mylogf; 14 | zimg_x_log10f = mylog10f; 15 | zimg_x_powf = mypowf; 16 | zimg_x_sin = mysin; 17 | zimg_x_cos = mycos; 18 | } 19 | 20 | } // namespace 21 | 22 | 23 | int main(int argc, char **argv) 24 | { 25 | int ret; 26 | 27 | wrap_libm(); 28 | 29 | ::testing::InitGoogleTest(&argc, argv); 30 | ret = RUN_ALL_TESTS(); 31 | 32 | if (getenv("INTERACTIVE") != nullptr) { 33 | puts("Press any key to continue..."); 34 | getc(stdin); 35 | } 36 | 37 | return ret; 38 | } 39 | -------------------------------------------------------------------------------- /.travis-script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | NPROC=$(nproc --all) 5 | 6 | echo "PWD: $(pwd)" 7 | echo "CC: ${CC}" 8 | echo "CXX: ${CXX}" 9 | echo "TRAVIS_BRANCH: ${TRAVIS_BRANCH}" 10 | echo "NPROC: ${NPROC}" 11 | ls -l 12 | 13 | # Do not execute build on coverity branch. 14 | if [ "x$COVERITY_SCAN_BRANCH" = "x1" -o "x$TRAVIS_BRANCH" = "xcoverity_scan" ]; then 15 | test -f cov-int/build-log.txt && tail -n 100 cov-int/build-log.txt || true 16 | test -f cov-int/scm_log.txt && tail -n 100 cov-int/scm_log.txt || true 17 | exit 18 | fi 19 | 20 | ./autogen.sh 21 | ./configure --enable-simd --enable-testapp --enable-example --enable-unit-test 22 | make clean 23 | make "-j${NPROC}" 24 | make "-j${NPROC}" test/unit_test 25 | ./test/unit_test 26 | -------------------------------------------------------------------------------- /src/zimg/common/cpuinfo.cpp: -------------------------------------------------------------------------------- 1 | #include "cpuinfo.h" 2 | 3 | #ifdef ZIMG_X86 4 | #include "x86/cpuinfo_x86.h" 5 | #endif 6 | 7 | namespace zimg { 8 | 9 | unsigned long cpu_cache_size() noexcept 10 | { 11 | unsigned long ret = 0; 12 | #ifdef ZIMG_X86 13 | ret = cpu_cache_size_x86(); 14 | #endif 15 | return ret ? ret : 1024 * 1024UL; 16 | } 17 | 18 | bool cpu_has_fast_f16(CPUClass cpu) noexcept 19 | { 20 | bool ret = false; 21 | #ifdef ZIMG_X86 22 | ret = cpu_has_fast_f16_x86(cpu); 23 | #endif 24 | return ret; 25 | } 26 | 27 | bool cpu_requires_64b_alignment(CPUClass cpu) noexcept 28 | { 29 | bool ret = false; 30 | #ifdef ZIMG_X86 31 | ret = cpu_requires_64b_alignment_x86(cpu); 32 | #endif 33 | return ret; 34 | } 35 | 36 | } // namespace zimg 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Output files from testapp. 2 | *.bin 3 | *.bmp 4 | *.yuv 5 | 6 | # MSVC files. 7 | *.db 8 | *.opendb 9 | *.opensdf 10 | *.sdf 11 | *.suo 12 | *.user 13 | _msvc/.vs 14 | Debug 15 | Release 16 | 17 | # autotools files. 18 | *.o 19 | *.la 20 | *.lo 21 | *.pc 22 | *.so 23 | *.dll 24 | *.log 25 | *.trs 26 | .libs 27 | .deps/ 28 | .dirstamp 29 | doc/example/alpha_example 30 | doc/example/api_example 31 | doc/example/api_example_c 32 | doc/example/hdr_example 33 | doc/example/interlace_example 34 | doc/example/tile_example 35 | m4/libtool.m4 36 | m4/lt*.m4 37 | test/unit_test 38 | Makefile 39 | Makefile.in 40 | aclocal.m4 41 | autom4te.cache 42 | compile 43 | config.* 44 | configure 45 | depcomp 46 | install-sh 47 | libtool 48 | ltmain.sh 49 | missing 50 | testapp 51 | test-driver 52 | test-suite.log 53 | 54 | !doc/example/Makefile 55 | -------------------------------------------------------------------------------- /src/zimg/common/cpuinfo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_CPUINFO_H_ 4 | #define ZIMG_CPUINFO_H_ 5 | 6 | namespace zimg { 7 | 8 | /** 9 | * Enum for CPU type. 10 | */ 11 | enum class CPUClass { 12 | NONE, 13 | AUTO, 14 | AUTO_64B, 15 | #if defined(ZIMG_X86) 16 | X86_SSE, 17 | X86_SSE2, 18 | X86_AVX, 19 | X86_F16C, 20 | X86_AVX2, 21 | X86_AVX512, // F, CD, BW, DQ, VL 22 | X86_AVX512_CLX, // VNNI 23 | #elif defined(ZIMG_ARM) 24 | ARM_NEON, 25 | #endif 26 | }; 27 | 28 | constexpr bool cpu_is_autodetect(CPUClass cpu) noexcept 29 | { 30 | return cpu == CPUClass::AUTO || cpu == CPUClass::AUTO_64B; 31 | } 32 | 33 | unsigned long cpu_cache_size() noexcept; 34 | 35 | bool cpu_has_fast_f16(CPUClass cpu) noexcept; 36 | bool cpu_requires_64b_alignment(CPUClass cpu) noexcept; 37 | 38 | } // namespace zimg 39 | 40 | #endif // ZIMG_CPUINFO_H_ 41 | -------------------------------------------------------------------------------- /src/testapp/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef UTILS_H_ 4 | #define UTILS_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | namespace graph { 10 | 11 | class ImageFilter; 12 | 13 | } // namespace graph 14 | } // namespace zimg 15 | 16 | 17 | class ImageFrame; 18 | 19 | class FilterExecutor { 20 | struct data; 21 | 22 | std::shared_ptr m_data; 23 | const zimg::graph::ImageFilter *m_filter; 24 | const zimg::graph::ImageFilter *m_filter_uv; 25 | const ImageFrame *m_src_frame; 26 | ImageFrame *m_dst_frame; 27 | 28 | void exec_grey(const zimg::graph::ImageFilter *filter, unsigned plane); 29 | void exec_color(); 30 | public: 31 | FilterExecutor(const zimg::graph::ImageFilter *filter, const zimg::graph::ImageFilter *filter_uv, const ImageFrame *src_frame, ImageFrame *dst_frame); 32 | 33 | void operator()(); 34 | }; 35 | 36 | #endif // UTILS_H_ 37 | -------------------------------------------------------------------------------- /src/zimg/colorspace/graph.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_COLORSPACE_GRAPH_H_ 4 | #define ZIMG_COLORSPACE_GRAPH_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | 14 | namespace colorspace { 15 | 16 | struct ColorspaceDefinition; 17 | struct OperationParams; 18 | class Operation; 19 | 20 | typedef std::function(const OperationParams &, CPUClass)> OperationFactory; 21 | 22 | /** 23 | * Find the shortest path between two colorspaces. 24 | * 25 | * @param in input colorspace 26 | * @param out output colorspace 27 | * @return vector of factory functors for operations 28 | */ 29 | std::vector get_operation_path(const ColorspaceDefinition &in, const ColorspaceDefinition &out); 30 | 31 | } // namespace colorspace 32 | } // namespace zimg 33 | 34 | #endif // ZIMG_COLORSPACE_GRAPH_H_ 35 | -------------------------------------------------------------------------------- /src/testapp/apps.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef APPS_H_ 4 | #define APPS_H_ 5 | 6 | #define PIXFMT_SPECIFIER_HELP_STR \ 7 | "Pixel format specifier: type[:fullrange[chroma][:depth]]\n" \ 8 | "fullrange: f=fullrange, l=limited\n" \ 9 | "chroma: c=chroma, l=luma\n" 10 | 11 | struct ArgparseOption; 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | int arg_decode_cpu(const struct ArgparseOption *opt, void *out, const char *param, int negated); 18 | 19 | int arg_decode_pixfmt(const struct ArgparseOption *opt, void *out, const char *param, int negated); 20 | 21 | int colorspace_main(int argc, char **argv); 22 | int cpuinfo_main(int argc, char **argv); 23 | int depth_main(int argc, char **argv); 24 | int graph_main(int argc, char **argv); 25 | int resize_main(int argc, char **argv); 26 | int unresize_main(int argc, char **argv); 27 | 28 | #ifdef __cplusplus 29 | } /* extern "C" */ 30 | #endif 31 | 32 | #endif /* APPS_H_ */ 33 | -------------------------------------------------------------------------------- /src/zimg/depth/depth.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_DEPTH_DEPTH_H_ 4 | #define ZIMG_DEPTH_DEPTH_H_ 5 | 6 | #include 7 | #include "common/pixel.h" 8 | 9 | namespace zimg { 10 | 11 | enum class CPUClass; 12 | 13 | namespace graph { 14 | 15 | class ImageFilter; 16 | 17 | } // namespace graph 18 | 19 | 20 | namespace depth { 21 | 22 | enum class DitherType { 23 | NONE, 24 | ORDERED, 25 | RANDOM, 26 | ERROR_DIFFUSION, 27 | }; 28 | 29 | struct DepthConversion { 30 | unsigned width; 31 | unsigned height; 32 | 33 | #include "common/builder.h" 34 | BUILDER_MEMBER(PixelFormat, pixel_in) 35 | BUILDER_MEMBER(PixelFormat, pixel_out) 36 | BUILDER_MEMBER(DitherType, dither_type) 37 | BUILDER_MEMBER(CPUClass, cpu) 38 | #undef BUILDER_MEMBER 39 | 40 | DepthConversion(unsigned width, unsigned height); 41 | 42 | std::unique_ptr create() const; 43 | }; 44 | 45 | } // namespace depth 46 | } // namespace zimg 47 | 48 | #endif // ZIMG_DEPTH_DEPTH2_H_ 49 | -------------------------------------------------------------------------------- /src/zimg/depth/dither.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_DEPTH_DITHER_H_ 4 | #define ZIMG_DEPTH_DITHER_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | 10 | enum class CPUClass; 11 | 12 | struct PixelFormat; 13 | 14 | namespace graph { 15 | 16 | class ImageFilter; 17 | 18 | } // namespace graph 19 | 20 | 21 | namespace depth { 22 | 23 | enum class DitherType; 24 | 25 | typedef void (*dither_convert_func)(const float *dither, unsigned dither_offset, unsigned dither_mask, 26 | const void *src, void *dst, float scale, float offset, unsigned bits, unsigned left, unsigned right); 27 | typedef void (*dither_f16c_func)(const void *src, void *dst, unsigned left, unsigned right); 28 | 29 | std::unique_ptr create_dither(DitherType type, unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 30 | 31 | } // namespace depth 32 | } // namespace zimg 33 | 34 | #endif // ZIMG_DEPTH_DITHER_H_ 35 | -------------------------------------------------------------------------------- /_msvc/_example_api/_example_api.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /_msvc/_example_hdr/_example_hdr.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /_msvc/_example_tile/_example_tile.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /_msvc/_example_api_c/_example_api_c.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /_msvc/_example_interlace/_example_interlace.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/zimg/common/libm_wrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_LIBM_WRAPPER_H_ 4 | #define ZIMG_LIBM_WRAPPER_H_ 5 | 6 | /** 7 | * @file 8 | * 9 | * To ensure reproducable results during testing, the use of inexact math 10 | * library functions is avoided in the library. Instead, all calls to such 11 | * functions must be dispatched through one of the function pointers below. 12 | */ 13 | 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | extern float (*zimg_x_expf)(float x); 19 | extern float (*zimg_x_logf)(float x); 20 | extern float (*zimg_x_log10f)(float x); 21 | 22 | extern float (*zimg_x_powf)(float x, float y); 23 | 24 | /* Prevent MSVC from executing the legacy x87 FSQRT instruction if possible. */ 25 | #if defined(_M_IX86_FP) && _M_IX86_FP > 0 26 | #include 27 | #define zimg_x_sqrtf(x) (_mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)))) 28 | #else 29 | #define zimg_x_sqrtf sqrtf 30 | #endif 31 | 32 | extern double (*zimg_x_sin)(double x); 33 | extern double (*zimg_x_cos)(double x); 34 | 35 | #ifdef __cplusplus 36 | } /* extern "C" */ 37 | #endif 38 | 39 | #endif /* ZIMG_LIBM_WRAPPER_H_ */ 40 | -------------------------------------------------------------------------------- /_msvc/dll/dll.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/testcommon/mmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef MMAP_H_ 4 | #define MMAP_H_ 5 | 6 | #include 7 | #include 8 | 9 | class MemoryMappedFile { 10 | class impl; 11 | 12 | struct read_tag {}; 13 | struct write_tag {}; 14 | struct create_tag {}; 15 | public: 16 | static const read_tag READ_TAG; 17 | static const write_tag WRITE_TAG; 18 | static const create_tag CREATE_TAG; 19 | private: 20 | std::unique_ptr m_impl; 21 | 22 | impl *get_impl() { return m_impl.get(); } 23 | const impl *get_impl() const { return m_impl.get(); } 24 | public: 25 | MemoryMappedFile() noexcept; 26 | 27 | MemoryMappedFile(MemoryMappedFile &&other) noexcept; 28 | 29 | MemoryMappedFile(const char *path, read_tag); 30 | 31 | MemoryMappedFile(const char *path, write_tag); 32 | 33 | MemoryMappedFile(const char *path, size_t size, create_tag); 34 | 35 | ~MemoryMappedFile(); 36 | 37 | MemoryMappedFile &operator=(MemoryMappedFile &&other) noexcept; 38 | 39 | size_t size() const noexcept; 40 | 41 | const void *read_ptr() const noexcept; 42 | 43 | void *write_ptr() noexcept; 44 | 45 | void flush(); 46 | 47 | void close(); 48 | }; 49 | 50 | #endif // MMAP_H_ 51 | -------------------------------------------------------------------------------- /src/zimg/depth/depth_convert.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_DEPTH_DEPTH_CONVERT_H_ 4 | #define ZIMG_DEPTH_DEPTH_CONVERT_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | 10 | struct PixelFormat; 11 | 12 | enum class PixelType; 13 | enum class CPUClass; 14 | 15 | namespace graph { 16 | 17 | class ImageFilter; 18 | 19 | } // namespace graph 20 | 21 | 22 | namespace depth { 23 | 24 | typedef void (*left_shift_func)(const void *src, void *dst, unsigned shift, unsigned left, unsigned right); 25 | typedef void (*depth_convert_func)(const void *src, void *dst, float scale, float offset, unsigned left, unsigned right); 26 | typedef void (*depth_f16c_func)(const void *src, void *dst, unsigned left, unsigned right); 27 | 28 | std::unique_ptr create_left_shift(unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 29 | 30 | std::unique_ptr create_convert_to_float(unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 31 | 32 | } // namespace depth 33 | } // namespace zimg 34 | 35 | #endif // ZIMG_DEPTH_DEPTH_CONVERT_H_ 36 | -------------------------------------------------------------------------------- /test/extra/musl-libm/logf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for logf. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT 6 | */ 7 | 8 | #include "logf_data.h" 9 | 10 | const struct logf_data my__logf_data = { 11 | .tab = { 12 | { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, 13 | { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 }, 14 | { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 }, 15 | { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 }, 16 | { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 }, 17 | { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 }, 18 | { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 }, 19 | { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 }, 20 | { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 }, 21 | { 0x1p+0, 0x0p+0 }, 22 | { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 }, 23 | { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 }, 24 | { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 }, 25 | { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 }, 26 | { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 }, 27 | { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }, 28 | }, 29 | .ln2 = 0x1.62e42fefa39efp-1, 30 | .poly = { 31 | -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2, 32 | } 33 | }; 34 | -------------------------------------------------------------------------------- /test/graph/filter_validator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_UNIT_TEST_GRAPH_FILTER_VALIDATOR_H_ 4 | #define ZIMG_UNIT_TEST_GRAPH_FILTER_VALIDATOR_H_ 5 | 6 | #include "common/pixel.h" 7 | 8 | namespace zimg { 9 | namespace graph { 10 | 11 | class ImageFilter; 12 | 13 | } // namespace graph 14 | } // namespace zimg 15 | 16 | 17 | class FilterValidator { 18 | const zimg::graph::ImageFilter *m_test_filter; 19 | const zimg::graph::ImageFilter *m_ref_filter; 20 | 21 | zimg::PixelFormat m_src_format; 22 | unsigned m_src_width; 23 | unsigned m_src_height; 24 | 25 | const char * const *m_sha1_str; 26 | double m_snr_thresh; 27 | 28 | bool m_yuv; 29 | public: 30 | FilterValidator(const zimg::graph::ImageFilter *test_filter, unsigned src_width, unsigned src_height, const zimg::PixelFormat &src_format); 31 | 32 | FilterValidator &set_ref_filter(const zimg::graph::ImageFilter *ref_filter, double snr_thresh); 33 | FilterValidator &set_sha1(const char * const sha1_str[3]); 34 | FilterValidator &set_yuv(bool yuv); 35 | 36 | void validate(); 37 | }; 38 | 39 | 40 | bool assert_different_dynamic_type(const zimg::graph::ImageFilter *filter_a, const zimg::graph::ImageFilter *filter_b); 41 | 42 | #endif // ZIMG_UNIT_TEST_GRAPH_FILTER_VALIDATOR_H_ 43 | -------------------------------------------------------------------------------- /src/zimg/resize/resize.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_RESIZE_RESIZE_H_ 4 | #define ZIMG_RESIZE_RESIZE_H_ 5 | 6 | #include 7 | #include 8 | 9 | namespace zimg { 10 | 11 | enum class CPUClass; 12 | enum class PixelType; 13 | 14 | namespace graph { 15 | 16 | class ImageFilter; 17 | 18 | } // namespace graph 19 | 20 | 21 | namespace resize { 22 | 23 | class Filter; 24 | 25 | struct ResizeConversion { 26 | typedef std::pair, std::unique_ptr> filter_pair; 27 | 28 | unsigned src_width; 29 | unsigned src_height; 30 | PixelType type; 31 | 32 | #include "common/builder.h" 33 | BUILDER_MEMBER(unsigned, depth) 34 | BUILDER_MEMBER(const Filter *, filter) 35 | BUILDER_MEMBER(unsigned, dst_width) 36 | BUILDER_MEMBER(unsigned, dst_height) 37 | BUILDER_MEMBER(double, shift_w) 38 | BUILDER_MEMBER(double, shift_h) 39 | BUILDER_MEMBER(double, subwidth) 40 | BUILDER_MEMBER(double, subheight) 41 | BUILDER_MEMBER(CPUClass, cpu) 42 | #undef BUILDER_MEMBER 43 | 44 | ResizeConversion(unsigned src_width, unsigned src_height, PixelType type); 45 | 46 | filter_pair create() const; 47 | }; 48 | 49 | } // namespace resize 50 | } // namespace zimg 51 | 52 | #endif // ZIMG_RESIZE_RESIZE_H_ 53 | -------------------------------------------------------------------------------- /src/zimg/common/arm/cpuinfo_arm.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #if defined(_WIN32) 4 | #define NOMINMAX 5 | #define STRICT 6 | #define WIN32_LEAN_AND_MEAN 7 | #include 8 | #elif defined(__linux__) 9 | #include 10 | #include 11 | #endif 12 | 13 | #include "cpuinfo_arm.h" 14 | 15 | namespace zimg { 16 | 17 | namespace { 18 | 19 | ARMCapabilities do_query_arm_capabilities() noexcept 20 | { 21 | ARMCapabilities caps = { 0 }; 22 | 23 | // AArch32 feature bits. 24 | #if defined(_M_ARM64) || defined(__aarch64__) 25 | // NEON is required for ARMv8a. 26 | caps.neon = 1; 27 | caps.vfpv4 = 1; 28 | #elif defined(_WIN32) 29 | caps.neon = IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); 30 | caps.vfpv4 = IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE); 31 | #elif defined(__linux__) 32 | long hwcaps = getauxval(AT_HWCAP); 33 | caps.neon = !!(hwcaps & HWCAP_NEON); 34 | caps.vfpv4 = !!(hwcaps & HWCAP_VFPv4); 35 | #endif 36 | 37 | return caps; 38 | } 39 | 40 | } // namespace 41 | 42 | 43 | ARMCapabilities query_arm_capabilities() noexcept 44 | { 45 | static const ARMCapabilities caps = do_query_arm_capabilities(); 46 | return caps; 47 | } 48 | 49 | } // namespace zimg 50 | 51 | #endif // ZIMG_ARM 52 | -------------------------------------------------------------------------------- /src/testapp/table.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef TABLE_H_ 4 | #define TABLE_H_ 5 | 6 | #include 7 | #include "common/static_map.h" 8 | 9 | namespace zimg { 10 | 11 | enum class CPUClass; 12 | enum class PixelType; 13 | 14 | namespace colorspace { 15 | 16 | enum class MatrixCoefficients; 17 | enum class TransferCharacteristics; 18 | enum class ColorPrimaries; 19 | 20 | } // namespace colorspace 21 | 22 | namespace depth { 23 | 24 | enum class DitherType; 25 | 26 | } // namespace depth 27 | 28 | namespace resize { 29 | 30 | class Filter; 31 | 32 | } // namespace resize 33 | 34 | } // namespace zimg 35 | 36 | 37 | extern const zimg::static_string_map g_cpu_table; 38 | extern const zimg::static_string_map g_pixel_table; 39 | extern const zimg::static_string_map g_matrix_table; 40 | extern const zimg::static_string_map g_transfer_table; 41 | extern const zimg::static_string_map g_primaries_table; 42 | extern const zimg::static_string_map g_dither_table; 43 | extern const zimg::static_string_map(*)(double, double), 8> g_resize_table; 44 | 45 | #endif // TABLE_H_ 46 | -------------------------------------------------------------------------------- /src/zimg/colorspace/arm/operation_impl_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_COLORSPACE_ARM_OPERATION_IMPL_ARM_H_ 6 | #define ZIMG_COLORSPACE_ARM_OPERATION_IMPL_ARM_H_ 7 | 8 | #include 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | 14 | namespace colorspace { 15 | 16 | struct Matrix3x3; 17 | struct OperationParams; 18 | struct TransferFunction; 19 | class Operation; 20 | 21 | std::unique_ptr create_matrix_operation_neon(const Matrix3x3 &m); 22 | 23 | std::unique_ptr create_matrix_operation_arm(const Matrix3x3 &m, CPUClass cpu); 24 | 25 | std::unique_ptr create_gamma_operation_neon(const TransferFunction &transfer, const OperationParams ¶ms); 26 | 27 | std::unique_ptr create_gamma_operation_arm(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu); 28 | 29 | std::unique_ptr create_inverse_gamma_operation_neon(const TransferFunction &transfer, const OperationParams ¶ms); 30 | 31 | std::unique_ptr create_inverse_gamma_operation_arm(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu); 32 | 33 | } // namespace colorspace 34 | } // namespace zimg 35 | 36 | #endif // ZIMG_COLORSPACE_ARM_OPERATION_IMPL_ARM_H_ 37 | 38 | #endif // ZIMG_ARM 39 | -------------------------------------------------------------------------------- /src/zimg/common/make_unique.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_MAKE_UNIQUE_H_ 4 | #define ZIMG_MAKE_UNIQUE_H_ 5 | 6 | #if __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) 7 | #include 8 | namespace ztd { 9 | using std::make_unique; 10 | } // namespace ztd 11 | #else 12 | #include 13 | #include 14 | #include 15 | 16 | namespace ztd { 17 | 18 | template 19 | std::unique_ptr make_unique_helper(std::false_type, Args&&... args) { 20 | return std::unique_ptr(new T(std::forward(args)...)); 21 | } 22 | 23 | template 24 | std::unique_ptr make_unique_helper(std::true_type, Args&&... args) { 25 | static_assert(std::extent::value == 0, 26 | "make_unique() is forbidden, please use make_unique()."); 27 | 28 | typedef typename std::remove_extent::type U; 29 | return std::unique_ptr(new U[sizeof...(Args)]{ std::forward(args)... }); 30 | } 31 | 32 | template 33 | std::unique_ptr make_unique(Args&&... args) { 34 | return make_unique_helper(std::is_array(), std::forward(args)...); 35 | } 36 | 37 | } // namespace ztd 38 | #endif // __cplusplus >= 201402L 39 | 40 | #endif // ZIMG_MAKE_UNIQUE_H_ 41 | -------------------------------------------------------------------------------- /src/testcommon/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef TIMER_H_ 4 | #define TIMER_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | class Timer { 11 | typedef std::chrono::high_resolution_clock hrclock; 12 | 13 | hrclock::time_point m_start; 14 | hrclock::time_point m_stop; 15 | public: 16 | void start() { m_start = hrclock::now(); } 17 | 18 | void stop() { m_stop = hrclock::now(); } 19 | 20 | double elapsed() 21 | { 22 | std::chrono::duration secs = m_stop - m_start; 23 | return secs.count(); 24 | } 25 | }; 26 | 27 | 28 | template 29 | std::pair measure_benchmark(unsigned times, T func, U callback) 30 | { 31 | Timer timer; 32 | double min_time = INFINITY; 33 | double sum_time = 0.0; 34 | 35 | for (unsigned n = 0; n < times; ++n) { 36 | double elapsed_cur; 37 | 38 | timer.start(); 39 | func(); 40 | timer.stop(); 41 | 42 | elapsed_cur = timer.elapsed(); 43 | callback(n, elapsed_cur); 44 | 45 | sum_time += elapsed_cur; 46 | min_time = min_time < elapsed_cur ? min_time : elapsed_cur; 47 | } 48 | return{ sum_time / times, min_time }; 49 | } 50 | 51 | template 52 | std::pair measure_benchmark(unsigned times, T func) 53 | { 54 | return measure_benchmark(times, func, [](unsigned, double) {}); 55 | } 56 | 57 | #endif // TIMER_H_ 58 | -------------------------------------------------------------------------------- /src/testcommon/win32_bitmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef WIN32_BITMAP_H_ 4 | #define WIN32_BITMAP_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | struct BitmapDataError : public std::runtime_error { 11 | using std::runtime_error::runtime_error; 12 | }; 13 | 14 | class WindowsBitmap { 15 | class impl; 16 | 17 | struct read_tag {}; 18 | struct write_tag {}; 19 | public: 20 | static const read_tag READ_TAG; 21 | static const write_tag WRITE_TAG; 22 | private: 23 | std::unique_ptr m_impl; 24 | 25 | impl *get_impl() noexcept { return m_impl.get(); } 26 | const impl *get_impl() const noexcept { return m_impl.get(); } 27 | public: 28 | WindowsBitmap(WindowsBitmap &&other) noexcept; 29 | 30 | WindowsBitmap(const char *path, read_tag); 31 | 32 | WindowsBitmap(const char *path, write_tag); 33 | 34 | WindowsBitmap(const char *path, int width, int height, int bit_count); 35 | 36 | ~WindowsBitmap(); 37 | 38 | WindowsBitmap &operator=(WindowsBitmap &&other) noexcept; 39 | 40 | ptrdiff_t stride() const noexcept; 41 | 42 | int width() const noexcept; 43 | 44 | int height() const noexcept; 45 | 46 | int bit_count() const noexcept; 47 | 48 | const unsigned char *read_ptr() const noexcept; 49 | 50 | unsigned char *write_ptr() noexcept; 51 | 52 | void flush(); 53 | 54 | void close(); 55 | }; 56 | 57 | #endif // WIN32_BITMAP_H_ 58 | -------------------------------------------------------------------------------- /src/zimg/resize/arm/resize_impl_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_RESIZE_ARM_RESIZE_IMPL_ARM_H_ 6 | #define ZIMG_RESIZE_ARM_RESIZE_IMPL_ARM_H_ 7 | 8 | #include 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | enum class PixelType; 14 | 15 | namespace graph { 16 | 17 | class ImageFilter; 18 | 19 | } // namespace graph 20 | 21 | 22 | namespace resize { 23 | 24 | struct FilterContext; 25 | 26 | #define DECLARE_IMPL_H(cpu) \ 27 | std::unique_ptr create_resize_impl_h_##cpu(const FilterContext &context, unsigned height, PixelType type, unsigned depth) 28 | #define DECLARE_IMPL_V(cpu) \ 29 | std::unique_ptr create_resize_impl_v_##cpu(const FilterContext &context, unsigned width, PixelType type, unsigned depth) 30 | 31 | DECLARE_IMPL_H(neon); 32 | 33 | DECLARE_IMPL_V(neon); 34 | 35 | #undef DECLARE_IMPL_H 36 | #undef DECLARE_IMPL_V 37 | 38 | std::unique_ptr create_resize_impl_h_arm(const FilterContext &context, unsigned height, PixelType type, unsigned depth, CPUClass cpu); 39 | 40 | std::unique_ptr create_resize_impl_v_arm(const FilterContext &context, unsigned width, PixelType type, unsigned depth, CPUClass cpu); 41 | 42 | } // namespace resize 43 | } // namespace zimg 44 | 45 | #endif // ZIMG_RESIZE_ARM_RESIZE_IMPL_ARM_H_ 46 | 47 | #endif // ZIMG_ARM 48 | -------------------------------------------------------------------------------- /src/zimg/resize/x86/resize_impl_avx512_vnni.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86_AVX512 2 | 3 | #include 4 | #include 5 | #include "common/make_unique.h" 6 | #include "common/pixel.h" 7 | #include "resize_impl_x86.h" 8 | 9 | #define mm512_dpwssd_epi32(src, a, b) _mm512_dpwssd_epi32((src), (a), (b)) 10 | #include "resize_impl_avx512_common.h" 11 | 12 | namespace zimg { 13 | namespace resize { 14 | 15 | std::unique_ptr create_resize_impl_h_avx512_vnni(const FilterContext &context, unsigned height, PixelType type, unsigned depth) 16 | { 17 | std::unique_ptr ret; 18 | 19 | #ifndef ZIMG_RESIZE_NO_PERMUTE 20 | if (type == PixelType::WORD) 21 | ret = ResizeImplH_Permute_U16_AVX512::create(context, height, depth); 22 | #endif 23 | 24 | if (!ret) { 25 | if (type == PixelType::WORD) 26 | ret = ztd::make_unique(context, height, depth); 27 | } 28 | 29 | return ret; 30 | } 31 | 32 | std::unique_ptr create_resize_impl_v_avx512_vnni(const FilterContext &context, unsigned width, PixelType type, unsigned depth) 33 | { 34 | std::unique_ptr ret; 35 | 36 | if (type == PixelType::WORD) 37 | ret = ztd::make_unique(context, width, depth); 38 | 39 | return ret; 40 | } 41 | 42 | } // namespace resize 43 | } // namespace zimg 44 | 45 | #endif // ZIMG_X86_AVX512 46 | -------------------------------------------------------------------------------- /src/zimg/depth/arm/dither_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_DEPTH_ARM_DITHER_ARM_H_ 6 | #define ZIMG_DEPTH_ARM_DITHER_ARM_H_ 7 | 8 | #include 9 | #include "depth/dither.h" 10 | 11 | namespace zimg { 12 | 13 | namespace graph { 14 | 15 | class ImageFilter; 16 | 17 | } // namespace graph 18 | 19 | 20 | namespace depth { 21 | 22 | #define DECLARE_ORDERED_DITHER(x, cpu) \ 23 | void ordered_dither_##x##_##cpu(const float *dither, unsigned dither_offset, unsigned dither_mask, \ 24 | const void *src, void *dst, float scale, float offset, unsigned bits, unsigned left, unsigned right) 25 | 26 | DECLARE_ORDERED_DITHER(b2b, neon); 27 | DECLARE_ORDERED_DITHER(b2w, neon); 28 | DECLARE_ORDERED_DITHER(w2b, neon); 29 | DECLARE_ORDERED_DITHER(w2w, neon); 30 | DECLARE_ORDERED_DITHER(h2b, neon); 31 | DECLARE_ORDERED_DITHER(h2w, neon); 32 | DECLARE_ORDERED_DITHER(f2b, neon); 33 | DECLARE_ORDERED_DITHER(f2w, neon); 34 | 35 | #undef DECLARE_ORDERED_DITHER 36 | 37 | dither_convert_func select_ordered_dither_func_arm(const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 38 | 39 | dither_f16c_func select_dither_f16c_func_arm(CPUClass cpu); 40 | 41 | bool needs_dither_f16c_func_arm(CPUClass cpu); 42 | 43 | } // namespace depth 44 | } // namespace zimg 45 | 46 | #endif // ZIMG_DEPTH_ARM_DITHER_ARM_H_ 47 | 48 | #endif // ZIMG_ARM 49 | -------------------------------------------------------------------------------- /doc/example/Makefile: -------------------------------------------------------------------------------- 1 | ZIMG_INCLUDES := $(shell pkg-config --cflags zimg) 2 | ZIMG_LIBS := $(shell pkg-config --libs zimg) 3 | 4 | MY_CFLAGS := -std=c89 -pthread $(CFLAGS) 5 | MY_CXXFLAGS := -std=c++11 -pthread $(CXXFLAGS) 6 | MY_CPPFLAGS := $(ZIMG_INCLUDES) -Imisc $(CPPFLAGS) 7 | MY_LDFLAGS := $(LDFLAGS) 8 | MY_LIBS := -pthread $(ZIMG_LIBS) $(LIBS) 9 | 10 | misc_HDRS = \ 11 | misc/aligned_malloc.h \ 12 | misc/argparse.h \ 13 | misc/mmap.h \ 14 | misc/win32_bitmap.h 15 | 16 | misc_OBJS = \ 17 | misc/argparse.o \ 18 | misc/mmap.o \ 19 | misc/win32_bitmap.o 20 | 21 | all: api_example api_example_c hdr_example interlace_example tile_example 22 | 23 | api_example: api_example.o $(misc_OBJS) 24 | $(CXX) $(MY_LDFLAGS) $^ $(MY_LIBS) -o $@ 25 | api_example_c: api_example_c.o $(misc_OBJS) 26 | $(CXX) $(MY_LDFLAGS) $^ $(MY_LIBS) -o $@ 27 | hdr_example: hdr_example.o $(misc_OBJS) 28 | $(CXX) $(MY_LDFLAGS) $^ $(MY_LIBS) -o $@ 29 | interlace_example: interlace_example.o $(misc_OBJS) 30 | $(CXX) $(MY_LDFLAGS) $^ $(MY_LIBS) -o $@ 31 | tile_example: tile_example.o $(misc_OBJS) 32 | $(CXX) $(MY_LDFLAGS) $^ $(MY_LIBS) -o $@ 33 | 34 | %.o: %.c $(misc_HDRS) 35 | $(CC) -c $(MY_CFLAGS) $(MY_CPPFLAGS) $< -o $@ 36 | %.o: %.cpp $(misc_HDRS) 37 | $(CXX) -c $(MY_CXXFLAGS) $(MY_CPPFLAGS) $< -o $@ 38 | 39 | clean: 40 | rm -f *.a *.o misc/*.o api_example api_example_c hdr_example interlace_example tile_example 41 | 42 | .PHONY: clean 43 | -------------------------------------------------------------------------------- /src/zimg/common/align.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_ALIGN_H_ 4 | #define ZIMG_ALIGN_H_ 5 | 6 | namespace zimg { 7 | 8 | /** 9 | * 64-byte alignment allows the use of instructions up to AVX-512. 10 | */ 11 | #if defined(ZIMG_X86) 12 | constexpr int ALIGNMENT = 64; 13 | constexpr int ALIGNMENT_RELAXED = 32; 14 | #elif defined(ZIMG_ARM) 15 | constexpr int ALIGNMENT = 16; 16 | constexpr int ALIGNMENT_RELAXED = 16; 17 | #else 18 | constexpr int ALIGNMENT = alignof(long double); 19 | constexpr int ALIGNMENT_RELAXED = alignof(long double); 20 | #endif 21 | 22 | /** 23 | * Round up the argument x to the nearest multiple of n. 24 | * x must be non-negative and n must be positive and power-of-2. 25 | */ 26 | template 27 | constexpr T ceil_n(T x, unsigned n) { return (x + (n - 1)) & ~static_cast(n - 1); } 28 | 29 | /** 30 | * Round down the argument x to the nearest multiple of n. 31 | * x must be non-negative and n must be positive and power-of-2. 32 | */ 33 | template 34 | constexpr T floor_n(T x, unsigned n) { return x & ~static_cast(n - 1); } 35 | 36 | /** 37 | * Helper struct that computes alignment in units of object count. 38 | * 39 | * @tparam T type of object 40 | */ 41 | template 42 | struct AlignmentOf { 43 | static constexpr unsigned value = ALIGNMENT >= sizeof(T) ? ALIGNMENT / sizeof(T) : 1; 44 | }; 45 | 46 | } // namespace zimg 47 | 48 | #endif // ZIMG_ALIGN_H_ 49 | -------------------------------------------------------------------------------- /src/zimg/resize/arm/resize_impl_arm.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/arm/cpuinfo_arm.h" 5 | #include "graph/image_filter.h" 6 | #include "resize_impl_arm.h" 7 | 8 | namespace zimg { 9 | namespace resize { 10 | 11 | std::unique_ptr create_resize_impl_h_arm(const FilterContext &context, unsigned height, PixelType type, unsigned depth, CPUClass cpu) 12 | { 13 | ARMCapabilities caps = query_arm_capabilities(); 14 | std::unique_ptr ret; 15 | 16 | if (cpu_is_autodetect(cpu)) { 17 | if (!ret && caps.neon && caps.vfpv4) 18 | ret = create_resize_impl_h_neon(context, height, type, depth); 19 | } else { 20 | if (!ret && cpu >= CPUClass::ARM_NEON) 21 | ret = create_resize_impl_h_neon(context, height, type, depth); 22 | } 23 | 24 | return ret; 25 | } 26 | 27 | std::unique_ptr create_resize_impl_v_arm(const FilterContext &context, unsigned width, PixelType type, unsigned depth, CPUClass cpu) 28 | { 29 | ARMCapabilities caps = query_arm_capabilities(); 30 | std::unique_ptr ret; 31 | 32 | if (cpu_is_autodetect(cpu)) { 33 | if (!ret && caps.neon && caps.vfpv4) 34 | ret = create_resize_impl_v_neon(context, width, type, depth); 35 | } else { 36 | if (!ret && cpu >= CPUClass::ARM_NEON) 37 | ret = create_resize_impl_v_neon(context, width, type, depth); 38 | } 39 | 40 | return ret; 41 | } 42 | 43 | } // namespace resize 44 | } // namespace zimg 45 | 46 | #endif // ZIMG_ARM -------------------------------------------------------------------------------- /src/testcommon/argparse.h: -------------------------------------------------------------------------------- 1 | #ifndef ARGPARSE_H_ 2 | #define ARGPARSE_H_ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef enum OptionType { 11 | OPTION_NULL, 12 | OPTION_FLAG, 13 | OPTION_HELP, 14 | OPTION_INCREMENT, 15 | OPTION_DECREMENT, 16 | OPTION_INT, 17 | OPTION_UINT, 18 | OPTION_LONGLONG, 19 | OPTION_ULONGLONG, 20 | OPTION_FLOAT, 21 | OPTION_STRING, 22 | OPTION_USER0, 23 | OPTION_USER1, 24 | } OptionType; 25 | 26 | typedef struct ArgparseOption { 27 | OptionType type; 28 | const char *short_name; 29 | const char *long_name; 30 | size_t offset; 31 | int (*func)(const struct ArgparseOption *opt, void *out, const char *param, int negated); 32 | const char *description; 33 | } ArgparseOption; 34 | 35 | typedef struct ArgparseCommandLine { 36 | const ArgparseOption *switches; /* Terminated by OPTION_NULL. */ 37 | const ArgparseOption *positional; /* Terminated by OPTION_NULL. */ 38 | const char *program_name; 39 | const char *summary; 40 | const char *help_message; 41 | } ArgparseCommandLine; 42 | 43 | enum { 44 | ARGPARSE_HELP_MESSAGE = -1, 45 | ARGPARSE_INSUFFICIENT_ARGS = -2, 46 | ARGPARSE_INVALID_SWITCH = -3, 47 | ARGPARSE_BAD_PARAMETER = -4, 48 | ARGPARSE_FATAL = -128 49 | }; 50 | 51 | /* Returns number of arguments parsed, or negative error code. */ 52 | int argparse_parse(const ArgparseCommandLine *cmd, void *out, int argc, char **argv); 53 | 54 | #ifdef __cplusplus 55 | } /* extern "C" */ 56 | #endif 57 | 58 | #endif /* ARGPARSE_H_ */ 59 | -------------------------------------------------------------------------------- /src/zimg/depth/arm/depth_convert_arm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_ARM 4 | 5 | #ifndef ZIMG_DEPTH_ARM_DEPTH_CONVERT_ARM_H_ 6 | #define ZIMG_DEPTH_ARM_DEPTH_CONVERT_ARM_H_ 7 | 8 | #include "depth/depth_convert.h" 9 | 10 | namespace zimg { 11 | namespace depth { 12 | 13 | #define DECLARE_LEFT_SHIFT(x, cpu) \ 14 | void left_shift_##x##_##cpu(const void *src, void *dst, unsigned shift, unsigned left, unsigned right) 15 | #define DECLARE_DEPTH_CONVERT(x, cpu) \ 16 | void depth_convert_##x##_##cpu(const void *src, void *dst, float scale, float offset, unsigned left, unsigned right) 17 | 18 | DECLARE_LEFT_SHIFT(b2b, neon); 19 | DECLARE_LEFT_SHIFT(b2w, neon); 20 | DECLARE_LEFT_SHIFT(w2b, neon); 21 | DECLARE_LEFT_SHIFT(w2w, neon); 22 | 23 | DECLARE_DEPTH_CONVERT(b2h, neon); 24 | DECLARE_DEPTH_CONVERT(b2f, neon); 25 | DECLARE_DEPTH_CONVERT(w2h, neon); 26 | DECLARE_DEPTH_CONVERT(w2f, neon); 27 | 28 | #undef DECLARE_LEFT_SHIFT 29 | #undef DECLARE_DEPTH_CONVERT 30 | 31 | left_shift_func select_left_shift_func_arm(PixelType pixel_in, PixelType pixel_out, CPUClass cpu); 32 | 33 | depth_convert_func select_depth_convert_func_arm(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu); 34 | 35 | depth_f16c_func select_depth_f16c_func_arm(bool to_half, CPUClass cpu); 36 | 37 | bool needs_depth_f16c_func_arm(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu); 38 | 39 | } // namespace depth 40 | } // namespace zimg 41 | 42 | #endif // ZIMG_DEPTH_ARM_DEPTH_CONVERT_ARM_H_ 43 | 44 | #endif // ZIMG_ARM 45 | -------------------------------------------------------------------------------- /test/extra/musl-libm/exp2f_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Shared data between expf, exp2f and powf. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT 6 | */ 7 | 8 | #include "exp2f_data.h" 9 | 10 | #ifdef _MSC_VER 11 | #pragma fenv_access(off) 12 | #endif 13 | 14 | #define N (1 << EXP2F_TABLE_BITS) 15 | 16 | const struct exp2f_data my__exp2f_data = { 17 | /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) 18 | used for computing 2^(k/N) for an int |k| < 150 N as 19 | double(tab[k%N] + (k << 52-BITS)) */ 20 | .tab = { 21 | 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, 22 | 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 23 | 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, 24 | 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, 25 | 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, 26 | 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, 27 | 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, 28 | 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, 29 | }, 30 | .shift_scaled = 0x1.8p+52 / N, 31 | .poly = { 32 | 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, 33 | }, 34 | .shift = 0x1.8p+52, 35 | .invln2_scaled = 0x1.71547652b82fep+0 * N, 36 | .poly_scaled = { 37 | 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, 38 | }, 39 | }; 40 | -------------------------------------------------------------------------------- /test/extra/musl-libm/powf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for powf. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT 6 | */ 7 | 8 | #include "powf_data.h" 9 | 10 | #ifdef _MSC_VER 11 | #pragma fenv_access(off) 12 | #endif 13 | 14 | const struct powf_log2_data my__powf_log2_data = { 15 | .tab = { 16 | { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE }, 17 | { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE }, 18 | { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE }, 19 | { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE }, 20 | { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE }, 21 | { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE }, 22 | { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE }, 23 | { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE }, 24 | { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE }, 25 | { 0x1p+0, 0x0p+0 * POWF_SCALE }, 26 | { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE }, 27 | { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE }, 28 | { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE }, 29 | { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE }, 30 | { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE }, 31 | { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE }, 32 | }, 33 | .poly = { 34 | 0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE, 35 | 0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE, 36 | 0x1.71547652ab82bp0 * POWF_SCALE, 37 | } 38 | }; 39 | -------------------------------------------------------------------------------- /src/zimg/common/x86/sse_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_X86_SSE_UTIL_H_ 6 | #define ZIMG_X86_SSE_UTIL_H_ 7 | 8 | #include "common/ccdep.h" 9 | #include "x86util.h" 10 | 11 | namespace zimg { 12 | 13 | // Store from [x] into [dst] the 32-bit elements with index less than [idx]. 14 | static inline FORCE_INLINE void mm_store_idxlo_ps(float *dst, __m128 x, unsigned idx) 15 | { 16 | __m128 orig = _mm_load_ps(dst); 17 | __m128 mask = _mm_load_ps((const float *)(&xmm_mask_table[idx * 4])); 18 | 19 | orig = _mm_andnot_ps(mask, orig); 20 | x = _mm_and_ps(mask, x); 21 | x = _mm_or_ps(x, orig); 22 | 23 | _mm_store_ps(dst, x); 24 | } 25 | 26 | // Store from [x] into [dst] the 32-bit elements with index greater than or equal to [idx] 27 | static inline FORCE_INLINE void mm_store_idxhi_ps(float *dst, __m128 x, unsigned idx) 28 | { 29 | __m128 orig = _mm_load_ps(dst); 30 | __m128 mask = _mm_load_ps((const float *)(&xmm_mask_table[idx * 4])); 31 | 32 | orig = _mm_and_ps(mask, orig); 33 | x = _mm_andnot_ps(mask, x); 34 | x = _mm_or_ps(x, orig); 35 | 36 | _mm_store_ps(dst, x); 37 | } 38 | 39 | // Stores the elements of [x] into [dst0]-[dst3]. 40 | static inline FORCE_INLINE void mm_scatter_ps(float *dst0, float *dst1, float *dst2, float *dst3, __m128 x) 41 | { 42 | _mm_store_ss(dst0, x); 43 | _mm_store_ss(dst1, _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 2, 1, 1))); 44 | _mm_store_ss(dst2, _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 2, 1, 2))); 45 | _mm_store_ss(dst3, _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 2, 1, 3))); 46 | } 47 | 48 | } // namespace zimg 49 | 50 | #endif // ZIMG_X86_SSE_UTIL_H_ 51 | 52 | #endif // ZIMG_X86 53 | -------------------------------------------------------------------------------- /src/zimg/depth/quantize.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_DEPTH_QUANTIZE_H_ 4 | #define ZIMG_DEPTH_QUANTIZE_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "common/pixel.h" 11 | 12 | namespace zimg { 13 | namespace depth { 14 | 15 | constexpr int32_t numeric_max(int bits) noexcept 16 | { 17 | return (1L << bits) - 1; 18 | } 19 | 20 | constexpr int32_t integer_offset(const PixelFormat &format) noexcept 21 | { 22 | return pixel_is_float(format.type) ? 0 23 | : format.chroma ? 1L << (format.depth - 1) 24 | : !format.fullrange ? 16L << (format.depth - 8) 25 | : 0; 26 | } 27 | 28 | constexpr int32_t integer_range(const PixelFormat &format) noexcept 29 | { 30 | return pixel_is_float(format.type) ? 1 31 | : format.fullrange ? numeric_max(format.depth) 32 | : format.chroma && !format.ycgco ? 224L << (format.depth - 8) 33 | : 219L << (format.depth - 8); 34 | } 35 | 36 | inline std::pair get_scale_offset(const PixelFormat &pixel_in, const PixelFormat &pixel_out) 37 | { 38 | double range_in = integer_range(pixel_in); 39 | double offset_in = integer_offset(pixel_in); 40 | double range_out = integer_range(pixel_out); 41 | double offset_out = integer_offset(pixel_out); 42 | 43 | float scale = static_cast(range_out / range_in); 44 | float offset = static_cast(-offset_in * range_out / range_in + offset_out); 45 | 46 | return{ scale, offset }; 47 | } 48 | 49 | float half_to_float(uint16_t f16w) noexcept; 50 | 51 | uint16_t float_to_half(float f32) noexcept; 52 | 53 | } // namespace depth 54 | } // namespace zimg 55 | 56 | #endif // ZIMG_DEPTH_QUANTIZE_H_ 57 | -------------------------------------------------------------------------------- /src/zimg/resize/x86/resize_impl_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_RESIZE_X86_RESIZE_IMPL_X86_H_ 6 | #define ZIMG_RESIZE_X86_RESIZE_IMPL_X86_H_ 7 | 8 | #include 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | enum class PixelType; 14 | 15 | namespace graph { 16 | 17 | class ImageFilter; 18 | 19 | } // namespace graph 20 | 21 | 22 | namespace resize { 23 | 24 | struct FilterContext; 25 | 26 | #define DECLARE_IMPL_H(cpu) \ 27 | std::unique_ptr create_resize_impl_h_##cpu(const FilterContext &context, unsigned height, PixelType type, unsigned depth) 28 | #define DECLARE_IMPL_V(cpu) \ 29 | std::unique_ptr create_resize_impl_v_##cpu(const FilterContext &context, unsigned width, PixelType type, unsigned depth) 30 | 31 | DECLARE_IMPL_H(sse); 32 | DECLARE_IMPL_H(sse2); 33 | DECLARE_IMPL_H(avx); 34 | DECLARE_IMPL_H(avx2); 35 | DECLARE_IMPL_H(avx512); 36 | DECLARE_IMPL_H(avx512_vnni); 37 | 38 | DECLARE_IMPL_V(sse); 39 | DECLARE_IMPL_V(sse2); 40 | DECLARE_IMPL_V(avx); 41 | DECLARE_IMPL_V(avx2); 42 | DECLARE_IMPL_V(avx512); 43 | DECLARE_IMPL_V(avx512_vnni); 44 | 45 | #undef DECLARE_IMPL_H 46 | #undef DECLARE_IMPL_V 47 | 48 | std::unique_ptr create_resize_impl_h_x86(const FilterContext &context, unsigned height, PixelType type, unsigned depth, CPUClass cpu); 49 | 50 | std::unique_ptr create_resize_impl_v_x86(const FilterContext &context, unsigned width, PixelType type, unsigned depth, CPUClass cpu); 51 | 52 | } // namespace resize 53 | } // namespace zimg 54 | 55 | #endif // ZIMG_RESIZE_X86_RESIZE_IMPL_X86_H_ 56 | 57 | #endif // ZIMG_X86 58 | -------------------------------------------------------------------------------- /test/depth/arm/f16c_neon_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/arm/cpuinfo_arm.h" 7 | #include "graph/image_filter.h" 8 | #include "depth/depth_convert.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(zimg::PixelType pixel_in, zimg::PixelType pixel_out, const char * const expected_sha1[3], double expected_snr) 16 | { 17 | const unsigned w = 640; 18 | const unsigned h = 480; 19 | 20 | if (!zimg::query_arm_capabilities().neon) { 21 | SUCCEED() << "neon not available, skipping"; 22 | return; 23 | } 24 | 25 | auto filter_c = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::NONE); 26 | auto filter_neon = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::ARM_NEON); 27 | 28 | FilterValidator validator{ filter_neon.get(), w, h, pixel_in }; 29 | validator.set_sha1(expected_sha1) 30 | .set_ref_filter(filter_c.get(), expected_snr) 31 | .validate(); 32 | } 33 | 34 | } // namespace 35 | 36 | 37 | TEST(F16CNeonTest, test_half_to_float) 38 | { 39 | const char *expected_sha1[3] = { 40 | "68442b2c5704fd2792d92b15fa2e259a51c601dc" 41 | }; 42 | 43 | test_case(zimg::PixelType::HALF, zimg::PixelType::FLOAT, expected_sha1, INFINITY); 44 | } 45 | 46 | TEST(F16CNeonTest, test_float_to_half) 47 | { 48 | const char *expected_sha1[3] = { 49 | "8907defd10af0b7c71abfb9c20147adc1b0a1f70" 50 | }; 51 | 52 | test_case(zimg::PixelType::FLOAT, zimg::PixelType::HALF, expected_sha1, INFINITY); 53 | } 54 | 55 | #endif // ZIMG_ARM 56 | -------------------------------------------------------------------------------- /test/depth/x86/f16c_ivb_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "depth/depth_convert.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(zimg::PixelType pixel_in, zimg::PixelType pixel_out, const char * const expected_sha1[3], double expected_snr) 16 | { 17 | const unsigned w = 640; 18 | const unsigned h = 480; 19 | 20 | if (!zimg::query_x86_capabilities().f16c) { 21 | SUCCEED() << "f16c not available, skipping"; 22 | return; 23 | } 24 | 25 | auto filter_c = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::NONE); 26 | auto filter_f16c = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::X86_F16C); 27 | 28 | FilterValidator validator{ filter_f16c.get(), w, h, pixel_in }; 29 | validator.set_sha1(expected_sha1) 30 | .set_ref_filter(filter_c.get(), expected_snr) 31 | .validate(); 32 | } 33 | 34 | } // namespace 35 | 36 | 37 | TEST(F16CIVBTest, test_half_to_float) 38 | { 39 | const char *expected_sha1[3] = { 40 | "68442b2c5704fd2792d92b15fa2e259a51c601dc" 41 | }; 42 | 43 | test_case(zimg::PixelType::HALF, zimg::PixelType::FLOAT, expected_sha1, INFINITY); 44 | } 45 | 46 | TEST(F16CIVBTest, test_float_to_half) 47 | { 48 | const char *expected_sha1[3] = { 49 | "8907defd10af0b7c71abfb9c20147adc1b0a1f70" 50 | }; 51 | 52 | test_case(zimg::PixelType::FLOAT, zimg::PixelType::HALF, expected_sha1, INFINITY); 53 | } 54 | 55 | #endif // ZIMG_X86 56 | -------------------------------------------------------------------------------- /test/depth/x86/f16c_sse2_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "depth/depth_convert.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(zimg::PixelType pixel_in, zimg::PixelType pixel_out, const char * const expected_sha1[3], double expected_snr) 16 | { 17 | const unsigned w = 640; 18 | const unsigned h = 480; 19 | 20 | if (!zimg::query_x86_capabilities().sse2) { 21 | SUCCEED() << "sse2 not available, skipping"; 22 | return; 23 | } 24 | 25 | auto filter_c = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::NONE); 26 | auto filter_sse2 = zimg::depth::create_convert_to_float(w, h, pixel_in, pixel_out, zimg::CPUClass::X86_SSE2); 27 | 28 | FilterValidator validator{ filter_sse2.get(), w, h, pixel_in }; 29 | validator.set_sha1(expected_sha1) 30 | .set_ref_filter(filter_c.get(), expected_snr) 31 | .validate(); 32 | } 33 | 34 | } // namespace 35 | 36 | 37 | TEST(F16CSSE2Test, test_half_to_float) 38 | { 39 | const char *expected_sha1[3] = { 40 | "68442b2c5704fd2792d92b15fa2e259a51c601dc" 41 | }; 42 | 43 | test_case(zimg::PixelType::HALF, zimg::PixelType::FLOAT, expected_sha1, INFINITY); 44 | } 45 | 46 | TEST(F16CSSE2Test, test_float_to_half) 47 | { 48 | const char *expected_sha1[3] = { 49 | "4184caae2bd2a3f54722cba1d561cc8720b117ce" 50 | }; 51 | 52 | // The SSE2 approximation does not implement correct rounding. 53 | test_case(zimg::PixelType::FLOAT, zimg::PixelType::HALF, expected_sha1, 90.0); 54 | } 55 | 56 | #endif // ZIMG_X86 57 | -------------------------------------------------------------------------------- /src/zimg/common/arm/neon_util.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include 4 | 5 | namespace zimg { 6 | 7 | #define REPEAT_1(x) x 8 | #define REPEAT_2(x) REPEAT_1(x), REPEAT_1(x) 9 | #define REPEAT_3(x) REPEAT_2(x), REPEAT_1(x) 10 | #define REPEAT_4(x) REPEAT_2(x), REPEAT_2(x) 11 | #define REPEAT_5(x) REPEAT_4(x), REPEAT_1(x) 12 | #define REPEAT_6(x) REPEAT_4(x), REPEAT_2(x) 13 | #define REPEAT_7(x) REPEAT_4(x), REPEAT_3(x) 14 | #define REPEAT_8(x) REPEAT_4(x), REPEAT_4(x) 15 | #define REPEAT_9(x) REPEAT_8(x), REPEAT_1(x) 16 | #define REPEAT_10(x) REPEAT_8(x), REPEAT_2(x) 17 | #define REPEAT_11(x) REPEAT_8(x), REPEAT_3(x) 18 | #define REPEAT_12(x) REPEAT_8(x), REPEAT_4(x) 19 | #define REPEAT_13(x) REPEAT_8(x), REPEAT_5(x) 20 | #define REPEAT_14(x) REPEAT_8(x), REPEAT_6(x) 21 | #define REPEAT_15(x) REPEAT_8(x), REPEAT_7(x) 22 | #define REPEAT_16(x) REPEAT_8(x), REPEAT_8(x) 23 | 24 | extern const uint8_t neon_mask_table alignas(16)[17][16]; 25 | 26 | const uint8_t neon_mask_table alignas(16)[17][16] = { 27 | { REPEAT_16(0x00) }, 28 | { REPEAT_1(0xFF), REPEAT_15(0x00) }, 29 | { REPEAT_2(0xFF), REPEAT_14(0x00) }, 30 | { REPEAT_3(0xFF), REPEAT_13(0x00) }, 31 | { REPEAT_4(0xFF), REPEAT_12(0x00) }, 32 | { REPEAT_5(0xFF), REPEAT_11(0x00) }, 33 | { REPEAT_6(0xFF), REPEAT_10(0x00) }, 34 | { REPEAT_7(0xFF), REPEAT_9(0x00) }, 35 | { REPEAT_8(0xFF), REPEAT_8(0x00) }, 36 | { REPEAT_9(0xFF), REPEAT_7(0x00) }, 37 | { REPEAT_10(0xFF), REPEAT_6(0x00) }, 38 | { REPEAT_11(0xFF), REPEAT_5(0x00) }, 39 | { REPEAT_12(0xFF), REPEAT_4(0x00) }, 40 | { REPEAT_13(0xFF), REPEAT_3(0x00) }, 41 | { REPEAT_14(0xFF), REPEAT_2(0x00) }, 42 | { REPEAT_15(0xFF), REPEAT_1(0x00) }, 43 | { REPEAT_16(0xFF) } 44 | }; 45 | 46 | } // namespace zimg 47 | 48 | #endif // ZIMG_ARM 49 | -------------------------------------------------------------------------------- /src/zimg/common/except.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_EXCEPT_H_ 4 | #define ZIMG_EXCEPT_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | namespace error { 10 | 11 | class Exception : private std::runtime_error { 12 | public: 13 | Exception() : std::runtime_error{ "" } {} 14 | 15 | using std::runtime_error::runtime_error; 16 | 17 | virtual ~Exception() = default; 18 | 19 | using std::runtime_error::what; 20 | }; 21 | 22 | #define DECLARE_EXCEPTION(x, base) class x : public base { public: using base::base; }; 23 | 24 | DECLARE_EXCEPTION(UnknownError, Exception) 25 | DECLARE_EXCEPTION(InternalError, Exception) 26 | 27 | DECLARE_EXCEPTION(OutOfMemory, Exception) 28 | DECLARE_EXCEPTION(UserCallbackFailed, Exception) 29 | 30 | DECLARE_EXCEPTION(LogicError, Exception) 31 | DECLARE_EXCEPTION(GreyscaleSubsampling, LogicError) 32 | DECLARE_EXCEPTION(ColorFamilyMismatch, LogicError) 33 | DECLARE_EXCEPTION(ImageNotDivisible, LogicError) 34 | DECLARE_EXCEPTION(BitDepthOverflow, LogicError) 35 | 36 | DECLARE_EXCEPTION(IllegalArgument, Exception) 37 | DECLARE_EXCEPTION(EnumOutOfRange, IllegalArgument) 38 | DECLARE_EXCEPTION(InvalidImageSize, IllegalArgument) 39 | 40 | DECLARE_EXCEPTION(UnsupportedOperation, Exception) 41 | DECLARE_EXCEPTION(UnsupportedSubsampling, UnsupportedOperation) 42 | DECLARE_EXCEPTION(NoColorspaceConversion, UnsupportedOperation) 43 | DECLARE_EXCEPTION(ResamplingNotAvailable, UnsupportedOperation) 44 | DECLARE_EXCEPTION(NoFieldParityConversion, UnsupportedOperation) 45 | 46 | #undef DECLARE_EXCEPTION 47 | 48 | template 49 | [[noreturn]] void throw_() 50 | { 51 | throw T{}; 52 | } 53 | 54 | template 55 | [[noreturn]] void throw_(const char *msg) 56 | { 57 | throw T{ msg }; 58 | } 59 | 60 | } // namespace error 61 | } // namespace zimg 62 | 63 | #endif // ZIMG_EXCEPT_H_ 64 | -------------------------------------------------------------------------------- /src/zimg/depth/depth.cpp: -------------------------------------------------------------------------------- 1 | #include "common/cpuinfo.h" 2 | #include "common/except.h" 3 | #include "common/make_unique.h" 4 | #include "common/pixel.h" 5 | #include "graph/basic_filter.h" 6 | #include "graph/image_filter.h" 7 | #include "depth.h" 8 | #include "depth_convert.h" 9 | #include "dither.h" 10 | 11 | namespace zimg { 12 | namespace depth { 13 | 14 | namespace { 15 | 16 | bool is_lossless_conversion(const PixelFormat &pixel_in, const PixelFormat &pixel_out) 17 | { 18 | return pixel_is_integer(pixel_in.type) && 19 | pixel_is_integer(pixel_out.type) && 20 | !pixel_in.fullrange && 21 | !pixel_out.fullrange && 22 | pixel_in.chroma == pixel_out.chroma && 23 | pixel_out.depth >= pixel_in.depth; 24 | } 25 | 26 | } // namespace 27 | 28 | 29 | DepthConversion::DepthConversion(unsigned width, unsigned height) : 30 | width{ width }, 31 | height{ height }, 32 | pixel_in{}, 33 | pixel_out{}, 34 | dither_type{ DitherType::NONE }, 35 | cpu{ CPUClass::NONE } 36 | {} 37 | 38 | std::unique_ptr DepthConversion::create() const try 39 | { 40 | if (width > pixel_max_width(pixel_in.type) || width > pixel_max_width(pixel_out.type)) 41 | error::throw_(); 42 | 43 | if (pixel_in == pixel_out) 44 | return ztd::make_unique(width, height, pixel_in.type); 45 | else if (is_lossless_conversion(pixel_in, pixel_out)) 46 | return create_left_shift(width, height, pixel_in, pixel_out, cpu); 47 | else if (pixel_is_float(pixel_out.type)) 48 | return create_convert_to_float(width, height, pixel_in, pixel_out, cpu); 49 | else 50 | return create_dither(dither_type, width, height, pixel_in, pixel_out, cpu); 51 | } catch (const std::bad_alloc &) { 52 | error::throw_(); 53 | } 54 | 55 | } // namespace depth 56 | } // namespace zimg 57 | -------------------------------------------------------------------------------- /test/extra/musl-libm/fpu_wrapper.c: -------------------------------------------------------------------------------- 1 | #ifdef _MSC_VER 2 | #include 3 | #endif 4 | 5 | extern float _myexpf(float x); 6 | extern float _mylogf(float x); 7 | extern float _mylog10f(float x); 8 | 9 | extern float _mypowf(float x, float y); 10 | 11 | extern double _mysin(double x); 12 | extern double _mycos(double y); 13 | 14 | #if defined(_MSC_VER) && defined(_M_IX86) 15 | #define fpu_save() _control87(0, 0) 16 | #define fpu_set_single() _control87(_PC_24, _MCW_PC) 17 | #define fpu_set_double() _control87(_PC_53, _MCW_PC) 18 | #define fpu_restore(x) _control87((x), _MCW_PC) 19 | #else 20 | #define fpu_save() 0 21 | #define fpu_set_single() (void)0 22 | #define fpu_set_double() (void)0 23 | #define fpu_restore(x) (void)x 24 | #endif /* _MSC_VER */ 25 | 26 | float myexpf(float x) 27 | { 28 | unsigned state = fpu_save(); 29 | float y; 30 | fpu_set_single(); 31 | y = _myexpf(x); 32 | fpu_restore(state); 33 | return y; 34 | } 35 | 36 | float mylogf(float x) 37 | { 38 | unsigned state = fpu_save(); 39 | float y; 40 | fpu_set_single(); 41 | y = _mylogf(x); 42 | fpu_restore(state); 43 | return y; 44 | } 45 | 46 | float mylog10f(float x) 47 | { 48 | unsigned state = fpu_save(); 49 | float y; 50 | fpu_set_single(); 51 | y = _mylog10f(x); 52 | fpu_restore(state); 53 | return y; 54 | } 55 | 56 | float mypowf(float x, float y) 57 | { 58 | unsigned state = fpu_save(); 59 | float z; 60 | fpu_set_single(); 61 | z = _mypowf(x, y); 62 | fpu_restore(state); 63 | return z; 64 | } 65 | 66 | double mysin(double x) 67 | { 68 | unsigned state = fpu_save(); 69 | double y; 70 | fpu_set_double(); 71 | y = _mysin(x); 72 | fpu_restore(state); 73 | return y; 74 | } 75 | 76 | double mycos(double x) 77 | { 78 | unsigned state = fpu_save(); 79 | double y; 80 | fpu_set_double(); 81 | y = _mycos(x); 82 | fpu_restore(state); 83 | return y; 84 | } 85 | -------------------------------------------------------------------------------- /src/testapp/pair_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_PAIR_FILTER_H_ 4 | #define ZIMG_PAIR_FILTER_H_ 5 | 6 | #include 7 | #include "graph/image_filter.h" 8 | 9 | enum class PixelType; 10 | 11 | class PairFilter final : public zimg::graph::ImageFilter { 12 | struct cache_context; 13 | private: 14 | std::unique_ptr m_first; 15 | std::unique_ptr m_second; 16 | 17 | filter_flags m_first_flags; 18 | filter_flags m_second_flags; 19 | 20 | image_attributes m_first_attr; 21 | image_attributes m_second_attr; 22 | 23 | unsigned m_first_step; 24 | unsigned m_second_step; 25 | unsigned m_second_buffering; 26 | 27 | bool m_has_state; 28 | bool m_in_place; 29 | bool m_color; 30 | 31 | ptrdiff_t get_cache_stride() const; 32 | 33 | unsigned get_cache_line_count() const; 34 | 35 | size_t get_cache_size_one_plane() const; 36 | 37 | unsigned get_num_planes() const; 38 | public: 39 | PairFilter(std::unique_ptr &&first, std::unique_ptr &&second); 40 | 41 | filter_flags get_flags() const override; 42 | 43 | image_attributes get_image_attributes() const override; 44 | 45 | pair_unsigned get_required_row_range(unsigned i) const override; 46 | 47 | pair_unsigned get_required_col_range(unsigned left, unsigned right) const override; 48 | 49 | unsigned get_simultaneous_lines() const override; 50 | 51 | unsigned get_max_buffering() const override; 52 | 53 | size_t get_context_size() const override; 54 | 55 | size_t get_tmp_size(unsigned left, unsigned right) const override; 56 | 57 | void init_context(void *ctx, unsigned seq) const override; 58 | 59 | void process(void *ctx, const zimg::graph::ImageBuffer src[], const zimg::graph::ImageBuffer dst[], void *tmp, unsigned i, unsigned left, unsigned right) const override; 60 | }; 61 | 62 | #endif // ZIMG_PAIR_FILTER_H_ 63 | -------------------------------------------------------------------------------- /test/graph/audit_buffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_UNIT_TEST_GRAPH_AUDIT_BUFFER_H_ 4 | #define ZIMG_UNIT_TEST_GRAPH_AUDIT_BUFFER_H_ 5 | 6 | #include 7 | #include "common/alloc.h" 8 | #include "common/pixel.h" 9 | #include "graph/image_buffer.h" 10 | 11 | enum class AuditBufferType { 12 | PLANE, 13 | COLOR_RGB, 14 | COLOR_YUV, 15 | }; 16 | 17 | template 18 | class AuditBuffer { 19 | zimg::AlignedVector m_vector[3]; 20 | zimg::graph::ColorImageBuffer m_buffer; 21 | AuditBufferType m_buffer_type; 22 | zimg::PixelFormat m_format; 23 | unsigned m_width[3]; 24 | unsigned m_buffer_height[3]; 25 | unsigned m_subsample_w; 26 | unsigned m_subsample_h; 27 | T m_fill_val[3]; 28 | T m_guard_val; 29 | 30 | static T splat_byte(unsigned char b); 31 | 32 | void add_guard_bytes(); 33 | 34 | ptrdiff_t stride_T(unsigned p) const; 35 | 36 | unsigned planes() const; 37 | public: 38 | AuditBuffer(AuditBufferType buffer_type, unsigned width, unsigned height, const zimg::PixelFormat &format, 39 | unsigned lines, unsigned subsample_w, unsigned subsample_h); 40 | 41 | void set_fill_val(unsigned char x); 42 | 43 | void set_fill_val(unsigned char x, unsigned plane); 44 | 45 | bool detect_write(unsigned i, unsigned left, unsigned right) const; 46 | 47 | void assert_eq(const AuditBuffer &other, unsigned i, unsigned left, unsigned right) const; 48 | 49 | void assert_guard_bytes() const; 50 | 51 | void random_fill(unsigned first_row, unsigned last_row, unsigned first_col, unsigned last_col); 52 | 53 | void default_fill(); 54 | 55 | zimg::graph::ColorImageBuffer as_read_buffer() const; 56 | 57 | zimg::graph::ColorImageBuffer as_write_buffer() const; 58 | }; 59 | 60 | extern template class AuditBuffer; 61 | extern template class AuditBuffer; 62 | extern template class AuditBuffer; 63 | 64 | #endif // ZIMG_UNIT_TEST_GRAPH_AUDIT_BUFFER_H_ 65 | -------------------------------------------------------------------------------- /src/zimg/colorspace/arm/operation_impl_arm.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/arm/cpuinfo_arm.h" 5 | #include "colorspace/operation.h" 6 | #include "colorspace/operation_impl.h" 7 | #include "operation_impl_arm.h" 8 | 9 | namespace zimg { 10 | namespace colorspace { 11 | 12 | std::unique_ptr create_matrix_operation_arm(const Matrix3x3 &m, CPUClass cpu) 13 | { 14 | ARMCapabilities caps = query_arm_capabilities(); 15 | std::unique_ptr ret; 16 | 17 | if (cpu_is_autodetect(cpu)) { 18 | if (!ret && caps.neon && caps.vfpv4) 19 | ret = create_matrix_operation_neon(m); 20 | } else { 21 | if (!ret && cpu >= CPUClass::ARM_NEON) 22 | ret = create_matrix_operation_neon(m); 23 | } 24 | 25 | return ret; 26 | } 27 | 28 | std::unique_ptr create_gamma_operation_arm(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu) 29 | { 30 | ARMCapabilities caps = query_arm_capabilities(); 31 | std::unique_ptr ret; 32 | 33 | if (cpu_is_autodetect(cpu)) { 34 | if (!ret && caps.neon && caps.vfpv4) 35 | ret = create_gamma_operation_neon(transfer, params); 36 | } else { 37 | if (!ret && cpu >= CPUClass::ARM_NEON) 38 | ret = create_gamma_operation_neon(transfer, params); 39 | } 40 | 41 | return ret; 42 | } 43 | 44 | std::unique_ptr create_inverse_gamma_operation_arm(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu) 45 | { 46 | ARMCapabilities caps = query_arm_capabilities(); 47 | std::unique_ptr ret; 48 | 49 | if (cpu_is_autodetect(cpu)) { 50 | if (!ret && caps.neon && caps.vfpv4) 51 | ret = create_inverse_gamma_operation_neon(transfer, params); 52 | } else { 53 | if (!ret && cpu >= CPUClass::ARM_NEON) 54 | ret = create_inverse_gamma_operation_neon(transfer, params); 55 | } 56 | 57 | return ret; 58 | } 59 | 60 | } // namespace colorspace 61 | } // namespace zimg 62 | 63 | #endif // ZIMG_ARM 64 | -------------------------------------------------------------------------------- /src/zimg/colorspace/x86/operation_impl_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_COLORSPACE_X86_OPERATION_IMPL_X86_H_ 6 | #define ZIMG_COLORSPACE_X86_OPERATION_IMPL_X86_H_ 7 | 8 | #include 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | 14 | namespace colorspace { 15 | 16 | struct Matrix3x3; 17 | struct OperationParams; 18 | struct TransferFunction; 19 | class Operation; 20 | 21 | std::unique_ptr create_matrix_operation_sse(const Matrix3x3 &m); 22 | std::unique_ptr create_matrix_operation_avx(const Matrix3x3 &m); 23 | std::unique_ptr create_matrix_operation_avx512(const Matrix3x3 &m); 24 | 25 | std::unique_ptr create_matrix_operation_x86(const Matrix3x3 &m, CPUClass cpu); 26 | 27 | std::unique_ptr create_gamma_operation_sse2(const TransferFunction &transfer, const OperationParams ¶ms); 28 | std::unique_ptr create_gamma_operation_avx2(const TransferFunction &transfer, const OperationParams ¶ms); 29 | std::unique_ptr create_gamma_operation_avx512(const TransferFunction &transfer, const OperationParams ¶ms); 30 | 31 | std::unique_ptr create_gamma_operation_x86(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu); 32 | 33 | std::unique_ptr create_inverse_gamma_operation_sse2(const TransferFunction &transfer, const OperationParams ¶ms); 34 | std::unique_ptr create_inverse_gamma_operation_avx2(const TransferFunction &transfer, const OperationParams ¶ms); 35 | std::unique_ptr create_inverse_gamma_operation_avx512(const TransferFunction &transfer, const OperationParams ¶ms); 36 | 37 | std::unique_ptr create_inverse_gamma_operation_x86(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu); 38 | 39 | } // namespace colorspace 40 | } // namespace zimg 41 | 42 | #endif // ZIMG_COLORSPACE_X86_OPERATION_IMPL_X86_H_ 43 | 44 | #endif // ZIMG_X86 45 | -------------------------------------------------------------------------------- /src/zimg/depth/x86/depth_convert_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_DEPTH_X86_DEPTH_CONVERT_X86_H_ 6 | #define ZIMG_DEPTH_X86_DEPTH_CONVERT_X86_H_ 7 | 8 | #include "depth/depth_convert.h" 9 | 10 | namespace zimg { 11 | namespace depth { 12 | 13 | #define DECLARE_LEFT_SHIFT(x, cpu) \ 14 | void left_shift_##x##_##cpu(const void *src, void *dst, unsigned shift, unsigned left, unsigned right) 15 | #define DECLARE_DEPTH_CONVERT(x, cpu) \ 16 | void depth_convert_##x##_##cpu(const void *src, void *dst, float scale, float offset, unsigned left, unsigned right) 17 | 18 | DECLARE_LEFT_SHIFT(b2b, sse2); 19 | DECLARE_LEFT_SHIFT(b2w, sse2); 20 | DECLARE_LEFT_SHIFT(w2b, sse2); 21 | DECLARE_LEFT_SHIFT(w2w, sse2); 22 | DECLARE_LEFT_SHIFT(b2b, avx2); 23 | DECLARE_LEFT_SHIFT(b2w, avx2); 24 | DECLARE_LEFT_SHIFT(w2b, avx2); 25 | DECLARE_LEFT_SHIFT(w2w, avx2); 26 | DECLARE_LEFT_SHIFT(b2b, avx512); 27 | DECLARE_LEFT_SHIFT(b2w, avx512); 28 | DECLARE_LEFT_SHIFT(w2b, avx512); 29 | DECLARE_LEFT_SHIFT(w2w, avx512); 30 | 31 | DECLARE_DEPTH_CONVERT(b2f, sse2); 32 | DECLARE_DEPTH_CONVERT(w2f, sse2); 33 | DECLARE_DEPTH_CONVERT(b2h, avx2); 34 | DECLARE_DEPTH_CONVERT(b2f, avx2); 35 | DECLARE_DEPTH_CONVERT(w2h, avx2); 36 | DECLARE_DEPTH_CONVERT(w2f, avx2); 37 | DECLARE_DEPTH_CONVERT(b2h, avx512); 38 | DECLARE_DEPTH_CONVERT(b2f, avx512); 39 | DECLARE_DEPTH_CONVERT(w2h, avx512); 40 | DECLARE_DEPTH_CONVERT(w2f, avx512); 41 | 42 | #undef DECLARE_LEFT_SHIFT 43 | #undef DECLARE_DEPTH_CONVERT 44 | 45 | left_shift_func select_left_shift_func_x86(PixelType pixel_in, PixelType pixel_out, CPUClass cpu); 46 | 47 | depth_convert_func select_depth_convert_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu); 48 | 49 | depth_f16c_func select_depth_f16c_func_x86(bool to_half, CPUClass cpu); 50 | 51 | bool needs_depth_f16c_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu); 52 | 53 | } // namespace depth 54 | } // namespace zimg 55 | 56 | #endif // ZIMG_DEPTH_X86_DEPTH_CONVERT_X86_H_ 57 | 58 | #endif // ZIMG_X86 59 | -------------------------------------------------------------------------------- /test/colorspace/x86/colorspace_avx_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "colorspace/colorspace.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(const zimg::colorspace::ColorspaceDefinition &csp_in, const zimg::colorspace::ColorspaceDefinition &csp_out, 16 | const char * const expected_sha1[3], double expected_snr) 17 | { 18 | const unsigned w = 640; 19 | const unsigned h = 480; 20 | 21 | if (!zimg::query_x86_capabilities().avx) { 22 | SUCCEED() << "avx not available, skipping"; 23 | return; 24 | } 25 | 26 | zimg::PixelFormat format = zimg::PixelType::FLOAT; 27 | auto builder = zimg::colorspace::ColorspaceConversion{ w, h } 28 | .set_csp_in(csp_in) 29 | .set_csp_out(csp_out); 30 | 31 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 32 | auto filter_avx = builder.set_cpu(zimg::CPUClass::X86_AVX).create(); 33 | 34 | FilterValidator validator{ filter_avx.get(), w, h, format }; 35 | validator.set_sha1(expected_sha1) 36 | .set_ref_filter(filter_c.get(), expected_snr) 37 | .set_yuv(csp_in.matrix != zimg::colorspace::MatrixCoefficients::RGB) 38 | .validate(); 39 | } 40 | 41 | } // namespace 42 | 43 | 44 | TEST(ColorspaceConversionAVXTest, test_matrix) 45 | { 46 | using namespace zimg::colorspace; 47 | 48 | const char *expected_sha1[3] = { 49 | "1d559e4b2812a5940839b064f5bd74bc4fe0a2f9", 50 | "b32a33c4bbbf3901f89458f914e6d03cc81f2c1d", 51 | "4aadd644fae30cfd2098bb8d2b9f98483c8821fd" 52 | }; 53 | const double expected_snr = INFINITY; 54 | 55 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::UNSPECIFIED, ColorPrimaries::UNSPECIFIED }, 56 | { MatrixCoefficients::REC_709, TransferCharacteristics::UNSPECIFIED, ColorPrimaries::UNSPECIFIED }, 57 | expected_sha1, expected_snr); 58 | } 59 | 60 | #endif // ZIMG_X86 61 | -------------------------------------------------------------------------------- /test/colorspace/x86/colorspace_sse_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "colorspace/colorspace.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(const zimg::colorspace::ColorspaceDefinition &csp_in, const zimg::colorspace::ColorspaceDefinition &csp_out, 16 | const char * const expected_sha1[3], double expected_snr) 17 | { 18 | const unsigned w = 640; 19 | const unsigned h = 480; 20 | 21 | if (!zimg::query_x86_capabilities().sse) { 22 | SUCCEED() << "sse not available, skipping"; 23 | return; 24 | } 25 | 26 | zimg::PixelFormat format = zimg::PixelType::FLOAT; 27 | auto builder = zimg::colorspace::ColorspaceConversion{ w, h } 28 | .set_csp_in(csp_in) 29 | .set_csp_out(csp_out); 30 | 31 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 32 | auto filter_sse = builder.set_cpu(zimg::CPUClass::X86_SSE).create(); 33 | 34 | FilterValidator validator{ filter_sse.get(), w, h, format }; 35 | validator.set_sha1(expected_sha1) 36 | .set_ref_filter(filter_c.get(), expected_snr) 37 | .set_yuv(csp_in.matrix != zimg::colorspace::MatrixCoefficients::RGB) 38 | .validate(); 39 | } 40 | 41 | } // namespace 42 | 43 | 44 | TEST(ColorspaceConversionSSETest, test_matrix) 45 | { 46 | using namespace zimg::colorspace; 47 | 48 | const char *expected_sha1[3] = { 49 | "1d559e4b2812a5940839b064f5bd74bc4fe0a2f9", 50 | "b32a33c4bbbf3901f89458f914e6d03cc81f2c1d", 51 | "4aadd644fae30cfd2098bb8d2b9f98483c8821fd" 52 | }; 53 | const double expected_snr = INFINITY; 54 | 55 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::UNSPECIFIED, ColorPrimaries::UNSPECIFIED }, 56 | { MatrixCoefficients::REC_709, TransferCharacteristics::UNSPECIFIED, ColorPrimaries::UNSPECIFIED }, 57 | expected_sha1, expected_snr); 58 | } 59 | 60 | #endif // ZIMG_X86 61 | -------------------------------------------------------------------------------- /test/extra/musl-libm/logf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision log function. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT 6 | */ 7 | 8 | #include 9 | #include 10 | #include "libm.h" 11 | #include "logf_data.h" 12 | 13 | /* 14 | LOGF_TABLE_BITS = 4 15 | LOGF_POLY_ORDER = 4 16 | 17 | ULP error: 0.818 (nearest rounding.) 18 | Relative error: 1.957 * 2^-26 (before rounding.) 19 | */ 20 | 21 | #define T my__logf_data.tab 22 | #define A my__logf_data.poly 23 | #define Ln2 my__logf_data.ln2 24 | #define N (1 << LOGF_TABLE_BITS) 25 | #define OFF 0x3f330000 26 | 27 | float _mylogf(float x) 28 | { 29 | double_t z, r, r2, y, y0, invc, logc; 30 | uint32_t ix, iz, tmp; 31 | int k, i; 32 | 33 | ix = asuint(x); 34 | /* Fix sign of zero with downward rounding when x==1. */ 35 | if (WANT_ROUNDING && predict_false(ix == 0x3f800000)) 36 | return 0; 37 | if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { 38 | /* x < 0x1p-126 or inf or nan. */ 39 | if (ix * 2 == 0) 40 | return my__math_divzerof(1); 41 | if (ix == 0x7f800000) /* log(inf) == inf. */ 42 | return x; 43 | if ((ix & 0x80000000) || ix * 2 >= 0xff000000) 44 | return my__math_invalidf(x); 45 | /* x is subnormal, normalize it. */ 46 | ix = asuint(x * 0x1p23f); 47 | ix -= 23 << 23; 48 | } 49 | 50 | /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. 51 | The range is split into N subintervals. 52 | The ith subinterval contains z and c is near its center. */ 53 | tmp = ix - OFF; 54 | i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; 55 | k = (int32_t)tmp >> 23; /* arithmetic shift */ 56 | iz = ix - (tmp & 0x1ffu << 23); 57 | invc = T[i].invc; 58 | logc = T[i].logc; 59 | z = (double_t)asfloat(iz); 60 | 61 | /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ 62 | r = z * invc - 1; 63 | y0 = logc + (double_t)k * Ln2; 64 | 65 | /* Pipelined polynomial evaluation to approximate log1p(r). */ 66 | r2 = r * r; 67 | y = A[1] * r + A[2]; 68 | y = A[0] * r2 + y; 69 | y = y * r2 + (y0 + r); 70 | return eval_as_float(y); 71 | } 72 | -------------------------------------------------------------------------------- /src/zimg/resize/resize_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_RESIZE_RESIZE_IMPL_H_ 4 | #define ZIMG_RESIZE_RESIZE_IMPL_H_ 5 | 6 | #include 7 | #include "graph/image_filter.h" 8 | #include "filter.h" 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | enum class PixelType; 14 | 15 | namespace resize { 16 | 17 | class ResizeImplH : public graph::ImageFilterBase { 18 | protected: 19 | FilterContext m_filter; 20 | image_attributes m_attr; 21 | bool m_is_sorted; 22 | 23 | ResizeImplH(const FilterContext &filter, const image_attributes &attr); 24 | public: 25 | filter_flags get_flags() const override; 26 | 27 | image_attributes get_image_attributes() const override; 28 | 29 | pair_unsigned get_required_row_range(unsigned i) const override; 30 | 31 | pair_unsigned get_required_col_range(unsigned left, unsigned right) const override; 32 | 33 | unsigned get_max_buffering() const override; 34 | }; 35 | 36 | class ResizeImplV : public graph::ImageFilterBase { 37 | protected: 38 | FilterContext m_filter; 39 | image_attributes m_attr; 40 | bool m_is_sorted; 41 | 42 | ResizeImplV(const FilterContext &filter, const image_attributes &attr); 43 | public: 44 | filter_flags get_flags() const override; 45 | 46 | image_attributes get_image_attributes() const override; 47 | 48 | pair_unsigned get_required_row_range(unsigned i) const override; 49 | 50 | unsigned get_max_buffering() const override; 51 | }; 52 | 53 | struct ResizeImplBuilder { 54 | unsigned src_width; 55 | unsigned src_height; 56 | PixelType type; 57 | 58 | #include "common/builder.h" 59 | BUILDER_MEMBER(bool, horizontal) 60 | BUILDER_MEMBER(unsigned, dst_dim) 61 | BUILDER_MEMBER(unsigned, depth) 62 | BUILDER_MEMBER(const Filter *, filter) 63 | BUILDER_MEMBER(double, shift) 64 | BUILDER_MEMBER(double, subwidth) 65 | BUILDER_MEMBER(CPUClass, cpu) 66 | #undef BUILDER_MEMBER 67 | 68 | ResizeImplBuilder(unsigned src_width, unsigned src_height, PixelType type); 69 | 70 | std::unique_ptr create() const; 71 | }; 72 | 73 | } // namespace resize 74 | } // namespace zimg 75 | 76 | #endif // ZIMG_RESIZE_RESIZE_IMPL_H_ 77 | -------------------------------------------------------------------------------- /src/zimg/depth/x86/f16c_ivb.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include "common/ccdep.h" 4 | 5 | #include 6 | #include "common/align.h" 7 | #include "f16c_x86.h" 8 | 9 | #include "common/x86/sse2_util.h" 10 | #include "common/x86/avx_util.h" 11 | 12 | namespace zimg { 13 | namespace depth { 14 | 15 | void f16c_half_to_float_ivb(const void *src, void *dst, unsigned left, unsigned right) 16 | { 17 | const uint16_t *src_p = static_cast(src); 18 | float *dst_p = static_cast(dst); 19 | 20 | unsigned vec_left = ceil_n(left, 8); 21 | unsigned vec_right = floor_n(right, 8); 22 | 23 | if (left != vec_left) { 24 | __m256 x = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)(src_p + vec_left - 8))); 25 | mm256_store_idxhi_ps(dst_p + vec_left - 8, x, left % 8); 26 | } 27 | 28 | for (unsigned j = vec_left; j < vec_right; j += 8) { 29 | __m256 x = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)(src_p + j))); 30 | _mm256_store_ps(dst_p + j, x); 31 | } 32 | 33 | if (right != vec_right) { 34 | __m256 x = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)(src_p + vec_right))); 35 | mm256_store_idxlo_ps(dst_p + vec_right, x, right % 8); 36 | } 37 | } 38 | 39 | void f16c_float_to_half_ivb(const void *src, void *dst, unsigned left, unsigned right) 40 | { 41 | const float *src_p = static_cast(src); 42 | uint16_t *dst_p = static_cast(dst); 43 | 44 | unsigned vec_left = ceil_n(left, 8); 45 | unsigned vec_right = floor_n(right, 8); 46 | 47 | if (left != vec_left) { 48 | __m128i x = _mm256_cvtps_ph(_mm256_load_ps(src_p + vec_left - 8), 0); 49 | mm_store_idxhi_epi16((__m128i *)(dst_p + vec_left - 8), x, left % 8); 50 | } 51 | 52 | for (unsigned j = vec_left; j < vec_right; j += 8) { 53 | __m128i x = _mm256_cvtps_ph(_mm256_load_ps(src_p + j), 0); 54 | _mm_store_si128((__m128i *)(dst_p + j), x); 55 | } 56 | 57 | if (right != vec_right) { 58 | __m128i x = _mm256_cvtps_ph(_mm256_load_ps(src_p + vec_right), 0); 59 | mm_store_idxlo_epi16((__m128i *)(dst_p + vec_right), x, right % 8); 60 | } 61 | } 62 | 63 | } // namespace depth 64 | } // namespace zimg 65 | 66 | #endif // ZIMG_X86 67 | -------------------------------------------------------------------------------- /src/zimg/unresize/unresize_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_UNRESIZE_UNRESIZE_IMPL_H_ 4 | #define ZIMG_UNRESIZE_UNRESIZE_IMPL_H_ 5 | 6 | #include 7 | #include "graph/image_filter.h" 8 | #include "bilinear.h" 9 | 10 | namespace zimg { 11 | 12 | enum class CPUClass; 13 | enum class PixelType; 14 | 15 | namespace unresize { 16 | 17 | class UnresizeImplH : public graph::ImageFilterBase { 18 | protected: 19 | BilinearContext m_context; 20 | image_attributes m_attr; 21 | 22 | UnresizeImplH(const BilinearContext &context, const image_attributes &attr); 23 | public: 24 | filter_flags get_flags() const override; 25 | 26 | image_attributes get_image_attributes() const override; 27 | 28 | pair_unsigned get_required_row_range(unsigned i) const override; 29 | 30 | pair_unsigned get_required_col_range(unsigned left, unsigned right) const override; 31 | 32 | unsigned get_max_buffering() const override; 33 | }; 34 | 35 | class UnresizeImplV : public graph::ImageFilterBase { 36 | protected: 37 | BilinearContext m_context; 38 | image_attributes m_attr; 39 | 40 | UnresizeImplV(const BilinearContext &context, const image_attributes &attr); 41 | public: 42 | filter_flags get_flags() const override; 43 | 44 | image_attributes get_image_attributes() const override; 45 | 46 | pair_unsigned get_required_row_range(unsigned i) const override; 47 | 48 | pair_unsigned get_required_col_range(unsigned left, unsigned right) const override; 49 | 50 | unsigned get_simultaneous_lines() const override; 51 | 52 | unsigned get_max_buffering() const override; 53 | }; 54 | 55 | struct UnresizeImplBuilder { 56 | unsigned up_width; 57 | unsigned up_height; 58 | PixelType type; 59 | 60 | #include "common/builder.h" 61 | BUILDER_MEMBER(bool, horizontal) 62 | BUILDER_MEMBER(unsigned, orig_dim) 63 | BUILDER_MEMBER(double, shift) 64 | BUILDER_MEMBER(CPUClass, cpu) 65 | #undef BUILDER_MEMBER 66 | 67 | UnresizeImplBuilder(unsigned up_width, unsigned up_height, PixelType type); 68 | 69 | std::unique_ptr create() const; 70 | }; 71 | 72 | } // namespace unresize 73 | } // namespace zimg 74 | 75 | #endif // ZIMG_UNRESIZE_UNRESIZE_IMPL_H_ 76 | -------------------------------------------------------------------------------- /src/zimg/depth/arm/f16c_neon.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include "common/ccdep.h" 4 | 5 | #include 6 | #include "common/align.h" 7 | #include "f16c_arm.h" 8 | 9 | #include "common/arm/neon_util.h" 10 | 11 | namespace zimg { 12 | namespace depth { 13 | 14 | #if !defined(_MSC_VER) || defined(_M_ARM64) 15 | void f16c_half_to_float_neon(const void *src, void *dst, unsigned left, unsigned right) 16 | { 17 | const __fp16 *src_p = static_cast(src); 18 | float *dst_p = static_cast(dst); 19 | 20 | unsigned vec_left = ceil_n(left, 4); 21 | unsigned vec_right = floor_n(right, 4); 22 | 23 | if (left != vec_left) { 24 | float32x4_t x = vcvt_f32_f16(vld1_f16(src_p + vec_left - 4)); 25 | neon_store_idxhi_f32(dst_p + vec_left - 4, x, left % 4); 26 | } 27 | 28 | for (unsigned j = vec_left; j < vec_right; j += 4) { 29 | float32x4_t x = vcvt_f32_f16(vld1_f16(src_p + j)); 30 | vst1q_f32(dst_p + j, x); 31 | } 32 | 33 | if (right != vec_right) { 34 | float32x4_t x = vcvt_f32_f16(vld1_f16(src_p + vec_right)); 35 | neon_store_idxlo_f32(dst_p + vec_right, x, right % 4); 36 | } 37 | } 38 | 39 | void f16c_float_to_half_neon(const void *src, void *dst, unsigned left, unsigned right) 40 | { 41 | const float *src_p = static_cast(src); 42 | __fp16 *dst_p = static_cast<__fp16 *>(dst); 43 | 44 | unsigned vec_left = ceil_n(left, 4); 45 | unsigned vec_right = floor_n(right, 4); 46 | 47 | if (left != vec_left) { 48 | float16x4_t x = vcvt_f16_f32(vld1q_f32(src_p + vec_left - 4)); 49 | neon_store_idxhi_f16(dst_p + vec_left - 8, vcombine_f16(vreinterpret_f16_u16(vdup_n_u16(0)), x), left % 4 + 4); 50 | } 51 | 52 | for (unsigned j = vec_left; j < vec_right; j += 4) { 53 | float16x4_t x = vcvt_f16_f32(vld1q_f32(src_p + j)); 54 | vst1_f16(dst_p + j, x); 55 | } 56 | 57 | if (right != vec_right) { 58 | float16x4_t x = vcvt_f16_f32(vld1q_f32(src_p + vec_right)); 59 | neon_store_idxlo_f16(dst_p + vec_right, vcombine_f16(x, vreinterpret_f16_u16(vdup_n_u16(0))), right % 4); 60 | } 61 | } 62 | #endif // !defined(_MSC_VER) || defined(_M_ARM64) 63 | 64 | } // namespace depth 65 | } // namespace zimg 66 | 67 | #endif // ZIMG_ARM 68 | -------------------------------------------------------------------------------- /_msvc/testcommon/testcommon.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | Header Files 40 | 41 | 42 | Header Files 43 | 44 | 45 | Header Files 46 | 47 | 48 | Header Files 49 | 50 | 51 | -------------------------------------------------------------------------------- /test/extra/musl-libm/expf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision e^x function. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT 6 | */ 7 | 8 | #include 9 | #include 10 | #include "libm.h" 11 | #include "exp2f_data.h" 12 | 13 | /* 14 | EXP2F_TABLE_BITS = 5 15 | EXP2F_POLY_ORDER = 3 16 | 17 | ULP error: 0.502 (nearest rounding.) 18 | Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) 19 | Wrong count: 170635 (all nearest rounding wrong results with fma.) 20 | Non-nearest ULP error: 1 (rounded ULP error) 21 | */ 22 | 23 | #define N (1 << EXP2F_TABLE_BITS) 24 | #define InvLn2N my__exp2f_data.invln2_scaled 25 | #define T my__exp2f_data.tab 26 | #define C my__exp2f_data.poly_scaled 27 | 28 | static inline uint32_t top12(float x) 29 | { 30 | return asuint(x) >> 20; 31 | } 32 | 33 | float _myexpf(float x) 34 | { 35 | uint32_t abstop; 36 | uint64_t ki, t; 37 | double_t kd, xd, z, r, r2, y, s; 38 | 39 | xd = (double_t)x; 40 | abstop = top12(x) & 0x7ff; 41 | if (predict_false(abstop >= top12(88.0f))) { 42 | /* |x| >= 88 or x is nan. */ 43 | if (asuint(x) == asuint(-INFINITY)) 44 | return 0.0f; 45 | if (abstop >= top12(INFINITY)) 46 | return x + x; 47 | if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ 48 | return my__math_oflowf(0); 49 | if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ 50 | return my__math_uflowf(0); 51 | } 52 | 53 | /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ 54 | z = InvLn2N * xd; 55 | 56 | /* Round and convert z to int, the result is in [-150*N, 128*N] and 57 | ideally ties-to-even rule is used, otherwise the magnitude of r 58 | can be bigger which gives larger approximation error. */ 59 | #if TOINT_INTRINSICS 60 | kd = roundtoint(z); 61 | ki = converttoint(z); 62 | #else 63 | # define SHIFT my__exp2f_data.shift 64 | kd = eval_as_double(z + SHIFT); 65 | ki = asuint64(kd); 66 | kd -= SHIFT; 67 | #endif 68 | r = z - kd; 69 | 70 | /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ 71 | t = T[ki % N]; 72 | t += ki << (52 - EXP2F_TABLE_BITS); 73 | s = asdouble(t); 74 | z = C[0] * r + C[1]; 75 | r2 = r * r; 76 | y = C[2] * r + 1; 77 | y = z * r2 + y; 78 | y = y * s; 79 | return eval_as_float(y); 80 | } 81 | -------------------------------------------------------------------------------- /test/extra/musl-libm/log10f.c: -------------------------------------------------------------------------------- 1 | /* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ 2 | /* 3 | * ==================================================== 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 | * 6 | * Developed at SunPro, a Sun Microsystems, Inc. business. 7 | * Permission to use, copy, modify, and distribute this 8 | * software is freely granted, provided that this notice 9 | * is preserved. 10 | * ==================================================== 11 | */ 12 | /* 13 | * See comments in log10.c. 14 | */ 15 | 16 | #include 17 | #include 18 | 19 | static const float 20 | ivln10hi = 4.3432617188e-01, /* 0x3ede6000 */ 21 | ivln10lo = -3.1689971365e-05, /* 0xb804ead9 */ 22 | log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */ 23 | log10_2lo = 7.9034151668e-07, /* 0x355427db */ 24 | /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ 25 | Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ 26 | Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ 27 | Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ 28 | Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ 29 | 30 | float _mylog10f(float x) 31 | { 32 | union {float f; uint32_t i;} u = {x}; 33 | float_t hfsq,f,s,z,R,w,t1,t2,dk,hi,lo; 34 | uint32_t ix; 35 | int k; 36 | 37 | ix = u.i; 38 | k = 0; 39 | if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */ 40 | if (ix<<1 == 0) 41 | return -1/(x*x); /* log(+-0)=-inf */ 42 | if (ix>>31) 43 | return (x-x)/0.0f; /* log(-#) = NaN */ 44 | /* subnormal number, scale up x */ 45 | k -= 25; 46 | x *= 0x1p25f; 47 | u.f = x; 48 | ix = u.i; 49 | } else if (ix >= 0x7f800000) { 50 | return x; 51 | } else if (ix == 0x3f800000) 52 | return 0; 53 | 54 | /* reduce x into [sqrt(2)/2, sqrt(2)] */ 55 | ix += 0x3f800000 - 0x3f3504f3; 56 | k += (int)(ix>>23) - 0x7f; 57 | ix = (ix&0x007fffff) + 0x3f3504f3; 58 | u.i = ix; 59 | x = u.f; 60 | 61 | f = x - 1.0f; 62 | s = f/(2.0f + f); 63 | z = s*s; 64 | w = z*z; 65 | t1= w*(Lg2+w*Lg4); 66 | t2= z*(Lg1+w*Lg3); 67 | R = t2 + t1; 68 | hfsq = 0.5f*f*f; 69 | 70 | hi = f - hfsq; 71 | u.f = hi; 72 | u.i &= 0xfffff000; 73 | hi = u.f; 74 | lo = f - hi - hfsq + s*(hfsq+R); 75 | dk = k; 76 | return dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi + dk*log10_2hi; 77 | } 78 | -------------------------------------------------------------------------------- /src/testapp/frame.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef FRAME_H_ 4 | #define FRAME_H_ 5 | 6 | #include "common/alloc.h" 7 | #include "graph/image_buffer.h" 8 | 9 | #define PATH_SPECIFIER_HELP_STR \ 10 | "Path specifier: spec@path\n" \ 11 | "BYTE: bmp, grey, yuy2, yv12, yv16, yv24, i420, i422, i444, rgbp, gbrp\n" \ 12 | "WORD: greyw, yv12w, yv16w, yv24w, i420w, i422w, i444w, rgbpw, gbrpw\n" \ 13 | "HALF: greyh, i420h, i422h, i444h, rgbph\n" \ 14 | "FLOAT: greys, i420s, i422s, i444s, rgbps\n" 15 | 16 | namespace zimg { 17 | 18 | enum class PixelType; 19 | 20 | } // namespace zimg 21 | 22 | 23 | class ImageFrame { 24 | zimg::AlignedVector m_vector[4]; 25 | ptrdiff_t m_offset[4]; 26 | unsigned m_width; 27 | unsigned m_height; 28 | zimg::PixelType m_pixel; 29 | unsigned m_planes; 30 | unsigned m_subsample_w; 31 | unsigned m_subsample_h; 32 | bool m_yuv; 33 | public: 34 | ImageFrame(unsigned width, unsigned height, zimg::PixelType pixel, unsigned planes, 35 | bool yuv = false, unsigned subsample_w = 0, unsigned subsample_h = 0); 36 | 37 | unsigned width(unsigned plane = 0) const noexcept; 38 | 39 | unsigned height(unsigned plane = 0) const noexcept; 40 | 41 | zimg::PixelType pixel_type() const noexcept; 42 | 43 | unsigned planes() const noexcept; 44 | 45 | unsigned subsample_w() const noexcept; 46 | 47 | unsigned subsample_h() const noexcept; 48 | 49 | bool is_yuv() const noexcept; 50 | 51 | zimg::graph::ImageBuffer as_read_buffer(unsigned plane) const noexcept; 52 | 53 | zimg::graph::ColorImageBuffer as_read_buffer() const noexcept; 54 | 55 | zimg::graph::ImageBuffer as_write_buffer(unsigned plane) noexcept; 56 | 57 | zimg::graph::ColorImageBuffer as_write_buffer() noexcept; 58 | }; 59 | 60 | 61 | namespace imageframe { 62 | 63 | ImageFrame read(const char *pathspec, const char *assumed, unsigned width, unsigned height); 64 | 65 | ImageFrame read(const char *pathspec, const char *assumed, unsigned width, unsigned height, zimg::PixelType type, bool fullrange); 66 | 67 | void write(const ImageFrame &frame, const char *pathspec, const char *assumed, bool fullrange = false); 68 | 69 | void write(const ImageFrame &frame, const char *pathspec, const char *assumed, unsigned depth_in, bool fullrange); 70 | 71 | } // namespace imageframe 72 | 73 | #endif // FRAME_H_ 74 | -------------------------------------------------------------------------------- /src/zimg/unresize/bilinear.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_UNRESIZE_BILINEAR_H_ 4 | #define ZIMG_UNRESIZE_BILINEAR_H_ 5 | 6 | #include "common/alloc.h" 7 | 8 | namespace zimg { 9 | namespace unresize { 10 | 11 | /** 12 | * Execution context for unresize algorithm. 13 | * 14 | * See unresize.h for description of algorithm. 15 | * 16 | * Note: Although the struct comments below use one-based indices, 17 | * all arrays are stored with conventional zero-based indexing. 18 | */ 19 | struct BilinearContext { 20 | /** 21 | * Dimension of upsampled image (M). 22 | */ 23 | unsigned input_width; 24 | 25 | /** 26 | * Dimension of unresized image (N). 27 | */ 28 | unsigned output_width; 29 | 30 | /** 31 | * Packed storage of (A') as row + offset. 32 | * The matrix is stored as a 2-D array of matrix_row_size rows 33 | * and dst_width columns. 34 | * 35 | * Each row is a contiguous portion of a row in the full matrix (A'). 36 | * matrix_row_offsets stores the original column index of row band in (A'). 37 | * 38 | * The relationship to the original matrix (A') is given by the following. 39 | * 40 | * matrix_coefficients(i, j) = A'(i, matrix_row_offsets(i) + j) 41 | * 42 | */ 43 | AlignedVector matrix_coefficients; 44 | AlignedVector matrix_row_offsets; 45 | unsigned matrix_row_size; 46 | unsigned matrix_row_stride; 47 | 48 | /** 49 | * LU decomposition of (A' A) stored as three arrays of dimension (N). 50 | * 51 | * The relationship to L and U is given by the following. 52 | * 53 | * lu_c(i) = L(i, i - 1) 54 | * lu_l(i) = 1 / L(i, i) 55 | * lu_u(i) = U(i, i + 1) 56 | * 57 | * lu_c(1) and lu_u(N) are set to 0 to simplify the execution loop. 58 | * lu_l is stored inverted as it is used in forward substitution as a divisor. 59 | */ 60 | AlignedVector lu_c; 61 | AlignedVector lu_l; 62 | AlignedVector lu_u; 63 | }; 64 | 65 | /** 66 | * Initialize a BilinearContext for a given scaling factor. 67 | * 68 | * @param in dimension of original vector 69 | * @param out dimension of upscaled vector 70 | * @param shift center shift relative to upscaled vector 71 | * @return an initialized context 72 | */ 73 | BilinearContext create_bilinear_context(unsigned in, unsigned out, double shift); 74 | 75 | } // namespace unresize 76 | } // namespace zimg 77 | 78 | #endif // ZIMG_UNRESIZE_BILINEAR_H_ 79 | -------------------------------------------------------------------------------- /src/testapp/utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "common/alloc.h" 4 | #include "graph/image_filter.h" 5 | 6 | #include "frame.h" 7 | #include "utils.h" 8 | 9 | struct FilterExecutor::data { 10 | zimg::AlignedVector ctx; 11 | zimg::AlignedVector tmp; 12 | }; 13 | 14 | void FilterExecutor::exec_grey(const zimg::graph::ImageFilter *filter, unsigned plane) 15 | { 16 | auto src_buffer = m_src_frame->as_read_buffer(plane); 17 | auto dst_buffer = m_dst_frame->as_write_buffer(plane); 18 | 19 | auto attr = filter->get_image_attributes(); 20 | unsigned step = filter->get_simultaneous_lines(); 21 | 22 | filter->init_context(m_data->ctx.data(), plane); 23 | 24 | for (unsigned i = 0; i < attr.height; i += step) { 25 | filter->process(m_data->ctx.data(), &src_buffer, &dst_buffer, m_data->tmp.data(), i, 0, attr.width); 26 | } 27 | } 28 | 29 | void FilterExecutor::exec_color() 30 | { 31 | auto attr = m_filter->get_image_attributes(); 32 | unsigned step = m_filter->get_simultaneous_lines(); 33 | 34 | m_filter->init_context(m_data->ctx.data(), 0); 35 | 36 | for (unsigned i = 0; i < attr.height; i += step) { 37 | m_filter->process(m_data->ctx.data(), m_src_frame->as_read_buffer(), m_dst_frame->as_write_buffer(), m_data->tmp.data(), i, 0, attr.width); 38 | } 39 | } 40 | 41 | FilterExecutor::FilterExecutor(const zimg::graph::ImageFilter *filter, const zimg::graph::ImageFilter *filter_uv, const ImageFrame *src_frame, ImageFrame *dst_frame) : 42 | m_data{ std::make_shared() }, 43 | m_filter{ filter }, 44 | m_filter_uv{ filter_uv }, 45 | m_src_frame{ src_frame }, 46 | m_dst_frame{ dst_frame } 47 | { 48 | filter_uv = filter_uv ? filter_uv : filter; 49 | 50 | m_data->ctx.resize(std::max(filter->get_context_size(), filter_uv->get_context_size())); 51 | m_data->tmp.resize(std::max(filter->get_tmp_size(0, dst_frame->width()), 52 | filter_uv->get_tmp_size(0, dst_frame->width()))); 53 | } 54 | 55 | void FilterExecutor::operator()() 56 | { 57 | zimg::graph::ImageFilter::filter_flags flags = m_filter->get_flags(); 58 | 59 | if (!flags.color) { 60 | unsigned planes = m_dst_frame->planes(); 61 | 62 | for (unsigned p = 0; p < planes; ++p) { 63 | const zimg::graph::ImageFilter *filter = (m_filter_uv && (p == 1 || p == 2)) ? m_filter_uv : m_filter; 64 | exec_grey(filter, p); 65 | } 66 | } else { 67 | exec_color(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /test/extra/musl-libm/cos.c: -------------------------------------------------------------------------------- 1 | /* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ 2 | /* 3 | * ==================================================== 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 | * 6 | * Developed at SunPro, a Sun Microsystems, Inc. business. 7 | * Permission to use, copy, modify, and distribute this 8 | * software is freely granted, provided that this notice 9 | * is preserved. 10 | * ==================================================== 11 | */ 12 | /* cos(x) 13 | * Return cosine function of x. 14 | * 15 | * kernel function: 16 | * __sin ... sine function on [-pi/4,pi/4] 17 | * __cos ... cosine function on [-pi/4,pi/4] 18 | * __rem_pio2 ... argument reduction routine 19 | * 20 | * Method. 21 | * Let S,C and T denote the sin, cos and tan respectively on 22 | * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 23 | * in [-pi/4 , +pi/4], and let n = k mod 4. 24 | * We have 25 | * 26 | * n sin(x) cos(x) tan(x) 27 | * ---------------------------------------------------------- 28 | * 0 S C T 29 | * 1 C -S -1/T 30 | * 2 -S -C T 31 | * 3 -C S -1/T 32 | * ---------------------------------------------------------- 33 | * 34 | * Special cases: 35 | * Let trig be any of sin, cos, or tan. 36 | * trig(+-INF) is NaN, with signals; 37 | * trig(NaN) is that NaN; 38 | * 39 | * Accuracy: 40 | * TRIG(x) returns trig(x) nearly rounded 41 | */ 42 | 43 | #include "libm.h" 44 | 45 | double _mycos(double x) 46 | { 47 | double y[2]; 48 | uint32_t ix; 49 | unsigned n; 50 | 51 | GET_HIGH_WORD(ix, x); 52 | ix &= 0x7fffffff; 53 | 54 | /* |x| ~< pi/4 */ 55 | if (ix <= 0x3fe921fb) { 56 | if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */ 57 | /* raise inexact if x!=0 */ 58 | FORCE_EVAL(x + 0x1p120f); 59 | return 1.0; 60 | } 61 | return my__cos(x, 0); 62 | } 63 | 64 | /* cos(Inf or NaN) is NaN */ 65 | if (ix >= 0x7ff00000) 66 | return x-x; 67 | 68 | /* argument reduction */ 69 | n = my__rem_pio2(x, y); 70 | switch (n&3) { 71 | case 0: return my__cos(y[0], y[1]); 72 | case 1: return -my__sin(y[0], y[1], 1); 73 | case 2: return -my__cos(y[0], y[1]); 74 | default: 75 | return my__sin(y[0], y[1], 1); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | sudo: false 3 | language: cpp 4 | 5 | matrix: 6 | include: 7 | # GCC 4.9 8 | - compiler: gcc 9 | # GCC 4.9 no longer available in Bionic 10 | dist: xenial 11 | addons: 12 | apt: 13 | sources: 14 | - sourceline: 'ppa:ubuntu-toolchain-r/test' 15 | packages: 16 | - gcc-4.9 17 | - g++-4.9 18 | env: 19 | - MY_CC: gcc-4.9 20 | - MY_CXX: g++-4.9 21 | # GCC 10 22 | - compiler: gcc 23 | addons: 24 | apt: 25 | sources: 26 | - sourceline: 'ppa:ubuntu-toolchain-r/test' 27 | packages: 28 | - gcc-10 29 | - g++-10 30 | coverity_scan: 31 | project: 32 | name: "sekrit-twc/zimg" 33 | description: "Build submitted via Travis CI" 34 | notification_email: noreply@example.com 35 | build_command_prepend: "source .coverity-prepare.sh" 36 | build_command: make 37 | branch_pattern: coverity_scan 38 | env: 39 | - MY_CC: gcc-10 40 | - MY_CXX: g++-10 41 | # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created 42 | # via the "travis encrypt" command using the project repo's public key 43 | - secure: "ZPVIqS6exYthp6FL6XQYyzz3Y17BKUP60bMj3X6vHk0aJ3d5jOVE4OEOjEY2uOnEWQej1a1ea0kBqpvljnrDUNZZjKdG7Auv0N8Dmg1RKznyjHLbqoGPD2Yj+ooYHT8qE7thCZC8v7axoCf2sObViWtZg3AmeqAMVxHGoG3OxRU=" 44 | # Clang/LLVM 11.0 Sanitized 45 | - compiler: clang 46 | addons: 47 | apt: 48 | sources: 49 | - sourceline: 'ppa:ubuntu-toolchain-r/test' 50 | - sourceline: 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-11 main' 51 | key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' 52 | packages: 53 | - clang-11 54 | # Needed for updated libstdc++ 55 | - g++-10 56 | env: 57 | - MY_CC: clang-11 58 | - MY_CXX: clang++-11 59 | - CFLAGS: -fsanitize=undefined -fsanitize=address 60 | - CXXFLAGS: -fsanitize=undefined -fsanitize=address 61 | - LSAN_OPTIONS: detect_leaks=0 62 | # Apple Clang 63 | - os: osx 64 | osx_image: xcode11.6 65 | compiler: clang 66 | env: 67 | - MY_CC: clang 68 | - MY_CXX: clang++ 69 | 70 | install: 71 | - export CC=$MY_CC 72 | - export CXX=$MY_CXX 73 | script: 74 | - ./.travis-script.sh 75 | -------------------------------------------------------------------------------- /src/zimg/depth/x86/dither_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_DEPTH_X86_DITHER_X86_H_ 6 | #define ZIMG_DEPTH_X86_DITHER_X86_H_ 7 | 8 | #include 9 | #include "depth/dither.h" 10 | 11 | namespace zimg { 12 | 13 | namespace graph { 14 | 15 | class ImageFilter; 16 | 17 | } // namespace graph 18 | 19 | 20 | namespace depth { 21 | 22 | #define DECLARE_ORDERED_DITHER(x, cpu) \ 23 | void ordered_dither_##x##_##cpu(const float *dither, unsigned dither_offset, unsigned dither_mask, \ 24 | const void *src, void *dst, float scale, float offset, unsigned bits, unsigned left, unsigned right) 25 | 26 | DECLARE_ORDERED_DITHER(b2b, sse2); 27 | DECLARE_ORDERED_DITHER(b2w, sse2); 28 | DECLARE_ORDERED_DITHER(w2b, sse2); 29 | DECLARE_ORDERED_DITHER(w2w, sse2); 30 | DECLARE_ORDERED_DITHER(f2b, sse2); 31 | DECLARE_ORDERED_DITHER(f2w, sse2); 32 | 33 | DECLARE_ORDERED_DITHER(b2b, avx2); 34 | DECLARE_ORDERED_DITHER(b2w, avx2); 35 | DECLARE_ORDERED_DITHER(w2b, avx2); 36 | DECLARE_ORDERED_DITHER(w2w, avx2); 37 | DECLARE_ORDERED_DITHER(h2b, avx2); 38 | DECLARE_ORDERED_DITHER(h2w, avx2); 39 | DECLARE_ORDERED_DITHER(f2b, avx2); 40 | DECLARE_ORDERED_DITHER(f2w, avx2); 41 | 42 | DECLARE_ORDERED_DITHER(b2b, avx512); 43 | DECLARE_ORDERED_DITHER(b2w, avx512); 44 | DECLARE_ORDERED_DITHER(w2b, avx512); 45 | DECLARE_ORDERED_DITHER(w2w, avx512); 46 | DECLARE_ORDERED_DITHER(h2b, avx512); 47 | DECLARE_ORDERED_DITHER(h2w, avx512); 48 | DECLARE_ORDERED_DITHER(f2b, avx512); 49 | DECLARE_ORDERED_DITHER(f2w, avx512); 50 | 51 | #undef DECLARE_ORDERED_DITHER 52 | 53 | dither_convert_func select_ordered_dither_func_x86(const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 54 | 55 | dither_f16c_func select_dither_f16c_func_x86(CPUClass cpu); 56 | 57 | bool needs_dither_f16c_func_x86(CPUClass cpu); 58 | 59 | 60 | std::unique_ptr create_error_diffusion_sse2(unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 61 | std::unique_ptr create_error_diffusion_avx2(unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out); 62 | 63 | std::unique_ptr create_error_diffusion_x86(unsigned width, unsigned height, const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu); 64 | 65 | } // namespace depth 66 | } // namespace zimg 67 | 68 | #endif // ZIMG_DEPTH_X86_DITHER_X86_H_ 69 | 70 | #endif // ZIMG_X86 71 | -------------------------------------------------------------------------------- /test/extra/musl-libm/sin.c: -------------------------------------------------------------------------------- 1 | /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ 2 | /* 3 | * ==================================================== 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 | * 6 | * Developed at SunPro, a Sun Microsystems, Inc. business. 7 | * Permission to use, copy, modify, and distribute this 8 | * software is freely granted, provided that this notice 9 | * is preserved. 10 | * ==================================================== 11 | */ 12 | /* sin(x) 13 | * Return sine function of x. 14 | * 15 | * kernel function: 16 | * __sin ... sine function on [-pi/4,pi/4] 17 | * __cos ... cose function on [-pi/4,pi/4] 18 | * __rem_pio2 ... argument reduction routine 19 | * 20 | * Method. 21 | * Let S,C and T denote the sin, cos and tan respectively on 22 | * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 23 | * in [-pi/4 , +pi/4], and let n = k mod 4. 24 | * We have 25 | * 26 | * n sin(x) cos(x) tan(x) 27 | * ---------------------------------------------------------- 28 | * 0 S C T 29 | * 1 C -S -1/T 30 | * 2 -S -C T 31 | * 3 -C S -1/T 32 | * ---------------------------------------------------------- 33 | * 34 | * Special cases: 35 | * Let trig be any of sin, cos, or tan. 36 | * trig(+-INF) is NaN, with signals; 37 | * trig(NaN) is that NaN; 38 | * 39 | * Accuracy: 40 | * TRIG(x) returns trig(x) nearly rounded 41 | */ 42 | 43 | #include "libm.h" 44 | 45 | double _mysin(double x) 46 | { 47 | double y[2]; 48 | uint32_t ix; 49 | unsigned n; 50 | 51 | /* High word of x. */ 52 | GET_HIGH_WORD(ix, x); 53 | ix &= 0x7fffffff; 54 | 55 | /* |x| ~< pi/4 */ 56 | if (ix <= 0x3fe921fb) { 57 | if (ix < 0x3e500000) { /* |x| < 2**-26 */ 58 | /* raise inexact if x != 0 and underflow if subnormal*/ 59 | FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); 60 | return x; 61 | } 62 | return my__sin(x, 0.0, 0); 63 | } 64 | 65 | /* sin(Inf or NaN) is NaN */ 66 | if (ix >= 0x7ff00000) 67 | return x - x; 68 | 69 | /* argument reduction needed */ 70 | n = my__rem_pio2(x, y); 71 | switch (n&3) { 72 | case 0: return my__sin(y[0], y[1], 1); 73 | case 1: return my__cos(y[0], y[1]); 74 | case 2: return -my__sin(y[0], y[1], 1); 75 | default: 76 | return -my__cos(y[0], y[1]); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /test/graph/mock_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_GRAPH_MOCK_FILTER_H_ 4 | #define ZIMG_GRAPH_MOCK_FILTER_H_ 5 | 6 | #include 7 | #include "graph/image_filter.h" 8 | 9 | class MockFilter : public zimg::graph::ImageFilter { 10 | protected: 11 | struct context { 12 | unsigned last_line; 13 | unsigned last_left; 14 | unsigned last_right; 15 | unsigned seq; 16 | }; 17 | 18 | image_attributes m_attr; 19 | filter_flags m_flags; 20 | mutable unsigned m_total_calls; 21 | unsigned m_simultaneous_lines; 22 | unsigned m_horizontal_support; 23 | unsigned m_vertical_support; 24 | public: 25 | MockFilter(unsigned width, unsigned height, zimg::PixelType type, const filter_flags &flags = {}); 26 | 27 | unsigned get_total_calls() const; 28 | 29 | void set_simultaneous_lines(unsigned n); 30 | 31 | void set_horizontal_support(unsigned n); 32 | 33 | void set_vertical_support(unsigned n); 34 | 35 | // ImageFilter 36 | filter_flags get_flags() const override; 37 | 38 | image_attributes get_image_attributes() const override; 39 | 40 | pair_unsigned get_required_row_range(unsigned i) const override; 41 | 42 | pair_unsigned get_required_col_range(unsigned left, unsigned right) const override; 43 | 44 | unsigned get_simultaneous_lines() const override; 45 | 46 | unsigned get_max_buffering() const override; 47 | 48 | size_t get_context_size() const override; 49 | 50 | size_t get_tmp_size(unsigned left, unsigned right) const override; 51 | 52 | void init_context(void *ctx, unsigned seq) const override; 53 | 54 | void process(void *ctx, const zimg::graph::ImageBuffer *src, const zimg::graph::ImageBuffer *dst, void *tmp, unsigned i, unsigned left, unsigned right) const override; 55 | }; 56 | 57 | template 58 | class SplatFilter : public MockFilter { 59 | static T splat_byte(unsigned char b); 60 | 61 | T m_src_val; 62 | T m_dst_val; 63 | bool m_input_checking; 64 | public: 65 | SplatFilter(unsigned width, unsigned height, zimg::PixelType type, const zimg::graph::ImageFilter::filter_flags &flags = {}); 66 | 67 | void set_input_val(unsigned char x); 68 | 69 | void set_output_val(unsigned char x); 70 | 71 | void enable_input_checking(bool enabled); 72 | 73 | void process(void *ctx, const zimg::graph::ImageBuffer *src, const zimg::graph::ImageBuffer *dst, void *tmp, unsigned i, unsigned left, unsigned right) const override; 74 | }; 75 | 76 | extern template class SplatFilter; 77 | extern template class SplatFilter; 78 | extern template class SplatFilter; 79 | 80 | #endif // ZIMG_GRAPH_MOCK_FILTER_H_ 81 | -------------------------------------------------------------------------------- /test/api/api_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "api/zimg.h" 5 | 6 | #include "gtest/gtest.h" 7 | 8 | TEST(APITest, test_api_2_0_compat) 9 | { 10 | const unsigned API_2_0 = ZIMG_MAKE_API_VERSION(2, 0); 11 | const size_t extra_off = offsetof(zimg_image_format, active_region); 12 | const size_t extra_len = sizeof(zimg_image_format) - extra_off; 13 | 14 | zimg_image_format format; 15 | std::memset(reinterpret_cast(&format) + extra_off, 0xCC, extra_len); 16 | 17 | zimg_image_format_default(&format, API_2_0); 18 | EXPECT_EQ(API_2_0, format.version); 19 | for (size_t i = extra_off; i < extra_len; ++i) { 20 | EXPECT_EQ(0xCC, *(reinterpret_cast(&format) + i)); 21 | } 22 | 23 | format.width = 640; 24 | format.height = 480; 25 | format.pixel_type = ZIMG_PIXEL_BYTE; 26 | 27 | // Should trigger error in API 2.1+. 28 | format.active_region.left = 0; 29 | format.active_region.top = 0; 30 | format.active_region.width = -INFINITY; 31 | format.active_region.height = -INFINITY; 32 | 33 | zimg_graph_builder_params params; 34 | zimg_graph_builder_params_default(¶ms, API_2_0); 35 | EXPECT_EQ(API_2_0, params.version); 36 | 37 | zimg_filter_graph *graph = zimg_filter_graph_build(&format, &format, ¶ms); 38 | EXPECT_TRUE(graph); 39 | zimg_filter_graph_free(graph); 40 | } 41 | 42 | TEST(APITest, test_api_2_1_compat) 43 | { 44 | const unsigned API_2_1 = ZIMG_MAKE_API_VERSION(2, 1); 45 | const size_t extra_off = offsetof(zimg_graph_builder_params, nominal_peak_luminance); 46 | const size_t extra_len = sizeof(zimg_graph_builder_params) - extra_off; 47 | 48 | zimg_graph_builder_params params; 49 | std::memset(reinterpret_cast(¶ms) + extra_off, 0xCC, extra_len); 50 | 51 | zimg_graph_builder_params_default(¶ms, API_2_1); 52 | EXPECT_EQ(API_2_1, params.version); 53 | for (size_t i = extra_off; i < extra_len; ++i) { 54 | EXPECT_EQ(0xCC, *(reinterpret_cast(¶ms) + i)); 55 | } 56 | } 57 | 58 | TEST(APITest, test_api_2_3_compat) 59 | { 60 | const unsigned API_2_3 = ZIMG_MAKE_API_VERSION(2, 3); 61 | const size_t extra_off = offsetof(zimg_image_format, alpha); 62 | const size_t extra_len = sizeof(zimg_image_format) - extra_off; 63 | 64 | zimg_image_format format; 65 | std::memset(reinterpret_cast(&format) + extra_off, 0xCC, extra_len); 66 | 67 | zimg_image_format_default(&format, API_2_3); 68 | EXPECT_EQ(API_2_3, format.version); 69 | for (size_t i = extra_off; i < extra_len; ++i) { 70 | EXPECT_EQ(0xCC, *(reinterpret_cast(&format) + i)); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/zimg/colorspace/matrix3.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_COLORSPACE_MATRIX3_H_ 4 | #define ZIMG_COLORSPACE_MATRIX3_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | namespace colorspace { 10 | 11 | /** 12 | * Fixed size vector of 3 numbers. 13 | */ 14 | struct Vector3 : public std::array { 15 | Vector3() = default; 16 | 17 | constexpr Vector3(double a, double b, double c) : 18 | std::array{ { a, b, c } } 19 | {} 20 | }; 21 | 22 | /** 23 | * Fixed size 3x3 matrix. 24 | */ 25 | struct Matrix3x3 : public std::array { 26 | Matrix3x3() = default; 27 | 28 | constexpr Matrix3x3(const Vector3 &a, const Vector3 &b, const Vector3 &c) : 29 | std::array{ { a, b, c } } 30 | {} 31 | 32 | static constexpr Matrix3x3 identity() 33 | { 34 | return{ 35 | { 1.0, 0.0, 0.0 }, 36 | { 0.0, 1.0, 0.0 }, 37 | { 0.0, 0.0, 1.0 } 38 | }; 39 | } 40 | }; 41 | 42 | /** 43 | * Element-wise multiplication between vectors. 44 | * 45 | * @param v1 lhs 46 | * @param v2 rhs 47 | * @return element-wise product 48 | */ 49 | Vector3 operator*(const Vector3 &v1, const Vector3 &v2) noexcept; 50 | 51 | /** 52 | * Matrix-vector multiplication. 53 | * 54 | * @param m matrix 55 | * @param v vector 56 | * @return product 57 | */ 58 | Vector3 operator*(const Matrix3x3 &m, const Vector3 &v) noexcept; 59 | 60 | /** 61 | * Matrix-matrix multiplication. 62 | * 63 | * @param a lhs 64 | * @param b rhs 65 | * @return product 66 | */ 67 | Matrix3x3 operator*(const Matrix3x3 &a, const Matrix3x3 &b) noexcept; 68 | 69 | /** 70 | * Vector cross product. 71 | * 72 | * @param a lhs 73 | * @param b rhs 74 | * @return product 75 | */ 76 | Vector3 cross(const Vector3 &a, const Vector3 &b) noexcept; 77 | 78 | /** 79 | * Vector dot product. 80 | * 81 | * @param a lhs 82 | * @param b rhs 83 | * @return product 84 | */ 85 | double dot(const Vector3 &a, const Vector3 &b) noexcept; 86 | 87 | /** 88 | * Determinant of matrix. 89 | * 90 | * @param m matrix 91 | * @return determinant 92 | */ 93 | double determinant(const Matrix3x3 &m) noexcept; 94 | 95 | /** 96 | * Inverse of matrix. 97 | * 98 | * @param m matrix 99 | * @return inverse 100 | */ 101 | Matrix3x3 inverse(const Matrix3x3 &m) noexcept; 102 | 103 | /** 104 | * Transpose of matrix. 105 | * 106 | * @param m matrix 107 | * @return transpose 108 | */ 109 | Matrix3x3 transpose(const Matrix3x3 &m) noexcept; 110 | 111 | } // namespace colorspace 112 | } // namespace zimg 113 | 114 | #endif // ZIMG_COLORSPACE_MATRIX3_H_ 115 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__sin.c: -------------------------------------------------------------------------------- 1 | /* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ 2 | /* 3 | * ==================================================== 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 | * 6 | * Developed at SunSoft, a Sun Microsystems, Inc. business. 7 | * Permission to use, copy, modify, and distribute this 8 | * software is freely granted, provided that this notice 9 | * is preserved. 10 | * ==================================================== 11 | */ 12 | /* __sin( x, y, iy) 13 | * kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 14 | * Input x is assumed to be bounded by ~pi/4 in magnitude. 15 | * Input y is the tail of x. 16 | * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 17 | * 18 | * Algorithm 19 | * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 20 | * 2. Callers must return sin(-0) = -0 without calling here since our 21 | * odd polynomial is not evaluated in a way that preserves -0. 22 | * Callers may do the optimization sin(x) ~ x for tiny x. 23 | * 3. sin(x) is approximated by a polynomial of degree 13 on 24 | * [0,pi/4] 25 | * 3 13 26 | * sin(x) ~ x + S1*x + ... + S6*x 27 | * where 28 | * 29 | * |sin(x) 2 4 6 8 10 12 | -58 30 | * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 31 | * | x | 32 | * 33 | * 4. sin(x+y) = sin(x) + sin'(x')*y 34 | * ~ sin(x) + (1-x*x/2)*y 35 | * For better accuracy, let 36 | * 3 2 2 2 2 37 | * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 38 | * then 3 2 39 | * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 40 | */ 41 | 42 | #include "libm.h" 43 | 44 | static const double 45 | S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 46 | S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ 47 | S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 48 | S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ 49 | S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 50 | S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ 51 | 52 | double my__sin(double x, double y, int iy) 53 | { 54 | double_t z,r,v,w; 55 | 56 | z = x*x; 57 | w = z*z; 58 | r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6); 59 | v = z*x; 60 | if (iy == 0) 61 | return x + v*(S1 + z*r); 62 | else 63 | return x - ((z*(0.5*y - v*r) - y) - v*S1); 64 | } 65 | -------------------------------------------------------------------------------- /src/testapp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "common/except.h" 5 | #include "common/pixel.h" 6 | 7 | #include "apps.h" 8 | #include "table.h" 9 | 10 | namespace { 11 | 12 | typedef int (*main_func)(int, char **); 13 | 14 | void usage() 15 | { 16 | std::cout << "TestApp subapp [args]\n"; 17 | std::cout << " colorspace - change colorspace\n"; 18 | std::cout << " cpuinfo - show CPU information\n"; 19 | std::cout << " depth - change depth\n"; 20 | std::cout << " graph - benchmark filter graph\n"; 21 | std::cout << " resize - resize images\n"; 22 | std::cout << " unresize - unresize images\n"; 23 | } 24 | 25 | main_func lookup_app(const char *name) 26 | { 27 | static const zimg::static_string_map map{ 28 | { "colorspace", colorspace_main }, 29 | { "cpuinfo", cpuinfo_main }, 30 | { "depth", depth_main }, 31 | { "graph", graph_main }, 32 | { "resize", resize_main }, 33 | { "unresize", unresize_main } 34 | }; 35 | 36 | auto it = map.find(name); 37 | return it == map.end() ? nullptr : it->second; 38 | } 39 | 40 | } // namespace 41 | 42 | 43 | int arg_decode_cpu(const struct ArgparseOption *, void *out, const char *param, int) 44 | { 45 | try { 46 | zimg::CPUClass *cpu = static_cast(out); 47 | *cpu = g_cpu_table[param]; 48 | } catch (const std::exception &e) { 49 | std::cerr << e.what() << '\n'; 50 | return -1; 51 | } 52 | 53 | return 0; 54 | } 55 | 56 | int arg_decode_pixfmt(const struct ArgparseOption *, void *out, const char *param, int) 57 | { 58 | try { 59 | zimg::PixelFormat *format = static_cast(out); 60 | std::regex format_regex{ R"(^(byte|word|half|float)(?::(f|l)(c|l)?(?::(\d+))?)?$)" }; 61 | std::cmatch match; 62 | 63 | if (!std::regex_match(param, match, format_regex)) 64 | throw std::runtime_error{ "bad format string" }; 65 | 66 | *format = g_pixel_table[match[1].str().c_str()]; 67 | 68 | if (match.size() >= 2 && match[2].length()) 69 | format->fullrange = (match[2] == "f"); 70 | if (match.size() >= 3 && match[3].length()) 71 | format->chroma = (match[3] == "c"); 72 | if (match.size() >= 4 && match[4].length()) 73 | format->depth = std::stoi(match[4]); 74 | } catch (const std::exception &e) { 75 | std::cerr << e.what() << '\n'; 76 | return -1; 77 | } 78 | 79 | return 0; 80 | } 81 | 82 | 83 | int main(int argc, char **argv) 84 | { 85 | if (argc < 2) { 86 | usage(); 87 | return 1; 88 | } 89 | 90 | main_func func = lookup_app(argv[1]); 91 | 92 | if (!func) { 93 | usage(); 94 | return 1; 95 | } 96 | 97 | return func(argc - 1, argv + 1); 98 | } 99 | -------------------------------------------------------------------------------- /src/zimg/colorspace/operation_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_COLORSPACE_OPERATION_IMPL_H_ 4 | #define ZIMG_COLORSPACE_OPERATION_IMPL_H_ 5 | 6 | #include "common/libm_wrapper.h" 7 | #include "operation.h" 8 | 9 | namespace zimg { 10 | 11 | enum class CPUClass; 12 | 13 | namespace colorspace { 14 | 15 | struct Matrix3x3; 16 | struct TransferFunction; 17 | 18 | /** 19 | * Base class for matrix operation implementations. 20 | */ 21 | class MatrixOperationImpl : public Operation { 22 | protected: 23 | /** 24 | * Transformation matrix. 25 | */ 26 | float m_matrix[3][3]; 27 | 28 | /** 29 | * Initialize the implementation with the given matrix. 30 | * 31 | * @param m transformation matrix 32 | */ 33 | explicit MatrixOperationImpl(const Matrix3x3 &matrix); 34 | }; 35 | 36 | /** 37 | * Create operation consisting of applying a 3x3 matrix to each pixel triplet. 38 | * 39 | * @param m matrix 40 | * @param cpu create operation optimized for given cpu 41 | * @return concrete operation 42 | */ 43 | std::unique_ptr create_matrix_operation(const Matrix3x3 &m, CPUClass cpu); 44 | 45 | /** 46 | * Create operation consisting of converting linear light to non-linear ("gamma") encoding. 47 | * 48 | * @param transfer transfer functions 49 | * @param params parameters 50 | * @param cpu create operation optimized for given cpu 51 | * @return concrete operation 52 | */ 53 | std::unique_ptr create_gamma_operation(const TransferFunction &func, const OperationParams ¶ms, CPUClass cpu); 54 | 55 | /** 56 | * Create operation consisting of converting non-linear ("gamma") encoding to linear light. 57 | * 58 | * @see create_gamma_operation 59 | */ 60 | std::unique_ptr create_inverse_gamma_operation(const TransferFunction &func, const OperationParams ¶ms, CPUClass cpu); 61 | 62 | /** 63 | * Create operation consisting of converting linear light to ARIB STD-B67 using display-referred EOTF. 64 | * 65 | * @param m RGB to YUV conversion matrix for color primaries 66 | * @param params parameters 67 | * @return concrete operation 68 | */ 69 | std::unique_ptr create_arib_b67_operation(const Matrix3x3 &m, const OperationParams ¶ms); 70 | 71 | /** 72 | * Create operation consisting of converting ARIB STD-B67 to linear light using display-referred EOTF. 73 | * 74 | * @param m RGB to YUV conversion matrix for color primaries 75 | * @param params parameters 76 | * @return concrete operation 77 | */ 78 | std::unique_ptr create_inverse_arib_b67_operation(const Matrix3x3 &m, const OperationParams ¶ms); 79 | 80 | } // namespace colorspace 81 | } // namespace zimg 82 | 83 | #endif // ZIMG_COLORSPACE_OPERATION_IMPL_H_ 84 | -------------------------------------------------------------------------------- /src/zimg/common/x86/avx_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_X86_AVX_UTIL_H_ 6 | #define ZIMG_X86_AVX_UTIL_H_ 7 | 8 | #include "common/ccdep.h" 9 | #include "x86util.h" 10 | 11 | namespace zimg { 12 | 13 | // Store from [x] into [dst] the 32-bit elements with index less than [idx]. 14 | static inline FORCE_INLINE void mm256_store_idxlo_ps(float *dst, __m256 x, unsigned idx) 15 | { 16 | __m256i mask = _mm256_load_si256((const __m256i *)(&ymm_mask_table[idx * 4])); 17 | _mm256_maskstore_ps(dst, mask, x); 18 | } 19 | 20 | // Store from [x] into [dst] the 32-bit elements with index greater than or equal to [idx]. 21 | static inline FORCE_INLINE void mm256_store_idxhi_ps(float *dst, __m256 x, unsigned idx) 22 | { 23 | __m256i mask = _mm256_load_si256((const __m256i *)(&ymm_mask_table[idx * 4])); 24 | mask = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(mask), _mm256_castsi256_ps(_mm256_set1_epi32(-1)))); 25 | _mm256_maskstore_ps(dst, mask, x); 26 | } 27 | 28 | // Transpose in-place the 8x8 matrix stored in [row0]-[row7] 29 | static inline FORCE_INLINE void mm256_transpose8_ps(__m256 &row0, __m256 &row1, __m256 &row2, __m256 &row3, __m256 &row4, __m256 &row5, __m256 &row6, __m256 &row7) 30 | { 31 | __m256 t0, t1, t2, t3, t4, t5, t6, t7; 32 | __m256 tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7; 33 | 34 | t0 = _mm256_unpacklo_ps(row0, row1); 35 | t1 = _mm256_unpackhi_ps(row0, row1); 36 | t2 = _mm256_unpacklo_ps(row2, row3); 37 | t3 = _mm256_unpackhi_ps(row2, row3); 38 | t4 = _mm256_unpacklo_ps(row4, row5); 39 | t5 = _mm256_unpackhi_ps(row4, row5); 40 | t6 = _mm256_unpacklo_ps(row6, row7); 41 | t7 = _mm256_unpackhi_ps(row6, row7); 42 | 43 | tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); 44 | tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); 45 | tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); 46 | tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); 47 | tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); 48 | tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); 49 | tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); 50 | tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); 51 | 52 | row0 = _mm256_permute2f128_ps(tt0, tt4, 0x20); 53 | row1 = _mm256_permute2f128_ps(tt1, tt5, 0x20); 54 | row2 = _mm256_permute2f128_ps(tt2, tt6, 0x20); 55 | row3 = _mm256_permute2f128_ps(tt3, tt7, 0x20); 56 | row4 = _mm256_permute2f128_ps(tt0, tt4, 0x31); 57 | row5 = _mm256_permute2f128_ps(tt1, tt5, 0x31); 58 | row6 = _mm256_permute2f128_ps(tt2, tt6, 0x31); 59 | row7 = _mm256_permute2f128_ps(tt3, tt7, 0x31); 60 | } 61 | 62 | } // namespace zimg 63 | 64 | #endif // ZIMG_X86_AVX_UTIL_H_ 65 | 66 | #endif // ZIMG_X86 67 | -------------------------------------------------------------------------------- /src/zimg/colorspace/x86/gamma_constants_avx512.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86_AVX512 4 | 5 | #ifndef ZIMG_COLORSPACE_X86_GAMMA_CONSTANTS_H_ 6 | #define ZIMG_COLORSPACE_X86_GAMMA_CONSTANTS_H_ 7 | 8 | namespace zimg { 9 | namespace colorspace { 10 | namespace avx512constants { 11 | 12 | struct Rec1886EOTF { 13 | // 5-th order polynomial on domain [1, 2). 14 | static const float horner[6]; 15 | // Exponent lookup table for range reduction, [-15, +0]. 16 | static const float table alignas(64)[16]; 17 | }; 18 | 19 | struct Rec1886InverseEOTF { 20 | // 5-th order polynomial on domain [1, 2). 21 | static const float horner[6]; 22 | // Exponent lookup table for range reduction, [-31, +0]. 23 | static const float table alignas(64)[32]; 24 | }; 25 | 26 | struct SRGBEOTF : private Rec1886EOTF { 27 | static constexpr float knee = 12.92f * 0.003041282560128f; 28 | static constexpr float linear_scale = 1.0f / 12.92f; 29 | 30 | static constexpr float power_scale = 1.0f / 1.055010718947587f; 31 | static constexpr float power_offset = (1.055010718947587f - 1.0f) / 1.055010718947587f; 32 | 33 | using Rec1886EOTF::horner; 34 | using Rec1886EOTF::table; 35 | }; 36 | 37 | struct SRGBInverseEOTF : private Rec1886InverseEOTF { 38 | static constexpr float knee = 0.003041282560128f; 39 | static constexpr float linear_scale = 12.92f; 40 | 41 | static constexpr float power_scale = 1.055010718947587f; 42 | static constexpr float power_offset = -(1.055010718947587f - 1.0f); 43 | 44 | using Rec1886InverseEOTF::horner; 45 | using Rec1886InverseEOTF::table; 46 | }; 47 | 48 | struct ST2084EOTF { 49 | // 32 4-th order polynomials on uniform domain [i / 32, (i + 1) / 32). 50 | static const float horner0 alignas(64)[32]; 51 | static const float horner1 alignas(64)[32]; 52 | static const float horner2 alignas(64)[32]; 53 | static const float horner3 alignas(64)[32]; 54 | static const float horner4 alignas(64)[32]; 55 | }; 56 | 57 | struct ST2084InverseEOTF { 58 | // 32 4-th order polynomials on logarithmic domain [2 ^ i, 2 ^ (1 + 1)). 59 | static const float horner0 alignas(64)[32]; 60 | static const float horner1 alignas(64)[32]; 61 | static const float horner2 alignas(64)[32]; 62 | static const float horner3 alignas(64)[32]; 63 | static const float horner4 alignas(64)[32]; 64 | }; 65 | 66 | // Debug implementations. 67 | float rec_1886_eotf(float x); 68 | float rec_1886_inverse_eotf(float x); 69 | 70 | float srgb_eotf(float x); 71 | float srgb_inverse_eotf(float x); 72 | 73 | float st_2084_eotf(float x); 74 | float st_2084_inverse_eotf(float x); 75 | 76 | } // namespace avx512constants 77 | } // namespace colorspace 78 | } // namespace zimg 79 | 80 | #endif // ZIMG_COLORSPACE_X86_GAMMA_CONSTANTS_H_ 81 | 82 | #endif // ZIMG_X86_AVX512 83 | -------------------------------------------------------------------------------- /_msvc/testapp/testapp.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | Header Files 29 | 30 | 31 | Header Files 32 | 33 | 34 | 35 | 36 | Source Files 37 | 38 | 39 | Source Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | Source Files 49 | 50 | 51 | Source Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | Source Files 58 | 59 | 60 | Source Files 61 | 62 | 63 | Source Files 64 | 65 | 66 | Source Files 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/zimg/colorspace/matrix3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "matrix3.h" 3 | 4 | namespace zimg { 5 | namespace colorspace { 6 | 7 | namespace { 8 | 9 | double det2(double a00, double a01, double a10, double a11) 10 | { 11 | return a00 * a11 - a01 * a10; 12 | } 13 | 14 | } // namespace 15 | 16 | 17 | Vector3 operator*(const Vector3 &v1, const Vector3 &v2) noexcept 18 | { 19 | Vector3 ret; 20 | 21 | for (size_t i = 0; i < 3; ++i) { 22 | ret[i] = v1[i] * v2[i]; 23 | } 24 | return ret; 25 | } 26 | 27 | Vector3 operator*(const Matrix3x3 &m, const Vector3 &v) noexcept 28 | { 29 | Vector3 ret; 30 | 31 | for (size_t i = 0; i < 3; ++i) { 32 | double accum = 0; 33 | 34 | for (size_t k = 0; k < 3; ++k) { 35 | accum += m[i][k] * v[k]; 36 | } 37 | ret[i] = accum; 38 | } 39 | return ret; 40 | } 41 | 42 | Matrix3x3 operator*(const Matrix3x3 &a, const Matrix3x3 &b) noexcept 43 | { 44 | Matrix3x3 ret; 45 | 46 | for (size_t i = 0; i < 3; ++i) { 47 | for (size_t j = 0; j < 3; ++j) { 48 | double accum = 0; 49 | 50 | for (size_t k = 0; k < 3; ++k) { 51 | accum += a[i][k] * b[k][j]; 52 | } 53 | ret[i][j] = accum; 54 | } 55 | } 56 | return ret; 57 | } 58 | 59 | Vector3 cross(const Vector3 &a, const Vector3 &b) noexcept 60 | { 61 | return { a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0] }; 62 | } 63 | 64 | double dot(const Vector3 &a, const Vector3 &b) noexcept 65 | { 66 | return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; 67 | } 68 | 69 | double determinant(const Matrix3x3 &m) noexcept 70 | { 71 | double det = 0; 72 | 73 | det += m[0][0] * det2(m[1][1], m[1][2], m[2][1], m[2][2]); 74 | det -= m[0][1] * det2(m[1][0], m[1][2], m[2][0], m[2][2]); 75 | det += m[0][2] * det2(m[1][0], m[1][1], m[2][0], m[2][1]); 76 | 77 | return det; 78 | } 79 | 80 | Matrix3x3 inverse(const Matrix3x3 &m) noexcept 81 | { 82 | Matrix3x3 ret; 83 | double det = determinant(m); 84 | 85 | ret[0][0] = det2(m[1][1], m[1][2], m[2][1], m[2][2]) / det; 86 | ret[0][1] = det2(m[0][2], m[0][1], m[2][2], m[2][1]) / det; 87 | ret[0][2] = det2(m[0][1], m[0][2], m[1][1], m[1][2]) / det; 88 | ret[1][0] = det2(m[1][2], m[1][0], m[2][2], m[2][0]) / det; 89 | ret[1][1] = det2(m[0][0], m[0][2], m[2][0], m[2][2]) / det; 90 | ret[1][2] = det2(m[0][2], m[0][0], m[1][2], m[1][0]) / det; 91 | ret[2][0] = det2(m[1][0], m[1][1], m[2][0], m[2][1]) / det; 92 | ret[2][1] = det2(m[0][1], m[0][0], m[2][1], m[2][0]) / det; 93 | ret[2][2] = det2(m[0][0], m[0][1], m[1][0], m[1][1]) / det; 94 | 95 | return ret; 96 | } 97 | 98 | Matrix3x3 transpose(const Matrix3x3 &m) noexcept 99 | { 100 | Matrix3x3 ret; 101 | 102 | for (size_t i = 0; i < 3; ++i) { 103 | for (size_t j = 0; j < 3; ++j) { 104 | ret[i][j] = m[j][i]; 105 | } 106 | } 107 | return ret; 108 | } 109 | 110 | } // namespace colorspace 111 | } // namespace zimg 112 | -------------------------------------------------------------------------------- /src/zimg/depth/arm/dither_arm.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_ARM 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/pixel.h" 5 | #include "common/arm/cpuinfo_arm.h" 6 | #include "graph/image_filter.h" 7 | #include "dither_arm.h" 8 | #include "f16c_arm.h" 9 | 10 | namespace zimg { 11 | namespace depth { 12 | 13 | namespace { 14 | 15 | dither_convert_func select_ordered_dither_func_neon(PixelType pixel_in, PixelType pixel_out) 16 | { 17 | #if defined(_MSC_VER) && !defined(_M_ARM64) 18 | if (pixel_in == PixelType::HALF) 19 | pixel_in = PixelType::FLOAT; 20 | #endif 21 | 22 | if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE) 23 | return ordered_dither_b2b_neon; 24 | else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD) 25 | return ordered_dither_b2w_neon; 26 | else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE) 27 | return ordered_dither_w2b_neon; 28 | else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD) 29 | return ordered_dither_w2w_neon; 30 | #if !defined(_MSC_VER) || defined(_M_ARM64) 31 | else if (pixel_in == PixelType::HALF && pixel_out == PixelType::BYTE) 32 | return ordered_dither_h2b_neon; 33 | else if (pixel_in == PixelType::HALF && pixel_out == PixelType::WORD) 34 | return ordered_dither_h2w_neon; 35 | #endif 36 | else if (pixel_in == PixelType::FLOAT && pixel_out == PixelType::BYTE) 37 | return ordered_dither_f2b_neon; 38 | else if (pixel_in == PixelType::FLOAT && pixel_out == PixelType::WORD) 39 | return ordered_dither_f2w_neon; 40 | else 41 | return nullptr; 42 | } 43 | 44 | } // namespace 45 | 46 | 47 | dither_convert_func select_ordered_dither_func_arm(const PixelFormat &pixel_in, const PixelFormat &pixel_out, CPUClass cpu) 48 | { 49 | ARMCapabilities caps = query_arm_capabilities(); 50 | dither_convert_func func = nullptr; 51 | 52 | if (cpu_is_autodetect(cpu)) { 53 | if (!func && caps.neon && caps.vfpv4) 54 | func = select_ordered_dither_func_neon(pixel_in.type, pixel_out.type); 55 | } else { 56 | if (!func && cpu >= CPUClass::ARM_NEON) 57 | func = select_ordered_dither_func_neon(pixel_in.type, pixel_out.type); 58 | } 59 | 60 | return func; 61 | } 62 | 63 | dither_f16c_func select_dither_f16c_func_arm(CPUClass cpu) 64 | { 65 | ARMCapabilities caps = query_arm_capabilities(); 66 | dither_f16c_func func = nullptr; 67 | 68 | #if !defined(_MSC_VER) || defined(_M_ARM64) 69 | if (cpu_is_autodetect(cpu)) { 70 | if (!func && caps.neon && caps.vfpv4) 71 | func = f16c_half_to_float_neon; 72 | } else { 73 | if (!func && cpu >= CPUClass::ARM_NEON) 74 | func = f16c_half_to_float_neon; 75 | } 76 | #endif 77 | 78 | return func; 79 | } 80 | 81 | bool needs_dither_f16c_func_arm(CPUClass cpu) 82 | { 83 | #if defined(_MSC_VER) && !defined(_M_ARM64) 84 | return true; 85 | #else 86 | ARMCapabilities caps = query_arm_capabilities(); 87 | 88 | if (cpu_is_autodetect(cpu)) 89 | return !caps.neon || !caps.vfpv4; 90 | else 91 | return cpu < CPUClass::ARM_NEON; 92 | #endif 93 | } 94 | 95 | } // namespace depth 96 | } // namespace zimg 97 | 98 | #endif // ZIMG_ARM 99 | -------------------------------------------------------------------------------- /src/zimg/common/x86/cpuinfo_x86.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef ZIMG_X86 4 | 5 | #ifndef ZIMG_X86_CPUINFO_X86_H_ 6 | #define ZIMG_X86_CPUINFO_X86_H_ 7 | 8 | namespace zimg { 9 | 10 | enum class CPUClass; 11 | 12 | /** 13 | * Bitfield of selected x86 feature flags. 14 | */ 15 | struct X86Capabilities { 16 | unsigned sse : 1; 17 | unsigned sse2 : 1; 18 | unsigned sse3 : 1; 19 | unsigned ssse3 : 1; 20 | unsigned fma : 1; 21 | unsigned sse41 : 1; 22 | unsigned sse42 : 1; 23 | unsigned avx : 1; 24 | unsigned f16c : 1; 25 | unsigned avx2 : 1; 26 | unsigned avx512f : 1; 27 | unsigned avx512dq : 1; 28 | unsigned avx512ifma : 1; 29 | unsigned avx512cd : 1; 30 | unsigned avx512bw : 1; 31 | unsigned avx512vl : 1; 32 | unsigned avx512vbmi : 1; 33 | unsigned avx512vbmi2 : 1; 34 | unsigned avx512vnni : 1; 35 | unsigned avx512bitalg : 1; 36 | unsigned avx512vpopcntdq : 1; 37 | /* AMD architectures needing workarounds. */ 38 | unsigned xop : 1; 39 | unsigned piledriver : 1; 40 | unsigned zen1 : 1; 41 | unsigned zen2 : 1; 42 | unsigned zen3 : 1; 43 | }; 44 | 45 | /* 17 cycles per store on Piledriver. */ 46 | constexpr bool cpu_has_slow_avx(const X86Capabilities &caps) { return caps.piledriver; } 47 | /* 2+ cycles per value on AMD. Still >1 cycle on Zen3, but usable. */ 48 | constexpr bool cpu_has_slow_gather(const X86Capabilities &caps) { return caps.xop || caps.zen1 || caps.zen2; } 49 | /* 4 cycles per vpermd on Zen. Higher throughput on Zen3, but still long latency. */ 50 | constexpr bool cpu_has_slow_permute(const X86Capabilities &caps) { return caps.zen1 || caps.zen2 || caps.zen3; } 51 | 52 | constexpr bool cpu_has_avx512_f_dq_bw_vl(const X86Capabilities &caps) { return caps.avx512f && caps.avx512dq && caps.avx512bw && caps.avx512vl; } 53 | 54 | /** 55 | * Representation of processor cache topology. 56 | */ 57 | struct X86CacheHierarchy { 58 | unsigned long l1d; 59 | unsigned long l1d_threads; 60 | unsigned long l2; 61 | unsigned long l2_threads; 62 | unsigned long l3; 63 | unsigned long l3_threads; 64 | bool l2_inclusive; 65 | bool l3_inclusive; 66 | bool valid; 67 | }; 68 | 69 | /** 70 | * Get the x86 feature flags on the current CPU. 71 | * 72 | * @return capabilities 73 | */ 74 | X86Capabilities query_x86_capabilities() noexcept; 75 | 76 | /** 77 | * Get the cache topology of the current CPU. 78 | * 79 | * On a multi-processor system, the returned topology corresponds to the first 80 | * processor package on which the function is called. The behaviour is 81 | * undefined if the platform contains non-identical processors. 82 | * 83 | * @return cache hierarchy 84 | */ 85 | X86CacheHierarchy query_x86_cache_hierarchy() noexcept; 86 | 87 | unsigned long cpu_cache_size_x86() noexcept; 88 | 89 | bool cpu_has_fast_f16_x86(CPUClass cpu) noexcept; 90 | bool cpu_requires_64b_alignment_x86(CPUClass cpu) noexcept; 91 | 92 | } // namespace zimg 93 | 94 | #endif // ZIMG_X86_CPUINFO_X86_H_ 95 | 96 | #endif // ZIMG_X86 97 | -------------------------------------------------------------------------------- /test/extra/musl-libm/__cos.c: -------------------------------------------------------------------------------- 1 | /* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ 2 | /* 3 | * ==================================================== 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 | * 6 | * Developed at SunSoft, a Sun Microsystems, Inc. business. 7 | * Permission to use, copy, modify, and distribute this 8 | * software is freely granted, provided that this notice 9 | * is preserved. 10 | * ==================================================== 11 | */ 12 | /* 13 | * __cos( x, y ) 14 | * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 15 | * Input x is assumed to be bounded by ~pi/4 in magnitude. 16 | * Input y is the tail of x. 17 | * 18 | * Algorithm 19 | * 1. Since cos(-x) = cos(x), we need only to consider positive x. 20 | * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 21 | * 3. cos(x) is approximated by a polynomial of degree 14 on 22 | * [0,pi/4] 23 | * 4 14 24 | * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 25 | * where the remez error is 26 | * 27 | * | 2 4 6 8 10 12 14 | -58 28 | * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 29 | * | | 30 | * 31 | * 4 6 8 10 12 14 32 | * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 33 | * cos(x) ~ 1 - x*x/2 + r 34 | * since cos(x+y) ~ cos(x) - sin(x)*y 35 | * ~ cos(x) - x*y, 36 | * a correction term is necessary in cos(x) and hence 37 | * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 38 | * For better accuracy, rearrange to 39 | * cos(x+y) ~ w + (tmp + (r-x*y)) 40 | * where w = 1 - x*x/2 and tmp is a tiny correction term 41 | * (1 - x*x/2 == w + tmp exactly in infinite precision). 42 | * The exactness of w + tmp in infinite precision depends on w 43 | * and tmp having the same precision as x. If they have extra 44 | * precision due to compiler bugs, then the extra precision is 45 | * only good provided it is retained in all terms of the final 46 | * expression for cos(). Retention happens in all cases tested 47 | * under FreeBSD, so don't pessimize things by forcibly clipping 48 | * any extra precision in w. 49 | */ 50 | 51 | #include "libm.h" 52 | 53 | static const double 54 | C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 55 | C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 56 | C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ 57 | C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 58 | C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ 59 | C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 60 | 61 | double my__cos(double x, double y) 62 | { 63 | double_t hz,z,r,w; 64 | 65 | z = x*x; 66 | w = z*z; 67 | r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6)); 68 | hz = 0.5*z; 69 | w = 1.0-hz; 70 | return w + (((1.0-w)-hz) + (z*r-x*y)); 71 | } 72 | -------------------------------------------------------------------------------- /src/zimg/unresize/unresize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "common/cpuinfo.h" 3 | #include "common/except.h" 4 | #include "common/make_unique.h" 5 | #include "common/pixel.h" 6 | #include "graph/basic_filter.h" 7 | #include "unresize.h" 8 | #include "unresize_impl.h" 9 | 10 | namespace zimg { 11 | namespace unresize { 12 | 13 | namespace { 14 | 15 | bool unresize_h_first(double xscale, double yscale) noexcept 16 | { 17 | double h_first_cost = std::max(xscale, 1.0) * 2.0 + xscale * std::max(yscale, 1.0); 18 | double v_first_cost = std::max(yscale, 1.0) + yscale * std::max(xscale, 1.0) * 2.0; 19 | 20 | return h_first_cost < v_first_cost; 21 | } 22 | 23 | } // namespace 24 | 25 | 26 | UnresizeConversion::UnresizeConversion(unsigned up_width, unsigned up_height, PixelType type) : 27 | up_width{ up_width }, 28 | up_height{ up_height }, 29 | type{ type }, 30 | orig_width{ up_width }, 31 | orig_height{ up_height }, 32 | shift_w{}, 33 | shift_h{}, 34 | cpu{ CPUClass::NONE } 35 | {} 36 | 37 | auto UnresizeConversion::create() const -> filter_pair try 38 | { 39 | if (up_width > pixel_max_width(PixelType::FLOAT) || orig_width > pixel_max_width(PixelType::FLOAT)) 40 | error::throw_(); 41 | 42 | bool skip_h = (up_width == orig_width && shift_w == 0); 43 | bool skip_v = (up_height == orig_height && shift_h == 0); 44 | 45 | if (skip_h && skip_v) 46 | return{ ztd::make_unique(up_width, up_height, type), nullptr }; 47 | 48 | auto builder = UnresizeImplBuilder{ up_width, up_height, type }.set_cpu(cpu); 49 | filter_pair ret{}; 50 | 51 | if (skip_h) { 52 | ret.first = builder.set_horizontal(false) 53 | .set_orig_dim(orig_height) 54 | .set_shift(shift_h) 55 | .create(); 56 | } else if (skip_v) { 57 | ret.first = builder.set_horizontal(true) 58 | .set_orig_dim(orig_width) 59 | .set_shift(shift_w) 60 | .create(); 61 | } else { 62 | bool h_first = unresize_h_first(static_cast(orig_width) / up_width, static_cast(orig_height) / up_height); 63 | 64 | if (h_first) { 65 | ret.first = builder.set_horizontal(true) 66 | .set_orig_dim(orig_width) 67 | .set_shift(shift_w) 68 | .create(); 69 | 70 | builder.up_width = orig_width; 71 | ret.second = builder.set_horizontal(false) 72 | .set_orig_dim(orig_height) 73 | .set_shift(shift_h) 74 | .create(); 75 | } else { 76 | ret.first = builder.set_horizontal(false) 77 | .set_orig_dim(orig_height) 78 | .set_shift(shift_h) 79 | .create(); 80 | 81 | builder.up_height = orig_height; 82 | ret.second = builder.set_horizontal(true) 83 | .set_orig_dim(orig_width) 84 | .set_shift(shift_w) 85 | .create(); 86 | } 87 | } 88 | 89 | return ret; 90 | } catch (const std::bad_alloc &) { 91 | error::throw_(); 92 | } 93 | 94 | } // namespace unresize 95 | } // namespace zimg 96 | -------------------------------------------------------------------------------- /src/zimg/colorspace/colorspace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_COLORSPACE_COLORSPACE_H_ 4 | #define ZIMG_COLORSPACE_COLORSPACE_H_ 5 | 6 | #include 7 | 8 | namespace zimg { 9 | 10 | enum class CPUClass; 11 | 12 | namespace graph { 13 | 14 | class ImageFilter; 15 | 16 | } // namespace graph 17 | 18 | 19 | namespace colorspace { 20 | 21 | enum class MatrixCoefficients { 22 | UNSPECIFIED, 23 | RGB, 24 | REC_601, 25 | REC_709, 26 | FCC, 27 | SMPTE_240M, 28 | YCGCO, 29 | REC_2020_NCL, 30 | REC_2020_CL, 31 | CHROMATICITY_DERIVED_NCL, 32 | CHROMATICITY_DERIVED_CL, 33 | REC_2100_LMS, 34 | REC_2100_ICTCP, 35 | }; 36 | 37 | enum class TransferCharacteristics { 38 | UNSPECIFIED, 39 | LINEAR, 40 | LOG_100, 41 | LOG_316, 42 | REC_709, 43 | REC_470_M, 44 | REC_470_BG, 45 | SMPTE_240M, 46 | XVYCC, 47 | SRGB, 48 | ST_2084, 49 | ST_428, 50 | ARIB_B67, 51 | }; 52 | 53 | enum class ColorPrimaries { 54 | UNSPECIFIED, 55 | REC_470_M, 56 | REC_470_BG, 57 | SMPTE_C, 58 | REC_709, 59 | FILM, 60 | REC_2020, 61 | XYZ, 62 | DCI_P3, 63 | DCI_P3_D65, 64 | JEDEC_P22, 65 | }; 66 | 67 | /** 68 | * Definition of a working colorspace. 69 | */ 70 | struct ColorspaceDefinition { 71 | MatrixCoefficients matrix; 72 | TransferCharacteristics transfer; 73 | ColorPrimaries primaries; 74 | 75 | // Helper functions to create modified colorspaces. 76 | constexpr ColorspaceDefinition to(MatrixCoefficients matrix_) const noexcept 77 | { 78 | return{ matrix_, transfer, primaries }; 79 | } 80 | 81 | constexpr ColorspaceDefinition to(TransferCharacteristics transfer_) const noexcept 82 | { 83 | return{ matrix, transfer_, primaries }; 84 | } 85 | 86 | constexpr ColorspaceDefinition to(ColorPrimaries primaries_) const noexcept 87 | { 88 | return{ matrix, transfer, primaries_ }; 89 | } 90 | 91 | constexpr ColorspaceDefinition to_rgb() const noexcept 92 | { 93 | return to(MatrixCoefficients::RGB); 94 | } 95 | 96 | constexpr ColorspaceDefinition to_linear() const noexcept 97 | { 98 | return to(TransferCharacteristics::LINEAR); 99 | } 100 | }; 101 | 102 | // Compare colorspaces by comparing each component. 103 | constexpr bool operator==(const ColorspaceDefinition &a, const ColorspaceDefinition &b) noexcept 104 | { 105 | return a.matrix == b.matrix && a.transfer == b.transfer && a.primaries == b.primaries; 106 | } 107 | 108 | constexpr bool operator!=(const ColorspaceDefinition &a, const ColorspaceDefinition &b) noexcept 109 | { 110 | return !(a == b); 111 | } 112 | 113 | 114 | struct ColorspaceConversion { 115 | unsigned width; 116 | unsigned height; 117 | 118 | #include "common/builder.h" 119 | BUILDER_MEMBER(ColorspaceDefinition, csp_in) 120 | BUILDER_MEMBER(ColorspaceDefinition, csp_out) 121 | BUILDER_MEMBER(double, peak_luminance) 122 | BUILDER_MEMBER(bool, approximate_gamma) 123 | BUILDER_MEMBER(bool, scene_referred) 124 | BUILDER_MEMBER(CPUClass, cpu) 125 | #undef BUILDER_MEMBER 126 | 127 | ColorspaceConversion(unsigned width, unsigned height); 128 | 129 | std::unique_ptr create() const; 130 | }; 131 | 132 | } // namespace colorspace 133 | } // namespace zimg 134 | 135 | #endif // ZIMG_COLORSPACE_COLORSPACE2_H_ 136 | -------------------------------------------------------------------------------- /src/zimg/colorspace/gamma.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_COLORSPACE_GAMMA_H_ 4 | #define ZIMG_COLORSPACE_GAMMA_H_ 5 | 6 | namespace zimg { 7 | namespace colorspace { 8 | 9 | enum class TransferCharacteristics; 10 | 11 | constexpr float ST2084_PEAK_LUMINANCE = 10000.0f; // Units of cd/m^2. 12 | 13 | typedef float (*gamma_func)(float); 14 | 15 | // Scene-referred transfer functions. 16 | float rec_709_oetf(float x) noexcept; 17 | float rec_709_inverse_oetf(float x) noexcept; 18 | 19 | float log100_oetf(float x) noexcept; 20 | float log100_inverse_oetf(float x) noexcept; 21 | 22 | float log316_oetf(float x) noexcept; 23 | float log316_inverse_oetf(float x) noexcept; 24 | 25 | float rec_470m_oetf(float x) noexcept; 26 | float rec_470m_inverse_oetf(float x) noexcept; 27 | 28 | float rec_470bg_oetf(float x) noexcept; 29 | float rec_470bg_inverse_oetf(float x) noexcept; 30 | 31 | float smpte_240m_oetf(float x) noexcept; 32 | float smpte_240m_inverse_oetf(float x) noexcept; 33 | 34 | float xvycc_eotf(float x) noexcept; 35 | float xvycc_inverse_eotf(float x) noexcept; 36 | 37 | float arib_b67_oetf(float x) noexcept; 38 | float arib_b67_inverse_oetf(float x) noexcept; 39 | 40 | // Display-referred transfer functions. 41 | float rec_1886_eotf(float x) noexcept; 42 | float rec_1886_inverse_eotf(float x) noexcept; 43 | 44 | float xvycc_oetf(float x) noexcept; 45 | float xvycc_inverse_oetf(float x) noexcept; 46 | 47 | float srgb_eotf(float x) noexcept; 48 | float srgb_inverse_eotf(float x) noexcept; 49 | 50 | float st_2084_eotf(float x) noexcept; 51 | float st_2084_inverse_eotf(float x) noexcept; 52 | 53 | float st_428_eotf(float x) noexcept; 54 | float st_428_inverse_eotf(float x) noexcept; 55 | 56 | // Derived functions. 57 | float arib_b67_eotf(float x) noexcept; 58 | float arib_b67_inverse_eotf(float x) noexcept; 59 | 60 | float st_2084_oetf(float x) noexcept; 61 | float st_2084_inverse_oetf(float x) noexcept; 62 | 63 | 64 | struct TransferFunction { 65 | gamma_func to_linear; 66 | gamma_func to_gamma; 67 | float to_linear_scale; 68 | float to_gamma_scale; 69 | }; 70 | 71 | TransferFunction select_transfer_function(TransferCharacteristics transfer, double peak_luminance, bool scene_referred); 72 | 73 | 74 | // MSVC 32-bit compiler generates x87 instructions when operating on floats 75 | // returned from external functions. The caller must set the x87 precision to 76 | // 24-bit (single precision) to ensure reproducible results. 77 | #if defined(_MSC_VER) && defined(_M_IX86) 78 | class EnsureSinglePrecision { 79 | unsigned m_fpu_word; 80 | public: 81 | EnsureSinglePrecision() noexcept; 82 | EnsureSinglePrecision(const EnsureSinglePrecision &) = delete; 83 | 84 | ~EnsureSinglePrecision(); 85 | 86 | EnsureSinglePrecision &operator=(const EnsureSinglePrecision &) = delete; 87 | }; 88 | #else 89 | struct EnsureSinglePrecision { 90 | EnsureSinglePrecision() {} 91 | EnsureSinglePrecision(const EnsureSinglePrecision &) = delete; 92 | 93 | ~EnsureSinglePrecision() {} 94 | 95 | EnsureSinglePrecision &operator=(const EnsureSinglePrecision &) = delete; 96 | }; 97 | #endif 98 | 99 | } // namespace colorspace 100 | } // namespace zimg 101 | 102 | #endif // ZIMG_COLORSPACE_GAMMA_H_ 103 | -------------------------------------------------------------------------------- /test/resize/filter_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "resize/filter.h" 3 | 4 | #include "gtest/gtest.h" 5 | 6 | namespace { 7 | 8 | void check_interpolating(const zimg::resize::Filter& f) 9 | { 10 | unsigned support = f.support(); 11 | 12 | EXPECT_EQ(1.0, f(0)); 13 | for (unsigned i = 1; i <= support; ++i) { 14 | SCOPED_TRACE(i); 15 | EXPECT_NEAR(0.0, f(-static_cast(i)), 1e-15); 16 | EXPECT_NEAR(0.0, f(static_cast(i)), 1e-15); 17 | } 18 | EXPECT_EQ(0.0, f(std::nextafter(-static_cast(support), -INFINITY))); 19 | EXPECT_EQ(0.0, f(std::nextafter(static_cast(support), INFINITY))); 20 | } 21 | 22 | } // namespace 23 | 24 | 25 | TEST(FilterTest, test_bilinear) 26 | { 27 | zimg::resize::BilinearFilter f; 28 | EXPECT_EQ(1U, f.support()); 29 | check_interpolating(f); 30 | EXPECT_EQ(0.5, f(0.5)); 31 | EXPECT_EQ(0.5, f(-0.5)); 32 | } 33 | 34 | TEST(FilterTest, test_bicubic_interpolating) 35 | { 36 | zimg::resize::BicubicFilter catmull{ 0.0, 0.5 }; 37 | EXPECT_EQ(2U, catmull.support()); 38 | check_interpolating(catmull); 39 | EXPECT_EQ(0.5625, catmull(0.5)); 40 | EXPECT_EQ(0.5625, catmull(-0.5)); 41 | EXPECT_EQ(-0.0625, catmull(1.5)); 42 | EXPECT_EQ(-0.0625, catmull(-1.5)); 43 | } 44 | 45 | TEST(FilterTest, test_bicubic_noninterpolating) 46 | { 47 | zimg::resize::BicubicFilter mitchell{ 1.0 / 3.0, 1.0 / 3.0 }; 48 | EXPECT_DOUBLE_EQ(8.0 / 9.0, mitchell(0.0)); 49 | EXPECT_NEAR(1.0 / 18.0, mitchell(-1.0), 1e-15); 50 | EXPECT_NEAR(1.0 / 18.0, mitchell(1.0), 1e-15); 51 | EXPECT_EQ(0.0, mitchell(-2.0)); 52 | EXPECT_EQ(0.0, mitchell(2.0)); 53 | EXPECT_DOUBLE_EQ(77.0 / 144.0, mitchell(0.5)); 54 | EXPECT_DOUBLE_EQ(77.0 / 144.0, mitchell(-0.5)); 55 | EXPECT_NEAR(-5.0 / 144.0 , mitchell(1.5), 1e-15); 56 | EXPECT_NEAR(-5.0 / 144.0, mitchell(-1.5), 1e-15); 57 | } 58 | 59 | TEST(FilterTest, test_spline16) 60 | { 61 | zimg::resize::Spline16Filter f; 62 | EXPECT_EQ(2U, f.support()); 63 | check_interpolating(f); 64 | EXPECT_EQ(0.575, f(0.5)); 65 | EXPECT_EQ(0.575, f(-0.5)); 66 | EXPECT_DOUBLE_EQ(-0.075, f(1.5)); 67 | EXPECT_DOUBLE_EQ(-0.075, f(-1.5)); 68 | } 69 | 70 | TEST(FilterTest, test_spline36) 71 | { 72 | zimg::resize::Spline36Filter f; 73 | EXPECT_EQ(3U, f.support()); 74 | check_interpolating(f); 75 | EXPECT_NEAR(0.598684, f(0.5), 1e-6); 76 | EXPECT_NEAR(0.598684, f(-0.5), 1e-6); 77 | EXPECT_NEAR(-0.118421, f(1.5), 1e-6); 78 | EXPECT_NEAR(-0.118421, f(-1.5), 1e-6); 79 | EXPECT_NEAR(0.0197368, f(2.5), 1e-6); 80 | EXPECT_NEAR(0.0197368, f(-2.5), 1e-6); 81 | } 82 | 83 | TEST(FilterTest, test_spline64) 84 | { 85 | zimg::resize::Spline64Filter f; 86 | EXPECT_EQ(4U, f.support()); 87 | check_interpolating(f); 88 | EXPECT_NEAR(0.600352, f(0.5), 1e-6); 89 | EXPECT_NEAR(0.600352, f(-0.5), 1e-6); 90 | EXPECT_NEAR(-0.126760, f(1.5), 1e-6); 91 | EXPECT_NEAR(-0.126760, f(-1.5), 1e-6); 92 | EXPECT_NEAR(0.0316901, f(2.5), 1e-7); 93 | EXPECT_NEAR(0.0316901, f(-2.5), 1e-7); 94 | EXPECT_NEAR(-0.00528169, f(3.5), 1e-8); 95 | EXPECT_NEAR(-0.00528169, f(-3.5), 1e-8); 96 | } 97 | 98 | TEST(FilterTest, test_lanczos) 99 | { 100 | for (unsigned i = 1; i < 4; ++i) { 101 | SCOPED_TRACE(i); 102 | zimg::resize::LanczosFilter f{ i }; 103 | EXPECT_EQ(i, f.support()); 104 | check_interpolating(f); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/zimg/unresize/unresize.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_UNRESIZE_UNRESIZE_H_ 4 | #define ZIMG_UNRESIZE_UNRESIZE_H_ 5 | 6 | /** 7 | * Unresize: reverses the effect of the bilinear scaling method. 8 | * 9 | * Linear interpolation in one dimension from an input dimension N to an 10 | * output dimension M can be represented as the matrix product: 11 | * 12 | * A x = y 13 | * 14 | * A is the interpolation function 15 | * x is the original vector 16 | * y is the resized vector 17 | * 18 | * 19 | * Unresize attempts to recover x given the resized vector y. 20 | * This is done by the method of least squares. 21 | * 22 | * A' A x = A' y 23 | * 24 | * A' is the transpose of A 25 | * 26 | * 27 | * The problem resolves to solving a linear system. 28 | * 29 | * P x = y' 30 | * 31 | * P is (A' A) 32 | * y' is (A' y) 33 | * 34 | * 35 | * Given the width of the bilinear filter, P is a tridiagonal matrix of 36 | * dimension N, and so the system can be solved by simple substitution after 37 | * LU factorization. 38 | * 39 | * Using a convention that U has a main diagonal of ones, the factoization is 40 | * given by the following. 41 | * 42 | * 43 | * The following names will be given to relevant diagonals. 44 | * 45 | * a(i) = P(i, i) 46 | * b(i) = P(i, i + 1) 47 | * c(i) = P(i, i - 1) 48 | * l(i) = L(i, i) 49 | * u(i) = U(i, i + 1) 50 | * 51 | * The computation of l and u can be described by the following procedure. 52 | * 53 | * l(1) = a(1) 54 | * u(1) = b(1) / a(1) 55 | * 56 | * FOR (i = 1 : N - 1) 57 | * l(i) = a(i) - c(i) * u(i - 1) 58 | * u(i) = b(i) / l(i) 59 | * 60 | * l(N) = a(N) - c(N) * u(N - 1) 61 | * 62 | * 63 | * The solution to the system can be described by the procedure. 64 | * 65 | * L U x = y' 66 | * 67 | * z(1) = y'(1) / l(1) 68 | * FOR (i = 2 : N) 69 | * z(i) = (y'(i) - c(i) * z(i - 1)) / l(i) 70 | * 71 | * x(N) = z(N) 72 | * FOR (i = N - 1 : 1) 73 | * x(i) = z(i) - u(i) * x'(i + 1) 74 | * 75 | * 76 | * The implementation of Unresize caches the values of P, l, u, and c for given 77 | * dimensions N and M. Execution is done by first computing y' and then 78 | * performing the tridiagonal algorithm to obtain x. 79 | * 80 | * Generalization to two dimensions is done by processing each dimension. 81 | */ 82 | 83 | #include 84 | #include 85 | 86 | namespace zimg { 87 | 88 | enum class CPUClass; 89 | enum class PixelType; 90 | 91 | namespace graph { 92 | 93 | class ImageFilter; 94 | 95 | } // namespace graph 96 | 97 | 98 | namespace unresize { 99 | 100 | struct UnresizeConversion { 101 | typedef std::pair, std::unique_ptr> filter_pair; 102 | 103 | unsigned up_width; 104 | unsigned up_height; 105 | PixelType type; 106 | 107 | #include "common/builder.h" 108 | BUILDER_MEMBER(unsigned, orig_width) 109 | BUILDER_MEMBER(unsigned, orig_height) 110 | BUILDER_MEMBER(double, shift_w) 111 | BUILDER_MEMBER(double, shift_h) 112 | BUILDER_MEMBER(CPUClass, cpu) 113 | #undef BUILDER_MEMBER 114 | 115 | UnresizeConversion(unsigned up_width, unsigned up_height, PixelType type); 116 | 117 | filter_pair create() const; 118 | }; 119 | 120 | } // namespace unresize 121 | } // namespace zimg 122 | 123 | #endif // ZIMG_UNRESIZE_UNRESIZE_H_ 124 | -------------------------------------------------------------------------------- /test/colorspace/x86/gamma_constants_avx512_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86_AVX512 2 | 3 | #include 4 | 5 | #include "colorspace/x86/gamma_constants_avx512.h" 6 | #include "colorspace/gamma.h" 7 | #include "common/x86/cpuinfo_x86.h" 8 | #include "gtest/gtest.h" 9 | 10 | namespace { 11 | 12 | void test_gamma_to_linear(float (*f)(float), float (*g)(float), float min, float max, float errthr, float biasthr) 13 | { 14 | if (!zimg::query_x86_capabilities().avx512f) { 15 | SUCCEED() << "avx512 not available, skipping"; 16 | return; 17 | } 18 | 19 | zimg::colorspace::EnsureSinglePrecision x87; 20 | 21 | const unsigned long STEPS = 1UL << 16; 22 | float err = 0.0f; 23 | float bias = 0.0f; 24 | 25 | for (unsigned long i = 0; i <= STEPS; ++i) { 26 | float x = min + i * ((max - min) / STEPS); 27 | float ref = f(x); 28 | float test = g(x); 29 | float err_local = (test - ref) / (ref == 0.0f ? FLT_EPSILON : ref); 30 | 31 | err += std::fabs(err_local); 32 | bias += test - ref; 33 | } 34 | 35 | err /= (STEPS + 1); 36 | bias /= (STEPS + 1); 37 | 38 | EXPECT_LT(err, errthr); 39 | EXPECT_LT(std::fabs(bias), biasthr); 40 | } 41 | 42 | void test_linear_to_gamma(float (*f)(float), float (*g)(float), float min, float max, float errthr, float biasthr) 43 | { 44 | if (!zimg::query_x86_capabilities().avx512f) { 45 | SUCCEED() << "avx512 not available, skipping"; 46 | return; 47 | } 48 | 49 | zimg::colorspace::EnsureSinglePrecision x87; 50 | 51 | const unsigned long STEPS = 1UL << 16; 52 | float err = 0.0f; 53 | float bias = 0.0f; 54 | 55 | for (unsigned long i = 0; i <= STEPS; ++i) { 56 | float x = std::exp2(min + i * ((max - min) / STEPS)); 57 | float ref = f(x); 58 | float test = g(x); 59 | float err_local = test - ref; 60 | 61 | err += std::fabs(err_local); 62 | bias += test - ref; 63 | } 64 | 65 | err /= (STEPS + 1); 66 | bias /= (STEPS + 1); 67 | 68 | EXPECT_LT(err, errthr); 69 | EXPECT_LT(std::fabs(bias), biasthr); 70 | } 71 | 72 | } // namespace 73 | 74 | 75 | TEST(GammaConstantsAVX512Test, test_rec1886) 76 | { 77 | using namespace zimg::colorspace; 78 | 79 | SCOPED_TRACE("forward"); 80 | test_gamma_to_linear(rec_1886_eotf, avx512constants::rec_1886_eotf, ldexpf(1.0f, -14), 2.0f, 1e-6f, 1e-7f); 81 | SCOPED_TRACE("reverse"); 82 | test_linear_to_gamma(rec_1886_inverse_eotf, avx512constants::rec_1886_inverse_eotf, -30, 1, 1e-6f, 1e-7f); 83 | } 84 | 85 | TEST(GammaConstantsAVX512Test, test_srgb) 86 | { 87 | using namespace zimg::colorspace; 88 | 89 | SCOPED_TRACE("forward"); 90 | test_gamma_to_linear(srgb_eotf, avx512constants::srgb_eotf, avx512constants::SRGBEOTF::knee, 1.0f, 1e-6f, 1e-7f); 91 | SCOPED_TRACE("reverse"); 92 | test_linear_to_gamma(srgb_inverse_eotf, avx512constants::srgb_inverse_eotf, avx512constants::SRGBInverseEOTF::knee, 1.0f, 1e-6f, 1e-7f); 93 | } 94 | 95 | TEST(GammaConstantsAVX512Test, test_st_2084) 96 | { 97 | using namespace zimg::colorspace; 98 | 99 | SCOPED_TRACE("forward"); 100 | test_gamma_to_linear(st_2084_eotf, avx512constants::st_2084_eotf, 1.0f / 4096.0f, 1.0f / 32.0f, 0.15f, 1e-9f); 101 | test_gamma_to_linear(st_2084_eotf, avx512constants::st_2084_eotf, 1.0f / 32.0f, 1.0f, 1e-4f, 1e-6f); 102 | SCOPED_TRACE("reverse"); 103 | test_linear_to_gamma(st_2084_inverse_eotf, avx512constants::st_2084_inverse_eotf, -31, 0, 1e-5f, 1e-7f); 104 | } 105 | 106 | #endif // ZIMG_X86_AVX512 107 | -------------------------------------------------------------------------------- /test/resize/x86/resize_impl_sse_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/pixel.h" 5 | #include "common/x86/cpuinfo_x86.h" 6 | #include "resize/filter.h" 7 | #include "resize/resize_impl.h" 8 | 9 | #include "gtest/gtest.h" 10 | #include "graph/filter_validator.h" 11 | 12 | namespace { 13 | 14 | void test_case(const zimg::resize::Filter &filter, bool horizontal, unsigned src_w, unsigned src_h, unsigned dst_w, unsigned dst_h, 15 | zimg::PixelType type, const char * const expected_sha1[3], double expected_snr) 16 | { 17 | if (!zimg::query_x86_capabilities().sse) { 18 | SUCCEED() << "sse not available, skipping"; 19 | return; 20 | } 21 | 22 | SCOPED_TRACE(filter.support()); 23 | SCOPED_TRACE(horizontal ? static_cast(dst_w) / src_w : static_cast(dst_h) / src_h); 24 | 25 | auto builder = zimg::resize::ResizeImplBuilder{ src_w, src_h, type } 26 | .set_horizontal(horizontal) 27 | .set_dst_dim(horizontal ? dst_w : dst_h) 28 | .set_filter(&filter) 29 | .set_shift(0.0) 30 | .set_subwidth(horizontal ? src_w : src_h); 31 | 32 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 33 | auto filter_sse = builder.set_cpu(zimg::CPUClass::X86_SSE).create(); 34 | 35 | ASSERT_FALSE(assert_different_dynamic_type(filter_c.get(), filter_sse.get())); 36 | 37 | FilterValidator validator{ filter_sse.get(), src_w, src_h, type }; 38 | validator.set_sha1(expected_sha1) 39 | .set_ref_filter(filter_c.get(), expected_snr) 40 | .validate(); 41 | } 42 | 43 | } // namespace 44 | 45 | 46 | TEST(ResizeImplSSETest, test_resize_h_f32) 47 | { 48 | const unsigned src_w = 640; 49 | const unsigned dst_w = 960; 50 | const unsigned h = 480; 51 | const zimg::PixelType format = zimg::PixelType::FLOAT; 52 | 53 | const char *expected_sha1[][3] = { 54 | { "1b2e37a345d315b0fa4d11e3532c70cb57b1e569" }, 55 | { "df391f7157d8c283abd408b35894139ca1903872" }, 56 | { "81fcfbdb9a3b31c625a3cdff1cf46da06f8af735" }, 57 | { "389b609ac62a8b9276e00fdcd39b921535196a07" } 58 | }; 59 | const double expected_snr = 120.0; 60 | 61 | test_case(zimg::resize::BilinearFilter{}, true, src_w, h, dst_w, h, format, expected_sha1[0], expected_snr); 62 | test_case(zimg::resize::Spline16Filter{}, true, src_w, h, dst_w, h, format, expected_sha1[1], expected_snr); 63 | test_case(zimg::resize::LanczosFilter{ 4 }, true, src_w, h, dst_w, h, format, expected_sha1[2], expected_snr); 64 | test_case(zimg::resize::LanczosFilter{ 4 }, true, dst_w, h, src_w, h, format, expected_sha1[3], expected_snr); 65 | } 66 | 67 | TEST(ResizeImplSSETest, test_resize_v_f32) 68 | { 69 | const unsigned w = 640; 70 | const unsigned src_h = 480; 71 | const unsigned dst_h = 720; 72 | const zimg::PixelType type = zimg::PixelType::FLOAT; 73 | 74 | const char *expected_sha1[][3] = { 75 | { "6b7507617dc89d5d3077f9cc4c832b261dea2be0" }, 76 | { "d07a8c6f3452ada7bd865a3283dc308176541db3" }, 77 | { "bda98bc253213d2e28a54c6ccb7496f0ca5a3b7d" }, 78 | { "6ba3876cd08a5b11ee646954b52b379a3d8b1228" } 79 | }; 80 | const double expected_snr = 120.0; 81 | 82 | test_case(zimg::resize::BilinearFilter{}, false, w, src_h, w, dst_h, type, expected_sha1[0], expected_snr); 83 | test_case(zimg::resize::Spline16Filter{}, false, w, src_h, w, dst_h, type, expected_sha1[1], expected_snr); 84 | test_case(zimg::resize::LanczosFilter{ 4 }, false, w, src_h, w, dst_h, type, expected_sha1[2], expected_snr); 85 | test_case(zimg::resize::LanczosFilter{ 4 }, false, w, dst_h, w, src_h, type, expected_sha1[3], expected_snr); 86 | } 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /test/resize/x86/resize_impl_avx_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/pixel.h" 5 | #include "common/x86/cpuinfo_x86.h" 6 | #include "resize/filter.h" 7 | #include "resize/resize_impl.h" 8 | 9 | #include "gtest/gtest.h" 10 | #include "graph/filter_validator.h" 11 | 12 | namespace { 13 | 14 | void test_case(const zimg::resize::Filter &filter, bool horizontal, unsigned src_w, unsigned src_h, unsigned dst_w, unsigned dst_h, 15 | zimg::PixelType type, const char * const expected_sha1[3], double expected_snr) 16 | { 17 | if (!zimg::query_x86_capabilities().avx) { 18 | SUCCEED() << "avx not available, skipping"; 19 | return; 20 | } 21 | 22 | SCOPED_TRACE(filter.support()); 23 | SCOPED_TRACE(horizontal ? static_cast(dst_w) / src_w : static_cast(dst_h) / src_h); 24 | 25 | auto builder = zimg::resize::ResizeImplBuilder{ src_w, src_h, type } 26 | .set_horizontal(horizontal) 27 | .set_dst_dim(horizontal ? dst_w : dst_h) 28 | .set_filter(&filter) 29 | .set_shift(0.0) 30 | .set_subwidth(horizontal ? src_w : src_h); 31 | 32 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 33 | auto filter_avx = builder.set_cpu(zimg::CPUClass::X86_AVX).create(); 34 | 35 | ASSERT_FALSE(assert_different_dynamic_type(filter_c.get(), filter_avx.get())); 36 | 37 | FilterValidator validator{ filter_avx.get(), src_w, src_h, type }; 38 | validator.set_sha1(expected_sha1) 39 | .set_ref_filter(filter_c.get(), expected_snr) 40 | .validate(); 41 | } 42 | 43 | } // namespace 44 | 45 | 46 | TEST(ResizeImplAVXTest, test_resize_h_f32) 47 | { 48 | const unsigned src_w = 640; 49 | const unsigned dst_w = 960; 50 | const unsigned h = 480; 51 | const zimg::PixelType format = zimg::PixelType::FLOAT; 52 | 53 | const char *expected_sha1[][3] = { 54 | { "1b2e37a345d315b0fa4d11e3532c70cb57b1e569" }, 55 | { "df391f7157d8c283abd408b35894139ca1903872" }, 56 | { "81fcfbdb9a3b31c625a3cdff1cf46da06f8af735" }, 57 | { "389b609ac62a8b9276e00fdcd39b921535196a07" } 58 | }; 59 | const double expected_snr = 120.0; 60 | 61 | test_case(zimg::resize::BilinearFilter{}, true, src_w, h, dst_w, h, format, expected_sha1[0], expected_snr); 62 | test_case(zimg::resize::Spline16Filter{}, true, src_w, h, dst_w, h, format, expected_sha1[1], expected_snr); 63 | test_case(zimg::resize::LanczosFilter{ 4 }, true, src_w, h, dst_w, h, format, expected_sha1[2], expected_snr); 64 | test_case(zimg::resize::LanczosFilter{ 4 }, true, dst_w, h, src_w, h, format, expected_sha1[3], expected_snr); 65 | } 66 | 67 | 68 | TEST(ResizeImplAVXTest, test_resize_v_f32) 69 | { 70 | const unsigned w = 640; 71 | const unsigned src_h = 480; 72 | const unsigned dst_h = 720; 73 | const zimg::PixelType type = zimg::PixelType::FLOAT; 74 | 75 | const char *expected_sha1[][3] = { 76 | { "6b7507617dc89d5d3077f9cc4c832b261dea2be0" }, 77 | { "d07a8c6f3452ada7bd865a3283dc308176541db3" }, 78 | { "127be47bf5124d8ed61f8da2a397d9f5eb14da4a" }, 79 | { "3113e07cb62b071a6ec71e41914e8a2f965020b6" } 80 | }; 81 | const double expected_snr = 120.0; 82 | 83 | test_case(zimg::resize::BilinearFilter{}, false, w, src_h, w, dst_h, type, expected_sha1[0], expected_snr); 84 | test_case(zimg::resize::Spline16Filter{}, false, w, src_h, w, dst_h, type, expected_sha1[1], expected_snr); 85 | test_case(zimg::resize::LanczosFilter{ 4 }, false, w, src_h, w, dst_h, type, expected_sha1[2], expected_snr); 86 | test_case(zimg::resize::LanczosFilter{ 4 }, false, w, dst_h, w, src_h, type, expected_sha1[3], expected_snr); 87 | } 88 | 89 | #endif // ZIMG_X86 90 | -------------------------------------------------------------------------------- /src/zimg/graph/basic_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef ZIMG_GRAPH_BASIC_FILTER_H_ 4 | #define ZIMG_GRAPH_BASIC_FILTER_H_ 5 | 6 | #include 7 | #include "image_filter.h" 8 | 9 | namespace zimg { 10 | 11 | enum class PixelType; 12 | 13 | namespace graph { 14 | 15 | // Copies an image buffer. 16 | class CopyFilter : public ImageFilterBase { 17 | image_attributes m_attr; 18 | bool m_color; 19 | public: 20 | CopyFilter(unsigned width, unsigned height, PixelType type, bool color = false); 21 | 22 | filter_flags get_flags() const override; 23 | 24 | image_attributes get_image_attributes() const override; 25 | 26 | void process(void *, const ImageBuffer *src, const ImageBuffer *dst, void *, unsigned i, unsigned left, unsigned right) const override; 27 | }; 28 | 29 | // Converts greyscale to RGB image by replicating the luma plane. 30 | // 31 | // For any YUV system, a greyscale image is encoded by U=0 and V=0, which also 32 | // implies R=G=B. Since Y is a weighted sum of R, G, and B, this also implies 33 | // R=G=B=Y. 34 | class RGBExtendFilter : public ImageFilterBase { 35 | image_attributes m_attr; 36 | public: 37 | RGBExtendFilter(unsigned width, unsigned height, PixelType type); 38 | 39 | filter_flags get_flags() const override; 40 | 41 | image_attributes get_image_attributes() const override; 42 | 43 | void process(void *, const ImageBuffer src[], const ImageBuffer dst[], void *, unsigned i, unsigned left, unsigned right) const override; 44 | }; 45 | 46 | // Initializes a plane to a constant value. 47 | class ValueInitializeFilter : public ImageFilterBase { 48 | public: 49 | union value_type { 50 | uint8_t b; 51 | uint16_t w; 52 | float f; 53 | }; 54 | private: 55 | image_attributes m_attr; 56 | value_type m_value; 57 | 58 | void fill_b(void *ptr, size_t n) const; 59 | void fill_w(void *ptr, size_t n) const; 60 | void fill_f(void *ptr, size_t n) const; 61 | public: 62 | ValueInitializeFilter(unsigned width, unsigned height, PixelType type, value_type val); 63 | 64 | filter_flags get_flags() const override; 65 | 66 | image_attributes get_image_attributes() const override; 67 | 68 | void process(void *, const ImageBuffer *, const ImageBuffer *dst, void *, unsigned i, unsigned left, unsigned right) const override; 69 | }; 70 | 71 | // Premultiplies an image. 72 | class PremultiplyFilter : public ImageFilterBase { 73 | unsigned m_width; 74 | unsigned m_height; 75 | bool m_color; 76 | public: 77 | PremultiplyFilter(unsigned width, unsigned height, bool color); 78 | 79 | filter_flags get_flags() const override; 80 | 81 | image_attributes get_image_attributes() const override; 82 | 83 | void process(void *, const ImageBuffer src[], const ImageBuffer dst[], void *, unsigned i, unsigned left, unsigned right) const override; 84 | }; 85 | 86 | // Unpremultiplies an image. 87 | class UnpremultiplyFilter : public ImageFilterBase { 88 | unsigned m_width; 89 | unsigned m_height; 90 | bool m_color; 91 | public: 92 | UnpremultiplyFilter(unsigned width, unsigned height, bool color); 93 | 94 | filter_flags get_flags() const override; 95 | 96 | image_attributes get_image_attributes() const override; 97 | 98 | void process(void *, const ImageBuffer src[], const ImageBuffer dst[], void *, unsigned i, unsigned left, unsigned right) const override; 99 | }; 100 | 101 | } // namespace graph 102 | } // namespace zimg 103 | 104 | #endif // ZIMG_GRAPH_BASIC_FILTER_H_ 105 | -------------------------------------------------------------------------------- /test/depth/x86/dither_sse2_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "depth/depth.h" 9 | #include "depth/dither.h" 10 | 11 | #include "gtest/gtest.h" 12 | #include "graph/filter_validator.h" 13 | 14 | namespace { 15 | 16 | void test_case(const zimg::PixelFormat &pixel_in, const zimg::PixelFormat &pixel_out, const char * const expected_sha1[3], double expected_snr) 17 | { 18 | const unsigned w = 640; 19 | const unsigned h = 480; 20 | const zimg::depth::DitherType dither = zimg::depth::DitherType::ORDERED; 21 | 22 | if (!zimg::query_x86_capabilities().sse2) { 23 | SUCCEED() << "sse2 not available, skipping"; 24 | return; 25 | } 26 | 27 | auto filter_c = zimg::depth::create_dither(dither, w, h, pixel_in, pixel_out, zimg::CPUClass::NONE); 28 | auto filter_sse2 = zimg::depth::create_dither(dither, w, h, pixel_in, pixel_out, zimg::CPUClass::X86_SSE2); 29 | 30 | FilterValidator validator{ filter_sse2.get(), w, h, pixel_in }; 31 | validator.set_sha1(expected_sha1) 32 | .set_ref_filter(filter_c.get(), expected_snr) 33 | .validate(); 34 | } 35 | 36 | } // namespace 37 | 38 | 39 | TEST(DitherSSE2Test, test_ordered_dither_b2b) 40 | { 41 | zimg::PixelFormat pixel_in{ zimg::PixelType::BYTE, 8, true, false }; 42 | zimg::PixelFormat pixel_out{ zimg::PixelType::BYTE, 1, true, false }; 43 | 44 | const char *expected_sha1[3] = { 45 | "85ac9596d3e91f4f52c4b66c611509fbf891064d" 46 | }; 47 | 48 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 49 | } 50 | 51 | TEST(DitherSSE2Test, test_ordered_dither_b2w) 52 | { 53 | 54 | zimg::PixelFormat pixel_in{ zimg::PixelType::BYTE, 8, true, false }; 55 | zimg::PixelFormat pixel_out{ zimg::PixelType::WORD, 9, true, false }; 56 | 57 | const char *expected_sha1[3] = { 58 | "267b1039372fab31c14ebf09911da9493ecea95e" 59 | }; 60 | 61 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 62 | } 63 | 64 | TEST(DitherSSE2Test, test_ordered_dither_w2b) 65 | { 66 | zimg::PixelFormat pixel_in = zimg::PixelType::WORD; 67 | zimg::PixelFormat pixel_out = zimg::PixelType::BYTE; 68 | 69 | const char *expected_sha1[3] = { 70 | "49bb64a45e15aa87f7f85e6f9b4940ef97308c1b" 71 | }; 72 | 73 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 74 | } 75 | 76 | TEST(DitherSSE2Test, test_ordered_dither_w2w) 77 | { 78 | zimg::PixelFormat pixel_in{ zimg::PixelType::WORD, 16, false, false }; 79 | zimg::PixelFormat pixel_out{ zimg::PixelType::WORD, 10, false, false }; 80 | 81 | const char *expected_sha1[3] = { 82 | "0495169ad8e289cf171553f1cf4f2c0599bce986" 83 | }; 84 | 85 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 86 | } 87 | 88 | TEST(DitherSSE2Test, test_ordered_dither_f2b) 89 | { 90 | zimg::PixelFormat pixel_in = zimg::PixelType::FLOAT; 91 | zimg::PixelFormat pixel_out = zimg::PixelType::BYTE; 92 | 93 | const char *expected_sha1[3] = { 94 | "3bee9485fd5258fbd5e6ba1a361660bf9aaeaa3f" 95 | }; 96 | 97 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 98 | } 99 | 100 | TEST(DitherSSE2Test, test_ordered_dither_f2w) 101 | { 102 | zimg::PixelFormat pixel_in = zimg::PixelType::FLOAT; 103 | zimg::PixelFormat pixel_out = zimg::PixelType::WORD; 104 | 105 | const char *expected_sha1[3] = { 106 | "5312234ac7d6198f138b2cded18b5bf48b6af568" 107 | }; 108 | 109 | test_case(pixel_in, pixel_out, expected_sha1, INFINITY); 110 | } 111 | 112 | #endif // ZIMG_X86 113 | -------------------------------------------------------------------------------- /test/colorspace/x86/colorspace_avx2_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "colorspace/colorspace.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(const zimg::colorspace::ColorspaceDefinition &csp_in, const zimg::colorspace::ColorspaceDefinition &csp_out, 16 | const char * const expected_sha1[3], double expected_snr) 17 | { 18 | const unsigned w = 640; 19 | const unsigned h = 480; 20 | 21 | if (!zimg::query_x86_capabilities().avx2) { 22 | SUCCEED() << "avx2 not available, skipping"; 23 | return; 24 | } 25 | 26 | zimg::PixelFormat format = zimg::PixelType::FLOAT; 27 | auto builder = zimg::colorspace::ColorspaceConversion{ w, h } 28 | .set_csp_in(csp_in) 29 | .set_csp_out(csp_out) 30 | .set_approximate_gamma(true); 31 | 32 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 33 | auto filter_avx2 = builder.set_cpu(zimg::CPUClass::X86_AVX2).create(); 34 | 35 | FilterValidator validator{ filter_avx2.get(), w, h, format }; 36 | validator.set_sha1(expected_sha1) 37 | .set_ref_filter(filter_c.get(), expected_snr) 38 | .set_yuv(csp_in.matrix != zimg::colorspace::MatrixCoefficients::RGB) 39 | .validate(); 40 | } 41 | 42 | } // namespace 43 | 44 | 45 | TEST(ColorspaceConversionAVX2Test, test_transfer_lut) 46 | { 47 | using namespace zimg::colorspace; 48 | 49 | const char *expected_sha1[][3] = { 50 | { 51 | "23d012fcb280f601e2e3c349229d0108e3cd632a", 52 | "7ae186215d5fa45065f7aeac74ab2dc74b556696", 53 | "bad84d4e0de8572c81df6d9f91fef05b1576f9e5" 54 | }, 55 | { 56 | "011ee645ad30bb6ad6d93d8980d89a3e3e073c19", 57 | "5ae0e075b3856d9f491954b477568b17daf7f147", 58 | "84b20f8fa27c23a668540566b9df26c4b42c9afa" 59 | }, 60 | { 61 | "8206be2ae5e8a0fc003daeec4178189eecf82a13", 62 | "24843f17600dd7bf9870f5c778549bd96c333427", 63 | "26a6b00801b41da17d849e02217bf69add6324a6" 64 | }, 65 | { 66 | "16f2274ffac90927de0438114f0ea22e650981a0", 67 | "b1c8b15b6159ab43e7bfc4e715fe3b621628d26e", 68 | "632ae07d6919533c87d2ed28560a60cf070498e2" 69 | }, 70 | }; 71 | const double expected_tolinear_snr = 80.0; 72 | const double expected_togamma_snr = 80.0; 73 | 74 | SCOPED_TRACE("tolinear 709"); 75 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::REC_709, ColorPrimaries::UNSPECIFIED }, 76 | { MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 77 | expected_sha1[0], expected_tolinear_snr); 78 | SCOPED_TRACE("togamma 709"); 79 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 80 | { MatrixCoefficients::RGB, TransferCharacteristics::REC_709, ColorPrimaries::UNSPECIFIED }, 81 | expected_sha1[1], expected_togamma_snr); 82 | SCOPED_TRACE("tolinear st2084"); 83 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::ST_2084, ColorPrimaries::UNSPECIFIED }, 84 | { MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 85 | expected_sha1[2], expected_tolinear_snr); 86 | SCOPED_TRACE("togamma st2084"); 87 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 88 | { MatrixCoefficients::RGB, TransferCharacteristics::ST_2084, ColorPrimaries::UNSPECIFIED }, 89 | expected_sha1[3], expected_togamma_snr); 90 | } 91 | 92 | #endif // ZIMG_X86 93 | -------------------------------------------------------------------------------- /m4/ax_check_compile_flag.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check whether the given FLAG works with the current language's compiler 12 | # or gives an error. (Warnings, however, are ignored) 13 | # 14 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 15 | # success/failure. 16 | # 17 | # If EXTRA-FLAGS is defined, it is added to the current language's default 18 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 19 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 20 | # force the compiler to issue an error when a bad flag is given. 21 | # 22 | # INPUT gives an alternative input source to AC_COMPILE_IFELSE. 23 | # 24 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 25 | # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2008 Guido U. Draheim 30 | # Copyright (c) 2011 Maarten Bosmans 31 | # 32 | # This program is free software: you can redistribute it and/or modify it 33 | # under the terms of the GNU General Public License as published by the 34 | # Free Software Foundation, either version 3 of the License, or (at your 35 | # option) any later version. 36 | # 37 | # This program is distributed in the hope that it will be useful, but 38 | # WITHOUT ANY WARRANTY; without even the implied warranty of 39 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 40 | # Public License for more details. 41 | # 42 | # You should have received a copy of the GNU General Public License along 43 | # with this program. If not, see . 44 | # 45 | # As a special exception, the respective Autoconf Macro's copyright owner 46 | # gives unlimited permission to copy, distribute and modify the configure 47 | # scripts that are the output of Autoconf when processing the Macro. You 48 | # need not follow the terms of the GNU General Public License when using 49 | # or distributing such scripts, even though portions of the text of the 50 | # Macro appear in them. The GNU General Public License (GPL) does govern 51 | # all other use of the material that constitutes the Autoconf Macro. 52 | # 53 | # This special exception to the GPL applies to versions of the Autoconf 54 | # Macro released by the Autoconf Archive. When you make and distribute a 55 | # modified version of the Autoconf Macro, you may extend this special 56 | # exception to the GPL to apply to your modified version as well. 57 | 58 | #serial 4 59 | 60 | AC_DEFUN([AX_CHECK_COMPILE_FLAG], 61 | [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF 62 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl 63 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ 64 | ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 65 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" 66 | AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], 67 | [AS_VAR_SET(CACHEVAR,[yes])], 68 | [AS_VAR_SET(CACHEVAR,[no])]) 69 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) 70 | AS_VAR_IF(CACHEVAR,yes, 71 | [m4_default([$2], :)], 72 | [m4_default([$3], :)]) 73 | AS_VAR_POPDEF([CACHEVAR])dnl 74 | ])dnl AX_CHECK_COMPILE_FLAGS 75 | -------------------------------------------------------------------------------- /test/colorspace/x86/colorspace_sse2_test.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include 4 | #include "common/cpuinfo.h" 5 | #include "common/pixel.h" 6 | #include "common/x86/cpuinfo_x86.h" 7 | #include "graph/image_filter.h" 8 | #include "colorspace/colorspace.h" 9 | 10 | #include "gtest/gtest.h" 11 | #include "graph/filter_validator.h" 12 | 13 | namespace { 14 | 15 | void test_case(const zimg::colorspace::ColorspaceDefinition &csp_in, const zimg::colorspace::ColorspaceDefinition &csp_out, 16 | const char * const expected_sha1[3], double expected_snr) 17 | { 18 | const unsigned w = 640; 19 | const unsigned h = 480; 20 | 21 | if (!zimg::query_x86_capabilities().sse2) { 22 | SUCCEED() << "sse2 not available, skipping"; 23 | return; 24 | } 25 | 26 | zimg::PixelFormat format = zimg::PixelType::FLOAT; 27 | auto builder = zimg::colorspace::ColorspaceConversion{ w, h } 28 | .set_csp_in(csp_in) 29 | .set_csp_out(csp_out) 30 | .set_approximate_gamma(true); 31 | 32 | auto filter_c = builder.set_cpu(zimg::CPUClass::NONE).create(); 33 | auto filter_sse2 = builder.set_cpu(zimg::CPUClass::X86_SSE2).create(); 34 | 35 | FilterValidator validator{ filter_sse2.get(), w, h, format }; 36 | validator.set_sha1(expected_sha1) 37 | .set_ref_filter(filter_c.get(), expected_snr) 38 | .set_yuv(csp_in.matrix != zimg::colorspace::MatrixCoefficients::RGB) 39 | .validate(); 40 | } 41 | 42 | } // namespace 43 | 44 | 45 | TEST(ColorspaceConversionSSE2Test, test_transfer_lut) 46 | { 47 | using namespace zimg::colorspace; 48 | 49 | const char *expected_sha1[][3] = { 50 | { 51 | "23d012fcb280f601e2e3c349229d0108e3cd632a", 52 | "7ae186215d5fa45065f7aeac74ab2dc74b556696", 53 | "bad84d4e0de8572c81df6d9f91fef05b1576f9e5" 54 | }, 55 | { 56 | "74e3ebaea6ed216e6792a186592f70149616d2ca", 57 | "af7e809a82f9075d68696d155022a2b12c7260e5", 58 | "d2796151e5d9d01e6aea73d64ac11134424900e8" 59 | }, 60 | { 61 | "8206be2ae5e8a0fc003daeec4178189eecf82a13", 62 | "24843f17600dd7bf9870f5c778549bd96c333427", 63 | "26a6b00801b41da17d849e02217bf69add6324a6" 64 | }, 65 | { 66 | "a33cd49cc2cf605ef8e80d61133d35660ab0ca5a", 67 | "e411937485a414de43f0f67d2e0105efde153f96", 68 | "cd211d2b32dbbcb57c70f095f3e5f9170e468073" 69 | }, 70 | }; 71 | const double expected_tolinear_snr = 80.0; 72 | const double expected_togamma_snr = 60.0; 73 | 74 | SCOPED_TRACE("tolinear 709"); 75 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::REC_709, ColorPrimaries::UNSPECIFIED }, 76 | { MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 77 | expected_sha1[0], expected_tolinear_snr); 78 | SCOPED_TRACE("togamma 709"); 79 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 80 | { MatrixCoefficients::RGB, TransferCharacteristics::REC_709, ColorPrimaries::UNSPECIFIED }, 81 | expected_sha1[1], expected_togamma_snr); 82 | SCOPED_TRACE("tolinear st2084"); 83 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::ST_2084, ColorPrimaries::UNSPECIFIED }, 84 | { MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 85 | expected_sha1[2], expected_tolinear_snr); 86 | SCOPED_TRACE("togamma st2084"); 87 | test_case({ MatrixCoefficients::RGB, TransferCharacteristics::LINEAR, ColorPrimaries::UNSPECIFIED }, 88 | { MatrixCoefficients::RGB, TransferCharacteristics::ST_2084, ColorPrimaries::UNSPECIFIED }, 89 | expected_sha1[3], expected_togamma_snr); 90 | } 91 | 92 | #endif // ZIMG_X86 93 | -------------------------------------------------------------------------------- /src/zimg/colorspace/x86/operation_impl_x86.cpp: -------------------------------------------------------------------------------- 1 | #ifdef ZIMG_X86 2 | 3 | #include "common/cpuinfo.h" 4 | #include "common/x86/cpuinfo_x86.h" 5 | #include "colorspace/operation.h" 6 | #include "colorspace/operation_impl.h" 7 | #include "operation_impl_x86.h" 8 | 9 | namespace zimg { 10 | namespace colorspace { 11 | 12 | std::unique_ptr create_matrix_operation_x86(const Matrix3x3 &m, CPUClass cpu) 13 | { 14 | X86Capabilities caps = query_x86_capabilities(); 15 | std::unique_ptr ret; 16 | 17 | if (cpu_is_autodetect(cpu)) { 18 | #ifdef ZIMG_X86_AVX512 19 | if (!ret && cpu == CPUClass::AUTO_64B && caps.avx512f) 20 | ret = create_matrix_operation_avx512(m); 21 | #endif 22 | if (!ret && caps.avx && !cpu_has_slow_avx(caps)) 23 | ret = create_matrix_operation_avx(m); 24 | if (!ret && caps.sse) 25 | ret = create_matrix_operation_sse(m); 26 | } else { 27 | #ifdef ZIMG_X86_AVX512 28 | if (!ret && cpu >= CPUClass::X86_AVX512) 29 | ret = create_matrix_operation_avx512(m); 30 | #endif 31 | if (!ret && cpu >= CPUClass::X86_AVX) 32 | ret = create_matrix_operation_avx(m); 33 | if (!ret && cpu >= CPUClass::X86_SSE) 34 | ret = create_matrix_operation_sse(m); 35 | } 36 | 37 | return ret; 38 | } 39 | 40 | std::unique_ptr create_gamma_operation_x86(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu) 41 | { 42 | X86Capabilities caps = query_x86_capabilities(); 43 | std::unique_ptr ret; 44 | 45 | if (cpu_is_autodetect(cpu)) { 46 | #ifdef ZIMG_X86_AVX512 47 | if (!ret && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512dq) 48 | ret = create_gamma_operation_avx512(transfer, params); 49 | #endif 50 | if (!ret && caps.avx2 && !cpu_has_slow_gather(caps)) 51 | ret = create_gamma_operation_avx2(transfer, params); 52 | if (!ret && caps.sse2) 53 | ret = create_gamma_operation_sse2(transfer, params); 54 | } else { 55 | #ifdef ZIMG_X86_AVX512 56 | if (!ret && cpu >= CPUClass::X86_AVX512) 57 | ret = create_gamma_operation_avx512(transfer, params); 58 | #endif 59 | if (!ret && cpu >= CPUClass::X86_AVX2) 60 | ret = create_gamma_operation_avx2(transfer, params); 61 | if (!ret && cpu >= CPUClass::X86_SSE2) 62 | ret = create_gamma_operation_sse2(transfer, params); 63 | } 64 | 65 | return ret; 66 | } 67 | 68 | std::unique_ptr create_inverse_gamma_operation_x86(const TransferFunction &transfer, const OperationParams ¶ms, CPUClass cpu) 69 | { 70 | X86Capabilities caps = query_x86_capabilities(); 71 | std::unique_ptr ret; 72 | 73 | if (cpu_is_autodetect(cpu)) { 74 | #ifdef ZIMG_X86_AVX512 75 | if (!ret && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512dq) 76 | ret = create_inverse_gamma_operation_avx512(transfer, params); 77 | #endif 78 | if (!ret && caps.avx2 && !cpu_has_slow_gather(caps)) 79 | ret = create_inverse_gamma_operation_avx2(transfer, params); 80 | if (!ret && caps.sse2) 81 | ret = create_inverse_gamma_operation_sse2(transfer, params); 82 | } else { 83 | #ifdef ZIMG_X86_AVX512 84 | if (!ret && cpu >= CPUClass::X86_AVX512) 85 | ret = create_inverse_gamma_operation_avx512(transfer, params); 86 | #endif 87 | if (!ret && cpu >= CPUClass::X86_AVX2) 88 | ret = create_inverse_gamma_operation_avx2(transfer, params); 89 | if (!ret && cpu >= CPUClass::X86_SSE2) 90 | ret = create_inverse_gamma_operation_sse2(transfer, params); 91 | } 92 | 93 | return ret; 94 | } 95 | 96 | } // namespace colorspace 97 | } // namespace zimg 98 | 99 | #endif // ZIMG_X86 100 | -------------------------------------------------------------------------------- /src/zimg/colorspace/colorspace.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "common/cpuinfo.h" 4 | #include "common/except.h" 5 | #include "common/make_unique.h" 6 | #include "common/pixel.h" 7 | #include "common/zassert.h" 8 | #include "graph/basic_filter.h" 9 | #include "graph/image_filter.h" 10 | #include "colorspace.h" 11 | #include "graph.h" 12 | #include "operation.h" 13 | 14 | namespace zimg { 15 | namespace colorspace { 16 | 17 | namespace { 18 | 19 | class ColorspaceConversionImpl final : public graph::ImageFilterBase { 20 | std::array, 6> m_operations; 21 | unsigned m_width; 22 | unsigned m_height; 23 | public: 24 | ColorspaceConversionImpl(unsigned width, unsigned height, const ColorspaceDefinition &in, const ColorspaceDefinition &out, 25 | const OperationParams ¶ms, CPUClass cpu) : 26 | m_width{ width }, 27 | m_height{ height } 28 | { 29 | zassert_d(width <= pixel_max_width(PixelType::FLOAT), "overflow"); 30 | 31 | auto path = get_operation_path(in, out); 32 | zassert(!path.empty(), "empty path"); 33 | zassert(path.size() <= 6, "too many operations"); 34 | 35 | for (size_t i = 0; i < path.size(); ++i) { 36 | m_operations[i] = path[i](params, cpu); 37 | } 38 | } 39 | 40 | filter_flags get_flags() const override 41 | { 42 | filter_flags flags{}; 43 | 44 | flags.same_row = true; 45 | flags.in_place = true; 46 | flags.color = true; 47 | 48 | return flags; 49 | } 50 | 51 | image_attributes get_image_attributes() const override 52 | { 53 | return{ m_width, m_height, PixelType::FLOAT }; 54 | } 55 | 56 | void process(void *, const graph::ImageBuffer src[], const graph::ImageBuffer dst[], void *, unsigned i, unsigned left, unsigned right) const override 57 | { 58 | const float *src_ptr[3]; 59 | float *dst_ptr[3]; 60 | 61 | for (unsigned p = 0; p < 3; ++p) { 62 | src_ptr[p] = static_cast(src[p][i]); 63 | dst_ptr[p] = static_cast(dst[p][i]); 64 | } 65 | 66 | m_operations[0]->process(src_ptr, dst_ptr, left, right); 67 | 68 | if (!m_operations[1]) 69 | return; 70 | m_operations[1]->process(dst_ptr, dst_ptr, left, right); 71 | 72 | if (!m_operations[2]) 73 | return; 74 | m_operations[2]->process(dst_ptr, dst_ptr, left, right); 75 | 76 | if (!m_operations[3]) 77 | return; 78 | m_operations[3]->process(dst_ptr, dst_ptr, left, right); 79 | 80 | if (!m_operations[4]) 81 | return; 82 | m_operations[4]->process(dst_ptr, dst_ptr, left, right); 83 | 84 | if (!m_operations[5]) 85 | return; 86 | m_operations[5]->process(dst_ptr, dst_ptr, left, right); 87 | } 88 | }; 89 | 90 | } // namespace 91 | 92 | 93 | ColorspaceConversion::ColorspaceConversion(unsigned width, unsigned height) : 94 | width{ width }, 95 | height{ height }, 96 | csp_in{}, 97 | csp_out{}, 98 | peak_luminance{ 100.0 }, 99 | approximate_gamma{}, 100 | scene_referred{}, 101 | cpu{ CPUClass::NONE } 102 | {} 103 | 104 | std::unique_ptr ColorspaceConversion::create() const try 105 | { 106 | OperationParams params; 107 | params.set_peak_luminance(peak_luminance) 108 | .set_approximate_gamma(approximate_gamma) 109 | .set_scene_referred(scene_referred); 110 | 111 | if (csp_in == csp_out) 112 | return ztd::make_unique(width, height, PixelType::FLOAT, true); 113 | else 114 | return ztd::make_unique(width, height, csp_in, csp_out, params, cpu); 115 | } catch (const std::bad_alloc &) { 116 | error::throw_(); 117 | } 118 | 119 | } // namespace colorspace 120 | } // namespace zimg 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | z.lib 2 | ====== 3 | The "z" library implements the commonly required image processing basics of 4 | scaling, colorspace conversion, and depth conversion. A simple API enables 5 | conversion between any supported formats to operate with minimal knowledge 6 | from the programmer. All library routines were designed from the ground-up 7 | with correctness, flexibility, and thread-safety as first priorities. 8 | Allocation, buffering, and I/O are cleanly separated from processing, allowing 9 | the programmer to adapt "z" to many scenarios. 10 | 11 | Requirements 12 | ----- 13 | - Byte-addressable architecture 14 | - Two's complement integer encoding 15 | - 32-bit or greater machine word 16 | - C++11 compiler 17 | - Platforms: Microsoft Windows, POSIX 18 | 19 | Building 20 | ----- 21 | The officially supported build system is GNU autotools. Use the provided 22 | "autogen.sh" script to instantiate the familiar "configure" and "make" build 23 | system. Visual Studio project files are not stable and are subject to change. 24 | 25 | Capabilities 26 | ----- 27 | ### Colorspace 28 | 29 | Colorspaces: SMPTE-C (NTSC), Rec.709, Rec.2020 30 | 31 | The colorspace module provides for conversion between any combination of 32 | colorspaces, as defined by the commonly used triplet of matrix coefficients, 33 | transfer characteristics, and color primaries. Conversions are implemented 34 | with intelligent logic that minimizes the number of intermediate 35 | representations required for common scenarios, such as conversion between 36 | YCbCr and RGB. Support is also provided for the non-traditional YCbCr system 37 | of ITU-R BT.2020 constant luminance (CL), which retains higher fidelity with 38 | chroma subsampling. Note that "z" is not a color management system and should 39 | not be used to perform drastic contrast or gamut reduction, such as BT.2020 40 | to BT.709. 41 | 42 | ### Depth 43 | 44 | Formats: BYTE, WORD, HALF, FLOAT 45 | 46 | The depth module provides for conversion between any pixel (number) format, 47 | including one and two-byte integer formats as well as IEEE-754 binary16 48 | (OpenEXR) and binary32 formats. Limited range (16-235) and full swing (0-255) 49 | integer formats are supported, including conversion between such formats. 50 | Multiple dithering methods are available when converting to integer formats, 51 | from basic rounding to high quality error diffusion. 52 | 53 | ### Resize 54 | 55 | The resize module provides high fidelity linear resamplers, including the 56 | popular Bicubic and Lanczos filters. Resampling ratios of up to 100x are 57 | supported for upsampling and downsampling. Full support is provided for 58 | various coordinate systems, including the various chroma siting conventions 59 | (e.g. JPEG and MPEG2) as well as interlaced images. 60 | 61 | Performance 62 | ----- 63 | "z" is optimized for Intel(R) Architecture and features faster processing times 64 | than industry standard swscale software. 65 | 66 | Time (ms) to resize FHD image to UHD with Lanczos filter. 67 | 68 | | | z.lib 2.8 | swscale 4.0.2* | 69 | |--------------------------------|-----------|----------------| 70 | | Intel(R) Core(TM) i7-8565U | 7.7 | 15.2 | 71 | | Intel(R) Xeon(R) Platinum 8176 | 10.8 | 22.2 | 72 | 73 | Time (ms) to convert FHD BT.709 (YUV) to FHD BT.2020. 74 | 75 | | | z.lib 2.8 | swscale 4.0.2** | 76 | |--------------------------------|-----------|-----------------| 77 | | Intel(R) Core(TM) i7-8565U | 8.3 | 17.5 | 78 | | Intel(R) Xeon(R) Platinum 8176 | 11.5 | 25.6 | 79 | 80 | \* `scale=3840:2160:sws_flags=lanczos+accurate_rnd:sws_dither=none` 81 | 82 | \** `colorspace=all=bt2020:iall=bt709:format=yuv420p10` 83 | 84 | -------------------------------------------------------------------------------- /src/zimg/resize/resize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "common/cpuinfo.h" 3 | #include "common/except.h" 4 | #include "common/make_unique.h" 5 | #include "common/pixel.h" 6 | #include "graph/basic_filter.h" 7 | #include "graph/image_filter.h" 8 | #include "resize.h" 9 | #include "resize_impl.h" 10 | 11 | namespace zimg { 12 | namespace resize { 13 | 14 | namespace { 15 | 16 | bool resize_h_first(double xscale, double yscale) noexcept 17 | { 18 | double h_first_cost = std::max(xscale, 1.0) * 2.0 + xscale * std::max(yscale, 1.0); 19 | double v_first_cost = std::max(yscale, 1.0) + yscale * std::max(xscale, 1.0) * 2.0; 20 | 21 | return h_first_cost < v_first_cost; 22 | } 23 | 24 | } // namespace 25 | 26 | 27 | ResizeConversion::ResizeConversion(unsigned src_width, unsigned src_height, PixelType type) : 28 | src_width{ src_width }, 29 | src_height{ src_height }, 30 | type{ type }, 31 | depth{ pixel_depth(type) }, 32 | filter{}, 33 | dst_width{ src_width }, 34 | dst_height{ src_height }, 35 | shift_w{}, 36 | shift_h{}, 37 | subwidth{ static_cast(src_width) }, 38 | subheight{ static_cast(src_height) }, 39 | cpu{ CPUClass::NONE } 40 | {} 41 | 42 | auto ResizeConversion::create() const -> filter_pair try 43 | { 44 | if (src_width > pixel_max_width(type) || dst_width > pixel_max_width(type)) 45 | error::throw_(); 46 | 47 | bool skip_h = (src_width == dst_width && shift_w == 0 && subwidth == src_width); 48 | bool skip_v = (src_height == dst_height && shift_h == 0 && subheight == src_height); 49 | 50 | if (skip_h && skip_v) 51 | return{ ztd::make_unique(src_width, src_height, type), nullptr }; 52 | 53 | auto builder = ResizeImplBuilder{ src_width, src_height, type } 54 | .set_depth(depth) 55 | .set_filter(filter) 56 | .set_cpu(cpu); 57 | filter_pair ret{}; 58 | 59 | if (skip_h) { 60 | ret.first = builder.set_horizontal(false) 61 | .set_dst_dim(dst_height) 62 | .set_shift(shift_h) 63 | .set_subwidth(subheight) 64 | .create(); 65 | } else if (skip_v) { 66 | ret.first = builder.set_horizontal(true) 67 | .set_dst_dim(dst_width) 68 | .set_shift(shift_w) 69 | .set_subwidth(subwidth) 70 | .create(); 71 | } else { 72 | bool h_first = resize_h_first(static_cast(dst_width) / subwidth, static_cast(dst_height) / subheight); 73 | 74 | if (h_first) { 75 | ret.first = builder.set_horizontal(true) 76 | .set_dst_dim(dst_width) 77 | .set_shift(shift_w) 78 | .set_subwidth(subwidth) 79 | .create(); 80 | 81 | builder.src_width = dst_width; 82 | ret.second = builder.set_horizontal(false) 83 | .set_dst_dim(dst_height) 84 | .set_shift(shift_h) 85 | .set_subwidth(subheight) 86 | .create(); 87 | } else { 88 | ret.first = builder.set_horizontal(false) 89 | .set_dst_dim(dst_height) 90 | .set_shift(shift_h) 91 | .set_subwidth(subheight) 92 | .create(); 93 | 94 | builder.src_height = dst_height; 95 | ret.second = builder.set_horizontal(true) 96 | .set_dst_dim(dst_width) 97 | .set_shift(shift_w) 98 | .set_subwidth(subwidth) 99 | .create(); 100 | } 101 | } 102 | 103 | return ret; 104 | } catch (const std::bad_alloc &) { 105 | error::throw_(); 106 | } 107 | 108 | } // namespace resize 109 | } // namespace zimg 110 | --------------------------------------------------------------------------------