├── 978-1-4842-7917-5.jpg ├── AppendixA ├── Example1 │ ├── Example1.cpp │ ├── Example1.vcxproj │ ├── Example1.vcxproj.filters │ ├── Example1.vcxproj.user │ └── Example1_fasm.asm └── TestSolution.sln ├── Chapter01 ├── Ch01_01 │ ├── Ch01_01.cpp │ ├── Ch01_01.h │ ├── Ch01_01.vcxproj │ ├── Ch01_01.vcxproj.filters │ ├── Ch01_01.vcxproj.user │ ├── Ch01_01_fasm.asm │ └── Ch01_01_fcpp.cpp └── Chapter01.sln ├── Chapter02 ├── Ch02_01 │ ├── Ch02_01.cpp │ ├── Ch02_01.h │ ├── Ch02_01.vcxproj │ ├── Ch02_01.vcxproj.filters │ ├── Ch02_01.vcxproj.user │ ├── Ch02_01_fcpp.cpp │ └── makefile ├── Ch02_02 │ ├── Ch02_02.cpp │ ├── Ch02_02.h │ ├── Ch02_02.vcxproj │ ├── Ch02_02.vcxproj.filters │ ├── Ch02_02.vcxproj.user │ ├── Ch02_02_fcpp.cpp │ └── makefile ├── Ch02_03 │ ├── Ch02_03.cpp │ ├── Ch02_03.h │ ├── Ch02_03.vcxproj │ ├── Ch02_03.vcxproj.filters │ ├── Ch02_03.vcxproj.user │ ├── Ch02_03_fcpp.cpp │ └── makefile ├── Ch02_04 │ ├── Ch02_04.cpp │ ├── Ch02_04.h │ ├── Ch02_04.vcxproj │ ├── Ch02_04.vcxproj.filters │ ├── Ch02_04.vcxproj.user │ ├── Ch02_04_fcpp.cpp │ └── makefile ├── Ch02_05 │ ├── Ch02_05.cpp │ ├── Ch02_05.h │ ├── Ch02_05.vcxproj │ ├── Ch02_05.vcxproj.filters │ ├── Ch02_05.vcxproj.user │ ├── Ch02_05_fcpp.cpp │ └── makefile ├── Ch02_06 │ ├── Ch02_06.cpp │ ├── Ch02_06.h │ ├── Ch02_06.vcxproj │ ├── Ch02_06.vcxproj.filters │ ├── Ch02_06.vcxproj.user │ ├── Ch02_06_bm.cpp │ ├── Ch02_06_fcpp.cpp │ ├── Ch02_06_misc.cpp │ └── makefile ├── Ch02_07 │ ├── Ch02_07.cpp │ ├── Ch02_07.h │ ├── Ch02_07.vcxproj │ ├── Ch02_07.vcxproj.filters │ ├── Ch02_07.vcxproj.user │ ├── Ch02_07_bm.cpp │ ├── Ch02_07_fcpp.cpp │ ├── Ch02_07_misc.cpp │ └── makefile └── Chapter02.sln ├── Chapter03 ├── Ch03_01 │ ├── Ch03_01.cpp │ ├── Ch03_01.h │ ├── Ch03_01.vcxproj │ ├── Ch03_01.vcxproj.filters │ ├── Ch03_01.vcxproj.user │ ├── Ch03_01_fcpp.cpp │ └── makefile ├── Ch03_02 │ ├── Ch03_02.cpp │ ├── Ch03_02.h │ ├── Ch03_02.vcxproj │ ├── Ch03_02.vcxproj.filters │ ├── Ch03_02.vcxproj.user │ ├── Ch03_02_fcpp.cpp │ └── makefile ├── Ch03_03 │ ├── Ch03_03.cpp │ ├── Ch03_03.h │ ├── Ch03_03.vcxproj │ ├── Ch03_03.vcxproj.filters │ ├── Ch03_03.vcxproj.user │ ├── Ch03_03_fcpp.cpp │ └── makefile ├── Ch03_04 │ ├── Ch03_04.cpp │ ├── Ch03_04.h │ ├── Ch03_04.vcxproj │ ├── Ch03_04.vcxproj.filters │ ├── Ch03_04.vcxproj.user │ ├── Ch03_04_fcpp.cpp │ ├── Ch03_04_misc.cpp │ └── makefile ├── Ch03_05 │ ├── Ch03_05.cpp │ ├── Ch03_05.h │ ├── Ch03_05.vcxproj │ ├── Ch03_05.vcxproj.filters │ ├── Ch03_05.vcxproj.user │ ├── Ch03_05_fcpp.cpp │ ├── Ch03_05_misc.cpp │ └── makefile ├── Ch03_06 │ ├── Ch03_06.cpp │ ├── Ch03_06.h │ ├── Ch03_06.vcxproj │ ├── Ch03_06.vcxproj.filters │ ├── Ch03_06.vcxproj.user │ ├── Ch03_06_fcpp.cpp │ ├── Ch03_06_misc.cpp │ └── makefile ├── Ch03_07 │ ├── Ch03_07.cpp │ ├── Ch03_07.h │ ├── Ch03_07.vcxproj │ ├── Ch03_07.vcxproj.filters │ ├── Ch03_07.vcxproj.user │ ├── Ch03_07_fcpp.cpp │ ├── Ch03_07_misc.cpp │ └── makefile ├── Ch03_08 │ ├── Ch03_08.cpp │ ├── Ch03_08.h │ ├── Ch03_08.vcxproj │ ├── Ch03_08.vcxproj.filters │ ├── Ch03_08.vcxproj.user │ ├── Ch03_08_fcpp.cpp │ └── makefile ├── Ch03_09 │ ├── Ch03_09.cpp │ ├── Ch03_09.h │ ├── Ch03_09.vcxproj │ ├── Ch03_09.vcxproj.filters │ ├── Ch03_09.vcxproj.user │ ├── Ch03_09_fcpp.cpp │ └── makefile └── Chapter03.sln ├── Chapter04 ├── Ch04_01 │ ├── Ch04_01.cpp │ ├── Ch04_01.h │ ├── Ch04_01.vcxproj │ ├── Ch04_01.vcxproj.filters │ ├── Ch04_01.vcxproj.user │ ├── Ch04_01_fcpp.cpp │ └── makefile ├── Ch04_02 │ ├── Ch04_02.cpp │ ├── Ch04_02.h │ ├── Ch04_02.vcxproj │ ├── Ch04_02.vcxproj.filters │ ├── Ch04_02.vcxproj.user │ ├── Ch04_02_fcpp.cpp │ └── makefile ├── Ch04_03 │ ├── Ch04_03.cpp │ ├── Ch04_03.h │ ├── Ch04_03.vcxproj │ ├── Ch04_03.vcxproj.filters │ ├── Ch04_03.vcxproj.user │ ├── Ch04_03_fcpp.cpp │ └── makefile ├── Ch04_04 │ ├── Ch04_04.cpp │ ├── Ch04_04.h │ ├── Ch04_04.vcxproj │ ├── Ch04_04.vcxproj.filters │ ├── Ch04_04.vcxproj.user │ ├── Ch04_04_bm.cpp │ ├── Ch04_04_fcpp.cpp │ ├── Ch04_04_misc.cpp │ └── makefile ├── Ch04_05 │ ├── Ch04_05.cpp │ ├── Ch04_05.h │ ├── Ch04_05.vcxproj │ ├── Ch04_05.vcxproj.filters │ ├── Ch04_05.vcxproj.user │ ├── Ch04_05_bm.cpp │ ├── Ch04_05_fcpp.cpp │ ├── Ch04_05_misc.cpp │ └── makefile ├── Ch04_06 │ ├── Ch04_06.cpp │ ├── Ch04_06.h │ ├── Ch04_06.vcxproj │ ├── Ch04_06.vcxproj.filters │ ├── Ch04_06.vcxproj.user │ ├── Ch04_06_bm.cpp │ ├── Ch04_06_fcpp.cpp │ ├── Ch04_06_misc.cpp │ └── makefile ├── Ch04_07 │ ├── Ch04_07.cpp │ ├── Ch04_07.h │ ├── Ch04_07.vcxproj │ ├── Ch04_07.vcxproj.filters │ ├── Ch04_07.vcxproj.user │ ├── Ch04_07_bm.cpp │ ├── Ch04_07_fcpp.cpp │ ├── Ch04_07_misc.cpp │ └── makefile └── Chapter04.sln ├── Chapter05 ├── Ch05_01 │ ├── Ch05_01.cpp │ ├── Ch05_01.h │ ├── Ch05_01.vcxproj │ ├── Ch05_01.vcxproj.filters │ ├── Ch05_01.vcxproj.user │ ├── Ch05_01_fcpp.cpp │ ├── Ch05_01_misc.cpp │ └── makefile ├── Ch05_02 │ ├── Ch05_02.cpp │ ├── Ch05_02.h │ ├── Ch05_02.vcxproj │ ├── Ch05_02.vcxproj.filters │ ├── Ch05_02.vcxproj.user │ ├── Ch05_02_bm.cpp │ ├── Ch05_02_fcpp.cpp │ ├── Ch05_02_misc.cpp │ └── makefile ├── Ch05_03 │ ├── Ch05_03.cpp │ ├── Ch05_03.h │ ├── Ch05_03.vcxproj │ ├── Ch05_03.vcxproj.filters │ ├── Ch05_03.vcxproj.user │ ├── Ch05_03_bm.cpp │ ├── Ch05_03_fcpp.cpp │ ├── Ch05_03_misc.cpp │ └── makefile ├── Ch05_04 │ ├── Ch05_04.cpp │ ├── Ch05_04.h │ ├── Ch05_04.vcxproj │ ├── Ch05_04.vcxproj.filters │ ├── Ch05_04.vcxproj.user │ ├── Ch05_04_bm.cpp │ ├── Ch05_04_fcpp.cpp │ ├── Ch05_04_misc.cpp │ └── makefile ├── Ch05_05 │ ├── Ch05_05.cpp │ ├── Ch05_05.h │ ├── Ch05_05.vcxproj │ ├── Ch05_05.vcxproj.filters │ ├── Ch05_05.vcxproj.user │ ├── Ch05_05_bm.cpp │ ├── Ch05_05_fcpp.cpp │ ├── Ch05_05_misc.cpp │ └── makefile ├── Ch05_06 │ ├── Ch05_06.cpp │ ├── Ch05_06.h │ ├── Ch05_06.vcxproj │ ├── Ch05_06.vcxproj.filters │ ├── Ch05_06.vcxproj.user │ ├── Ch05_06_bm.cpp │ ├── Ch05_06_fcpp.cpp │ ├── Ch05_06_misc.cpp │ └── makefile ├── Ch05_07 │ ├── Ch05_07.cpp │ ├── Ch05_07.h │ ├── Ch05_07.vcxproj │ ├── Ch05_07.vcxproj.filters │ ├── Ch05_07.vcxproj.user │ ├── Ch05_07_bm.cpp │ ├── Ch05_07_fcpp.cpp │ ├── Ch05_07_misc.cpp │ └── makefile ├── Ch05_08 │ ├── Ch05_08.cpp │ ├── Ch05_08.h │ ├── Ch05_08.vcxproj │ ├── Ch05_08.vcxproj.filters │ ├── Ch05_08.vcxproj.user │ ├── Ch05_08_bm.cpp │ ├── Ch05_08_fcpp.cpp │ ├── Ch05_08_fcpp2.cpp │ ├── Ch05_08_misc.cpp │ └── makefile ├── Ch05_09 │ ├── Ch05_09.cpp │ ├── Ch05_09.h │ ├── Ch05_09.vcxproj │ ├── Ch05_09.vcxproj.filters │ ├── Ch05_09.vcxproj.user │ ├── Ch05_09_bm.cpp │ ├── Ch05_09_fcpp.cpp │ ├── Ch05_09_fcpp2.cpp │ ├── Ch05_09_misc.cpp │ └── makefile └── Chapter05.sln ├── Chapter06 ├── Ch06_01 │ ├── Ch06_01.cpp │ ├── Ch06_01.h │ ├── Ch06_01.vcxproj │ ├── Ch06_01.vcxproj.filters │ ├── Ch06_01.vcxproj.user │ ├── Ch06_01_bm.cpp │ ├── Ch06_01_fcpp.cpp │ ├── Ch06_01_misc.cpp │ └── makefile ├── Ch06_02 │ ├── Ch06_02.cpp │ ├── Ch06_02.h │ ├── Ch06_02.vcxproj │ ├── Ch06_02.vcxproj.filters │ ├── Ch06_02.vcxproj.user │ ├── Ch06_02_bm.cpp │ ├── Ch06_02_fcpp.cpp │ ├── Ch06_02_misc.cpp │ └── makefile ├── Ch06_03 │ ├── Ch06_03.cpp │ ├── Ch06_03.h │ ├── Ch06_03.vcxproj │ ├── Ch06_03.vcxproj.filters │ ├── Ch06_03.vcxproj.user │ ├── Ch06_03_bm.cpp │ ├── Ch06_03_fcpp.cpp │ ├── Ch06_03_misc.cpp │ ├── Ch06_03_misc2.cpp │ └── makefile ├── Ch06_04 │ ├── Ch06_04.cpp │ ├── Ch06_04.h │ ├── Ch06_04.vcxproj │ ├── Ch06_04.vcxproj.filters │ ├── Ch06_04.vcxproj.user │ ├── Ch06_04_bm.cpp │ ├── Ch06_04_fcpp.cpp │ ├── Ch06_04_misc.cpp │ ├── Ch06_04_misc2.cpp │ └── makefile ├── Ch06_05 │ ├── Ch06_05.cpp │ ├── Ch06_05.vcxproj │ ├── Ch06_05.vcxproj.filters │ ├── Ch06_05.vcxproj.user │ └── makefile └── Chapter06.sln ├── Chapter07 ├── Ch07_01 │ ├── Ch07_01.cpp │ ├── Ch07_01.h │ ├── Ch07_01.vcxproj │ ├── Ch07_01.vcxproj.filters │ ├── Ch07_01.vcxproj.user │ ├── Ch07_01_fcpp.cpp │ └── makefile ├── Ch07_02 │ ├── Ch07_02.cpp │ ├── Ch07_02.h │ ├── Ch07_02.vcxproj │ ├── Ch07_02.vcxproj.filters │ ├── Ch07_02.vcxproj.user │ ├── Ch07_02_fcpp.cpp │ └── makefile ├── Ch07_03 │ ├── Ch07_03.cpp │ ├── Ch07_03.h │ ├── Ch07_03.vcxproj │ ├── Ch07_03.vcxproj.filters │ ├── Ch07_03.vcxproj.user │ ├── Ch07_03_bm.cpp │ ├── Ch07_03_fcpp.cpp │ ├── Ch07_03_misc.cpp │ └── makefile ├── Ch07_04 │ ├── Ch07_04.cpp │ ├── Ch07_04.h │ ├── Ch07_04.vcxproj │ ├── Ch07_04.vcxproj.filters │ ├── Ch07_04.vcxproj.user │ ├── Ch07_04_fcpp.cpp │ ├── Ch07_04_misc.cpp │ └── makefile ├── Ch07_05 │ ├── Ch07_05.cpp │ ├── Ch07_05.h │ ├── Ch07_05.vcxproj │ ├── Ch07_05.vcxproj.filters │ ├── Ch07_05.vcxproj.user │ ├── Ch07_05_bm.cpp │ ├── Ch07_05_fcpp.cpp │ ├── Ch07_05_misc.cpp │ └── makefile └── Chapter07.sln ├── Chapter08 ├── Ch08_01 │ ├── Ch08_01.cpp │ ├── Ch08_01.h │ ├── Ch08_01.vcxproj │ ├── Ch08_01.vcxproj.filters │ ├── Ch08_01.vcxproj.user │ ├── Ch08_01_fcpp.cpp │ └── makefile ├── Ch08_02 │ ├── Ch08_02.cpp │ ├── Ch08_02.h │ ├── Ch08_02.vcxproj │ ├── Ch08_02.vcxproj.filters │ ├── Ch08_02.vcxproj.user │ ├── Ch08_02_fcpp.cpp │ └── makefile ├── Ch08_03 │ ├── Ch08_03.cpp │ ├── Ch08_03.h │ ├── Ch08_03.vcxproj │ ├── Ch08_03.vcxproj.filters │ ├── Ch08_03.vcxproj.user │ ├── Ch08_03_fcpp.cpp │ ├── Ch08_03_misc.cpp │ └── makefile ├── Ch08_04 │ ├── Ch08_04.cpp │ ├── Ch08_04.h │ ├── Ch08_04.vcxproj │ ├── Ch08_04.vcxproj.filters │ ├── Ch08_04.vcxproj.user │ ├── Ch08_04_fcpp.cpp │ ├── Ch08_04_misc.cpp │ ├── Ch08_04_misc2.cpp │ └── makefile ├── Ch08_05 │ ├── Ch08_05.cpp │ ├── Ch08_05.h │ ├── Ch08_05.vcxproj │ ├── Ch08_05.vcxproj.filters │ ├── Ch08_05.vcxproj.user │ ├── Ch08_05_bm.cpp │ ├── Ch08_05_fcpp.cpp │ ├── Ch08_05_misc.cpp │ └── makefile ├── Ch08_06 │ ├── Ch08_06.cpp │ ├── Ch08_06.h │ ├── Ch08_06.vcxproj │ ├── Ch08_06.vcxproj.filters │ ├── Ch08_06.vcxproj.user │ ├── Ch08_06_bm.cpp │ ├── Ch08_06_fcpp.cpp │ ├── Ch08_06_misc.cpp │ └── makefile ├── Ch08_07 │ ├── Ch08_07.cpp │ ├── Ch08_07.h │ ├── Ch08_07.vcxproj │ ├── Ch08_07.vcxproj.filters │ ├── Ch08_07.vcxproj.user │ ├── Ch08_07_bm.cpp │ ├── Ch08_07_fcpp.cpp │ ├── Ch08_07_misc.cpp │ └── makefile ├── Ch08_08 │ ├── Ch08_08.cpp │ ├── Ch08_08.h │ ├── Ch08_08.vcxproj │ ├── Ch08_08.vcxproj.filters │ ├── Ch08_08.vcxproj.user │ ├── Ch08_08_bm.cpp │ ├── Ch08_08_fcpp.cpp │ ├── Ch08_08_misc.cpp │ └── makefile ├── Ch08_09 │ ├── Ch08_09.cpp │ ├── Ch08_09.h │ ├── Ch08_09.vcxproj │ ├── Ch08_09.vcxproj.filters │ ├── Ch08_09.vcxproj.user │ ├── Ch08_09_bm.cpp │ ├── Ch08_09_fcpp.cpp │ ├── Ch08_09_misc.cpp │ ├── Ch08_09_test.cpp │ └── makefile └── Chapter08.sln ├── Chapter09 ├── Ch09_01 │ ├── CacheInfo.cpp │ ├── Ch09_01.cpp │ ├── Ch09_01.vcxproj │ ├── Ch09_01.vcxproj.filters │ ├── Ch09_01.vcxproj.user │ ├── CpuidInfo.cpp │ ├── CpuidInfo.h │ ├── Cpuid__.cpp │ ├── Cpuid__.h │ └── makefile ├── Ch09_02 │ ├── Ch09_02.cpp │ ├── Ch09_02.h │ ├── Ch09_02.vcxproj │ ├── Ch09_02.vcxproj.filters │ ├── Ch09_02.vcxproj.user │ ├── Ch09_02_fcpp.cpp │ ├── Ch09_02_misc.cpp │ └── makefile ├── Ch09_03 │ ├── Ch09_03.cpp │ ├── Ch09_03.h │ ├── Ch09_03.vcxproj │ ├── Ch09_03.vcxproj.filters │ ├── Ch09_03.vcxproj.user │ ├── Ch09_03_bm.cpp │ ├── Ch09_03_fcpp.cpp │ ├── Ch09_03_misc.cpp │ └── makefile └── Chapter09.sln ├── Chapter11 ├── Ch11_01 │ ├── Ch11_01.cpp │ ├── Ch11_01.h │ ├── Ch11_01.vcxproj │ ├── Ch11_01.vcxproj.filters │ ├── Ch11_01.vcxproj.user │ ├── Ch11_01_fasm.asm │ └── Ch11_01_misc.cpp ├── Ch11_02 │ ├── Ch11_02.cpp │ ├── Ch11_02.h │ ├── Ch11_02.vcxproj │ ├── Ch11_02.vcxproj.filters │ ├── Ch11_02.vcxproj.user │ ├── Ch11_02_fasm.asm │ └── Ch11_02_misc.cpp ├── Ch11_03 │ ├── Ch11_03.cpp │ ├── Ch11_03.h │ ├── Ch11_03.vcxproj │ ├── Ch11_03.vcxproj.filters │ ├── Ch11_03.vcxproj.user │ ├── Ch11_03_fasm.asm │ └── Ch11_03_misc.cpp ├── Ch11_04 │ ├── Ch11_04.cpp │ ├── Ch11_04.h │ ├── Ch11_04.vcxproj │ ├── Ch11_04.vcxproj.filters │ ├── Ch11_04.vcxproj.user │ └── Ch11_04_fasm.asm ├── Ch11_05 │ ├── Ch11_05.cpp │ ├── Ch11_05.h │ ├── Ch11_05.vcxproj │ ├── Ch11_05.vcxproj.filters │ ├── Ch11_05.vcxproj.user │ └── Ch11_05_fasm.asm ├── Ch11_06 │ ├── Ch11_06.cpp │ ├── Ch11_06.h │ ├── Ch11_06.vcxproj │ ├── Ch11_06.vcxproj.filters │ ├── Ch11_06.vcxproj.user │ ├── Ch11_06_fasm.asm │ ├── Ch11_06_fcpp.cpp │ └── Ch11_06_misc.cpp ├── Ch11_07 │ ├── Ch11_07.cpp │ ├── Ch11_07.h │ ├── Ch11_07.vcxproj │ ├── Ch11_07.vcxproj.filters │ ├── Ch11_07.vcxproj.user │ ├── Ch11_07_fasm.asm │ └── Ch11_07_misc.cpp ├── Ch11_08 │ ├── Ch11_08.cpp │ ├── Ch11_08.h │ ├── Ch11_08.vcxproj │ ├── Ch11_08.vcxproj.filters │ ├── Ch11_08.vcxproj.user │ └── Ch11_08_fasm.asm └── Chapter11.sln ├── Chapter12 ├── Ch12_01 │ ├── Ch12_01.cpp │ ├── Ch12_01.h │ ├── Ch12_01.vcxproj │ ├── Ch12_01.vcxproj.filters │ ├── Ch12_01.vcxproj.user │ └── Ch12_01_fasm.asm ├── Ch12_02 │ ├── Ch12_02.cpp │ ├── Ch12_02.h │ ├── Ch12_02.vcxproj │ ├── Ch12_02.vcxproj.filters │ ├── Ch12_02.vcxproj.user │ ├── Ch12_02_fasm.asm │ ├── Ch12_02_fcpp.cpp │ └── Ch12_02_misc.cpp ├── Ch12_03 │ ├── Ch12_03.cpp │ ├── Ch12_03.h │ ├── Ch12_03.vcxproj │ ├── Ch12_03.vcxproj.filters │ ├── Ch12_03.vcxproj.user │ ├── Ch12_03_fasm.asm │ └── Ch12_03_misc.cpp ├── Ch12_04 │ ├── Ch12_04.cpp │ ├── Ch12_04.h │ ├── Ch12_04.vcxproj │ ├── Ch12_04.vcxproj.filters │ ├── Ch12_04.vcxproj.user │ └── Ch12_04_fasm.asm ├── Ch12_05 │ ├── Ch12_05.cpp │ ├── Ch12_05.h │ ├── Ch12_05.vcxproj │ ├── Ch12_05.vcxproj.filters │ ├── Ch12_05.vcxproj.user │ ├── Ch12_05_fasm.asm │ └── Ch12_05_fcpp.cpp ├── Ch12_06 │ ├── Ch12_06.cpp │ ├── Ch12_06.h │ ├── Ch12_06.vcxproj │ ├── Ch12_06.vcxproj.filters │ ├── Ch12_06.vcxproj.user │ └── Ch12_06_fasm.asm ├── Ch12_07 │ ├── Ch12_07.cpp │ ├── Ch12_07.h │ ├── Ch12_07.vcxproj │ ├── Ch12_07.vcxproj.filters │ ├── Ch12_07.vcxproj.user │ └── Ch12_07_fasm.asm ├── Ch12_08 │ ├── Ch12_08.cpp │ ├── Ch12_08.h │ ├── Ch12_08.vcxproj │ ├── Ch12_08.vcxproj.filters │ ├── Ch12_08.vcxproj.user │ ├── Ch12_08_fasm.asm │ └── Ch12_08_fcpp.cpp ├── Ch12_09 │ ├── Ch12_09.cpp │ ├── Ch12_09.h │ ├── Ch12_09.vcxproj │ ├── Ch12_09.vcxproj.filters │ ├── Ch12_09.vcxproj.user │ ├── Ch12_09_fasm.asm │ └── Ch12_09_fcpp.cpp └── Chapter12.sln ├── Chapter13 ├── Ch13_01 │ ├── Ch13_01.cpp │ ├── Ch13_01.h │ ├── Ch13_01.vcxproj │ ├── Ch13_01.vcxproj.filters │ ├── Ch13_01.vcxproj.user │ └── Ch13_01_fasm.asm ├── Ch13_02 │ ├── Ch13_02.cpp │ ├── Ch13_02.h │ ├── Ch13_02.vcxproj │ ├── Ch13_02.vcxproj.filters │ ├── Ch13_02.vcxproj.user │ └── Ch13_02_fasm.asm ├── Ch13_03 │ ├── Ch13_03.cpp │ ├── Ch13_03.h │ ├── Ch13_03.vcxproj │ ├── Ch13_03.vcxproj.filters │ ├── Ch13_03.vcxproj.user │ └── Ch13_03_fasm.asm ├── Ch13_04 │ ├── Ch13_04.cpp │ ├── Ch13_04.h │ ├── Ch13_04.vcxproj │ ├── Ch13_04.vcxproj.filters │ ├── Ch13_04.vcxproj.user │ └── Ch13_04_fasm.asm ├── Ch13_05 │ ├── Ch13_05.cpp │ ├── Ch13_05.h │ ├── Ch13_05.vcxproj │ ├── Ch13_05.vcxproj.filters │ ├── Ch13_05.vcxproj.user │ ├── Ch13_05_bm.cpp │ ├── Ch13_05_fasm.asm │ ├── Ch13_05_fcpp.cpp │ └── Ch13_05_misc.cpp ├── Ch13_06 │ ├── Ch13_06.cpp │ ├── Ch13_06.h │ ├── Ch13_06.vcxproj │ ├── Ch13_06.vcxproj.filters │ ├── Ch13_06.vcxproj.user │ ├── Ch13_06_bm.cpp │ ├── Ch13_06_fasm.asm │ ├── Ch13_06_fcpp.cpp │ └── Ch13_06_misc.cpp └── Chapter13.sln ├── Chapter14 ├── Ch14_01 │ ├── Ch14_01.cpp │ ├── Ch14_01.h │ ├── Ch14_01.vcxproj │ ├── Ch14_01.vcxproj.filters │ ├── Ch14_01.vcxproj.user │ └── Ch14_01_fasm.asm ├── Ch14_02 │ ├── Ch14_02.cpp │ ├── Ch14_02.h │ ├── Ch14_02.vcxproj │ ├── Ch14_02.vcxproj.filters │ ├── Ch14_02.vcxproj.user │ └── Ch14_02_fasm.asm ├── Ch14_03 │ ├── Ch14_03.cpp │ ├── Ch14_03.h │ ├── Ch14_03.vcxproj │ ├── Ch14_03.vcxproj.filters │ ├── Ch14_03.vcxproj.user │ ├── Ch14_03_fasm.asm │ ├── Ch14_03_fcpp.cpp │ └── Ch14_03_misc.cpp ├── Ch14_04 │ ├── Ch14_04.cpp │ ├── Ch14_04.h │ ├── Ch14_04.vcxproj │ ├── Ch14_04.vcxproj.filters │ ├── Ch14_04.vcxproj.user │ ├── Ch14_04_fasm.asm │ ├── Ch14_04_fcpp.cpp │ └── Ch14_04_misc.cpp ├── Ch14_05 │ ├── Ch14_05.cpp │ ├── Ch14_05.h │ ├── Ch14_05.vcxproj │ ├── Ch14_05.vcxproj.filters │ ├── Ch14_05.vcxproj.user │ ├── Ch14_05_fasm.asm │ └── Ch14_05_fcpp.cpp └── Chapter14.sln ├── Chapter15 ├── Ch15_01 │ ├── Ch15_01.cpp │ ├── Ch15_01.h │ ├── Ch15_01.vcxproj │ ├── Ch15_01.vcxproj.filters │ ├── Ch15_01.vcxproj.user │ └── Ch15_01_fasm.asm ├── Ch15_02 │ ├── Ch15_02.cpp │ ├── Ch15_02.h │ ├── Ch15_02.vcxproj │ ├── Ch15_02.vcxproj.filters │ ├── Ch15_02.vcxproj.user │ └── Ch15_02_fasm.asm ├── Ch15_03 │ ├── Ch15_03.cpp │ ├── Ch15_03.h │ ├── Ch15_03.vcxproj │ ├── Ch15_03.vcxproj.filters │ ├── Ch15_03.vcxproj.user │ ├── Ch15_03_bm.cpp │ ├── Ch15_03_fasm.asm │ ├── Ch15_03_fcpp.cpp │ └── Ch15_03_misc.cpp ├── Ch15_04 │ ├── Ch15_04.cpp │ ├── Ch15_04.h │ ├── Ch15_04.vcxproj │ ├── Ch15_04.vcxproj.filters │ ├── Ch15_04.vcxproj.user │ ├── Ch15_04_bm.cpp │ ├── Ch15_04_fasm.asm │ ├── Ch15_04_fcpp.cpp │ └── Ch15_04_misc.cpp ├── Ch15_05 │ ├── Ch15_05.cpp │ ├── Ch15_05.h │ ├── Ch15_05.vcxproj │ ├── Ch15_05.vcxproj.filters │ ├── Ch15_05.vcxproj.user │ ├── Ch15_05_bm.cpp │ ├── Ch15_05_fasm.asm │ ├── Ch15_05_fcpp.cpp │ └── Ch15_05_misc.cpp └── Chapter15.sln ├── Chapter16 ├── Ch16_01 │ ├── Ch16_01.cpp │ ├── Ch16_01.h │ ├── Ch16_01.vcxproj │ ├── Ch16_01.vcxproj.filters │ ├── Ch16_01.vcxproj.user │ ├── Ch16_01_fasm.asm │ ├── Ch16_01_fcpp.cpp │ └── Ch16_01_misc.cpp ├── Ch16_02 │ ├── Ch16_02.cpp │ ├── Ch16_02.h │ ├── Ch16_02.vcxproj │ ├── Ch16_02.vcxproj.filters │ ├── Ch16_02.vcxproj.user │ ├── Ch16_02_bm.cpp │ ├── Ch16_02_fasm.asm │ ├── Ch16_02_fcpp.cpp │ ├── Ch16_02_misc.cpp │ └── Ch16_02_test.cpp ├── Ch16_03 │ ├── Ch16_03.cpp │ ├── Ch16_03.h │ ├── Ch16_03.vcxproj │ ├── Ch16_03.vcxproj.filters │ ├── Ch16_03.vcxproj.user │ ├── Ch16_03_bm.cpp │ ├── Ch16_03_fasm.asm │ ├── Ch16_03_fcpp.cpp │ └── Ch16_03_misc.cpp ├── Ch16_04 │ ├── Ch16_04.cpp │ ├── Ch16_04.h │ ├── Ch16_04.vcxproj │ ├── Ch16_04.vcxproj.filters │ ├── Ch16_04.vcxproj.user │ ├── Ch16_04_bm.cpp │ ├── Ch16_04_fasm.asm │ ├── Ch16_04_fcpp.cpp │ └── Ch16_04_misc.cpp ├── Ch16_05 │ ├── Ch16_05.cpp │ ├── Ch16_05.h │ ├── Ch16_05.vcxproj │ ├── Ch16_05.vcxproj.filters │ ├── Ch16_05.vcxproj.user │ ├── Ch16_05_bm.cpp │ ├── Ch16_05_fasm.asm │ ├── Ch16_05_fcpp.cpp │ └── Ch16_05_misc.cpp └── Chapter16.sln ├── Chapter17 ├── Ch17_01 │ ├── Ch17_01.cpp │ ├── Ch17_01.h │ ├── Ch17_01.vcxproj │ ├── Ch17_01.vcxproj.filters │ ├── Ch17_01.vcxproj.user │ └── Ch17_01_fasm.asm ├── Ch17_02 │ ├── Ch17_02.cpp │ ├── Ch17_02.h │ ├── Ch17_02.vcxproj │ ├── Ch17_02.vcxproj.filters │ ├── Ch17_02.vcxproj.user │ └── Ch17_02_fasm.asm ├── Ch17_03 │ ├── Ch17_03.cpp │ ├── Ch17_03.h │ ├── Ch17_03.vcxproj │ ├── Ch17_03.vcxproj.filters │ ├── Ch17_03.vcxproj.user │ ├── Ch17_03_fasm.asm │ ├── Ch17_03_fcpp.cpp │ └── Ch17_03_misc.cpp ├── Ch17_04 │ ├── Ch17_04.cpp │ ├── Ch17_04.h │ ├── Ch17_04.vcxproj │ ├── Ch17_04.vcxproj.filters │ ├── Ch17_04.vcxproj.user │ ├── Ch17_04_bm.cpp │ ├── Ch17_04_fasm.asm │ ├── Ch17_04_fcpp.cpp │ └── Ch17_04_misc.cpp └── Chapter17.sln ├── Chapter18 ├── Ch18_01 │ ├── Ch18_01.cpp │ ├── Ch18_01.h │ ├── Ch18_01.vcxproj │ ├── Ch18_01.vcxproj.filters │ ├── Ch18_01.vcxproj.user │ └── Ch18_01_fasm.asm ├── Ch18_02 │ ├── Ch18_02.cpp │ ├── Ch18_02.h │ ├── Ch18_02.vcxproj │ ├── Ch18_02.vcxproj.filters │ ├── Ch18_02.vcxproj.user │ └── Ch18_02_fasm.asm ├── Ch18_03 │ ├── Ch18_03.cpp │ ├── Ch18_03.h │ ├── Ch18_03.vcxproj │ ├── Ch18_03.vcxproj.filters │ ├── Ch18_03.vcxproj.user │ ├── Ch18_03_fasm.asm │ ├── Ch18_03_fcpp.cpp │ └── Ch18_03_misc.cpp ├── Ch18_04 │ ├── Ch18_04.cpp │ ├── Ch18_04.h │ ├── Ch18_04.vcxproj │ ├── Ch18_04.vcxproj.filters │ ├── Ch18_04.vcxproj.user │ ├── Ch18_04_bm.cpp │ ├── Ch18_04_fasm.asm │ ├── Ch18_04_fcpp.cpp │ └── Ch18_04_misc.cpp ├── Ch18_05 │ ├── Ch18_05.cpp │ ├── Ch18_05.h │ ├── Ch18_05.vcxproj │ ├── Ch18_05.vcxproj.filters │ ├── Ch18_05.vcxproj.user │ ├── Ch18_05_bm.cpp │ ├── Ch18_05_fasm.asm │ ├── Ch18_05_fasm2.asm │ ├── Ch18_05_fcpp.cpp │ └── Ch18_05_misc.cpp ├── Ch18_06 │ ├── Ch18_06.cpp │ ├── Ch18_06.h │ ├── Ch18_06.vcxproj │ ├── Ch18_06.vcxproj.filters │ ├── Ch18_06.vcxproj.user │ ├── Ch18_06_bm.cpp │ ├── Ch18_06_fasm.asm │ ├── Ch18_06_fcpp.cpp │ └── Ch18_06_misc.cpp └── Chapter18.sln ├── Chapter19 ├── Ch19_01 │ ├── Ch19_01.cpp │ ├── Ch19_01.h │ ├── Ch19_01.vcxproj │ ├── Ch19_01.vcxproj.filters │ ├── Ch19_01.vcxproj.user │ ├── Ch19_01_bm.cpp │ ├── Ch19_01_fcpp.cpp │ ├── Ch19_01_fcpp2.cpp │ ├── Ch19_01_misc.cpp │ └── makefile ├── Ch19_02 │ ├── Ch19_02.cpp │ ├── Ch19_02.h │ ├── Ch19_02.vcxproj │ ├── Ch19_02.vcxproj.filters │ ├── Ch19_02.vcxproj.user │ ├── Ch19_02_bm.cpp │ ├── Ch19_02_fcpp.cpp │ ├── Ch19_02_fcpp2.cpp │ ├── Ch19_02_misc.cpp │ ├── Ch19_02_test.cpp │ └── makefile └── Chapter19.sln ├── Contributing.md ├── Data ├── ImageA.png ├── ImageB.png ├── ImageC.png ├── ImageD.png ├── ImageE.png └── ImageF.png ├── Doc ├── ImportantNotes.txt ├── ReleaseHistory.txt ├── VS2022 Review Solution Actions Dialog Box.jpg └── VS2022Notes.txt ├── Include ├── AlignedMem.h ├── BmThreadTimer.h ├── ImageBuffer.h ├── ImageMatrix.h ├── ImageMatrixL.h ├── ImageMatrixW.h ├── ImageMisc.h ├── ImagePng.h ├── MF.h ├── MT.h ├── MT_Convolve.h ├── MacrosX86-64-AVX.asmh ├── MatrixF32.h ├── MatrixF64.h ├── MiscTypes.h ├── OS.h ├── SimdMath.h ├── XmmVal.h ├── YmmVal.h ├── ZmmVal.h └── cmpequ.asmh ├── LICENSE.txt ├── README.md └── errata.md /978-1-4842-7917-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/978-1-4842-7917-5.jpg -------------------------------------------------------------------------------- /AppendixA/Example1/Example1.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /AppendixA/Example1/Example1.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /AppendixA/TestSolution.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31829.152 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Example1", "Example1\Example1.vcxproj", "{D5ADB351-2739-4853-AD8F-E8A28C5797F7}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Debug|x64.ActiveCfg = Debug|x64 15 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Debug|x64.Build.0 = Debug|x64 16 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Release|x64.ActiveCfg = Release|x64 17 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {30740DBB-52E9-43EF-BC5D-8C33B7137F4E} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /Chapter01/Ch01_01/Ch01_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch01_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch01_01_fcpp.cpp 8 | extern void CalcZ_Cpp(float* z, const float* x, const float* y, size_t n); 9 | extern void CalcZ_Iavx(float* z, const float* x, const float* y, size_t n); 10 | 11 | // Ch01_01_fasm.asm 12 | extern "C" void CalcZ_Aavx(float* z, const float* x, const float* y, size_t n); 13 | -------------------------------------------------------------------------------- /Chapter01/Ch01_01/Ch01_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter01/Ch01_01/Ch01_01_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch01_01_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch01_01.h" 7 | 8 | void CalcZ_Cpp(float* z, const float* x, const float* y, size_t n) 9 | { 10 | for (size_t i = 0; i < n; i++) 11 | z[i] = x[i] + y[i]; 12 | } 13 | 14 | void CalcZ_Iavx(float* z, const float* x, const float* y, size_t n) 15 | { 16 | size_t i = 0; 17 | const size_t num_simd_elements = 8; 18 | 19 | for (; n - i >= num_simd_elements; i += num_simd_elements) 20 | { 21 | // Calculate z[i:i+7] = x[i:i+7] + y[i:i+7] 22 | __m256 x_vals = _mm256_loadu_ps(&x[i]); 23 | __m256 y_vals = _mm256_loadu_ps(&y[i]); 24 | __m256 z_vals = _mm256_add_ps(x_vals, y_vals); 25 | 26 | _mm256_storeu_ps(&z[i], z_vals); 27 | } 28 | 29 | // Calculate z[i] = x[i] + y[i] for any remaining elements 30 | for (; i < n; i += 1) 31 | z[i] = x[i] + y[i]; 32 | } 33 | -------------------------------------------------------------------------------- /Chapter01/Chapter01.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.31424.327 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Ch01_01", "Ch01_01\Ch01_01.vcxproj", "{467CFDA8-9F54-4C8A-945C-430284891B93}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Debug|x64.ActiveCfg = Debug|x64 15 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Debug|x64.Build.0 = Debug|x64 16 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Release|x64.ActiveCfg = Release|x64 17 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {3ACB9B7D-D21D-4217-A57F-E9955222E4F9} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /Chapter02/Ch02_01/Ch02_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch02_01_fcpp.cpp 9 | extern void AddI16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b); 10 | extern void AddU16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter02/Ch02_01/Ch02_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_01/Ch02_01_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_01_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_01.h" 7 | 8 | void AddI16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b) 9 | { 10 | __m128i a_vals = _mm_load_si128((__m128i*)a); 11 | __m128i b_vals = _mm_load_si128((__m128i*)b); 12 | 13 | __m128i c1_vals = _mm_add_epi16(a_vals, b_vals); 14 | __m128i c2_vals = _mm_adds_epi16(a_vals, b_vals); 15 | 16 | _mm_store_si128((__m128i*)c1, c1_vals); 17 | _mm_store_si128((__m128i*)c2, c2_vals); 18 | } 19 | 20 | void AddU16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b) 21 | { 22 | __m128i a_vals = _mm_load_si128((__m128i*)a); 23 | __m128i b_vals = _mm_load_si128((__m128i*)b); 24 | 25 | __m128i c1_vals = _mm_add_epi16(a_vals, b_vals); 26 | __m128i c2_vals = _mm_adds_epu16(a_vals, b_vals); 27 | 28 | _mm_store_si128((__m128i*)c1, c1_vals); 29 | _mm_store_si128((__m128i*)c2, c2_vals); 30 | } 31 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/Ch02_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch02_02_fcpp.cpp 9 | extern void SubI32_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 10 | extern void SubI64_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/Ch02_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/Ch02_02_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_02_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_02.h" 7 | 8 | extern void SubI32_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b) 9 | { 10 | __m128i a_vals = _mm_load_si128((__m128i*)a); 11 | __m128i b_vals = _mm_load_si128((__m128i*)b); 12 | __m128i c_vals = _mm_sub_epi32(a_vals, b_vals); 13 | 14 | _mm_store_si128((__m128i*)c, c_vals); 15 | } 16 | 17 | extern void SubI64_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b) 18 | { 19 | __m128i a_vals = _mm_load_si128((__m128i*)a); 20 | __m128i b_vals = _mm_load_si128((__m128i*)b); 21 | __m128i c_vals = _mm_sub_epi64(a_vals, b_vals); 22 | 23 | _mm_store_si128((__m128i*)c, c_vals); 24 | } 25 | -------------------------------------------------------------------------------- /Chapter02/Ch02_03/Ch02_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch02_03_fcpp.cpp 9 | extern void MulI16_Iavx(XmmVal c[2], const XmmVal* a, const XmmVal* b); 10 | extern void MulI32a_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 11 | extern void MulI32b_Iavx(XmmVal c[2], const XmmVal* a, const XmmVal* b); 12 | -------------------------------------------------------------------------------- /Chapter02/Ch02_03/Ch02_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/Ch02_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch02_04_fcpp.cpp 9 | extern void AndU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 10 | extern void OrU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 11 | extern void XorU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 12 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/Ch02_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/Ch02_04_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_04_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_04.h" 7 | 8 | void AndU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b) 9 | { 10 | __m128i a_vals = _mm_load_si128((__m128i*)a); 11 | __m128i b_vals = _mm_load_si128((__m128i*)b); 12 | __m128i c_vals = _mm_and_si128(a_vals, b_vals); 13 | 14 | _mm_store_si128((__m128i*)c, c_vals); 15 | } 16 | 17 | void OrU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b) 18 | { 19 | __m128i a_vals = _mm_load_si128((__m128i*)a); 20 | __m128i b_vals = _mm_load_si128((__m128i*)b); 21 | __m128i c_vals = _mm_or_si128(a_vals, b_vals); 22 | 23 | _mm_store_si128((__m128i*)c, c_vals); 24 | } 25 | 26 | void XorU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b) 27 | { 28 | __m128i a_vals = _mm_load_si128((__m128i*)a); 29 | __m128i b_vals = _mm_load_si128((__m128i*)b); 30 | __m128i c_vals = _mm_xor_si128(a_vals, b_vals); 31 | 32 | _mm_store_si128((__m128i*)c, c_vals); 33 | } 34 | -------------------------------------------------------------------------------- /Chapter02/Ch02_05/Ch02_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch02_05_fcpp.cpp 9 | extern void SllU16_Iavx(XmmVal* c, const XmmVal* a, int count); 10 | extern void SrlU16_Iavx(XmmVal* c, const XmmVal* a, int count); 11 | extern void SraU16_Iavx(XmmVal* c, const XmmVal* a, int count); 12 | -------------------------------------------------------------------------------- /Chapter02/Ch02_05/Ch02_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_05/Ch02_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_05_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_05.h" 7 | 8 | void SllU16_Iavx(XmmVal* c, const XmmVal* a, int count) 9 | { 10 | __m128i a_vals = _mm_load_si128((__m128i*)a); 11 | __m128i b_vals = _mm_slli_epi16(a_vals, count); 12 | 13 | _mm_store_si128((__m128i*)c, b_vals); 14 | } 15 | 16 | void SrlU16_Iavx(XmmVal* c, const XmmVal* a, int count) 17 | { 18 | __m128i a_vals = _mm_load_si128((__m128i*)a); 19 | __m128i b_vals = _mm_srli_epi16(a_vals, count); 20 | 21 | _mm_store_si128((__m128i*)c, b_vals); 22 | } 23 | 24 | void SraU16_Iavx(XmmVal* c, const XmmVal* a, int count) 25 | { 26 | __m128i a_vals = _mm_load_si128((__m128i*)a); 27 | __m128i b_vals = _mm_srai_epi16(a_vals, count); 28 | 29 | _mm_store_si128((__m128i*)c, b_vals); 30 | } 31 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_06.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_06.h" 7 | #include "AlignedMem.h" 8 | 9 | static void CalcMinMaxU8(); 10 | 11 | int main() 12 | { 13 | CalcMinMaxU8(); 14 | CalcMinMaxU8_bm(); 15 | } 16 | 17 | static void CalcMinMaxU8() 18 | { 19 | size_t n = c_NumElements; 20 | AlignedArray x_aa(n, 16); 21 | uint8_t* x = x_aa.Data(); 22 | 23 | InitArray(x, n, c_RngSeedVal); 24 | 25 | uint8_t x_min1 = 0, x_max1 = 0; 26 | uint8_t x_min2 = 0, x_max2 = 0; 27 | 28 | bool rc1 = CalcMinMaxU8_Cpp(&x_min1, &x_max1, x, n); 29 | bool rc2 = CalcMinMaxU8_Iavx(&x_min2, &x_max2, x, n); 30 | 31 | std::cout << "\nResults for CalcMinMaxU8_Cpp\n"; 32 | std::cout << "rc1: " << rc1 << " x_min1: " << (int)x_min1; 33 | std::cout << " x_max1: " << (int)x_max1 << '\n'; 34 | std::cout << "\nResults for CalcMinMaxU8_Iavx\n"; 35 | std::cout << "rc2: " << rc2 << " x_min2: " << (int)x_min2; 36 | std::cout << " x_max2: " << (int)x_max2 << '\n'; 37 | } 38 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch02_06_fcpp.cpp 10 | extern bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n); 11 | extern bool CalcMinMaxU8_Iavx(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n); 12 | 13 | // Ch02_06_misc.cpp 14 | extern void InitArray(uint8_t* x, size_t n, unsigned int rng_seed); 15 | 16 | // Ch02_06_BM.cpp 17 | extern void CalcMinMaxU8_bm(); 18 | 19 | // c_NumElements must be > 0 and even multiple of 16 20 | const size_t c_NumElements = 10000000; 21 | const unsigned int c_RngSeedVal = 23; 22 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_06_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch02_06.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | 10 | void CalcMinMaxU8_bm(void) 11 | { 12 | std::cout << "\nRunning benchmark function CalcMinMaxU8_bm - please wait\n"; 13 | 14 | size_t n = c_NumElements; 15 | AlignedArray x_aa(n, 16); 16 | uint8_t* x = x_aa.Data(); 17 | 18 | InitArray(x, n, c_RngSeedVal); 19 | 20 | uint8_t x_min0 = 0, x_max0 = 0; 21 | uint8_t x_min1 = 0, x_max1 = 0; 22 | 23 | const size_t num_it = 500; 24 | const size_t num_alg = 2; 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | bmtt.Start(i, 0); 30 | CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n); 31 | bmtt.Stop(i, 0); 32 | 33 | bmtt.Start(i, 1); 34 | CalcMinMaxU8_Iavx(&x_min1, &x_max1, x, n); 35 | bmtt.Stop(i, 1); 36 | } 37 | 38 | std::string fn = bmtt.BuildCsvFilenameString("Ch02_06_CalcMinMaxU8_bm"); 39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 40 | std::cout << "Benchmark times save to file " << fn << '\n'; 41 | } 42 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_06_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch02_06.h" 6 | #include "MT.h" 7 | 8 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed) 9 | { 10 | int rng_min_val = 5; 11 | int rng_max_val = 250; 12 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed); 13 | 14 | // Use known values for min & max (for test purposes) 15 | x[(n / 4) * 3 + 1] = 2; 16 | x[n / 4 + 11] = 3; 17 | x[n / 2] = 252; 18 | x[n / 2 + 13] = 253; 19 | x[n / 8 + 5] = 4; 20 | x[n / 8 + 7] = 254; 21 | } 22 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch02_07.h" 8 | #include "AlignedMem.h" 9 | 10 | static void CalcMeanU8(void); 11 | 12 | int main() 13 | { 14 | CalcMeanU8(); 15 | CalcMeanU8_bm(); 16 | } 17 | 18 | static void CalcMeanU8(void) 19 | { 20 | const char nl = '\n'; 21 | const size_t n = c_NumElements; 22 | AlignedArray x_aa(n, 16); 23 | uint8_t* x = x_aa.Data(); 24 | 25 | InitArray(x, n, c_RngSeedVal); 26 | 27 | bool rc0, rc1; 28 | uint64_t sum_x0, sum_x1; 29 | double mean_x0, mean_x1; 30 | 31 | rc0 = CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n); 32 | rc1 = CalcMeanU8_Iavx(&mean_x1, &sum_x1, x, n); 33 | 34 | std::cout << std::fixed << std::setprecision(6); 35 | 36 | std::cout << "\nResults for CalcMeanU8_Cpp\n"; 37 | std::cout << "rc0: " << rc0 << " "; 38 | std::cout << "sum_x0: " << sum_x0 << " "; 39 | std::cout << "mean_x0: " << mean_x0 << nl; 40 | 41 | std::cout << "\nResults for CalcMeanU8_Iavx\n"; 42 | std::cout << "rc1: " << rc1 << " "; 43 | std::cout << "sum_x1: " << sum_x1 << " "; 44 | std::cout << "mean_x1: " << mean_x1 << nl; 45 | } 46 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch02_07_fcpp.cpp 10 | extern bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n); 11 | extern bool CalcMeanU8_Iavx(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n); 12 | 13 | // Ch02_07_misc.cpp 14 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed); 15 | extern bool CheckArgs(const uint8_t* x, size_t n); 16 | 17 | // Ch02_07_bm.cpp 18 | extern void CalcMeanU8_bm(void); 19 | 20 | // Miscellaneous constants 21 | const size_t c_NumElements = 10000000; 22 | const size_t c_Alignment = 16; 23 | const unsigned int c_RngSeedVal = 29; 24 | extern "C" size_t g_NumElementsMax; 25 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch02_07.h" 6 | #include "AlignedMem.h" 7 | #include "BmThreadTimer.h" 8 | 9 | void CalcMeanU8_bm(void) 10 | { 11 | std::cout << "\nRunning benchmark function CalcMeanU8_bm - please wait\n"; 12 | 13 | size_t n = c_NumElements; 14 | AlignedArray x_aa(n, c_Alignment); 15 | uint8_t* x = x_aa.Data(); 16 | 17 | InitArray(x, n, c_RngSeedVal); 18 | 19 | uint64_t sum_x0, sum_x1; 20 | double mean_x0, mean_x1; 21 | 22 | const size_t num_it = 500; 23 | const size_t num_alg = 2; 24 | BmThreadTimer bmtt(num_it, num_alg); 25 | 26 | for (size_t i = 0; i < num_it; i++) 27 | { 28 | bmtt.Start(i, 0); 29 | CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n); 30 | bmtt.Stop(i, 0); 31 | 32 | bmtt.Start(i, 1); 33 | CalcMeanU8_Iavx(&mean_x1, &sum_x1, x, n); 34 | bmtt.Stop(i, 1); 35 | } 36 | 37 | std::string fn = bmtt.BuildCsvFilenameString("Ch02_07_CalcMeanU8_bm"); 38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 39 | std::cout << "Benchmark times save to file " << fn << '\n'; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch02_07.h" 6 | #include "MT.h" 7 | #include "AlignedMem.h" 8 | 9 | size_t g_NumElementsMax = 64 * 1024 * 1024; 10 | 11 | bool CheckArgs(const uint8_t* x, size_t n) 12 | { 13 | if (n == 0 || n > g_NumElementsMax) 14 | return false; 15 | 16 | if ((n % 64) != 0) 17 | return false; 18 | 19 | if (!AlignedMem::IsAligned(x, c_Alignment)) 20 | return false; 21 | 22 | return true; 23 | } 24 | 25 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed) 26 | { 27 | int rng_min_val = 0; 28 | int rng_max_val = 255; 29 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed); 30 | } 31 | -------------------------------------------------------------------------------- /Chapter03/Ch03_01/Ch03_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch03_01_fcpp.cpp 9 | extern void PackedMathF32_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 10 | extern void PackedMathF64_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter03/Ch03_01/Ch03_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_02/Ch03_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch03_02_fcpp.cpp 9 | extern void PackedCompareF32_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 10 | extern void PackedCompareF64_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter03/Ch03_02/Ch03_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_03/Ch03_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | enum class CvtOp : unsigned int 9 | { 10 | I32_TO_F32, F32_TO_I32, I32_TO_F64, F64_TO_I32, F32_TO_F64, F64_TO_F32, 11 | }; 12 | 13 | // Ch03_03_fcpp.cpp 14 | extern void PackedConvertFP_Iavx(XmmVal* c, const XmmVal* a, CvtOp cvt_op); 15 | -------------------------------------------------------------------------------- /Chapter03/Ch03_03/Ch03_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_04/Ch03_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch03_04_fcpp.cpp 9 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n); 10 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean); 11 | extern bool CalcMeanF32_Iavx(float* mean, const float* x, size_t n); 12 | extern bool CalcStDevF32_Iavx(float* st_dev, const float* x, size_t n, float mean); 13 | 14 | // Ch03_04_misc.cpp 15 | extern bool CheckArgs(const float* x, size_t n); 16 | extern void InitArray(float* x, size_t n); 17 | 18 | // Miscellaneous constants 19 | const size_t c_NumElements = 91; 20 | const unsigned int c_RngSeed = 13; 21 | const float c_ArrayFillMin = 1.0f; 22 | const float c_ArrayFillMax = 100.0f; 23 | const size_t c_Alignment = 32; 24 | -------------------------------------------------------------------------------- /Chapter03/Ch03_04/Ch03_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_04/Ch03_04_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_04_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch03_04.h" 6 | #include "AlignedMem.h" 7 | #include "MT.h" 8 | 9 | bool CheckArgs(const float* x, size_t n) 10 | { 11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment)); 12 | } 13 | 14 | void InitArray(float* x, size_t n) 15 | { 16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed); 17 | } 18 | -------------------------------------------------------------------------------- /Chapter03/Ch03_05/Ch03_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch03_05_fcpp.cpp 9 | extern bool CalcMeanF64_Cpp(double* mean, const double* x, size_t n); 10 | extern bool CalcMeanF64_Iavx(double* mean, const double* x, size_t n); 11 | extern bool CalcStDevF64_Cpp(double* st_dev, const double* x, size_t n, double mean); 12 | extern bool CalcStDevF64_Iavx(double* st_dev, const double* x, size_t n, double mean); 13 | 14 | // Ch03_05_misc.cpp 15 | extern bool CheckArgs(const double* x, size_t n); 16 | extern void InitArray(double* x, size_t n); 17 | 18 | // Miscellaneous constants 19 | const size_t c_NumElements = 91; 20 | const unsigned int c_RngSeed = 13; 21 | const double c_ArrayFillMin = 1.0; 22 | const double c_ArrayFillMax = 100.0; 23 | const size_t c_Alignment = 32; 24 | -------------------------------------------------------------------------------- /Chapter03/Ch03_05/Ch03_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_05/Ch03_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch03_05.h" 6 | #include "AlignedMem.h" 7 | #include "MT.h" 8 | 9 | bool CheckArgs(const double* x, size_t n) 10 | { 11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment)); 12 | } 13 | 14 | void InitArray(double* x, size_t n) 15 | { 16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed); 17 | } 18 | 19 | -------------------------------------------------------------------------------- /Chapter03/Ch03_06/Ch03_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct PointArrays 9 | { 10 | float* X1; 11 | float* Y1; 12 | float* X2; 13 | float* Y2; 14 | float* Dist1; 15 | float* Dist2; 16 | float* DistCmp1; 17 | float* DistCmp2; 18 | size_t NumPoints; 19 | }; 20 | 21 | // Ch03_06_fcpp.cpp 22 | extern bool CalcDistancesF32_Cpp(PointArrays& pa); 23 | extern bool CalcDistancesF32_Iavx(PointArrays& pa); 24 | extern void CompareDistancesF32_Cpp(PointArrays& pa, float cmp_val); 25 | extern void CompareDistancesF32_Iavx(PointArrays& pa, float cmp_val); 26 | 27 | // Ch03_06_misc.cpp 28 | extern bool CheckArgs(PointArrays& pa); 29 | extern void FillPointArraysF32(PointArrays& pa, float min_val, float max_val, unsigned int rng_seed); 30 | 31 | // Miscellaneous constants 32 | const size_t c_NumPoints = 21; 33 | const unsigned int c_RngSeed = 39; 34 | const float c_ArrayFillMin = 1.0f; 35 | const float c_ArrayFillMax = 75.0f; 36 | const float c_CmpVal = 50.0f; 37 | const size_t c_Alignment = 32; 38 | -------------------------------------------------------------------------------- /Chapter03/Ch03_06/Ch03_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_07/Ch03_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct PointArrays 9 | { 10 | double* X1; 11 | double* Y1; 12 | double* X2; 13 | double* Y2; 14 | double* Dist1; 15 | double* Dist2; 16 | double* DistCmp1; 17 | double* DistCmp2; 18 | size_t NumPoints; 19 | }; 20 | 21 | // Ch03_07_fcpp.cpp 22 | extern bool CalcDistancesF64_Cpp(PointArrays& pa); 23 | extern bool CalcDistancesF64_Iavx(PointArrays& pa); 24 | extern void CompareDistancesF64_Cpp(PointArrays& pa, double cmp_val); 25 | extern void CompareDistancesF64_Iavx(PointArrays& pa, double cmp_val); 26 | 27 | // Ch03_07_misc.cpp 28 | extern bool CheckArgs(PointArrays& pa); 29 | extern void FillPointArraysF64(PointArrays& pa, double min_val, double max_val, unsigned int rng_seed); 30 | 31 | // Miscellaneous constants 32 | const size_t c_NumPoints = 21; 33 | const unsigned int c_RngSeed = 39; 34 | const double c_ArrayFillMin = 1.0; 35 | const double c_ArrayFillMax = 75.0; 36 | const double c_CmpVal = 50.0; 37 | const size_t c_Alignment = 32; 38 | -------------------------------------------------------------------------------- /Chapter03/Ch03_07/Ch03_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_08/Ch03_08.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_08.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | // Ch03_08_fcpp.cpp 10 | extern std::vector CalcColumnMeansF32_Cpp(const MatrixF32& x); 11 | extern std::vector CalcColumnMeansF32_Iavx(const MatrixF32& x); 12 | 13 | // Miscellaneous constants 14 | const unsigned int c_RngSeed = 41; 15 | const float c_MatrixFillMin = 1.0f; 16 | const float c_MatrixFillMax = 80.0f; 17 | 18 | -------------------------------------------------------------------------------- /Chapter03/Ch03_08/Ch03_08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter03/Ch03_09/Ch03_09.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_09.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF64.h" 8 | 9 | // Ch03_09_fcpp.cpp 10 | extern std::vector CalcColumnMeansF64_Cpp(const MatrixF64& x); 11 | extern std::vector CalcColumnMeansF64_Iavx(const MatrixF64& x); 12 | 13 | // Miscellaneous constants 14 | const unsigned int c_RngSeed = 41; 15 | const double c_MatrixFillMin = 1.0; 16 | const double c_MatrixFillMax = 80.0; 17 | -------------------------------------------------------------------------------- /Chapter03/Ch03_09/Ch03_09.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_01/Ch04_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch04_01_fcpp.cpp 9 | extern void MathI16_Iavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b); 10 | extern void MathI32_Iavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter04/Ch04_01/Ch04_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/Ch04_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch04_02_fcpp.cpp 9 | extern void UnpackU32_U64_Iavx2(YmmVal c[2], const YmmVal* a, const YmmVal* b); 10 | extern void PackI32_I16_Iavx2(YmmVal* c, const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/Ch04_02.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/Ch04_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/Ch04_02_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_02_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch04_02.h" 7 | 8 | void UnpackU32_U64_Iavx2(YmmVal c[2], const YmmVal* a, const YmmVal* b) 9 | { 10 | __m256i a_vals = _mm256_load_si256((__m256i*)a); 11 | __m256i b_vals = _mm256_load_si256((__m256i*)b); 12 | 13 | __m256i c_vals0 = _mm256_unpacklo_epi32(a_vals, b_vals); 14 | __m256i c_vals1 = _mm256_unpackhi_epi32(a_vals, b_vals); 15 | 16 | _mm256_store_si256((__m256i*)&c[0], c_vals0); 17 | _mm256_store_si256((__m256i*)&c[1], c_vals1); 18 | } 19 | 20 | void PackI32_I16_Iavx2(YmmVal* c, const YmmVal* a, const YmmVal* b) 21 | { 22 | __m256i a_vals = _mm256_load_si256((__m256i*)a); 23 | __m256i b_vals = _mm256_load_si256((__m256i*)b); 24 | 25 | __m256i c_vals = _mm256_packs_epi32(a_vals, b_vals); 26 | 27 | _mm256_store_si256((__m256i*)c, c_vals); 28 | } 29 | -------------------------------------------------------------------------------- /Chapter04/Ch04_03/Ch04_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch04_03_fcpp.cpp 9 | extern void ZeroExtU8_U16_Iavx2(YmmVal c[2], YmmVal* a); 10 | extern void ZeroExtU8_U32_Iavx2(YmmVal c[4], YmmVal* a); 11 | extern void SignExtI16_I32_Iavx2(YmmVal c[2], YmmVal* a); 12 | extern void SignExtI16_I64_Iavx2(YmmVal c[4], YmmVal* a); 13 | 14 | -------------------------------------------------------------------------------- /Chapter04/Ch04_03/Ch04_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_04/Ch04_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Data structure for pixel clipping algorithms 10 | struct ClipData 11 | { 12 | uint8_t* m_PbSrc; // source buffer pointer 13 | uint8_t* m_PbDes; // destination buffer pointer 14 | size_t m_NumPixels; // number of pixels 15 | size_t m_NumClippedPixels; // number of clipped pixels 16 | uint8_t m_ThreshLo; // low threshold 17 | uint8_t m_ThreshHi; // high threshold 18 | }; 19 | 20 | // Ch04_04_fcpp.cpp 21 | extern void ClipPixels_Cpp(ClipData* clip_data); 22 | extern void ClipPixels_Iavx2(ClipData* clip_data); 23 | 24 | // Ch04_04_misc.cpp 25 | extern bool CheckArgs(const ClipData* clip_data); 26 | 27 | // Ch04_04_bm.cpp 28 | extern void ClipPixels_bm(void); 29 | 30 | // Miscellaneous constants 31 | const size_t c_Alignment = 32; 32 | const int c_RngMinVal = 0; 33 | const int c_RngMaxVal = 255; 34 | const unsigned int c_RngSeed = 157; 35 | const uint8_t c_ThreshLo = 10; 36 | const uint8_t c_ThreshHi = 245; 37 | const size_t c_NumPixels = 8 * 1024 * 1024 + 31; 38 | const size_t c_NumPixelsBM = 10000000; 39 | -------------------------------------------------------------------------------- /Chapter04/Ch04_04/Ch04_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_04/Ch04_04_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_04_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch04_04.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const ClipData* clip_data) 9 | { 10 | if (clip_data->m_NumPixels == 0) 11 | return false; 12 | if (!AlignedMem::IsAligned(clip_data->m_PbSrc, c_Alignment)) 13 | return false; 14 | if (!AlignedMem::IsAligned(clip_data->m_PbDes, c_Alignment)) 15 | return false; 16 | return true; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /Chapter04/Ch04_05/Ch04_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include "ImageMisc.h" 9 | 10 | // Ch04_05.cpp 11 | extern const float c_Coef[4]; 12 | extern const char* c_TestImageFileName; 13 | 14 | // Ch04_05_fcpp.cpp 15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 16 | extern void ConvertRgbToGs_Iavx2(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 17 | 18 | // Ch04_05_misc.cpp 19 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 20 | 21 | // Ch04_05_bm.cpp 22 | extern void ConvertRgbToGs_bm(void); 23 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels); 24 | 25 | // Miscellaneous constants 26 | const size_t c_Alignment = 32; 27 | const size_t c_NumPixelsMax = 256 * 1024 * 1024; 28 | -------------------------------------------------------------------------------- /Chapter04/Ch04_05/Ch04_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_05/Ch04_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch04_05.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]) 9 | { 10 | if (num_pixels > c_NumPixelsMax) 11 | return false; 12 | if (num_pixels % 8 != 0) 13 | return false; 14 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment)) 15 | return false; 16 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment)) 17 | return false; 18 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f) 19 | return false; 20 | return true; 21 | } 22 | 23 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels) 24 | { 25 | for (size_t i = 0; i < num_pixels; i++) 26 | { 27 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1) 28 | return false; 29 | } 30 | return true; 31 | } 32 | -------------------------------------------------------------------------------- /Chapter04/Ch04_06/Ch04_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_06/Ch04_06_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_06_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch04_06.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(size_t num_pixels, const uint8_t* pb_src, const uint8_t* pb_mask) 9 | { 10 | if ((num_pixels == 0) || (num_pixels > c_NumPixelsMax)) 11 | return false; 12 | if ((num_pixels % c_NumSimdElements) != 0) 13 | return false; 14 | if (!AlignedMem::IsAligned(pb_src, c_Alignment)) 15 | return false; 16 | if (!AlignedMem::IsAligned(pb_mask, c_Alignment)) 17 | return false; 18 | return true; 19 | } 20 | -------------------------------------------------------------------------------- /Chapter04/Ch04_07/Ch04_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch04_07_fcpp.cpp 10 | extern void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels); 11 | extern void ConvertU8ToF32_Iavx2(float* pb_des, const uint8_t* pb_src, size_t num_pixels); 12 | 13 | // Ch04_07_misc.cpp 14 | extern void BuildLUT_U8ToF32(void); 15 | extern bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels); 16 | extern size_t CompareArraysF32(const float* pb_src1, const float* pb_src2, size_t num_pixels); 17 | 18 | // Ch04_07_bm.cpp 19 | extern void ConvertU8ToF32_bm(void); 20 | 21 | // Miscellaneous constants 22 | const size_t c_Alignment = 32; 23 | const size_t c_NumPixels = 1024 * 1024 + 19; 24 | const size_t c_NumPixelsBM = 10000000; 25 | const size_t c_NumPixelsMax = 16 * 1024 * 1024; 26 | const int c_FillMinVal = 0; 27 | const int c_FillMaxVal = 255; 28 | const unsigned int c_RngSeed = 71; 29 | extern float g_LUT_U8ToF32[]; 30 | -------------------------------------------------------------------------------- /Chapter04/Ch04_07/Ch04_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter04/Ch04_07/Ch04_07_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch04_07_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch04_07.h" 7 | #include "AlignedMem.h" 8 | 9 | float g_LUT_U8ToF32[256]; 10 | 11 | void BuildLUT_U8ToF32(void) 12 | { 13 | size_t n = sizeof(g_LUT_U8ToF32) / sizeof(float); 14 | 15 | for (size_t i = 0; i < n; i++) 16 | g_LUT_U8ToF32[i] = (float)i / 255.0f; 17 | } 18 | 19 | bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels) 20 | { 21 | if (num_pixels == 0 || num_pixels > c_NumPixelsMax) 22 | return false; 23 | if (!AlignedMem::IsAligned(pb1, c_Alignment)) 24 | return false; 25 | if (!AlignedMem::IsAligned(pb2, c_Alignment)) 26 | return false; 27 | return true; 28 | } 29 | 30 | size_t CompareArraysF32(const float* pb_src1, const float* pb_src2, size_t num_pixels) 31 | { 32 | size_t num_diff = 0; 33 | 34 | for (size_t i = 0; i < num_pixels; i++) 35 | { 36 | if (pb_src1[i] != pb_src2[i]) 37 | { 38 | std::cout << i << ", " << pb_src1[i] << ", " << pb_src2[i] << '\n'; 39 | num_diff++; 40 | } 41 | } 42 | 43 | return num_diff; 44 | } 45 | -------------------------------------------------------------------------------- /Chapter05/Ch05_01/Ch05_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch05_01_fcpp.cpp 8 | extern void CalcLeastSquares_Cpp(double* m, double* b, const double* x, 9 | const double* y, size_t n); 10 | extern void CalcLeastSquares_Iavx2(double* m, double* b, const double* x, 11 | const double* y, size_t n); 12 | 13 | // Ch05_01_misc.cpp 14 | extern bool CheckArgs(const double* x, const double* y, size_t n); 15 | extern void FillArrays(double* x, double* y, size_t n); 16 | 17 | // Miscellaneous constants 18 | const size_t c_Alignment = 32; 19 | const double c_LsEpsilon = 1.0e-12; 20 | -------------------------------------------------------------------------------- /Chapter05/Ch05_01/Ch05_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_01/Ch05_01_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_01_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch05_01.h" 7 | #include "AlignedMem.h" 8 | #include "MT.h" 9 | 10 | bool CheckArgs(const double* x, const double* y, size_t n) 11 | { 12 | if (n < 2) 13 | return false; 14 | if (!AlignedMem::IsAligned(x, c_Alignment)) 15 | return false; 16 | if (!AlignedMem::IsAligned(y, c_Alignment)) 17 | return false; 18 | 19 | return true; 20 | } 21 | 22 | void FillArrays(double* x, double* y, size_t n) 23 | { 24 | const unsigned int rng_seed1 = 73; 25 | const unsigned int rng_seed2 = 83; 26 | const double fill_min_val = -25.0; 27 | const double fill_max_val = 25.0; 28 | 29 | MT::FillArrayFP(x, n, fill_min_val, fill_max_val, rng_seed1); 30 | MT::FillArrayFP(y, n, fill_min_val, fill_max_val, rng_seed2); 31 | 32 | for (size_t i = 0; i < n; i++) 33 | y[i] = y[i] * y[i]; 34 | } 35 | -------------------------------------------------------------------------------- /Chapter05/Ch05_02/Ch05_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch05_02_fcpp.cpp 9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 10 | void MatrixMulF32_Iavx2(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 11 | 12 | // Ch05_02_misc.cpp 13 | bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 14 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b); 15 | void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a, 16 | const MatrixF32& b); 17 | 18 | // Ch05_02_bm.cpp 19 | void MatrixMulF32_bm(void); 20 | -------------------------------------------------------------------------------- /Chapter05/Ch05_02/Ch05_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_03/Ch05_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF64.h" 7 | 8 | // Ch05_03_fcpp.cpp 9 | void MatrixMulF64_Cpp(MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 10 | void MatrixMulF64_Iavx2(MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 11 | 12 | // Ch05_03_misc.cpp 13 | bool CheckArgs(const MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 14 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b); 15 | void SaveResults(const MatrixF64& c1, const MatrixF64& c2, const MatrixF64& a, 16 | const MatrixF64& b); 17 | 18 | // Ch05_03_bm.cpp 19 | void MatrixMulF64_bm(void); 20 | -------------------------------------------------------------------------------- /Chapter05/Ch05_03/Ch05_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_04/Ch05_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch05_04_fcpp.cpp 9 | extern void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a, 10 | const MatrixF32& b); 11 | extern void MatrixMul4x4F32_Iavx2(MatrixF32& c, const MatrixF32& a, 12 | const MatrixF32& b); 13 | 14 | // Ch05_04_misc.cpp 15 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b); 16 | 17 | // Ch05_04_bm.cpp 18 | extern void MatrixMul4x4F32_bm(void); 19 | -------------------------------------------------------------------------------- /Chapter05/Ch05_04/Ch05_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_04/Ch05_04_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_04_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch05_04.h" 6 | 7 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b) 8 | { 9 | const float a_row0[] = { 10, 11, 12, 13 }; 10 | const float a_row1[] = { 20, 21, 22, 23 }; 11 | const float a_row2[] = { 30, 31, 32, 33 }; 12 | const float a_row3[] = { 40, 41, 42, 43 }; 13 | 14 | const float b_row0[] = { 100, 101, 102, 103 }; 15 | const float b_row1[] = { 200, 201, 202, 203 }; 16 | const float b_row2[] = { 300, 301, 302, 303 }; 17 | const float b_row3[] = { 400, 401, 402, 403 }; 18 | 19 | a.SetRow(0, a_row0); 20 | a.SetRow(1, a_row1); 21 | a.SetRow(2, a_row2); 22 | a.SetRow(3, a_row3); 23 | 24 | b.SetRow(0, b_row0); 25 | b.SetRow(1, b_row1); 26 | b.SetRow(2, b_row2); 27 | b.SetRow(3, b_row3); 28 | 29 | const int w = 12; 30 | const char* delim = " "; 31 | c1.SetOstream(w, delim); 32 | c2.SetOstream(w, delim); 33 | a.SetOstream(w, delim); 34 | b.SetOstream(w, delim); 35 | } 36 | -------------------------------------------------------------------------------- /Chapter05/Ch05_05/Ch05_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF64.h" 7 | 8 | // Ch05_05_fcpp.cpp 9 | extern void MatrixMul4x4F64_Cpp(MatrixF64& c, const MatrixF64& a, 10 | const MatrixF64& b); 11 | extern void MatrixMul4x4F64_Iavx2(MatrixF64& c, const MatrixF64& a, 12 | const MatrixF64& b); 13 | 14 | // Ch05_05_misc.cpp 15 | extern void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b); 16 | 17 | // Ch05_05_bm.cpp 18 | extern void MatrixMul4x4F64_bm(void); 19 | -------------------------------------------------------------------------------- /Chapter05/Ch05_05/Ch05_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_05/Ch05_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch05_05.h" 6 | 7 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b) 8 | { 9 | const double a_row0[] = { 10, 11, 12, 13 }; 10 | const double a_row1[] = { 20, 21, 22, 23 }; 11 | const double a_row2[] = { 30, 31, 32, 33 }; 12 | const double a_row3[] = { 40, 41, 42, 43 }; 13 | 14 | const double b_row0[] = { 100, 101, 102, 103 }; 15 | const double b_row1[] = { 200, 201, 202, 203 }; 16 | const double b_row2[] = { 300, 301, 302, 303 }; 17 | const double b_row3[] = { 400, 401, 402, 403 }; 18 | 19 | a.SetRow(0, a_row0); 20 | a.SetRow(1, a_row1); 21 | a.SetRow(2, a_row2); 22 | a.SetRow(3, a_row3); 23 | 24 | b.SetRow(0, b_row0); 25 | b.SetRow(1, b_row1); 26 | b.SetRow(2, b_row2); 27 | b.SetRow(3, b_row3); 28 | 29 | const int w = 12; 30 | const char* delim = " "; 31 | c1.SetOstream(w, delim); 32 | c2.SetOstream(w, delim); 33 | a.SetOstream(w, delim); 34 | b.SetOstream(w, delim); 35 | } 36 | -------------------------------------------------------------------------------- /Chapter05/Ch05_06/Ch05_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | struct Vec4x1_F32 10 | { 11 | float W, X, Y, Z; 12 | }; 13 | 14 | // Ch05_06_fcpp.cpp 15 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m, 16 | const Vec4x1_F32* vec_a, size_t num_vec); 17 | extern void MatVecMulF32_Iavx2(Vec4x1_F32* vec_b, const MatrixF32& m, 18 | const Vec4x1_F32* vec_a, size_t num_vec); 19 | 20 | // Ch05_06_misc.cpp 21 | extern bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m, 22 | const Vec4x1_F32* vec_a, size_t num_vec); 23 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec); 24 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2); 25 | 26 | // Ch05_06_bm.cpp 27 | extern void MatrixVecMulF32_bm(void); 28 | 29 | // Miscellaenous constants 30 | const size_t c_Alignment = 32; 31 | const int c_RngMinVal = 1; 32 | const int c_RngMaxVal = 500; 33 | const unsigned int c_RngSeedVal = 187; 34 | -------------------------------------------------------------------------------- /Chapter05/Ch05_06/Ch05_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_07/Ch05_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF64.h" 8 | 9 | struct Vec4x1_F64 10 | { 11 | double W, X, Y, Z; 12 | }; 13 | 14 | // Ch05_07_fcpp.cpp 15 | extern void MatVecMulF64_Cpp(Vec4x1_F64* vec_b, MatrixF64& m, 16 | Vec4x1_F64* vec_a, size_t num_vec); 17 | extern void MatVecMulF64_Iavx2(Vec4x1_F64* vec_b, MatrixF64& m, 18 | Vec4x1_F64* vec_a, size_t num_vec); 19 | 20 | // Ch05_07_misc.cpp 21 | extern bool CheckArgs(const Vec4x1_F64* vec_b, const MatrixF64& m, 22 | const Vec4x1_F64* vec_a, size_t num_vec); 23 | extern void Init(MatrixF64& m, Vec4x1_F64* va, size_t num_vec); 24 | extern bool VecCompare(const Vec4x1_F64* v1, const Vec4x1_F64* v2); 25 | 26 | // Ch05_07_bm.cpp 27 | extern void MatrixVecMulF64_bm(void); 28 | 29 | // Miscellaenous constants 30 | const size_t c_Alignment = 32; 31 | const int c_RngMinVal = 1; 32 | const int c_RngMaxVal = 500; 33 | const unsigned int c_RngSeedVal = 187; 34 | -------------------------------------------------------------------------------- /Chapter05/Ch05_07/Ch05_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_08/Ch05_08.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_08.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | // Ch05_08_fcpp.cpp 10 | bool MatrixInvF32_Cpp(MatrixF32& a_inv, const MatrixF32& a, float epsilon); 11 | bool MatrixInvF32_Iavx2(MatrixF32& a_inv, const MatrixF32& a, float epsilon); 12 | 13 | // Ch05_08_fcpp2.cpp 14 | MatrixF32 MatrixAddF32_Iavx2(const MatrixF32& a, const MatrixF32& b); 15 | MatrixF32 MatrixMulF32_Iavx2(const MatrixF32& a, const MatrixF32& b); 16 | void MatrixMulScalarF32_Iavx2(MatrixF32& c, const MatrixF32& a, float s_val); 17 | 18 | // Ch05_08_misc.cpp 19 | MatrixF32 GetTestMatrix(size_t id, float* epsilon); 20 | size_t GetNumTestMatrices(void); 21 | 22 | // Ch05_08_bm.cpp 23 | void CalcMatrixInvF32_bm(void); 24 | 25 | // Miscellaenous constants 26 | const float c_DefaultEpsilon = 1.0e-5f; 27 | -------------------------------------------------------------------------------- /Chapter05/Ch05_08/Ch05_08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter05/Ch05_09/Ch05_09.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_09.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF64.h" 8 | 9 | // Ch05_09_fcpp.cpp 10 | bool MatrixInvF64_Cpp(MatrixF64& a_inv, const MatrixF64& a, double epsilon); 11 | bool MatrixInvF64_Iavx2(MatrixF64& a_inv, const MatrixF64& a, double epsilon); 12 | 13 | // Ch05_09_fcpp2.cpp 14 | MatrixF64 MatrixAddF64_Iavx2(const MatrixF64& a, const MatrixF64& b); 15 | MatrixF64 MatrixMulF64_Iavx2(const MatrixF64& a, const MatrixF64& b); 16 | void MatrixMulScalarF64_Iavx2(MatrixF64& c, const MatrixF64& a, double s_val); 17 | 18 | // Ch05_09_misc.cpp 19 | MatrixF64 GetTestMatrix(size_t id, double* epsilon); 20 | size_t GetNumTestMatrices(void); 21 | 22 | // Ch05_09_bm.cpp 23 | void CalcMatrixInvF64_bm(void); 24 | 25 | // Miscellaenous constants 26 | const double c_DefaultEpsilon = 1.0e-5; 27 | -------------------------------------------------------------------------------- /Chapter05/Ch05_09/Ch05_09.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/Ch06_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch06_01_fcpp.cpp 9 | extern void Convolve1D_F32_Cpp(std::vector& y, 10 | const std::vector& x, const std::vector& kernel); 11 | extern void Convolve1D_F32_Iavx2(std::vector& y, 12 | const std::vector& x, const std::vector& kernel); 13 | extern void Convolve1DKs5_F32_Iavx2(std::vector& y, 14 | const std::vector& x, const std::vector& kernel); 15 | 16 | // Ch06_01_misc.cpp 17 | extern bool CheckArgs(std::vector& y, 18 | const std::vector& x, const std::vector& kernel); 19 | 20 | // Ch06_01_bm.cpp 21 | extern void Convolve1D_F32_bm(void); 22 | 23 | // Miscellaneous constants 24 | const unsigned int c_RngSeed = 97; 25 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/Ch06_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/Ch06_01_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_01_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch06_01.h" 6 | 7 | bool CheckArgs(std::vector& y, const std::vector& x, 8 | const std::vector& kernel) 9 | { 10 | if ((kernel.size() & 1) == 0) 11 | return false; 12 | if (y.size() != x.size()) 13 | return false; 14 | if (y.size() < kernel.size()) 15 | return false; 16 | return true; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/Ch06_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch06_02_fcpp.cpp 9 | extern void Convolve1D_F64_Cpp(std::vector& y, 10 | const std::vector& x, const std::vector& kernel); 11 | extern void Convolve1D_F64_Iavx2(std::vector& y, 12 | const std::vector& x, const std::vector& kernel); 13 | extern void Convolve1DKs5_F64_Iavx2(std::vector& y, 14 | const std::vector& x, const std::vector& kernel); 15 | 16 | // Ch06_02_misc.cpp 17 | extern bool CheckArgs(std::vector& y, 18 | const std::vector& x, const std::vector& kernel); 19 | 20 | // Ch06_02_bm.cpp 21 | extern void Convolve1D_F64_bm(void); 22 | 23 | // Miscellaneous constants 24 | const unsigned int c_RngSeed = 97; 25 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/Ch06_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/Ch06_02_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_02_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch06_02.h" 6 | 7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel) 8 | { 9 | if ((kernel.size() & 1) == 0) 10 | return false; 11 | if (y.size() != x.size()) 12 | return false; 13 | if (y.size() < kernel.size()) 14 | return false; 15 | return true; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /Chapter06/Ch06_03/Ch06_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | struct CD_2D 10 | { 11 | size_t m_ImH = 0; 12 | size_t m_ImW = 0; 13 | size_t m_KernelSize = 0; 14 | std::vector m_ImSrc; 15 | std::vector m_ImDes; 16 | std::vector m_Kernel2D; 17 | }; 18 | 19 | enum class KERNEL_ID : unsigned int 20 | { 21 | LowPass2D_3x3, LowPass2D_5x5, LowPass2D_7x7, LowPass2D_9x9, LowPass2D_15x15 22 | }; 23 | 24 | // Ch06_03_fcpp.cpp 25 | extern void Convolve2D_F32_Cpp(CD_2D& cd); 26 | extern void Convolve2D_F32_Iavx2(CD_2D& cd); 27 | 28 | // Ch06_03_misc.cpp 29 | extern bool CheckArgs2D(const CD_2D& cd); 30 | extern void Init2D(std::array& cd, const char* fn, KERNEL_ID id); 31 | 32 | // Ch06_03_misc2.cpp 33 | extern void DisplayKernel2D(float sigma, size_t ks); 34 | extern void GetKernel2D(CD_2D& cd, KERNEL_ID id); 35 | 36 | // Ch06_03_bm.cpp 37 | extern void Convolve2D_F32_bm(void); 38 | 39 | // Miscellaneous constants 40 | const KERNEL_ID c_KernelID = KERNEL_ID::LowPass2D_15x15; 41 | const KERNEL_ID c_KernelID_BM = KERNEL_ID::LowPass2D_9x9; 42 | -------------------------------------------------------------------------------- /Chapter06/Ch06_03/Ch06_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter06/Ch06_03/Ch06_03_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_03_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch06_03.h" 7 | #include "BmThreadTimer.h" 8 | 9 | void Convolve2D_F32_bm(void) 10 | { 11 | std::cout << "\nRunning benchmark function Convolve2D_F32_bm - please wait\n"; 12 | 13 | const char* fn_src = "../../Data/ImageE.png"; 14 | 15 | std::array cd; 16 | Init2D(cd, fn_src, c_KernelID_BM); 17 | 18 | const size_t num_it = 500; 19 | const size_t num_alg = 2; 20 | BmThreadTimer bmtt(num_it, num_alg); 21 | 22 | for (size_t i = 0; i < num_it; i++) 23 | { 24 | bmtt.Start(i, 0); 25 | Convolve2D_F32_Cpp(cd[0]); 26 | bmtt.Stop(i, 0); 27 | 28 | bmtt.Start(i, 1); 29 | Convolve2D_F32_Iavx2(cd[1]); 30 | bmtt.Stop(i, 1); 31 | 32 | if ((i % 10) == 0) 33 | std::cout << '.' << std::flush; 34 | } 35 | 36 | std::cout << '\n'; 37 | std::string fn = bmtt.BuildCsvFilenameString("Ch06_03_Convolve2D_F32_bm"); 38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 39 | std::cout << "Benchmark times saved to file " << fn << '\n'; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter06/Ch06_04/Ch06_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter06/Ch06_04/Ch06_04_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch06_04_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch06_04.h" 8 | #include "BmThreadTimer.h" 9 | 10 | void Convolve1Dx2_F32_bm(void) 11 | { 12 | std::cout << "\nRunning benchmark function Convolve1Dx2_F32_bm - please wait\n"; 13 | 14 | const char* fn_src = "../../Data/ImageE.png"; 15 | 16 | std::array cd; 17 | Init1Dx2(cd, fn_src, c_KernelID_BM); 18 | 19 | const size_t num_it = 500; 20 | const size_t num_alg = 2; 21 | BmThreadTimer bmtt(num_it, num_alg); 22 | 23 | for (size_t i = 0; i < num_it; i++) 24 | { 25 | bmtt.Start(i, 0); 26 | Convolve1Dx2_F32_Cpp(cd[0]); 27 | bmtt.Stop(i, 0); 28 | 29 | bmtt.Start(i, 1); 30 | Convolve1Dx2_F32_Iavx2(cd[1]); 31 | bmtt.Stop(i, 1); 32 | 33 | if ((i % 10) == 0) 34 | std::cout << '.' << std::flush; 35 | } 36 | 37 | std::cout << '\n'; 38 | std::string fn = bmtt.BuildCsvFilenameString("Ch06_04_Convolve1Dx2_F32_bm"); 39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 40 | std::cout << "Benchmark times saved to file " << fn << '\n'; 41 | } 42 | -------------------------------------------------------------------------------- /Chapter06/Ch06_05/Ch06_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_01/Ch07_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ZmmVal.h" 8 | 9 | // Ch07_01_fcpp.cpp 10 | extern void MathI16_Iavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b); 11 | extern void MathI64_Iavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b); 12 | -------------------------------------------------------------------------------- /Chapter07/Ch07_01/Ch07_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_02/Ch07_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ZmmVal.h" 8 | 9 | // Ch07_02_fcpp.cpp 10 | extern void MaskOpI64a_Iavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a, 11 | const ZmmVal* b); 12 | extern void MaskOpI64b_Iavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a, 13 | const ZmmVal* b1, const ZmmVal* b2); 14 | extern void MaskOpI64c_Iavx512(ZmmVal* c, const ZmmVal* a, int64_t x1, int64_t x2); 15 | -------------------------------------------------------------------------------- /Chapter07/Ch07_02/Ch07_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_03/Ch07_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include "ImageMisc.h" 9 | 10 | // Ch07_03.cpp 11 | extern const float c_Coef[4]; 12 | extern const char* c_TestImageFileName; 13 | 14 | // Ch07_03_fcpp.cpp 15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 16 | extern void ConvertRgbToGs_Iavx512(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 17 | 18 | // Ch07_03_misc.cpp 19 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 20 | 21 | // Ch07_03_bm.cpp 22 | extern void ConvertRgbToGs_bm(void); 23 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels); 24 | 25 | // Miscellaneous constants 26 | const size_t c_Alignment = 64; 27 | const size_t c_NumPixelsMax = 256 * 1024 * 1024; 28 | -------------------------------------------------------------------------------- /Chapter07/Ch07_03/Ch07_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_03/Ch07_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch07_03.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]) 9 | { 10 | if (num_pixels > c_NumPixelsMax) 11 | return false; 12 | if (num_pixels % 16 != 0) 13 | return false; 14 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment)) 15 | return false; 16 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment)) 17 | return false; 18 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f) 19 | return false; 20 | return true; 21 | } 22 | 23 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels) 24 | { 25 | for (size_t i = 0; i < num_pixels; i++) 26 | { 27 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1) 28 | return false; 29 | } 30 | return true; 31 | } 32 | -------------------------------------------------------------------------------- /Chapter07/Ch07_04/Ch07_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Compare operators 10 | enum class CmpOp { EQ, NE, LT, LE, GT, GE }; 11 | 12 | // Ch07_04_fcpp.cpp 13 | extern void ComparePixels_Cpp(uint8_t* pb_des, const uint8_t* pb_src, 14 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val); 15 | extern void ComparePixels_Iavx512(uint8_t* pb_des, const uint8_t* pb_src, 16 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val); 17 | 18 | // Ch07_04_misc.cpp 19 | extern bool CheckArgs(const uint8_t* pb_des, const uint8_t* pb_src, 20 | size_t num_pixels); 21 | extern void DisplayResults(const uint8_t* pb_des1, const uint8_t* pb_des2, 22 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val, size_t test_id); 23 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed); 24 | 25 | // Miscellaneous constants 26 | const size_t c_Alignment = 64; 27 | const size_t c_NumPixelsMax = 16 * 1024 * 1024; 28 | -------------------------------------------------------------------------------- /Chapter07/Ch07_04/Ch07_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_05/Ch07_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | struct ImageStats 10 | { 11 | uint8_t* m_PixelBuffer; 12 | uint32_t m_PixelMinVal; 13 | uint32_t m_PixelMaxVal; 14 | size_t m_NumPixels; 15 | size_t m_NumPixelsInRange; 16 | uint64_t m_PixelSum; 17 | uint64_t m_PixelSumSquares; 18 | double m_PixelMean; 19 | double m_PixelStDev; 20 | }; 21 | 22 | // Ch07_05.cpp 23 | extern const char* c_ImageFileName; 24 | 25 | // Ch07_05_fcpp.cpp 26 | extern void CalcImageStats_Cpp(ImageStats& im_stats); 27 | extern void CalcImageStats_Iavx512(ImageStats& im_stats); 28 | 29 | // Ch07_05_misc.cpp 30 | extern bool CheckArgs(const ImageStats& im_stats); 31 | 32 | // Ch07_05_bm.cpp 33 | extern void CalcImageStats_bm(void); 34 | 35 | // Miscellaneous constants 36 | const size_t c_Alignment = 64; 37 | const size_t c_NumPixelsMax = 64 * 1024 * 1024; 38 | const uint32_t c_PixelMinVal = 40; 39 | const uint32_t c_PixelMaxVal = 230; 40 | -------------------------------------------------------------------------------- /Chapter07/Ch07_05/Ch07_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter07/Ch07_05/Ch07_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch07_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch07_05.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const ImageStats& im_stats) 9 | { 10 | if (im_stats.m_NumPixels == 0) 11 | return false; 12 | if (im_stats.m_NumPixels % 64 != 0) 13 | return false; 14 | if (im_stats.m_NumPixels > c_NumPixelsMax) 15 | return false; 16 | if (!AlignedMem::IsAligned(im_stats.m_PixelBuffer, c_Alignment)) 17 | return false; 18 | return true; 19 | } 20 | -------------------------------------------------------------------------------- /Chapter08/Ch08_01/Ch08_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "ZmmVal.h" 7 | 8 | // Ch08_01_fcpp.cpp 9 | extern void PackedMathF32_Iavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b); 10 | extern void PackedMathF64_Iavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter08/Ch08_01/Ch08_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_02/Ch08_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ZmmVal.h" 8 | 9 | // Ch08_02_fcpp.cpp 10 | extern void PackedCompareF32_Iavx512(uint16_t c[8], const ZmmVal* a, const ZmmVal* b); 11 | extern void PackedCompareF64_Iavx512(ZmmVal* c, const ZmmVal* a, const ZmmVal* b, 12 | double x1, double x2, double x3); 13 | -------------------------------------------------------------------------------- /Chapter08/Ch08_02/Ch08_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/Ch08_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch08_03_fcpp.cpp 9 | extern void CalcMeanF32_Cpp(float* mean, const float* x, size_t n); 10 | extern void CalcMeanF32_Iavx512(float* mean, const float* x, size_t n); 11 | extern void CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean); 12 | extern void CalcStDevF32_Iavx512(float* st_dev, const float* x, size_t n, 13 | float mean); 14 | 15 | // Ch08_03_misc.cpp 16 | extern bool CheckArgs(const float* x, size_t n); 17 | extern void InitArray(float* x, size_t n); 18 | 19 | // Miscellaneous constants 20 | const size_t c_NumElements = 91; 21 | const unsigned int c_RngSeed = 13; 22 | const float c_ArrayFillMin = 1.0f; 23 | const float c_ArrayFillMax = 100.0f; 24 | const size_t c_Alignment = 64; 25 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/Ch08_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/Ch08_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch08_03.h" 6 | #include "AlignedMem.h" 7 | #include "MT.h" 8 | 9 | bool CheckArgs(const float* x, size_t n) 10 | { 11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment)); 12 | } 13 | 14 | void InitArray(float* x, size_t n) 15 | { 16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed); 17 | } 18 | -------------------------------------------------------------------------------- /Chapter08/Ch08_04/Ch08_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF64.h" 8 | 9 | // Note: In this example, CMD stands for covariance matrix data 10 | struct CMD 11 | { 12 | MatrixF64 m_X; // Data matrix 13 | MatrixF64 m_CovMat; // Covariance matrix 14 | std::vector m_VarMeans; // Variable (row) means 15 | 16 | CMD(size_t n_vars, size_t n_obvs) : 17 | m_X(n_vars, n_obvs), m_CovMat(n_vars, n_vars), m_VarMeans(n_vars) { } 18 | }; 19 | 20 | // Ch08_04_fcpp.cpp 21 | extern void CalcCovMatF64_Cpp(CMD& cmd); 22 | extern void CalcCovMatF64_Iavx512(CMD& cmd); 23 | 24 | // Ch08_04_misc.cpp 25 | extern bool CheckArgs(const CMD& cmd); 26 | extern bool CompareResults(CMD& cmd1, CMD& cmd2); 27 | extern void InitCMD(CMD& cmd1, CMD& cmd2); 28 | 29 | // Ch08_04_misc2.cpp 30 | extern void DisplayData(const CMD& cmd); 31 | -------------------------------------------------------------------------------- /Chapter08/Ch08_04/Ch08_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_04/Ch08_04_misc2.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_04_misc2.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch08_04.h" 8 | 9 | void DisplayData(const CMD& cmd) 10 | { 11 | std::cout << "----- Data matrix ----\n"; 12 | std::cout << cmd.m_X << std::endl; 13 | } 14 | -------------------------------------------------------------------------------- /Chapter08/Ch08_05/Ch08_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch08_05_fcpp.cpp 9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 10 | void MatrixMulF32_Iavx512(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 11 | 12 | // Ch08_05_misc.cpp 13 | bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 14 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b); 15 | void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a, 16 | const MatrixF32& b); 17 | 18 | // Ch08_05_bm.cpp 19 | void MatrixMulF32_bm(void); 20 | -------------------------------------------------------------------------------- /Chapter08/Ch08_05/Ch08_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_06/Ch08_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF64.h" 7 | 8 | // Ch08_06_fcpp.cpp 9 | void MatrixMulF64_Cpp(MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 10 | void MatrixMulF64_Iavx512(MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 11 | 12 | // Ch08_06_misc.cpp 13 | bool CheckArgs(const MatrixF64& c, const MatrixF64& a, const MatrixF64& b); 14 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b); 15 | void SaveResults(const MatrixF64& c1, const MatrixF64& c2, const MatrixF64& a, const MatrixF64& b); 16 | 17 | // Ch08_06_bm.cpp 18 | void MatrixMulF64_bm(void); 19 | -------------------------------------------------------------------------------- /Chapter08/Ch08_06/Ch08_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_07/Ch08_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | struct Vec4x1_F32 10 | { 11 | float W, X, Y, Z; 12 | }; 13 | 14 | // Ch08_07_fcpp.cpp 15 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, MatrixF32& m, 16 | Vec4x1_F32* vec_a, size_t num_vec); 17 | extern void MatVecMulF32a_Iavx512(Vec4x1_F32* vec_b, MatrixF32& m, 18 | Vec4x1_F32* vec_a, size_t num_vec); 19 | extern void MatVecMulF32b_Iavx512(Vec4x1_F32* vec_b, MatrixF32& m, 20 | Vec4x1_F32* vec_a, size_t num_vec); 21 | 22 | // Ch08_07_misc.cpp 23 | extern bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m, 24 | const Vec4x1_F32* vec_a, size_t num_vec); 25 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec); 26 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2); 27 | 28 | // Ch08_07_bm.cpp 29 | extern void MatrixVecMulF32_bm(void); 30 | 31 | // Miscellaenous constants 32 | const size_t c_Alignment = 64; 33 | const int c_RngMinVal = 1; 34 | const int c_RngMaxVal = 500; 35 | const unsigned int c_RngSeedVal = 187; 36 | -------------------------------------------------------------------------------- /Chapter08/Ch08_07/Ch08_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_08/Ch08_08.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_08.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch08_08_fcpp.cpp 9 | extern void Convolve1D_F32_Cpp(std::vector& y, 10 | const std::vector& x, const std::vector& kernel); 11 | extern void Convolve1D_F32_Iavx512(std::vector& y, 12 | const std::vector& x, const std::vector& kernel); 13 | extern void Convolve1DKs5_F32_Iavx512(std::vector& y, 14 | const std::vector& x, const std::vector& kernel); 15 | 16 | // Ch08_08_misc.cpp 17 | extern bool CheckArgs(std::vector& y, 18 | const std::vector& x, const std::vector& kernel); 19 | 20 | // Ch08_08_bm.cpp 21 | extern void Convolve1D_F32_bm(void); 22 | 23 | // Miscellaneous constants 24 | const unsigned int c_RngSeed = 97; 25 | -------------------------------------------------------------------------------- /Chapter08/Ch08_08/Ch08_08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_08/Ch08_08_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_08_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch08_08.h" 6 | 7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel) 8 | { 9 | if ((kernel.size() & 1) == 0) 10 | return false; 11 | if (y.size() != x.size()) 12 | return false; 13 | if (y.size() < kernel.size()) 14 | return false; 15 | return true; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /Chapter08/Ch08_09/Ch08_09.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_09.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | struct CD_1Dx2 10 | { 11 | size_t m_ImH; 12 | size_t m_ImW; 13 | size_t m_KernelSize; 14 | std::vector m_ImSrc; 15 | std::vector m_ImDes; 16 | std::vector m_ImTmp; 17 | std::vector m_Kernel1Dy; 18 | std::vector m_Kernel1Dx; 19 | }; 20 | 21 | enum class KERNEL_ID : unsigned int 22 | { 23 | LowPass1Dx2_3x3, LowPass1Dx2_5x5, LowPass1Dx2_7x7, LowPass1Dx2_9x9 24 | }; 25 | 26 | // Ch08_09_fcpp.cpp 27 | extern void Convolve1Dx2_F32_Cpp(CD_1Dx2& cd); 28 | extern void Convolve1Dx2_F32_Iavx512(CD_1Dx2& cd); 29 | 30 | // Ch08_09_misc.cpp 31 | extern bool CheckArgs1Dx2(const CD_1Dx2& cd); 32 | extern void GetKernel1Dx2(CD_1Dx2& cd, KERNEL_ID id); 33 | extern void InitConvData1Dx2(std::array& cd, const char* fn); 34 | 35 | // Ch08_09_bm.cpp 36 | extern void Convolve1Dx2_F32_bm(void); 37 | 38 | // Ch08_09_test.cpp 39 | extern void DisplayKernel1Dx2(float sigma, size_t ks); 40 | -------------------------------------------------------------------------------- /Chapter08/Ch08_09/Ch08_09.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter08/Ch08_09/Ch08_09_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_09_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch08_09.h" 8 | #include "BmThreadTimer.h" 9 | 10 | void Convolve1Dx2_F32_bm(void) 11 | { 12 | std::cout << "\nRunning benchmark function Convolve1Dx2_F32_bm - please wait\n"; 13 | 14 | const char* fn_src = "../../Data/ImageE.png"; 15 | 16 | std::array cd; 17 | InitConvData1Dx2(cd, fn_src); 18 | 19 | const size_t num_it = 500; 20 | const size_t num_alg = 2; 21 | BmThreadTimer bmtt(num_it, num_alg); 22 | 23 | for (size_t i = 0; i < num_it; i++) 24 | { 25 | bmtt.Start(i, 0); 26 | Convolve1Dx2_F32_Cpp(cd[0]); 27 | bmtt.Stop(i, 0); 28 | 29 | bmtt.Start(i, 1); 30 | Convolve1Dx2_F32_Iavx512(cd[1]); 31 | bmtt.Stop(i, 1); 32 | 33 | if ((i % 10) == 0) 34 | std::cout << '.' << std::flush; 35 | } 36 | 37 | std::cout << '\n'; 38 | std::string fn = bmtt.BuildCsvFilenameString("Ch08_09_Convolve1Dx2_F32_bm"); 39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 40 | std::cout << "Benchmark times saved to file " << fn << '\n'; 41 | } 42 | -------------------------------------------------------------------------------- /Chapter08/Ch08_09/Ch08_09_test.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_09_test.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Ch08_09.h" 9 | #include "MT_Convolve.h" 10 | 11 | void DisplayKernel1Dx2(float sigma, size_t ks) 12 | { 13 | std::vector gk = GenGaussianKernel1D(sigma, ks); 14 | 15 | std::cout << std::fixed << std::setprecision(6); 16 | 17 | float sum = 0.0f; 18 | for (size_t i = 0; i < ks; i++) 19 | { 20 | sum += gk[i]; 21 | std::cout << std::setw(10) << gk[i] << ' '; 22 | } 23 | 24 | std::cout << '\n'; 25 | std::cout << " sum = " << sum << "\n\n"; 26 | } 27 | -------------------------------------------------------------------------------- /Chapter09/Ch09_01/Ch09_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter09/Ch09_01/Cpuid__.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Cpuid__.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct CpuidRegs 9 | { 10 | uint32_t EAX; 11 | uint32_t EBX; 12 | uint32_t ECX; 13 | uint32_t EDX; 14 | }; 15 | 16 | // Cpuid__.cpp 17 | extern uint32_t Cpuid__(uint32_t r_eax, uint32_t r_ecx, CpuidRegs* r_out); 18 | extern void Xgetbv__(uint32_t r_ecx, uint32_t* r_eax, uint32_t* r_edx); 19 | -------------------------------------------------------------------------------- /Chapter09/Ch09_02/Ch09_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch09_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch09_02_fcpp.cpp 9 | extern void ConvertRectToPolarF32_Cpp(std::vector& r, std::vector& a, 10 | const std::vector& x, const std::vector& y); 11 | extern void ConvertRectToPolarF32_Iavx(std::vector& r, std::vector& a, 12 | const std::vector& x, const std::vector& y); 13 | extern void ConvertPolarToRectF32_Cpp(std::vector& x, std::vector& y, 14 | const std::vector& r, const std::vector& a); 15 | extern void ConvertPolarToRectF32_Iavx(std::vector& x, std::vector& y, 16 | const std::vector& r, const std::vector& a); 17 | 18 | // Ch09_02_misc.cpp 19 | extern bool CheckArgs(const std::vector& v1, const std::vector& v2, 20 | const std::vector& v3, const std::vector& v4); 21 | extern bool CompareResults(const std::vector& v1, 22 | const std::vector& v2); 23 | extern void FillVectorsRect(std::vector& x, std::vector& y); 24 | extern void FillVectorsPolar(std::vector& r, std::vector& a); 25 | -------------------------------------------------------------------------------- /Chapter09/Ch09_02/Ch09_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter09/Ch09_03/Ch09_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch09_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch09_03_fcpp.cpp 9 | extern void CalcBSA_F64_Cpp(std::vector& bsa, const std::vector& ht, 10 | const std::vector& wt); 11 | extern void CalcBSA_F64_Iavx(std::vector& bsa, const std::vector& ht, 12 | const std::vector& wt); 13 | 14 | // Ch09_03_misc.cpp 15 | extern bool CheckArgs(const std::vector& bsa, 16 | const std::vector& ht, const std::vector& wt); 17 | extern bool CompareResults(const std::vector& bsa1, 18 | const std::vector& bsa2); 19 | extern void FillHeightWeightVectors(std::vector& ht, 20 | std::vector& wt); 21 | 22 | // Ch09_03_bm.cpp 23 | void CalcBSA_bm(void); 24 | -------------------------------------------------------------------------------- /Chapter09/Ch09_03/Ch09_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter09/Ch09_03/Ch09_03_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch09_03_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch09_03.h" 7 | #include "BmThreadTimer.h" 8 | 9 | void CalcBSA_bm(void) 10 | { 11 | std::cout << "\nRunning benchmark function CalcBSA_bm - please wait\n"; 12 | 13 | const size_t n = 200000; 14 | std::vector heights(n); 15 | std::vector weights(n); 16 | std::vector bsa1(n * 3); 17 | std::vector bsa2(n * 3); 18 | 19 | FillHeightWeightVectors(heights, weights); 20 | 21 | const size_t num_it = 500; 22 | const size_t num_alg = 2; 23 | BmThreadTimer bmtt(num_it, num_alg); 24 | 25 | for (size_t i = 0; i < num_it; i++) 26 | { 27 | bmtt.Start(i, 0); 28 | CalcBSA_F64_Cpp(bsa1, heights, weights); 29 | bmtt.Stop(i, 0); 30 | 31 | bmtt.Start(i, 1); 32 | CalcBSA_F64_Iavx(bsa2, heights, weights); 33 | bmtt.Stop(i, 1); 34 | } 35 | 36 | std::cout << '\n'; 37 | std::string fn = bmtt.BuildCsvFilenameString("Ch09_03_CalcBSA_bm"); 38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 39 | std::cout << "Benchmark times save to file " << fn << '\n'; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_01.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch11_01.h" 7 | 8 | static void AddI32(void); 9 | static void SubI64(void); 10 | 11 | int main() 12 | { 13 | AddI32(); 14 | SubI64(); 15 | return 0; 16 | } 17 | 18 | static void AddI32(void) 19 | { 20 | int a = 10; 21 | int b = 20; 22 | int c = 30; 23 | int d = AddI32_A(a, b, c); 24 | 25 | DisplayResultsAddI32(a, b, c, d); 26 | } 27 | 28 | static void SubI64(void) 29 | { 30 | long long a = 10; 31 | long long b = 20; 32 | long long c = 30; 33 | long long d = SubI64_A(a, b, c); 34 | 35 | DisplayResultsSubI64(a, b, c, d); 36 | } 37 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch11_01_misc.cpp 8 | extern void DisplayResultsAddI32(int a, int b, int c, int d); 9 | extern void DisplayResultsSubI64(long long a, long long b, long long c, long long d); 10 | 11 | // Ch11_01_fasm.asm 12 | extern "C" int AddI32_A(int a, int b, int c); 13 | extern "C" long long SubI64_A(long long a, long long b, long long c); 14 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_01_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch11_01.h" 7 | 8 | void DisplayResultsAddI32(int a, int b, int c, int d) 9 | { 10 | const char nl = '\n'; 11 | std::cout << "Results for AddI32_A()\n"; 12 | std::cout << "a = " << a << nl; 13 | std::cout << "b = " << b << nl; 14 | std::cout << "c = " << c << nl; 15 | std::cout << "d = " << d << nl; 16 | std::cout << nl; 17 | } 18 | 19 | void DisplayResultsSubI64(long long a, long long b, long long c, long long d) 20 | { 21 | const char nl = '\n'; 22 | std::cout << "Results for SubI64_A()\n"; 23 | std::cout << "a = " << a << nl; 24 | std::cout << "b = " << b << nl; 25 | std::cout << "c = " << c << nl; 26 | std::cout << "d = " << d << nl; 27 | std::cout << nl; 28 | } 29 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_02.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch11_02.h" 8 | 9 | static void MulI32(void); 10 | static void MulU64(void); 11 | 12 | int main() 13 | { 14 | MulI32(); 15 | MulU64(); 16 | return 0; 17 | } 18 | 19 | static void MulI32(void) 20 | { 21 | int32_t a = 10; 22 | int32_t b = -20; 23 | int32_t c = 30; 24 | int32_t d = MulI32_A(a, b, c); 25 | 26 | DisplayResultsMulI32(a, b, c, d); 27 | } 28 | 29 | static void MulU64(void) 30 | { 31 | uint64_t a = 10; 32 | uint64_t b = 20; 33 | uint64_t c = 1000000000; 34 | uint64_t d = MulU64_A(a, b, c); 35 | 36 | DisplayResultsMulU64(a, b, c, d); 37 | } 38 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch11_02_misc.cpp 9 | extern void DisplayResultsMulI32(int32_t a, int32_t b, int32_t c, int32_t d); 10 | extern void DisplayResultsMulU64(uint64_t a, uint64_t b, uint64_t c, uint64_t d); 11 | 12 | // Ch11_02_fasm.asm 13 | extern "C" int32_t MulI32_A(int32_t a, int32_t b, int32_t c); 14 | extern "C" uint64_t MulU64_A(uint64_t a, uint64_t b, uint64_t c); 15 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_02_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch11_02.h" 7 | 8 | void DisplayResultsMulI32(int32_t a, int32_t b, int32_t c, int32_t d) 9 | { 10 | const char nl = '\n'; 11 | std::cout << "Results for MulI32_A()\n"; 12 | std::cout << "a = " << a << nl; 13 | std::cout << "b = " << b << nl; 14 | std::cout << "c = " << c << nl; 15 | std::cout << "d = " << d << nl; 16 | std::cout << nl; 17 | } 18 | 19 | void DisplayResultsMulU64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) 20 | { 21 | const char nl = '\n'; 22 | std::cout << "Results for MulU64_A()\n"; 23 | std::cout << "a = " << a << nl; 24 | std::cout << "b = " << b << nl; 25 | std::cout << "c = " << c << nl; 26 | std::cout << "d = " << d << nl; 27 | std::cout << nl; 28 | } 29 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/Ch11_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch11_03_misc.cpp 9 | extern void DisplayResultsDivI32(size_t test_id, int32_t rc, int32_t a, 10 | int32_t b, int32_t quo, int32_t rem); 11 | extern void DisplayResultsDivU64(size_t test_id, int32_t rc, uint64_t a, 12 | uint64_t b, uint64_t quo, uint64_t rem); 13 | 14 | // Ch11_03_fasm.asm 15 | extern "C" int32_t DivI32_A(int32_t a, int32_t b, int32_t* quo, int32_t* rem); 16 | extern "C" int32_t DivU64_A(uint64_t a, uint64_t b, uint64_t* quo, uint64_t* rem); 17 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/Ch11_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/Ch11_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch11_03.h" 7 | 8 | void DisplayResultsDivI32(size_t test_id, int rc, int32_t a, 9 | int32_t b, int32_t quo, int32_t rem) 10 | { 11 | const char nl = '\n'; 12 | std::cout << "Test #" << test_id << " | "; 13 | std::cout << "a: " << a << " b: " << b << nl; 14 | 15 | if (rc != 0) 16 | std::cout << "quo: " << quo << " rem: " << rem << nl; 17 | else 18 | std::cout << "quo: undefined rem: undefined" << nl; 19 | 20 | std::cout << nl; 21 | } 22 | 23 | void DisplayResultsDivU64(size_t test_id, int rc, uint64_t a, 24 | uint64_t b, uint64_t quo, uint64_t rem) 25 | { 26 | const char nl = '\n'; 27 | std::cout << "Test #" << test_id << " | "; 28 | std::cout << "a: " << a << " b: " << b << nl; 29 | 30 | if (rc != 0) 31 | std::cout << "quo: " << quo << " rem: " << rem << nl; 32 | else 33 | std::cout << "quo: undefined rem: undefined" << nl; 34 | 35 | std::cout << nl; 36 | } 37 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/Ch11_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch11_04_fasm.asm 9 | extern "C" int64_t CalcResultI64_A(int8_t a, int16_t b, int32_t c, int64_t d, 10 | int8_t e, int16_t f, int32_t g, int64_t h); 11 | 12 | extern "C" int32_t CalcResultU64_A(uint8_t a, uint16_t b, uint32_t c, uint64_t d, 13 | uint8_t e, uint16_t f, uint32_t g, uint64_t h, uint64_t* quo, uint64_t* rem); 14 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/Ch11_04.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | 28 | 29 | Header Files 30 | 31 | 32 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/Ch11_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/Ch11_05.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_05.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Ch11_05.h" 9 | 10 | static void MemAddressing(void); 11 | 12 | int main() 13 | { 14 | MemAddressing(); 15 | return 0; 16 | } 17 | 18 | static void MemAddressing() 19 | { 20 | const int w = 5; 21 | const char nl = '\n'; 22 | const char* delim = ", "; 23 | 24 | int n = g_NumPrimes_A; 25 | 26 | g_SumPrimes_A = 0; 27 | 28 | for (int i = -1; i < n + 1; i++) 29 | { 30 | int v1 = -1, v2 = -1, v3 = -1, v4 = -1; 31 | int rc = MemAddressing_A(i, &v1, &v2, &v3, &v4); 32 | 33 | std::cout << "i = " << std::setw(w - 1) << i << delim; 34 | std::cout << "rc = " << std::setw(w - 1) << rc << delim; 35 | std::cout << "v1 = " << std::setw(w) << v1 << delim; 36 | std::cout << "v2 = " << std::setw(w) << v2 << delim; 37 | std::cout << "v3 = " << std::setw(w) << v3 << delim; 38 | std::cout << "v4 = " << std::setw(w) << v4 << delim; 39 | std::cout << nl; 40 | } 41 | 42 | std::cout << "\ng_SumPrimes_A = " << g_SumPrimes_A << nl; 43 | } 44 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/Ch11_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch11_05_fasm.asm 8 | extern "C" int MemAddressing_A(int i, int* v1, int* v2, int* v3, int* v4); 9 | 10 | extern "C" int g_NumPrimes_A; 11 | extern "C" int g_SumPrimes_A; 12 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/Ch11_05.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | 28 | 29 | Header Files 30 | 31 | 32 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/Ch11_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_06.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch11_06.h" 8 | 9 | static void SumElementsI32(void); 10 | 11 | int main() 12 | { 13 | SumElementsI32(); 14 | return 0; 15 | } 16 | 17 | static void SumElementsI32(void) 18 | { 19 | const size_t n = 20; 20 | int x[n]; 21 | 22 | FillArray(x, n); 23 | 24 | int sum1 = SumElementsI32_Cpp(x, n); 25 | int sum2 = SumElementsI32_A(x, n); 26 | 27 | DisplayResults(x, n, sum1, sum2); 28 | } -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch11_06_fcpp.cpp 8 | extern int SumElementsI32_Cpp(const int* x, size_t n); 9 | 10 | // Ch11_06_fasm.asm 11 | extern "C" int SumElementsI32_A(const int* x, size_t n); 12 | 13 | // Ch11_06_misc.cpp 14 | extern void FillArray(int* x, size_t n); 15 | extern void DisplayResults(const int* x, size_t n, int sum1, int sum2); 16 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06_fasm.asm: -------------------------------------------------------------------------------- 1 | ;------------------------------------------------- 2 | ; Ch11_06_fasm.asm 3 | ;------------------------------------------------- 4 | 5 | ;------------------------------------------------------------------------------ 6 | ; extern "C" int SumElementsI32_A(const int* x, size_t n); 7 | ;------------------------------------------------------------------------------ 8 | 9 | .code 10 | SumElementsI32_A proc 11 | 12 | ; Initialize sum to zero 13 | xor eax,eax ;sum = 0 14 | mov r10,-1 ;i = -1 15 | 16 | ; Sum the elements of the array 17 | Loop1: inc r10 ;i += 1 18 | cmp r10,rdx ;is i >= n? 19 | jae Done ;jump if i >= n 20 | 21 | add eax,[rcx+r10*4] ;sum += x[i] 22 | jmp Loop1 ;perform next iteration 23 | 24 | Done: ret ;return to caller 25 | 26 | SumElementsI32_A endp 27 | end 28 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_06_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch11_06.h" 6 | 7 | int SumElementsI32_Cpp(const int* x, size_t n) 8 | { 9 | int sum = 0; 10 | 11 | for (size_t i = 0; i < n; i++) 12 | sum += x[i]; 13 | 14 | return sum; 15 | } 16 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_06_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch11_06.h" 8 | #include "MT.h" 9 | 10 | void FillArray(int* x, size_t n) 11 | { 12 | const int min_val = -2000; 13 | const int max_val = 2000; 14 | const unsigned int rng_seed = 1337; 15 | 16 | MT::FillArray(x, n, min_val, max_val, rng_seed, true); 17 | } 18 | 19 | void DisplayResults(const int* x, size_t n, int sum1, int sum2) 20 | { 21 | const char nl = '\n'; 22 | std::cout << "----- Results for SumElementsI32() -----\n"; 23 | 24 | for (size_t i = 0; i < n; i++) 25 | std::cout << "x[" << i << "] = " << std::setw(4) << x[i] << nl; 26 | 27 | std::cout << nl; 28 | std::cout << "sum1 = " << sum1 << nl; 29 | std::cout << "sum2 = " << sum2 << nl; 30 | } 31 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/Ch11_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch11_07_fasm.asm 8 | extern "C" int SignedMin1_A(int a, int b, int c); 9 | extern "C" int SignedMin2_A(int a, int b, int c); 10 | extern "C" int SignedMax1_A(int a, int b, int c); 11 | extern "C" int SignedMax2_A(int a, int b, int c); 12 | 13 | // Ch11_07_misc.cpp 14 | void DisplayResult(const char* s1, int a, int b, int c, int result); 15 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/Ch11_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/Ch11_07_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_07_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch11_07.h" 8 | 9 | void DisplayResult(const char* s1, int a, int b, int c, int result) 10 | { 11 | const size_t w = 4; 12 | 13 | std::cout << s1 << "("; 14 | std::cout << std::setw(w) << a << ", "; 15 | std::cout << std::setw(w) << b << ", "; 16 | std::cout << std::setw(w) << c << ") = "; 17 | std::cout << std::setw(w) << result << '\n'; 18 | } 19 | -------------------------------------------------------------------------------- /Chapter11/Ch11_08/Ch11_08.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch11_08.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch11_08_fasm.asm 8 | extern "C" void CopyArrayI32_A(int32_t* b, const int32_t* a, size_t n); 9 | extern "C" void FillArrayI32_A(const int32_t* a, int32_t val, size_t n); 10 | -------------------------------------------------------------------------------- /Chapter11/Ch11_08/Ch11_08.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | 28 | 29 | Header Files 30 | 31 | 32 | -------------------------------------------------------------------------------- /Chapter11/Ch11_08/Ch11_08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_01/Ch12_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch12_01_fasm.asm 8 | extern "C" float ConvertFtoC_Aavx(float deg_f); 9 | extern "C" float ConvertCtoF_Aavx(float deg_c); 10 | -------------------------------------------------------------------------------- /Chapter12/Ch12_01/Ch12_01.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | 28 | 29 | Header Files 30 | 31 | 32 | -------------------------------------------------------------------------------- /Chapter12/Ch12_01/Ch12_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_02/Ch12_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch12_02_fcpp.cpp 8 | extern double CalcDistance_Cpp(double x1, double y1, double z1, double x2, 9 | double y2, double z2); 10 | 11 | // Ch12_02_fasm.asm 12 | extern "C" double CalcDistance_Aavx(double x1, double y1, double z1, double x2, 13 | double y2, double z2); 14 | 15 | // Ch12_02_misc.cpp 16 | extern void InitArrays(double* x, double* y, double* z, size_t n, 17 | unsigned int rng_seed); 18 | -------------------------------------------------------------------------------- /Chapter12/Ch12_02/Ch12_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_02/Ch12_02_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_02_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch12_02.h" 6 | #include 7 | 8 | double CalcDistance_Cpp(double x1, double y1, double z1, double x2, double y2, double z2) 9 | { 10 | double temp_x = (x2 - x1) * (x2 - x1); 11 | double temp_y = (y2 - y1) * (y2 - y1); 12 | double temp_z = (z2 - z1) * (z2 - z1); 13 | double dist = sqrt(temp_x + temp_y + temp_z); 14 | return dist; 15 | } 16 | -------------------------------------------------------------------------------- /Chapter12/Ch12_02/Ch12_02_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_02_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch12_02.h" 6 | #include "MT.h" 7 | 8 | void InitArrays(double* x, double* y, double* z, size_t n, unsigned int rng_seed) 9 | { 10 | const int rng_min = 1; 11 | const int rng_max = 99; 12 | 13 | MT::FillArray(x, n, rng_min, rng_max, rng_seed); 14 | MT::FillArray(y, n, rng_min, rng_max, rng_seed + 1); 15 | MT::FillArray(z, n, rng_min, rng_max, rng_seed + 2); 16 | } 17 | -------------------------------------------------------------------------------- /Chapter12/Ch12_03/Ch12_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_03.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "Ch12_03.h" 10 | 11 | static void CompareF32(void); 12 | 13 | int main() 14 | { 15 | CompareF32(); 16 | return 0; 17 | } 18 | 19 | static void CompareF32(void) 20 | { 21 | const size_t n = 6; 22 | float a[n] {120.0, 250.0, 300.0, -18.0, -81.0, 42.0}; 23 | float b[n] {130.0, 240.0, 300.0, 32.0, -100.0, 0.0}; 24 | 25 | // Set NAN test value 26 | b[n - 1] = std::numeric_limits::quiet_NaN(); 27 | 28 | std::cout << "\n----- Results for CompareF32 -----\n"; 29 | 30 | for (size_t i = 0; i < n; i++) 31 | { 32 | uint8_t cmp_results[c_NumCmpOps]; 33 | 34 | CompareF32_Aavx(a[i], b[i], cmp_results); 35 | DisplayResults(a[i], b[i], cmp_results); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Chapter12/Ch12_03/Ch12_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch12_03_fasm.asm 9 | extern "C" void CompareF32_Aavx(float a, float b, uint8_t* results); 10 | 11 | // Ch12_03_misc.cpp 12 | extern void DisplayResults(float a, float b, const uint8_t* cmp_results); 13 | 14 | // Miscellaenous constants 15 | const size_t c_NumCmpOps = 7; 16 | -------------------------------------------------------------------------------- /Chapter12/Ch12_03/Ch12_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_03/Ch12_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch12_03.h" 8 | 9 | static const char* c_OpStrings[c_NumCmpOps] = 10 | { "UO", "LT", "LE", "EQ", "NE", "GT", "GE" }; 11 | 12 | void DisplayResults(float a, float b, const uint8_t* cmp_results) 13 | { 14 | std::cout << "a = " << a << ", "; 15 | std::cout << "b = " << b << '\n'; 16 | 17 | for (size_t i = 0; i < c_NumCmpOps; i++) 18 | { 19 | std::cout << c_OpStrings[i] << '='; 20 | std::cout << std::boolalpha << std::left; 21 | std::cout << std::setw(6) << (int)cmp_results[i] << ' '; 22 | } 23 | 24 | std::cout << "\n\n"; 25 | } 26 | -------------------------------------------------------------------------------- /Chapter12/Ch12_04/Ch12_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Simple union for data exchange 8 | union Uval 9 | { 10 | int32_t m_I32; 11 | int64_t m_I64; 12 | float m_F32; 13 | double m_F64; 14 | }; 15 | 16 | // The order of values in enum CvtOp must match the jump table 17 | // that's defined in the .asm file. 18 | enum class CvtOp : unsigned int 19 | { 20 | I32_F32, // int32_t to float 21 | F32_I32, // float to int32_t 22 | I32_F64, // int32_t to double 23 | F64_I32, // double to int32_t 24 | I64_F32, // int64_t to float 25 | F32_I64, // float to int64_t 26 | I64_F64, // int64_t to double 27 | F64_I64, // double to int64_t 28 | F32_F64, // float to double 29 | F64_F32, // double to float 30 | }; 31 | 32 | // Enumerated type for rounding control 33 | enum class RC : unsigned int 34 | { 35 | Nearest, Down, Up, Zero // Do not change order 36 | }; 37 | 38 | // Ch12_04_fasm.asm 39 | extern "C" bool ConvertScalar_Aavx(Uval* a, Uval* b, CvtOp cvt_op, RC rc); 40 | -------------------------------------------------------------------------------- /Chapter12/Ch12_04/Ch12_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_05/Ch12_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch12_05_fcpp.cpp 8 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n); 9 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean); 10 | 11 | // Ch12_05_fasm.asm 12 | extern "C" bool CalcMeanF32_Aavx(float* mean, const float* x, size_t n); 13 | extern "C" bool CalcStDevF32_Aavx(float* st_dev, const float* x, size_t n, float mean); 14 | 15 | // Miscellaneous constants 16 | const size_t c_NumElements = 91; 17 | const unsigned int c_RngSeed = 13; 18 | const float c_ArrayFillMin = 1.0f; 19 | const float c_ArrayFillMax = 100.0f; 20 | -------------------------------------------------------------------------------- /Chapter12/Ch12_05/Ch12_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_05/Ch12_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_05_fcpp.h 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch12_05.h" 7 | 8 | bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n) 9 | { 10 | if (n < 2) 11 | return false; 12 | 13 | float sum = 0.0f; 14 | 15 | for (size_t i = 0; i < n; i++) 16 | sum += x[i]; 17 | 18 | *mean = sum / n; 19 | return true; 20 | } 21 | 22 | bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean) 23 | { 24 | if (n < 2) 25 | return false; 26 | 27 | float sum_squares = 0.0f; 28 | 29 | for (size_t i = 0; i < n; i++) 30 | { 31 | float temp = x[i] - mean; 32 | sum_squares += temp * temp; 33 | } 34 | 35 | *st_dev = sqrt(sum_squares / (n - 1)); 36 | return true; 37 | } 38 | -------------------------------------------------------------------------------- /Chapter12/Ch12_06/Ch12_06.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_06.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Ch12_06.h" 9 | 10 | static void SumIntegers(void); 11 | 12 | int main() 13 | { 14 | SumIntegers(); 15 | return 0; 16 | } 17 | 18 | static void SumIntegers(void) 19 | { 20 | int8_t a = 10, e = -20; 21 | int16_t b = -200, f = 400; 22 | int32_t c = -300, g = -600; 23 | int64_t d = 4000, h = -8000; 24 | 25 | int64_t sum = SumIntegers_A(a, b, c, d, e, f, g, h); 26 | 27 | const char nl = '\n'; 28 | const size_t w = 7; 29 | std::cout << "----- Results for SumIntegers_A ----- \n"; 30 | std::cout << "a: " << std::setw(w) << (int)a << nl; 31 | std::cout << "b: " << std::setw(w) << b << nl; 32 | std::cout << "c: " << std::setw(w) << c << nl; 33 | std::cout << "d: " << std::setw(w) << d << nl; 34 | std::cout << "e: " << std::setw(w) << (int)e << nl; 35 | std::cout << "f: " << std::setw(w) << f << nl; 36 | std::cout << "g: " << std::setw(w) << g << nl; 37 | std::cout << "h: " << std::setw(w) << h << nl; 38 | std::cout << "sum: " << std::setw(w) << sum << nl; 39 | } 40 | -------------------------------------------------------------------------------- /Chapter12/Ch12_06/Ch12_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch12_06_fasm.asm 9 | extern "C" int64_t SumIntegers_A(int8_t a, int16_t b, int32_t c, int64_t d, 10 | int8_t e, int16_t f, int32_t g, int64_t h); 11 | -------------------------------------------------------------------------------- /Chapter12/Ch12_06/Ch12_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_07/Ch12_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch12_07_fasm.asm 9 | extern "C" void CalcSumProd_A(const int64_t* a, const int64_t* b, int32_t n, 10 | int64_t* sum_a, int64_t* sum_b, int64_t* prod_a, int64_t* prod_b); 11 | -------------------------------------------------------------------------------- /Chapter12/Ch12_07/Ch12_07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_08/Ch12_08.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_08.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch12_08_fcpp.cpp 8 | extern bool CalcConeAreaVol_Cpp(const double* r, const double* h, int n, 9 | double* sa_cone, double* vol_cone); 10 | 11 | // Ch12_08_fasm.asm 12 | extern "C" bool CalcConeAreaVol_A(const double* r, const double* h, int n, 13 | double* sa_cone, double* vol_cone); 14 | -------------------------------------------------------------------------------- /Chapter12/Ch12_08/Ch12_08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_08/Ch12_08_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_08_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #define _USE_MATH_DEFINES 6 | #include 7 | #include "Ch12_08.h" 8 | 9 | bool CalcConeAreaVol_Cpp(const double* r, const double* h, int n, double* sa_cone, double* vol_cone) 10 | { 11 | if (n <= 0) 12 | return false; 13 | 14 | for (int i = 0; i < n; i++) 15 | { 16 | sa_cone[i] = M_PI * r[i] * (r[i] + sqrt(r[i] * r[i] + h[i] * h[i])); 17 | vol_cone[i] = M_PI * r[i] * r[i] * h[i] / 3.0; 18 | } 19 | 20 | return true; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /Chapter12/Ch12_09/Ch12_09.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_09.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch12_09_fcpp.cpp 8 | extern bool CalcBSA_Cpp(const double* ht, const double* wt, int n, 9 | double* bsa1, double* bsa2, double* bsa3); 10 | 11 | // Ch12_09_fasm.asm 12 | extern "C" bool CalcBSA_Aavx(const double* ht, const double* wt, int n, 13 | double* bsa1, double* bsa2, double* bsa3); 14 | -------------------------------------------------------------------------------- /Chapter12/Ch12_09/Ch12_09.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter12/Ch12_09/Ch12_09_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch12_09_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch12_09.h" 7 | 8 | bool CalcBSA_Cpp(const double* ht, const double* wt, int n, double* bsa1, double* bsa2, double* bsa3) 9 | { 10 | if (n <= 0) 11 | return false; 12 | 13 | for (int i = 0; i < n; i++) 14 | { 15 | bsa1[i] = 0.007184 * pow(ht[i], 0.725) * pow(wt[i], 0.425); 16 | bsa2[i] = 0.0235 * pow(ht[i], 0.42246) * pow(wt[i], 0.51456); 17 | bsa3[i] = sqrt(ht[i] * wt[i] / 3600.0); 18 | } 19 | 20 | return true; 21 | } 22 | -------------------------------------------------------------------------------- /Chapter13/Ch13_01/Ch13_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch13_01_fasm.asm 9 | extern "C" void AddI16_Aavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b); 10 | extern "C" void SubI16_Aavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter13/Ch13_01/Ch13_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_02/Ch13_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch13_02_fasm.asm 9 | extern "C" void MulI16_Aavx(XmmVal c[2], const XmmVal* a, const XmmVal* b); 10 | extern "C" void MulI32a_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 11 | extern "C" void MulI32b_Aavx(XmmVal c[2], const XmmVal* a, const XmmVal* b); 12 | -------------------------------------------------------------------------------- /Chapter13/Ch13_02/Ch13_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_03/Ch13_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch13_03_fasm.asm 9 | extern "C" void AndU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 10 | extern "C" void OrU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 11 | extern "C" void XorU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b); 12 | -------------------------------------------------------------------------------- /Chapter13/Ch13_03/Ch13_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_04/Ch13_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "XmmVal.h" 7 | 8 | // Ch13_04_fasm.asm 9 | extern "C" void SllU16_Aavx(XmmVal* c, const XmmVal* a, int count); 10 | extern "C" void SrlU16_Aavx(XmmVal* c, const XmmVal* a, int count); 11 | extern "C" void SraU16_Aavx(XmmVal* c, const XmmVal* a, int count); 12 | -------------------------------------------------------------------------------- /Chapter13/Ch13_04/Ch13_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_05.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch13_05.h" 7 | #include "AlignedMem.h" 8 | 9 | static void CalcMinMaxU8(); 10 | 11 | int main() 12 | { 13 | CalcMinMaxU8(); 14 | CalcMinMaxU8_bm(); 15 | } 16 | 17 | static void CalcMinMaxU8() 18 | { 19 | const char nl = '\n'; 20 | size_t n = c_NumElements; 21 | AlignedArray x_aa(n, 16); 22 | uint8_t* x = x_aa.Data(); 23 | 24 | InitArray(x, n, c_RngSeedVal); 25 | 26 | uint8_t x_min0 = 0, x_max0 = 0; 27 | uint8_t x_min1 = 0, x_max1 = 0; 28 | 29 | bool rc0 = CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n); 30 | bool rc1 = CalcMinMaxU8_Aavx(&x_min1, &x_max1, x, n); 31 | 32 | std::cout << "\nResults for CalcMinMaxU8_Cpp\n"; 33 | std::cout << "rc0: " << rc0 << " x_min0: " << (int)x_min0; 34 | std::cout << " x_max0: " << (int)x_max0 << nl; 35 | 36 | std::cout << "\nResults for CalcMinMaxU8_Aavx\n"; 37 | std::cout << "rc1: " << rc1 << " x_min1: " << (int)x_min1; 38 | std::cout << " x_max1: " << (int)x_max1 << nl; 39 | } 40 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch13_05_fcpp.cpp 10 | extern bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max, 11 | const uint8_t* x, size_t n); 12 | 13 | // Ch13_05_fasm.asm 14 | extern "C" bool CalcMinMaxU8_Aavx(uint8_t* x_min, uint8_t* x_max, 15 | const uint8_t* x, size_t n); 16 | 17 | // Ch13_05_misc.cpp 18 | extern void InitArray(uint8_t* x, size_t n, unsigned int rng_seed); 19 | 20 | // Ch13_05_BM.cpp 21 | extern void CalcMinMaxU8_bm(); 22 | 23 | // c_NumElements must be > 0 and even multiple of 16 24 | const size_t c_NumElements = 10000000; 25 | const unsigned int c_RngSeedVal = 23; 26 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_05_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch13_05.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | 10 | void CalcMinMaxU8_bm(void) 11 | { 12 | std::cout << "\nRunning benchmark function CalcMinMaxU8_bm - please wait\n"; 13 | 14 | size_t n = c_NumElements; 15 | AlignedArray x_aa(n, 16); 16 | uint8_t* x = x_aa.Data(); 17 | 18 | InitArray(x, n, c_RngSeedVal); 19 | 20 | uint8_t x_min0 = 0, x_max0 = 0; 21 | uint8_t x_min1 = 0, x_max1 = 0; 22 | 23 | const size_t num_it = 500; 24 | const size_t num_alg = 2; 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | bmtt.Start(i, 0); 30 | CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n); 31 | bmtt.Stop(i, 0); 32 | 33 | bmtt.Start(i, 1); 34 | CalcMinMaxU8_Aavx(&x_min1, &x_max1, x, n); 35 | bmtt.Stop(i, 1); 36 | } 37 | 38 | std::string fn = bmtt.BuildCsvFilenameString("Ch13_05_CalcMinMaxU8_bm"); 39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 40 | std::cout << "Benchmark times save to file " << fn << '\n'; 41 | } 42 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_05_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch13_05.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n) 9 | { 10 | if (n == 0 || (n & 0xf) != 0) 11 | return false; 12 | 13 | if (!AlignedMem::IsAligned(x, 16)) 14 | return false; 15 | 16 | uint8_t min_val = 0xff; 17 | uint8_t max_val = 0; 18 | 19 | for (size_t i = 0; i < n; i++) 20 | { 21 | uint8_t val = *x++; 22 | 23 | if (val < min_val) 24 | min_val = val; 25 | else if (val > max_val) 26 | max_val = val; 27 | } 28 | 29 | *x_min = min_val; 30 | *x_max = max_val; 31 | return true; 32 | } 33 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/Ch13_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch13_05.h" 6 | #include "MT.h" 7 | 8 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed) 9 | { 10 | int rng_min_val = 5; 11 | int rng_max_val = 250; 12 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed); 13 | 14 | // Use known values for min & max (for validation) 15 | x[(n / 4) * 3 + 1] = 2; 16 | x[n / 4 + 11] = 3; 17 | x[n / 2] = 252; 18 | x[n / 2 + 13] = 253; 19 | x[n / 8 + 5] = 4; 20 | x[n / 8 + 7] = 254; 21 | } 22 | -------------------------------------------------------------------------------- /Chapter13/Ch13_06/Ch13_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch13_06_fcpp.cpp 10 | extern bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n); 11 | 12 | // Ch13_06_fasm.asm 13 | extern "C" bool CalcMeanU8_Aavx(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n); 14 | 15 | // Ch13_06_misc.cpp 16 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed); 17 | extern bool CheckArgs(const uint8_t* x, size_t n); 18 | 19 | // Ch13_06_bm.cpp 20 | extern void CalcMeanU8_bm(void); 21 | 22 | // Miscellaneous constants 23 | const size_t c_NumElements = 10000000; 24 | const size_t c_Alignment = 16; 25 | const unsigned int c_RngSeedVal = 29; 26 | extern "C" size_t g_NumElementsMax; 27 | -------------------------------------------------------------------------------- /Chapter13/Ch13_06/Ch13_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter13/Ch13_06/Ch13_06_bm.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_06_bm.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch13_06.h" 6 | #include "AlignedMem.h" 7 | #include "BmThreadTimer.h" 8 | 9 | void CalcMeanU8_bm(void) 10 | { 11 | std::cout << "\nRunning benchmark function CalcMeanU8_bm - please wait\n"; 12 | 13 | size_t n = c_NumElements; 14 | AlignedArray x_aa(n, c_Alignment); 15 | uint8_t* x = x_aa.Data(); 16 | 17 | InitArray(x, n, c_RngSeedVal); 18 | 19 | uint64_t sum_x0, sum_x1; 20 | double mean_x0, mean_x1; 21 | 22 | const size_t num_it = 500; 23 | const size_t num_alg = 2; 24 | BmThreadTimer bmtt(num_it, num_alg); 25 | 26 | for (size_t i = 0; i < num_it; i++) 27 | { 28 | bmtt.Start(i, 0); 29 | CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n); 30 | bmtt.Stop(i, 0); 31 | 32 | bmtt.Start(i, 1); 33 | CalcMeanU8_Aavx(&mean_x1, &sum_x1, x, n); 34 | bmtt.Stop(i, 1); 35 | } 36 | 37 | std::string fn = bmtt.BuildCsvFilenameString("Ch13_06_CalcMeanU8_bm"); 38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 39 | std::cout << "Benchmark times save to file " << fn << '\n'; 40 | } 41 | -------------------------------------------------------------------------------- /Chapter13/Ch13_06/Ch13_06_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_06_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch13_06.h" 7 | 8 | bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n) 9 | { 10 | if (!CheckArgs(x, n)) 11 | return false; 12 | 13 | uint64_t sum_x_temp = 0; 14 | 15 | for (size_t i = 0; i < n; i++) 16 | sum_x_temp += x[i]; 17 | 18 | *sum_x = sum_x_temp; 19 | *mean_x = (double)sum_x_temp / n; 20 | return true; 21 | } 22 | -------------------------------------------------------------------------------- /Chapter13/Ch13_06/Ch13_06_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_06_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch13_06.h" 6 | #include "MT.h" 7 | #include "AlignedMem.h" 8 | 9 | extern size_t g_NumElementsMax = 64 * 1024 * 1024; 10 | 11 | bool CheckArgs(const uint8_t* x, size_t n) 12 | { 13 | if (n == 0 || n > g_NumElementsMax) 14 | return false; 15 | 16 | if ((n % 64) != 0) 17 | return false; 18 | 19 | if (!AlignedMem::IsAligned(x, c_Alignment)) 20 | return false; 21 | 22 | return true; 23 | } 24 | 25 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed) 26 | { 27 | int rng_min_val = 0; 28 | int rng_max_val = 255; 29 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed); 30 | } 31 | -------------------------------------------------------------------------------- /Chapter14/Ch14_01/Ch14_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch14_01_fcpp.cpp 9 | extern "C" void PackedMathF32_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 10 | extern "C" void PackedMathF64_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 11 | 12 | -------------------------------------------------------------------------------- /Chapter14/Ch14_01/Ch14_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter14/Ch14_02/Ch14_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch14_02_fasm.cpp 9 | extern "C" void PackedCompareF32_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 10 | extern "C" void PackedCompareF64_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter14/Ch14_02/Ch14_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch14_03_fcpp.cpp 8 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n); 9 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean); 10 | 11 | // Ch14_03_fasm.asm 12 | extern "C" bool CalcMeanF32_Aavx(float* mean, const float* x, size_t n); 13 | extern "C" bool CalcStDevF32_Aavx(float* st_dev, const float* x, size_t n, 14 | float mean); 15 | 16 | // Ch14_03_misc.cpp 17 | extern bool CheckArgs(const float* x, size_t n); 18 | extern void InitArray(float* x, size_t n); 19 | 20 | // Miscellaneous constants 21 | const size_t c_NumElements = 91; 22 | const unsigned int c_RngSeed = 13; 23 | const float c_ArrayFillMin = 1.0f; 24 | const float c_ArrayFillMax = 100.0f; 25 | const size_t c_Alignment = 32; 26 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_03_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch14_03.h" 7 | 8 | bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n) 9 | { 10 | if (!CheckArgs(x, n)) 11 | return false; 12 | 13 | float sum = 0.0f; 14 | 15 | for (size_t i = 0; i < n; i++) 16 | sum += x[i]; 17 | 18 | *mean = sum / n; 19 | return true; 20 | } 21 | 22 | bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean) 23 | { 24 | if (!CheckArgs(x, n)) 25 | return false; 26 | 27 | float sum_squares = 0.0f; 28 | 29 | for (size_t i = 0; i < n; i++) 30 | { 31 | float temp = x[i] - mean; 32 | sum_squares += temp * temp; 33 | } 34 | 35 | *st_dev = sqrt(sum_squares / (n - 1)); 36 | return true; 37 | } 38 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch14_03.h" 6 | #include "AlignedMem.h" 7 | #include "MT.h" 8 | 9 | bool CheckArgs(const float* x, size_t n) 10 | { 11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment)); 12 | } 13 | 14 | void InitArray(float* x, size_t n) 15 | { 16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed); 17 | } 18 | -------------------------------------------------------------------------------- /Chapter14/Ch14_04/Ch14_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // The members of PA below must match the PA structure 8 | // that's declared in Ch14_04_fasm.asm 9 | 10 | struct PA 11 | { 12 | double* X1; 13 | double* Y1; 14 | double* X2; 15 | double* Y2; 16 | double* Dist1; 17 | double* Dist2; 18 | double* DistCmp1; 19 | double* DistCmp2; 20 | size_t NumPoints; 21 | }; 22 | 23 | // Ch14_04_fcpp.cpp 24 | extern bool CalcDistancesF64(PA& pa); 25 | extern void CompareDistancesF64(PA& pa, double cmp_val); 26 | 27 | // Ch14_04_fasm.asm 28 | extern "C" bool CalcDistancesF64_Aavx(PA& pa); 29 | extern "C" void CompareDistancesF64_Aavx(PA& pa, const double* cmp_val); 30 | 31 | // Ch14_04_misc.cpp 32 | extern "C" bool CheckArgs(PA& pa); 33 | extern void FillPAF64(PA& pa, double min_val, double max_val, unsigned int rng_seed); 34 | 35 | // Miscellaneous constants 36 | const size_t c_NumPoints = 21; 37 | const unsigned int c_RngSeed = 39; 38 | const double c_ArrayFillMin = 1.0; 39 | const double c_ArrayFillMax = 75.0; 40 | const double c_CmpVal = 50.0; 41 | const size_t c_Alignment = 32; 42 | 43 | -------------------------------------------------------------------------------- /Chapter14/Ch14_04/Ch14_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter14/Ch14_04/Ch14_04_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_04_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch14_04.h" 7 | 8 | bool CalcDistancesF64(PA& pa) 9 | { 10 | if (!CheckArgs(pa)) 11 | return false; 12 | 13 | size_t num_points = pa.NumPoints; 14 | 15 | for (size_t i = 0; i < num_points; i++) 16 | { 17 | double temp1 = pa.X1[i] - pa.X2[i]; 18 | double temp2 = pa.Y1[i] - pa.Y2[i]; 19 | 20 | pa.Dist1[i] = sqrt(temp1 * temp1 + temp2 * temp2); 21 | } 22 | 23 | return true; 24 | } 25 | 26 | void CompareDistancesF64(PA& pa, double cmp_val) 27 | { 28 | size_t num_points = pa.NumPoints; 29 | 30 | for (size_t i = 0; i < num_points; i++) 31 | { 32 | double temp1 = pa.Dist1[i]; 33 | double temp2 = (temp1 >= cmp_val) ? temp1 * -2.0 : temp1; 34 | 35 | pa.DistCmp1[i] = temp2; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch14_05_fcpp.cpp 8 | extern void CalcColumnMeansF64_Cpp(double* col_means, const double* x, size_t nrows, 9 | size_t ncols); 10 | 11 | // Ch14_04_fasm.asm 12 | extern "C" void CalcColumnMeansF64_Aavx(double* col_means, const double* x, 13 | size_t nrows, size_t ncols); 14 | 15 | // Miscellaneous constants 16 | const unsigned int c_RngSeed = 41; 17 | const double c_MatrixFillMin = 1.0; 18 | const double c_MatrixFillMax = 80.0; 19 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_05_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch14_05.h" 6 | 7 | void CalcColumnMeansF64_Cpp(double* col_means, const double* x, size_t nrows, 8 | size_t ncols) 9 | { 10 | for (size_t j = 0; j < ncols; j++) 11 | col_means[j] = 0.0; 12 | 13 | for (size_t i = 0; i < nrows; i++) 14 | { 15 | for (size_t j = 0; j < ncols; j++) 16 | col_means[j] += x[i * ncols + j]; 17 | } 18 | 19 | for (size_t j = 0; j < ncols; j++) 20 | col_means[j] /= (double)nrows; 21 | } 22 | -------------------------------------------------------------------------------- /Chapter15/Ch15_01/Ch15_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch15_01_fasm.asm 9 | extern "C" void MathI16_Aavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b); 10 | extern "C" void MathI32_Aavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter15/Ch15_01/Ch15_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter15/Ch15_02/Ch15_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "YmmVal.h" 7 | 8 | // Ch15_02_fcpp.cpp 9 | extern "C" void ZeroExtU8_U16_Aavx2(YmmVal c[2], YmmVal* a); 10 | extern "C" void ZeroExtU8_U32_Aavx2(YmmVal c[4], YmmVal* a); 11 | extern "C" void SignExtI16_I32_Aavx2(YmmVal c[2], YmmVal* a); 12 | extern "C" void SignExtI16_I64_Aavx2(YmmVal c[4], YmmVal* a); 13 | 14 | -------------------------------------------------------------------------------- /Chapter15/Ch15_02/Ch15_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter15/Ch15_03/Ch15_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter15/Ch15_03/Ch15_03_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_03_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch15_03.h" 6 | #include "AlignedMem.h" 7 | 8 | void ClipPixels_Cpp(ClipData* clip_data) 9 | { 10 | if (!CheckArgs(clip_data)) 11 | throw std::runtime_error("ClipPixels_Cpp() - CheckArgs failed"); 12 | 13 | uint8_t* pb_src = clip_data->m_PbSrc; 14 | uint8_t* pb_des = clip_data->m_PbDes; 15 | size_t num_pixels = clip_data->m_NumPixels; 16 | size_t num_clipped_pixels = 0; 17 | uint8_t thresh_lo = clip_data->m_ThreshLo; 18 | uint8_t thresh_hi = clip_data->m_ThreshHi; 19 | 20 | for (size_t i = 0; i < num_pixels; i++) 21 | { 22 | uint8_t pixel = pb_src[i]; 23 | 24 | if (pixel < thresh_lo) 25 | { 26 | pb_des[i] = thresh_lo; 27 | num_clipped_pixels++; 28 | } 29 | else if (pixel > thresh_hi) 30 | { 31 | pb_des[i] = thresh_hi; 32 | num_clipped_pixels++; 33 | } 34 | else 35 | pb_des[i] = pb_src[i]; 36 | } 37 | 38 | clip_data->m_NumClippedPixels = num_clipped_pixels; 39 | } 40 | -------------------------------------------------------------------------------- /Chapter15/Ch15_03/Ch15_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch15_03.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const ClipData* clip_data) 9 | { 10 | if (clip_data->m_NumPixels == 0) 11 | return false; 12 | 13 | if (!AlignedMem::IsAligned(clip_data->m_PbSrc, c_Alignment)) 14 | return false; 15 | 16 | if (!AlignedMem::IsAligned(clip_data->m_PbDes, c_Alignment)) 17 | return false; 18 | 19 | return true; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /Chapter15/Ch15_04/Ch15_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include "ImageMisc.h" 9 | 10 | // Ch15_04.cpp 11 | extern const float c_Coef[4]; 12 | extern const char* c_TestImageFileName; 13 | 14 | // Ch15_04_fcpp.cpp 15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 16 | 17 | // Ch15_04_fasm.asm 18 | extern "C" void ConvertRgbToGs_Aavx2(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 19 | 20 | // Ch15_04_misc.cpp 21 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]); 22 | 23 | // Ch15_04_bm.cpp 24 | extern void ConvertRgbToGs_bm(void); 25 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels); 26 | 27 | // Miscellaneous constants 28 | const size_t c_Alignment = 32; 29 | const size_t c_NumPixelsMax = 256 * 1024 * 1024; 30 | extern "C" size_t g_NumPixelsMax; 31 | 32 | -------------------------------------------------------------------------------- /Chapter15/Ch15_04/Ch15_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter15/Ch15_04/Ch15_04_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_04_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch15_04.h" 8 | #include "ImageMisc.h" 9 | 10 | void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]) 11 | { 12 | if (!CheckArgs(pb_gs, pb_rgb, num_pixels, coef)) 13 | throw std::runtime_error("ConvertRgbToGs_Cpp() - CheckArgs failed"); 14 | 15 | for (size_t i = 0; i < num_pixels; i++) 16 | { 17 | uint8_t r = pb_rgb[i].m_R; 18 | uint8_t g = pb_rgb[i].m_G; 19 | uint8_t b = pb_rgb[i].m_B; 20 | 21 | float gs_temp = r * coef[0] + g * coef[1] + b * coef[2] + 0.5f; 22 | 23 | if (gs_temp > 255.0f) 24 | gs_temp = 255.0f; 25 | 26 | pb_gs[i] = (uint8_t)gs_temp; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Chapter15/Ch15_04/Ch15_04_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_04_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch15_04.h" 6 | #include "AlignedMem.h" 7 | 8 | size_t g_NumPixelsMax = c_NumPixelsMax; // For use by assembly language function 9 | 10 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]) 11 | { 12 | if (num_pixels > c_NumPixelsMax) 13 | return false; 14 | 15 | if (num_pixels % 8 != 0) 16 | return false; 17 | 18 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment)) 19 | return false; 20 | 21 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment)) 22 | return false; 23 | 24 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f) 25 | return false; 26 | 27 | return true; 28 | } 29 | 30 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels) 31 | { 32 | for (size_t i = 0; i < num_pixels; i++) 33 | { 34 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1) 35 | return false; 36 | } 37 | 38 | return true; 39 | } 40 | -------------------------------------------------------------------------------- /Chapter15/Ch15_05/Ch15_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Ch15_05_fcpp.cpp 10 | extern void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels); 11 | 12 | // Ch15_05_fasm.asm 13 | extern "C" void ConvertU8ToF32_Aavx2(float* pb_des, const uint8_t* pb_src, 14 | size_t num_pixels); 15 | 16 | // Ch15_05_misc.cpp 17 | extern void BuildLUT_U8ToF32(void); 18 | extern bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels); 19 | extern size_t CompareArraysF32(const float* pb_src1, const float* pb_src2, 20 | size_t num_pixels); 21 | 22 | // Ch15_05_bm.cpp 23 | extern void ConvertU8ToF32_bm(void); 24 | 25 | // Miscellaneous constants 26 | const size_t c_Alignment = 32; 27 | const size_t c_NumPixels = 1024 * 1024 + 19; 28 | const size_t c_NumPixelsBM = 10000000; 29 | const size_t c_NumPixelsMax = 16 * 1024 * 1024; 30 | const int c_FillMinVal = 0; 31 | const int c_FillMaxVal = 255; 32 | const unsigned int c_RngSeed = 71; 33 | 34 | extern "C" float g_LUT_U8ToF32[]; 35 | extern "C" size_t g_NumPixelsMax; 36 | -------------------------------------------------------------------------------- /Chapter15/Ch15_05/Ch15_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter15/Ch15_05/Ch15_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_05_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch15_05.h" 7 | 8 | void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels) 9 | { 10 | if (!CheckArgs(pb_des, pb_src, num_pixels)) 11 | throw std::runtime_error("ConvertU8ToF32_Cpp() CheckArgs failed"); 12 | 13 | const float* lut = g_LUT_U8ToF32; 14 | 15 | for (size_t i = 0; i < num_pixels; i++) 16 | pb_des[i] = lut[pb_src[i]]; 17 | } 18 | -------------------------------------------------------------------------------- /Chapter16/Ch16_01/Ch16_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch16_01_fcpp.cpp 8 | extern void CalcLeastSquares_Cpp(double* m, double* b, const double* x, const double* y, size_t n); 9 | 10 | // Ch16_01_fasm.asm 11 | extern "C" void CalcLeastSquares_Aavx2(double* m, double* b, const double* x, const double* y, size_t n); 12 | 13 | // Ch16_01_misc.cpp 14 | extern bool CheckArgs(const double* x, const double* y, size_t n); 15 | extern void FillArrays(double* x, double* y, size_t n); 16 | 17 | // Miscellaneous constants 18 | const size_t c_Alignment = 32; 19 | const double c_LsEpsilon = 1.0e-12; 20 | extern "C" double g_LsEpsilon; 21 | -------------------------------------------------------------------------------- /Chapter16/Ch16_01/Ch16_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter16/Ch16_01/Ch16_01_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_01_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch16_01.h" 8 | 9 | void CalcLeastSquares_Cpp(double* m, double* b, const double* x, const double* y, size_t n) 10 | { 11 | *m = 0.0; 12 | *b = 0.0; 13 | 14 | if (!CheckArgs(x, y, n)) 15 | throw std::runtime_error("CalcLeastSquares_cpp() CheckArgs failed"); 16 | 17 | double sum_x = 0.0, sum_y = 0.0, sum_xx = 0.0, sum_xy = 0.0; 18 | 19 | for (size_t i = 0; i < n; i++) 20 | { 21 | sum_x += x[i]; 22 | sum_y += y[i]; 23 | sum_xx += x[i] * x[i]; 24 | sum_xy += x[i] * y[i]; 25 | } 26 | 27 | double denom = n * sum_xx - sum_x * sum_x; 28 | 29 | if (fabs(denom) >= c_LsEpsilon) 30 | { 31 | *m = (n * sum_xy - sum_x * sum_y) / denom; 32 | *b = (sum_xx * sum_y - sum_x * sum_xy) / denom; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Chapter16/Ch16_01/Ch16_01_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_01_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch16_01.h" 7 | #include "AlignedMem.h" 8 | #include "MT.h" 9 | 10 | extern "C" double g_LsEpsilon = c_LsEpsilon; 11 | 12 | bool CheckArgs(const double* x, const double* y, size_t n) 13 | { 14 | if (n < 2) 15 | return false; 16 | 17 | if (!AlignedMem::IsAligned(x, c_Alignment)) 18 | return false; 19 | 20 | if (!AlignedMem::IsAligned(y, c_Alignment)) 21 | return false; 22 | 23 | return true; 24 | } 25 | 26 | void FillArrays(double* x, double* y, size_t n) 27 | { 28 | const unsigned int rng_seed1 = 73; 29 | const unsigned int rng_seed2 = 83; 30 | const double fill_min_val = -25.0; 31 | const double fill_max_val = 25.0; 32 | 33 | MT::FillArrayFP(x, n, fill_min_val, fill_max_val, rng_seed1); 34 | MT::FillArrayFP(y, n, fill_min_val, fill_max_val, rng_seed2); 35 | 36 | for (size_t i = 0; i < n; i++) 37 | y[i] = y[i] * y[i]; 38 | } 39 | -------------------------------------------------------------------------------- /Chapter16/Ch16_02/Ch16_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch16_02_fcpp.cpp 9 | extern void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 10 | 11 | // Ch16_02_fasm2.asm 12 | extern "C" void MatrixMulF32_Aavx2(float* c, const float* a, const float* b, 13 | const size_t* sizes); 14 | 15 | // Ch16_02_misc.cpp 16 | extern bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 17 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b); 18 | extern void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a, 19 | const MatrixF32& b); 20 | 21 | // Ch16_02_bm.cpp 22 | extern void MatrixMulF32_bm(void); 23 | 24 | // Ch16_02_test.cpp 25 | extern void MatrixMulF32p_Iavx2(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 26 | -------------------------------------------------------------------------------- /Chapter16/Ch16_02/Ch16_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter16/Ch16_02/Ch16_02_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_02_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch16_02.h" 7 | 8 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b) 9 | { 10 | MatrixF32::Mul(c, a, b); 11 | } 12 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch16_03_fcpp.cpp 9 | extern void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 10 | 11 | // Ch16_03_fasm.asm 12 | extern "C" void MatrixMul4x4F32a_Aavx2(float* c, const float* a, const float* b); 13 | extern "C" void MatrixMul4x4F32b_Aavx2(float* c, const float* a, const float* b); 14 | 15 | // Ch16_03_misc.cpp 16 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& c3, MatrixF32& a, MatrixF32& b); 17 | 18 | // Ch16_03_bm.cpp 19 | extern void MatrixMul4x4F32_bm(void); 20 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_03_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch16_03.h" 6 | 7 | void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b) 8 | { 9 | MatrixF32::Mul4x4(c, a, b); 10 | } 11 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_03_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch16_03.h" 6 | 7 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& c3, MatrixF32& a, MatrixF32& b) 8 | { 9 | const float a_row0[] = { 10, 11, 12, 13 }; 10 | const float a_row1[] = { 20, 21, 22, 23 }; 11 | const float a_row2[] = { 30, 31, 32, 33 }; 12 | const float a_row3[] = { 40, 41, 42, 43 }; 13 | 14 | const float b_row0[] = { 100, 101, 102, 103 }; 15 | const float b_row1[] = { 200, 201, 202, 203 }; 16 | const float b_row2[] = { 300, 301, 302, 303 }; 17 | const float b_row3[] = { 400, 401, 402, 403 }; 18 | 19 | a.SetRow(0, a_row0); 20 | a.SetRow(1, a_row1); 21 | a.SetRow(2, a_row2); 22 | a.SetRow(3, a_row3); 23 | 24 | b.SetRow(0, b_row0); 25 | b.SetRow(1, b_row1); 26 | b.SetRow(2, b_row2); 27 | b.SetRow(3, b_row3); 28 | 29 | const int w = 12; 30 | const char* delim = " "; 31 | c1.SetOstream(w, delim); 32 | c2.SetOstream(w, delim); 33 | c3.SetOstream(w, delim); 34 | a.SetOstream(w, delim); 35 | b.SetOstream(w, delim); 36 | } 37 | -------------------------------------------------------------------------------- /Chapter16/Ch16_04/Ch16_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | // Simple 4x1 vector structure 10 | struct Vec4x1_F32 11 | { 12 | float W, X, Y, Z; 13 | }; 14 | 15 | // Ch16_04_fcpp.cpp 16 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec); 17 | 18 | // Ch16_04_fasm.asm 19 | extern "C" void MatVecMulF32_Aavx2(Vec4x1_F32* vec_b, const float* m, const Vec4x1_F32* vec_a, size_t num_vec); 20 | 21 | // Ch16_04_misc.cpp 22 | extern "C" bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec); 23 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec); 24 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2); 25 | 26 | // Ch16_04_bm.cpp 27 | extern void MatrixVecMulF32_bm(void); 28 | 29 | // Miscellaenous constants 30 | const size_t c_Alignment = 32; 31 | const int c_RngMinVal = 1; 32 | const int c_RngMaxVal = 500; 33 | const unsigned int c_RngSeedVal = 187; 34 | 35 | -------------------------------------------------------------------------------- /Chapter16/Ch16_04/Ch16_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter16/Ch16_04/Ch16_04_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_04_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Ch16_04.h" 9 | #include "MatrixF32.h" 10 | #include "AlignedMem.h" 11 | 12 | void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec) 13 | { 14 | if (!CheckArgs(vec_b, m, vec_a, num_vec)) 15 | throw std::runtime_error("MatVecMulF32_Cpp() - CheckArgs failed"); 16 | 17 | const float* mm = m.Data(); 18 | 19 | // Calculate matrix-vector products 20 | for (size_t i = 0; i < num_vec; i++) 21 | { 22 | vec_b[i].W = mm[0] * vec_a[i].W + mm[1] * vec_a[i].X; 23 | vec_b[i].W += mm[2] * vec_a[i].Y + mm[3] * vec_a[i].Z; 24 | 25 | vec_b[i].X = mm[4] * vec_a[i].W + mm[5] * vec_a[i].X; 26 | vec_b[i].X += mm[6] * vec_a[i].Y + mm[7] * vec_a[i].Z; 27 | 28 | vec_b[i].Y = mm[8] * vec_a[i].W + mm[9] * vec_a[i].X; 29 | vec_b[i].Y += mm[10] * vec_a[i].Y + mm[11] * vec_a[i].Z; 30 | 31 | vec_b[i].Z = mm[12] * vec_a[i].W + mm[13] * vec_a[i].X; 32 | vec_b[i].Z += mm[14] * vec_a[i].Y + mm[15] * vec_a[i].Z; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Chapter16/Ch16_05/Ch16_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch16_05_fcpp.cpp 9 | extern void Convolve1D_F32_Cpp(std::vector& y, const std::vector& x, const std::vector& kernel); 10 | 11 | // Ch16_05_fasm.asm.cpp 12 | extern "C" void Convolve1D_F32_Aavx2(float* y, const float* x, const float* kernel, size_t num_pts, size_t kernel_size); 13 | extern "C" void Convolve1DKs5_F32_Aavx2(float* y, const float* x, const float* kernel, size_t num_pts); 14 | 15 | // Ch16_05_misc.cpp 16 | extern bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel); 17 | 18 | // Ch16_05_bm.cpp 19 | extern void Convolve1D_F32_bm(void); 20 | 21 | // Miscellaneous constants 22 | const unsigned int c_RngSeed = 97; 23 | -------------------------------------------------------------------------------- /Chapter16/Ch16_05/Ch16_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter16/Ch16_05/Ch16_05_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_05_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch16_05.h" 7 | #include "MiscTypes.h" 8 | 9 | void Convolve1D_F32_Cpp(std::vector& y, const std::vector& x, const std::vector& kernel) 10 | { 11 | if (!CheckArgs(y, x, kernel)) 12 | throw std::runtime_error("Convolve1D_F32_Cpp() - CheckArgs failed"); 13 | 14 | indx_t num_pts = (indx_t)y.size(); 15 | indx_t ks2 = kernel.size() / 2; 16 | 17 | for (indx_t i = ks2; i < num_pts - ks2; i++) 18 | { 19 | float y_val = 0; 20 | 21 | for (indx_t k = -ks2; k <= ks2; k++) 22 | y_val += x[i - k] * kernel[k + ks2]; 23 | 24 | y[i] = y_val; 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /Chapter16/Ch16_05/Ch16_05_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_05_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch16_05.h" 6 | 7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel) 8 | { 9 | if ((kernel.size() & 1) == 0) 10 | return false; 11 | 12 | if (y.size() != x.size()) 13 | return false; 14 | 15 | if (y.size() < kernel.size()) 16 | return false; 17 | 18 | return true; 19 | } 20 | -------------------------------------------------------------------------------- /Chapter17/Ch17_01/Ch17_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch17_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ZmmVal.h" 8 | 9 | // Ch17_01_fasm.asm.cpp 10 | extern "C" void MathI16_Aavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b); 11 | extern "C" void MathI64_Aavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b); 12 | 13 | -------------------------------------------------------------------------------- /Chapter17/Ch17_01/Ch17_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter17/Ch17_02/Ch17_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch17_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ZmmVal.h" 8 | 9 | // Ch07_02_fcpp.cpp 10 | extern "C" void MaskOpI64a_Aavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a, const ZmmVal* b); 11 | extern "C" void MaskOpI64b_Aavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a, const ZmmVal* b1, const ZmmVal* b2); 12 | extern "C" void MaskOpI64c_Aavx512(ZmmVal* c, const ZmmVal* a, int64_t x1, int64_t x2); 13 | -------------------------------------------------------------------------------- /Chapter17/Ch17_02/Ch17_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter17/Ch17_03/Ch17_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch17_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Note: any changes to CmpOp must also be reflected in Ch17_03_fasm.asm 10 | enum class CmpOp : uint64_t { EQ, NE, LT, LE, GT, GE }; 11 | 12 | // Ch17_03_fcpp.cpp 13 | extern void ComparePixels_Cpp(uint8_t* des, const uint8_t* src, size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val); 14 | 15 | // Ch17_03_fasm.asm 16 | extern "C" void ComparePixels_Aavx512(uint8_t* des, const uint8_t* src, size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val); 17 | 18 | // Ch17_03_misc.cpp 19 | extern bool CheckArgs(const uint8_t* des, const uint8_t* src, size_t num_pixels); 20 | extern void DisplayResults(const uint8_t* des1, const uint8_t* des2, size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val, size_t test_id); 21 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed); 22 | 23 | // Miscellaneous constants 24 | const size_t c_Alignment = 64; 25 | -------------------------------------------------------------------------------- /Chapter17/Ch17_03/Ch17_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter17/Ch17_04/Ch17_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch17_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | // Simple image statistics structure. This must match the structure that's 10 | // defined in Ch17_04_fasm.asm 11 | struct ImageStats 12 | { 13 | uint8_t* m_PixelBuffer; 14 | uint32_t m_PixelMinVal; 15 | uint32_t m_PixelMaxVal; 16 | size_t m_NumPixels; 17 | size_t m_NumPixelsInRange; 18 | uint64_t m_PixelSum; 19 | uint64_t m_PixelSumSquares; 20 | double m_PixelMean; 21 | double m_PixelStDev; 22 | }; 23 | 24 | // Ch17_04.cpp 25 | extern const char* c_ImageFileName; 26 | 27 | // Ch17_04_fcpp.cpp 28 | extern void CalcImageStats_Cpp(ImageStats& im_stats); 29 | 30 | // Ch17_04_fasm.cpp 31 | extern "C" void CalcImageStats_Aavx512(ImageStats& im_stats); 32 | 33 | // Ch17_04_misc.cpp 34 | extern "C" bool CheckArgs(const ImageStats& im_stats); 35 | 36 | // Ch17_04_bm.cpp 37 | extern void CalcImageStats_bm(void); 38 | 39 | // Miscellaneous constants 40 | const size_t c_Alignment = 64; 41 | const size_t c_NumPixelsMax = 64 * 1024 * 1024; 42 | const uint32_t c_PixelMinVal = 40; 43 | const uint32_t c_PixelMaxVal = 230; 44 | -------------------------------------------------------------------------------- /Chapter17/Ch17_04/Ch17_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter17/Ch17_04/Ch17_04_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch17_04_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch17_04.h" 6 | #include "AlignedMem.h" 7 | 8 | bool CheckArgs(const ImageStats& im_stats) 9 | { 10 | if (im_stats.m_NumPixels == 0) 11 | return false; 12 | 13 | if (im_stats.m_NumPixels % 64 != 0) 14 | return false; 15 | 16 | if (im_stats.m_NumPixels > c_NumPixelsMax) 17 | return false; 18 | 19 | if (!AlignedMem::IsAligned(im_stats.m_PixelBuffer, c_Alignment)) 20 | return false; 21 | 22 | return true; 23 | } 24 | -------------------------------------------------------------------------------- /Chapter18/Ch18_01/Ch18_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "ZmmVal.h" 7 | 8 | // Ch18_01_fcpp.cpp 9 | extern "C" void PackedMathF32_Aavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b); 10 | extern "C" void PackedMathF64_Aavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter18/Ch18_01/Ch18_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_02/Ch18_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "ZmmVal.h" 7 | 8 | // Ch18_02_fcpp.cpp 9 | extern "C" void PackedCompareF32_Aavx512(uint16_t c[8], const ZmmVal* a, const ZmmVal* b); 10 | extern "C" void PackedCompareF64_Aavx512(uint8_t c[8], const ZmmVal* a, const ZmmVal* b); 11 | -------------------------------------------------------------------------------- /Chapter18/Ch18_02/Ch18_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_03/Ch18_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF64.h" 8 | 9 | // Covariance matrix data 10 | struct CMD 11 | { 12 | MatrixF64 m_X; 13 | MatrixF64 m_CovMat; 14 | std::vector m_VarMeans; 15 | 16 | CMD(size_t n_vars, size_t n_obvs) : 17 | m_X(n_vars, n_obvs), m_CovMat(n_vars, n_vars), m_VarMeans(n_vars) { } 18 | }; 19 | 20 | // Ch18_03_fcpp.cpp 21 | extern void CalcCovMatF64_Cpp(CMD& cmd); 22 | extern void CalcCovMatF64_Aavx512(CMD& cmd); 23 | 24 | // Ch18_03_fasm.asm 25 | extern "C" void CalcCovMatF64_Aavx512(double* cov_mat, double* var_means, const double* x, size_t n_vars, size_t n_obs); 26 | 27 | // Ch18_03_misc.cpp 28 | extern bool CheckArgs(const CMD& cmd); 29 | extern bool CompareResults(CMD& cmd1, CMD& cmd2); 30 | extern void InitCMD(CMD& cmd1, CMD& cmd2); 31 | -------------------------------------------------------------------------------- /Chapter18/Ch18_03/Ch18_03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_04/Ch18_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include "MatrixF32.h" 7 | 8 | // Ch18_04_fcpp.cpp 9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 10 | 11 | // Ch18_04_fasm.asm 12 | extern "C" void MatrixMulF32_Aavx512(float* c, const float* a, const float* b, 13 | const size_t* sizes); 14 | 15 | // Ch18_04_misc.cpp 16 | bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b); 17 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b); 18 | void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a, 19 | const MatrixF32& b); 20 | 21 | // Ch18_04_bm.cpp 22 | void MatrixMulF32_bm(void); 23 | -------------------------------------------------------------------------------- /Chapter18/Ch18_04/Ch18_04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_04/Ch18_04_fcpp.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_04_fcpp.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch18_04.h" 7 | #include "MF.h" 8 | 9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b) 10 | { 11 | MatrixF32::Mul(c, a, b); 12 | } 13 | -------------------------------------------------------------------------------- /Chapter18/Ch18_05/Ch18_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "MatrixF32.h" 8 | 9 | // Simple 4x1 vector structure 10 | struct Vec4x1_F32 11 | { 12 | float W, X, Y, Z; 13 | }; 14 | 15 | // Ch18_05_fcpp.cpp 16 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, MatrixF32& m, Vec4x1_F32* vec_a, size_t num_vec); 17 | 18 | // Ch18_04_fasm.asm 19 | extern "C" void MatVecMulF32a_Aavx512(Vec4x1_F32* vec_b, const float* m, const Vec4x1_F32* vec_a, size_t num_vec); 20 | 21 | // Ch18_04_fasm2.asm 22 | extern "C" void MatVecMulF32b_Aavx512(Vec4x1_F32* vec_b, const float* m, const Vec4x1_F32* vec_a, size_t num_vec); 23 | 24 | // Ch18_05_misc.cpp 25 | extern bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec); 26 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec); 27 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2); 28 | 29 | // Ch18_05_bm.cpp 30 | extern void MatrixVecMulF32_bm(void); 31 | 32 | // Miscellaenous constants 33 | const size_t c_Alignment = 64; 34 | const int c_RngMinVal = 1; 35 | const int c_RngMaxVal = 500; 36 | const unsigned int c_RngSeedVal = 187; 37 | -------------------------------------------------------------------------------- /Chapter18/Ch18_05/Ch18_05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_06/Ch18_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch18_06_fcpp.cpp 9 | extern void Convolve1D_F32_Cpp(std::vector& y, const std::vector& x, const std::vector& kernel); 10 | 11 | // Ch18_06_fasm.cpp 12 | extern "C" void Convolve1D_F32_Aavx512(float* y, const float* x, const float* kernel, size_t num_pts, size_t kernel_size); 13 | extern "C" void Convolve1DKs5_F32_Aavx512(float* y, const float* x, const float* kernel, size_t num_pts); 14 | 15 | // Ch18_06_misc.cpp 16 | extern bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel); 17 | 18 | // Ch18_06_bm.cpp 19 | extern void Convolve1D_F32_bm(void); 20 | 21 | // Miscellaneous constants 22 | const unsigned int c_RngSeed = 97; 23 | -------------------------------------------------------------------------------- /Chapter18/Ch18_06/Ch18_06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter18/Ch18_06/Ch18_06_misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch18_06_misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include "Ch18_06.h" 6 | 7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel) 8 | { 9 | if ((kernel.size() & 1) == 0) 10 | return false; 11 | 12 | if (y.size() != x.size()) 13 | return false; 14 | 15 | if (y.size() < kernel.size()) 16 | return false; 17 | 18 | return true; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /Chapter19/Ch19_01/Ch19_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch19_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include 8 | #include "MatrixF32.h" 9 | 10 | // Ch19_01_fcpp.cpp 11 | extern void CalcRowStatsF32a_Iavx512(MatrixF32& x, std::vector& row_means, std::vector& row_sds); 12 | extern void CalcRowStatsF32b_Iavx512(MatrixF32& x, std::vector& row_means, std::vector& row_sds); 13 | 14 | // Ch19_01_fcpp2.cpp 15 | extern void CalcRowStatsF32_Cpp(MatrixF32& x, std::vector& row_means, std::vector& row_sds); 16 | 17 | // Ch19_01_misc.cpp 18 | extern bool CheckArgs(const MatrixF32& x, const std::vector& v1, const std::vector& v2); 19 | extern bool CompareResults(const std::vector& v1, const std::vector& v2); 20 | extern void Init(MatrixF32& x); 21 | 22 | extern void SaveResults(const char* bn, const MatrixF32& x, 23 | const std::vector& row_means1, const std::vector& row_means2, const std::vector& row_means3, 24 | const std::vector& row_sds1, const std::vector& row_sds2, const std::vector& row_sds3); 25 | 26 | // Ch19_01_bm.cpp 27 | extern void CalcRowStatsF32_bm(void); 28 | -------------------------------------------------------------------------------- /Chapter19/Ch19_01/Ch19_01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter19/Ch19_01/Ch19_01_fcpp2.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch19_01_fcpp2.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch19_01.h" 7 | #include "MatrixF32.h" 8 | 9 | void CalcRowStatsF32_Cpp(MatrixF32& x, std::vector& row_means, 10 | std::vector& row_sds) 11 | { 12 | if (!CheckArgs(x, row_means, row_sds)) 13 | throw std::runtime_error("CalcRowStatsF32_Cpp() - CheckArgs failed"); 14 | 15 | size_t nrows = x.GetNumRows(); 16 | size_t ncols = x.GetNumCols(); 17 | const float* xx = x.Data(); 18 | 19 | for (size_t i = 0; i < nrows; i++) 20 | { 21 | float sum = 0.0f; 22 | 23 | for (size_t j = 0; j < ncols; j++) 24 | sum += xx[i * ncols + j]; 25 | row_means[i] = sum / ncols; 26 | 27 | float sum_sqs = 0.0f; 28 | 29 | for (size_t j = 0; j < ncols; j++) 30 | { 31 | float temp = xx[i * ncols + j] - row_means[i]; 32 | sum_sqs += temp * temp; 33 | } 34 | row_sds[i] = sqrt(sum_sqs / (ncols - 1)); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Chapter19/Ch19_02/Ch19_02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Chapter19/Ch19_02/Ch19_02_test.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch19_02_test.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Ch19_02.h" 9 | #include "MT_Convolve.h" 10 | 11 | void DisplayKernel1Dx2(float sigma, size_t ks) 12 | { 13 | std::vector gk = GenGaussianKernel1D(sigma, ks); 14 | 15 | std::cout << std::fixed << std::setprecision(6); 16 | 17 | float sum = 0.0f; 18 | for (size_t i = 0; i < ks; i++) 19 | { 20 | sum += gk[i]; 21 | std::cout << std::setw(10) << gk[i] << ' '; 22 | } 23 | 24 | std::cout << '\n'; 25 | std::cout << " sum = " << sum << "\n\n"; 26 | } 27 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /Data/ImageA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageA.png -------------------------------------------------------------------------------- /Data/ImageB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageB.png -------------------------------------------------------------------------------- /Data/ImageC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageC.png -------------------------------------------------------------------------------- /Data/ImageD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageD.png -------------------------------------------------------------------------------- /Data/ImageE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageE.png -------------------------------------------------------------------------------- /Data/ImageF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Data/ImageF.png -------------------------------------------------------------------------------- /Doc/ImportantNotes.txt: -------------------------------------------------------------------------------- 1 | The sole purpose of the source code is to elucidate programming examples 2 | that are directly related to the topics discussed in this book. 3 | Minimal attention is given to essential software engineering concerns 4 | such as robust error handling, security risks, numerical stability, 5 | rounding errors, or ill-conditioned functions. You are responsible 6 | for addressing these concerns should you decide to use any of the 7 | source code in your own programs. 8 | 9 | The Include folder contains shared C++ header files. These files 10 | incorporate classes, templates, methods, and macros that are intended 11 | for use with this book's source code. This code should not be used for 12 | other purposes without proper modifications. 13 | -------------------------------------------------------------------------------- /Doc/ReleaseHistory.txt: -------------------------------------------------------------------------------- 1 | 2022-02-22 2 | ---------- 3 | Initial release for publication. 4 | -------------------------------------------------------------------------------- /Doc/VS2022 Review Solution Actions Dialog Box.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/Doc/VS2022 Review Solution Actions Dialog Box.jpg -------------------------------------------------------------------------------- /Doc/VS2022Notes.txt: -------------------------------------------------------------------------------- 1 | The source code can be used with Visual Studio 2022. The first time a chapter 2 | solution is opened with Visual Studio 2022, a “Review Solution Actions” dialog 3 | will appear on the screen (see accompanying JPG file for an example). When 4 | this dialog box appears, click on “OK” to upgrade the project for use with 5 | Visual Studio 2022. 6 | -------------------------------------------------------------------------------- /Include/ImageMisc.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // ImageMisc.h 3 | //------------------------------------------------- 4 | 5 | #pragma once 6 | #include 7 | 8 | struct RGB32 9 | { 10 | // Do not change order of elements below 11 | uint8_t m_R; 12 | uint8_t m_G; 13 | uint8_t m_B; 14 | uint8_t m_A; 15 | }; 16 | 17 | enum class PixelType : unsigned int 18 | { 19 | Undefined, 20 | Gray8, 21 | Rgb32 22 | }; 23 | 24 | enum class ImageFileType : unsigned int 25 | { 26 | Undefined, 27 | BMP, 28 | PNG, 29 | JPEG, 30 | TIFF 31 | }; 32 | 33 | enum class Channel : unsigned int 34 | { 35 | // Do not change order of R, G, B, A 36 | R, G, B, A, 37 | None 38 | }; 39 | -------------------------------------------------------------------------------- /Include/MiscTypes.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // MiscTypes.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | #if defined(_M_AMD64) || defined(__x86_64__) 9 | typedef int64_t indx_t; 10 | #elif defined(_M_IX86) || defined(__i386__) 11 | typedef int32_t indx_t; 12 | #else 13 | #error "typedef indx_t is undefined" 14 | #endif 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*Modern Parallel Programming with C++ and Assembly Language*](https://link.springer.com/book/10.1007/978-1-4842-7918-2) by Daniel Kusswurm (Apress, 2022). 4 | 5 | [comment]: #cover 6 | ![Cover image](978-1-4842-7917-5.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | # Errata for *Modern Parallel Programming with C++ and Assembly Language* 2 | 3 | On **page xx** [Summary of error]: 4 | 5 | Details of error here. Highlight key pieces in **bold**. 6 | 7 | *** 8 | 9 | On **page xx** [Summary of error]: 10 | 11 | Details of error here. Highlight key pieces in **bold**. 12 | 13 | *** --------------------------------------------------------------------------------