├── 978-1-4842-7917-5.jpg
├── AppendixA
├── Example1
│ ├── Example1.cpp
│ ├── Example1.vcxproj
│ ├── Example1.vcxproj.filters
│ ├── Example1.vcxproj.user
│ └── Example1_fasm.asm
└── TestSolution.sln
├── Chapter01
├── Ch01_01
│ ├── Ch01_01.cpp
│ ├── Ch01_01.h
│ ├── Ch01_01.vcxproj
│ ├── Ch01_01.vcxproj.filters
│ ├── Ch01_01.vcxproj.user
│ ├── Ch01_01_fasm.asm
│ └── Ch01_01_fcpp.cpp
└── Chapter01.sln
├── Chapter02
├── Ch02_01
│ ├── Ch02_01.cpp
│ ├── Ch02_01.h
│ ├── Ch02_01.vcxproj
│ ├── Ch02_01.vcxproj.filters
│ ├── Ch02_01.vcxproj.user
│ ├── Ch02_01_fcpp.cpp
│ └── makefile
├── Ch02_02
│ ├── Ch02_02.cpp
│ ├── Ch02_02.h
│ ├── Ch02_02.vcxproj
│ ├── Ch02_02.vcxproj.filters
│ ├── Ch02_02.vcxproj.user
│ ├── Ch02_02_fcpp.cpp
│ └── makefile
├── Ch02_03
│ ├── Ch02_03.cpp
│ ├── Ch02_03.h
│ ├── Ch02_03.vcxproj
│ ├── Ch02_03.vcxproj.filters
│ ├── Ch02_03.vcxproj.user
│ ├── Ch02_03_fcpp.cpp
│ └── makefile
├── Ch02_04
│ ├── Ch02_04.cpp
│ ├── Ch02_04.h
│ ├── Ch02_04.vcxproj
│ ├── Ch02_04.vcxproj.filters
│ ├── Ch02_04.vcxproj.user
│ ├── Ch02_04_fcpp.cpp
│ └── makefile
├── Ch02_05
│ ├── Ch02_05.cpp
│ ├── Ch02_05.h
│ ├── Ch02_05.vcxproj
│ ├── Ch02_05.vcxproj.filters
│ ├── Ch02_05.vcxproj.user
│ ├── Ch02_05_fcpp.cpp
│ └── makefile
├── Ch02_06
│ ├── Ch02_06.cpp
│ ├── Ch02_06.h
│ ├── Ch02_06.vcxproj
│ ├── Ch02_06.vcxproj.filters
│ ├── Ch02_06.vcxproj.user
│ ├── Ch02_06_bm.cpp
│ ├── Ch02_06_fcpp.cpp
│ ├── Ch02_06_misc.cpp
│ └── makefile
├── Ch02_07
│ ├── Ch02_07.cpp
│ ├── Ch02_07.h
│ ├── Ch02_07.vcxproj
│ ├── Ch02_07.vcxproj.filters
│ ├── Ch02_07.vcxproj.user
│ ├── Ch02_07_bm.cpp
│ ├── Ch02_07_fcpp.cpp
│ ├── Ch02_07_misc.cpp
│ └── makefile
└── Chapter02.sln
├── Chapter03
├── Ch03_01
│ ├── Ch03_01.cpp
│ ├── Ch03_01.h
│ ├── Ch03_01.vcxproj
│ ├── Ch03_01.vcxproj.filters
│ ├── Ch03_01.vcxproj.user
│ ├── Ch03_01_fcpp.cpp
│ └── makefile
├── Ch03_02
│ ├── Ch03_02.cpp
│ ├── Ch03_02.h
│ ├── Ch03_02.vcxproj
│ ├── Ch03_02.vcxproj.filters
│ ├── Ch03_02.vcxproj.user
│ ├── Ch03_02_fcpp.cpp
│ └── makefile
├── Ch03_03
│ ├── Ch03_03.cpp
│ ├── Ch03_03.h
│ ├── Ch03_03.vcxproj
│ ├── Ch03_03.vcxproj.filters
│ ├── Ch03_03.vcxproj.user
│ ├── Ch03_03_fcpp.cpp
│ └── makefile
├── Ch03_04
│ ├── Ch03_04.cpp
│ ├── Ch03_04.h
│ ├── Ch03_04.vcxproj
│ ├── Ch03_04.vcxproj.filters
│ ├── Ch03_04.vcxproj.user
│ ├── Ch03_04_fcpp.cpp
│ ├── Ch03_04_misc.cpp
│ └── makefile
├── Ch03_05
│ ├── Ch03_05.cpp
│ ├── Ch03_05.h
│ ├── Ch03_05.vcxproj
│ ├── Ch03_05.vcxproj.filters
│ ├── Ch03_05.vcxproj.user
│ ├── Ch03_05_fcpp.cpp
│ ├── Ch03_05_misc.cpp
│ └── makefile
├── Ch03_06
│ ├── Ch03_06.cpp
│ ├── Ch03_06.h
│ ├── Ch03_06.vcxproj
│ ├── Ch03_06.vcxproj.filters
│ ├── Ch03_06.vcxproj.user
│ ├── Ch03_06_fcpp.cpp
│ ├── Ch03_06_misc.cpp
│ └── makefile
├── Ch03_07
│ ├── Ch03_07.cpp
│ ├── Ch03_07.h
│ ├── Ch03_07.vcxproj
│ ├── Ch03_07.vcxproj.filters
│ ├── Ch03_07.vcxproj.user
│ ├── Ch03_07_fcpp.cpp
│ ├── Ch03_07_misc.cpp
│ └── makefile
├── Ch03_08
│ ├── Ch03_08.cpp
│ ├── Ch03_08.h
│ ├── Ch03_08.vcxproj
│ ├── Ch03_08.vcxproj.filters
│ ├── Ch03_08.vcxproj.user
│ ├── Ch03_08_fcpp.cpp
│ └── makefile
├── Ch03_09
│ ├── Ch03_09.cpp
│ ├── Ch03_09.h
│ ├── Ch03_09.vcxproj
│ ├── Ch03_09.vcxproj.filters
│ ├── Ch03_09.vcxproj.user
│ ├── Ch03_09_fcpp.cpp
│ └── makefile
└── Chapter03.sln
├── Chapter04
├── Ch04_01
│ ├── Ch04_01.cpp
│ ├── Ch04_01.h
│ ├── Ch04_01.vcxproj
│ ├── Ch04_01.vcxproj.filters
│ ├── Ch04_01.vcxproj.user
│ ├── Ch04_01_fcpp.cpp
│ └── makefile
├── Ch04_02
│ ├── Ch04_02.cpp
│ ├── Ch04_02.h
│ ├── Ch04_02.vcxproj
│ ├── Ch04_02.vcxproj.filters
│ ├── Ch04_02.vcxproj.user
│ ├── Ch04_02_fcpp.cpp
│ └── makefile
├── Ch04_03
│ ├── Ch04_03.cpp
│ ├── Ch04_03.h
│ ├── Ch04_03.vcxproj
│ ├── Ch04_03.vcxproj.filters
│ ├── Ch04_03.vcxproj.user
│ ├── Ch04_03_fcpp.cpp
│ └── makefile
├── Ch04_04
│ ├── Ch04_04.cpp
│ ├── Ch04_04.h
│ ├── Ch04_04.vcxproj
│ ├── Ch04_04.vcxproj.filters
│ ├── Ch04_04.vcxproj.user
│ ├── Ch04_04_bm.cpp
│ ├── Ch04_04_fcpp.cpp
│ ├── Ch04_04_misc.cpp
│ └── makefile
├── Ch04_05
│ ├── Ch04_05.cpp
│ ├── Ch04_05.h
│ ├── Ch04_05.vcxproj
│ ├── Ch04_05.vcxproj.filters
│ ├── Ch04_05.vcxproj.user
│ ├── Ch04_05_bm.cpp
│ ├── Ch04_05_fcpp.cpp
│ ├── Ch04_05_misc.cpp
│ └── makefile
├── Ch04_06
│ ├── Ch04_06.cpp
│ ├── Ch04_06.h
│ ├── Ch04_06.vcxproj
│ ├── Ch04_06.vcxproj.filters
│ ├── Ch04_06.vcxproj.user
│ ├── Ch04_06_bm.cpp
│ ├── Ch04_06_fcpp.cpp
│ ├── Ch04_06_misc.cpp
│ └── makefile
├── Ch04_07
│ ├── Ch04_07.cpp
│ ├── Ch04_07.h
│ ├── Ch04_07.vcxproj
│ ├── Ch04_07.vcxproj.filters
│ ├── Ch04_07.vcxproj.user
│ ├── Ch04_07_bm.cpp
│ ├── Ch04_07_fcpp.cpp
│ ├── Ch04_07_misc.cpp
│ └── makefile
└── Chapter04.sln
├── Chapter05
├── Ch05_01
│ ├── Ch05_01.cpp
│ ├── Ch05_01.h
│ ├── Ch05_01.vcxproj
│ ├── Ch05_01.vcxproj.filters
│ ├── Ch05_01.vcxproj.user
│ ├── Ch05_01_fcpp.cpp
│ ├── Ch05_01_misc.cpp
│ └── makefile
├── Ch05_02
│ ├── Ch05_02.cpp
│ ├── Ch05_02.h
│ ├── Ch05_02.vcxproj
│ ├── Ch05_02.vcxproj.filters
│ ├── Ch05_02.vcxproj.user
│ ├── Ch05_02_bm.cpp
│ ├── Ch05_02_fcpp.cpp
│ ├── Ch05_02_misc.cpp
│ └── makefile
├── Ch05_03
│ ├── Ch05_03.cpp
│ ├── Ch05_03.h
│ ├── Ch05_03.vcxproj
│ ├── Ch05_03.vcxproj.filters
│ ├── Ch05_03.vcxproj.user
│ ├── Ch05_03_bm.cpp
│ ├── Ch05_03_fcpp.cpp
│ ├── Ch05_03_misc.cpp
│ └── makefile
├── Ch05_04
│ ├── Ch05_04.cpp
│ ├── Ch05_04.h
│ ├── Ch05_04.vcxproj
│ ├── Ch05_04.vcxproj.filters
│ ├── Ch05_04.vcxproj.user
│ ├── Ch05_04_bm.cpp
│ ├── Ch05_04_fcpp.cpp
│ ├── Ch05_04_misc.cpp
│ └── makefile
├── Ch05_05
│ ├── Ch05_05.cpp
│ ├── Ch05_05.h
│ ├── Ch05_05.vcxproj
│ ├── Ch05_05.vcxproj.filters
│ ├── Ch05_05.vcxproj.user
│ ├── Ch05_05_bm.cpp
│ ├── Ch05_05_fcpp.cpp
│ ├── Ch05_05_misc.cpp
│ └── makefile
├── Ch05_06
│ ├── Ch05_06.cpp
│ ├── Ch05_06.h
│ ├── Ch05_06.vcxproj
│ ├── Ch05_06.vcxproj.filters
│ ├── Ch05_06.vcxproj.user
│ ├── Ch05_06_bm.cpp
│ ├── Ch05_06_fcpp.cpp
│ ├── Ch05_06_misc.cpp
│ └── makefile
├── Ch05_07
│ ├── Ch05_07.cpp
│ ├── Ch05_07.h
│ ├── Ch05_07.vcxproj
│ ├── Ch05_07.vcxproj.filters
│ ├── Ch05_07.vcxproj.user
│ ├── Ch05_07_bm.cpp
│ ├── Ch05_07_fcpp.cpp
│ ├── Ch05_07_misc.cpp
│ └── makefile
├── Ch05_08
│ ├── Ch05_08.cpp
│ ├── Ch05_08.h
│ ├── Ch05_08.vcxproj
│ ├── Ch05_08.vcxproj.filters
│ ├── Ch05_08.vcxproj.user
│ ├── Ch05_08_bm.cpp
│ ├── Ch05_08_fcpp.cpp
│ ├── Ch05_08_fcpp2.cpp
│ ├── Ch05_08_misc.cpp
│ └── makefile
├── Ch05_09
│ ├── Ch05_09.cpp
│ ├── Ch05_09.h
│ ├── Ch05_09.vcxproj
│ ├── Ch05_09.vcxproj.filters
│ ├── Ch05_09.vcxproj.user
│ ├── Ch05_09_bm.cpp
│ ├── Ch05_09_fcpp.cpp
│ ├── Ch05_09_fcpp2.cpp
│ ├── Ch05_09_misc.cpp
│ └── makefile
└── Chapter05.sln
├── Chapter06
├── Ch06_01
│ ├── Ch06_01.cpp
│ ├── Ch06_01.h
│ ├── Ch06_01.vcxproj
│ ├── Ch06_01.vcxproj.filters
│ ├── Ch06_01.vcxproj.user
│ ├── Ch06_01_bm.cpp
│ ├── Ch06_01_fcpp.cpp
│ ├── Ch06_01_misc.cpp
│ └── makefile
├── Ch06_02
│ ├── Ch06_02.cpp
│ ├── Ch06_02.h
│ ├── Ch06_02.vcxproj
│ ├── Ch06_02.vcxproj.filters
│ ├── Ch06_02.vcxproj.user
│ ├── Ch06_02_bm.cpp
│ ├── Ch06_02_fcpp.cpp
│ ├── Ch06_02_misc.cpp
│ └── makefile
├── Ch06_03
│ ├── Ch06_03.cpp
│ ├── Ch06_03.h
│ ├── Ch06_03.vcxproj
│ ├── Ch06_03.vcxproj.filters
│ ├── Ch06_03.vcxproj.user
│ ├── Ch06_03_bm.cpp
│ ├── Ch06_03_fcpp.cpp
│ ├── Ch06_03_misc.cpp
│ ├── Ch06_03_misc2.cpp
│ └── makefile
├── Ch06_04
│ ├── Ch06_04.cpp
│ ├── Ch06_04.h
│ ├── Ch06_04.vcxproj
│ ├── Ch06_04.vcxproj.filters
│ ├── Ch06_04.vcxproj.user
│ ├── Ch06_04_bm.cpp
│ ├── Ch06_04_fcpp.cpp
│ ├── Ch06_04_misc.cpp
│ ├── Ch06_04_misc2.cpp
│ └── makefile
├── Ch06_05
│ ├── Ch06_05.cpp
│ ├── Ch06_05.vcxproj
│ ├── Ch06_05.vcxproj.filters
│ ├── Ch06_05.vcxproj.user
│ └── makefile
└── Chapter06.sln
├── Chapter07
├── Ch07_01
│ ├── Ch07_01.cpp
│ ├── Ch07_01.h
│ ├── Ch07_01.vcxproj
│ ├── Ch07_01.vcxproj.filters
│ ├── Ch07_01.vcxproj.user
│ ├── Ch07_01_fcpp.cpp
│ └── makefile
├── Ch07_02
│ ├── Ch07_02.cpp
│ ├── Ch07_02.h
│ ├── Ch07_02.vcxproj
│ ├── Ch07_02.vcxproj.filters
│ ├── Ch07_02.vcxproj.user
│ ├── Ch07_02_fcpp.cpp
│ └── makefile
├── Ch07_03
│ ├── Ch07_03.cpp
│ ├── Ch07_03.h
│ ├── Ch07_03.vcxproj
│ ├── Ch07_03.vcxproj.filters
│ ├── Ch07_03.vcxproj.user
│ ├── Ch07_03_bm.cpp
│ ├── Ch07_03_fcpp.cpp
│ ├── Ch07_03_misc.cpp
│ └── makefile
├── Ch07_04
│ ├── Ch07_04.cpp
│ ├── Ch07_04.h
│ ├── Ch07_04.vcxproj
│ ├── Ch07_04.vcxproj.filters
│ ├── Ch07_04.vcxproj.user
│ ├── Ch07_04_fcpp.cpp
│ ├── Ch07_04_misc.cpp
│ └── makefile
├── Ch07_05
│ ├── Ch07_05.cpp
│ ├── Ch07_05.h
│ ├── Ch07_05.vcxproj
│ ├── Ch07_05.vcxproj.filters
│ ├── Ch07_05.vcxproj.user
│ ├── Ch07_05_bm.cpp
│ ├── Ch07_05_fcpp.cpp
│ ├── Ch07_05_misc.cpp
│ └── makefile
└── Chapter07.sln
├── Chapter08
├── Ch08_01
│ ├── Ch08_01.cpp
│ ├── Ch08_01.h
│ ├── Ch08_01.vcxproj
│ ├── Ch08_01.vcxproj.filters
│ ├── Ch08_01.vcxproj.user
│ ├── Ch08_01_fcpp.cpp
│ └── makefile
├── Ch08_02
│ ├── Ch08_02.cpp
│ ├── Ch08_02.h
│ ├── Ch08_02.vcxproj
│ ├── Ch08_02.vcxproj.filters
│ ├── Ch08_02.vcxproj.user
│ ├── Ch08_02_fcpp.cpp
│ └── makefile
├── Ch08_03
│ ├── Ch08_03.cpp
│ ├── Ch08_03.h
│ ├── Ch08_03.vcxproj
│ ├── Ch08_03.vcxproj.filters
│ ├── Ch08_03.vcxproj.user
│ ├── Ch08_03_fcpp.cpp
│ ├── Ch08_03_misc.cpp
│ └── makefile
├── Ch08_04
│ ├── Ch08_04.cpp
│ ├── Ch08_04.h
│ ├── Ch08_04.vcxproj
│ ├── Ch08_04.vcxproj.filters
│ ├── Ch08_04.vcxproj.user
│ ├── Ch08_04_fcpp.cpp
│ ├── Ch08_04_misc.cpp
│ ├── Ch08_04_misc2.cpp
│ └── makefile
├── Ch08_05
│ ├── Ch08_05.cpp
│ ├── Ch08_05.h
│ ├── Ch08_05.vcxproj
│ ├── Ch08_05.vcxproj.filters
│ ├── Ch08_05.vcxproj.user
│ ├── Ch08_05_bm.cpp
│ ├── Ch08_05_fcpp.cpp
│ ├── Ch08_05_misc.cpp
│ └── makefile
├── Ch08_06
│ ├── Ch08_06.cpp
│ ├── Ch08_06.h
│ ├── Ch08_06.vcxproj
│ ├── Ch08_06.vcxproj.filters
│ ├── Ch08_06.vcxproj.user
│ ├── Ch08_06_bm.cpp
│ ├── Ch08_06_fcpp.cpp
│ ├── Ch08_06_misc.cpp
│ └── makefile
├── Ch08_07
│ ├── Ch08_07.cpp
│ ├── Ch08_07.h
│ ├── Ch08_07.vcxproj
│ ├── Ch08_07.vcxproj.filters
│ ├── Ch08_07.vcxproj.user
│ ├── Ch08_07_bm.cpp
│ ├── Ch08_07_fcpp.cpp
│ ├── Ch08_07_misc.cpp
│ └── makefile
├── Ch08_08
│ ├── Ch08_08.cpp
│ ├── Ch08_08.h
│ ├── Ch08_08.vcxproj
│ ├── Ch08_08.vcxproj.filters
│ ├── Ch08_08.vcxproj.user
│ ├── Ch08_08_bm.cpp
│ ├── Ch08_08_fcpp.cpp
│ ├── Ch08_08_misc.cpp
│ └── makefile
├── Ch08_09
│ ├── Ch08_09.cpp
│ ├── Ch08_09.h
│ ├── Ch08_09.vcxproj
│ ├── Ch08_09.vcxproj.filters
│ ├── Ch08_09.vcxproj.user
│ ├── Ch08_09_bm.cpp
│ ├── Ch08_09_fcpp.cpp
│ ├── Ch08_09_misc.cpp
│ ├── Ch08_09_test.cpp
│ └── makefile
└── Chapter08.sln
├── Chapter09
├── Ch09_01
│ ├── CacheInfo.cpp
│ ├── Ch09_01.cpp
│ ├── Ch09_01.vcxproj
│ ├── Ch09_01.vcxproj.filters
│ ├── Ch09_01.vcxproj.user
│ ├── CpuidInfo.cpp
│ ├── CpuidInfo.h
│ ├── Cpuid__.cpp
│ ├── Cpuid__.h
│ └── makefile
├── Ch09_02
│ ├── Ch09_02.cpp
│ ├── Ch09_02.h
│ ├── Ch09_02.vcxproj
│ ├── Ch09_02.vcxproj.filters
│ ├── Ch09_02.vcxproj.user
│ ├── Ch09_02_fcpp.cpp
│ ├── Ch09_02_misc.cpp
│ └── makefile
├── Ch09_03
│ ├── Ch09_03.cpp
│ ├── Ch09_03.h
│ ├── Ch09_03.vcxproj
│ ├── Ch09_03.vcxproj.filters
│ ├── Ch09_03.vcxproj.user
│ ├── Ch09_03_bm.cpp
│ ├── Ch09_03_fcpp.cpp
│ ├── Ch09_03_misc.cpp
│ └── makefile
└── Chapter09.sln
├── Chapter11
├── Ch11_01
│ ├── Ch11_01.cpp
│ ├── Ch11_01.h
│ ├── Ch11_01.vcxproj
│ ├── Ch11_01.vcxproj.filters
│ ├── Ch11_01.vcxproj.user
│ ├── Ch11_01_fasm.asm
│ └── Ch11_01_misc.cpp
├── Ch11_02
│ ├── Ch11_02.cpp
│ ├── Ch11_02.h
│ ├── Ch11_02.vcxproj
│ ├── Ch11_02.vcxproj.filters
│ ├── Ch11_02.vcxproj.user
│ ├── Ch11_02_fasm.asm
│ └── Ch11_02_misc.cpp
├── Ch11_03
│ ├── Ch11_03.cpp
│ ├── Ch11_03.h
│ ├── Ch11_03.vcxproj
│ ├── Ch11_03.vcxproj.filters
│ ├── Ch11_03.vcxproj.user
│ ├── Ch11_03_fasm.asm
│ └── Ch11_03_misc.cpp
├── Ch11_04
│ ├── Ch11_04.cpp
│ ├── Ch11_04.h
│ ├── Ch11_04.vcxproj
│ ├── Ch11_04.vcxproj.filters
│ ├── Ch11_04.vcxproj.user
│ └── Ch11_04_fasm.asm
├── Ch11_05
│ ├── Ch11_05.cpp
│ ├── Ch11_05.h
│ ├── Ch11_05.vcxproj
│ ├── Ch11_05.vcxproj.filters
│ ├── Ch11_05.vcxproj.user
│ └── Ch11_05_fasm.asm
├── Ch11_06
│ ├── Ch11_06.cpp
│ ├── Ch11_06.h
│ ├── Ch11_06.vcxproj
│ ├── Ch11_06.vcxproj.filters
│ ├── Ch11_06.vcxproj.user
│ ├── Ch11_06_fasm.asm
│ ├── Ch11_06_fcpp.cpp
│ └── Ch11_06_misc.cpp
├── Ch11_07
│ ├── Ch11_07.cpp
│ ├── Ch11_07.h
│ ├── Ch11_07.vcxproj
│ ├── Ch11_07.vcxproj.filters
│ ├── Ch11_07.vcxproj.user
│ ├── Ch11_07_fasm.asm
│ └── Ch11_07_misc.cpp
├── Ch11_08
│ ├── Ch11_08.cpp
│ ├── Ch11_08.h
│ ├── Ch11_08.vcxproj
│ ├── Ch11_08.vcxproj.filters
│ ├── Ch11_08.vcxproj.user
│ └── Ch11_08_fasm.asm
└── Chapter11.sln
├── Chapter12
├── Ch12_01
│ ├── Ch12_01.cpp
│ ├── Ch12_01.h
│ ├── Ch12_01.vcxproj
│ ├── Ch12_01.vcxproj.filters
│ ├── Ch12_01.vcxproj.user
│ └── Ch12_01_fasm.asm
├── Ch12_02
│ ├── Ch12_02.cpp
│ ├── Ch12_02.h
│ ├── Ch12_02.vcxproj
│ ├── Ch12_02.vcxproj.filters
│ ├── Ch12_02.vcxproj.user
│ ├── Ch12_02_fasm.asm
│ ├── Ch12_02_fcpp.cpp
│ └── Ch12_02_misc.cpp
├── Ch12_03
│ ├── Ch12_03.cpp
│ ├── Ch12_03.h
│ ├── Ch12_03.vcxproj
│ ├── Ch12_03.vcxproj.filters
│ ├── Ch12_03.vcxproj.user
│ ├── Ch12_03_fasm.asm
│ └── Ch12_03_misc.cpp
├── Ch12_04
│ ├── Ch12_04.cpp
│ ├── Ch12_04.h
│ ├── Ch12_04.vcxproj
│ ├── Ch12_04.vcxproj.filters
│ ├── Ch12_04.vcxproj.user
│ └── Ch12_04_fasm.asm
├── Ch12_05
│ ├── Ch12_05.cpp
│ ├── Ch12_05.h
│ ├── Ch12_05.vcxproj
│ ├── Ch12_05.vcxproj.filters
│ ├── Ch12_05.vcxproj.user
│ ├── Ch12_05_fasm.asm
│ └── Ch12_05_fcpp.cpp
├── Ch12_06
│ ├── Ch12_06.cpp
│ ├── Ch12_06.h
│ ├── Ch12_06.vcxproj
│ ├── Ch12_06.vcxproj.filters
│ ├── Ch12_06.vcxproj.user
│ └── Ch12_06_fasm.asm
├── Ch12_07
│ ├── Ch12_07.cpp
│ ├── Ch12_07.h
│ ├── Ch12_07.vcxproj
│ ├── Ch12_07.vcxproj.filters
│ ├── Ch12_07.vcxproj.user
│ └── Ch12_07_fasm.asm
├── Ch12_08
│ ├── Ch12_08.cpp
│ ├── Ch12_08.h
│ ├── Ch12_08.vcxproj
│ ├── Ch12_08.vcxproj.filters
│ ├── Ch12_08.vcxproj.user
│ ├── Ch12_08_fasm.asm
│ └── Ch12_08_fcpp.cpp
├── Ch12_09
│ ├── Ch12_09.cpp
│ ├── Ch12_09.h
│ ├── Ch12_09.vcxproj
│ ├── Ch12_09.vcxproj.filters
│ ├── Ch12_09.vcxproj.user
│ ├── Ch12_09_fasm.asm
│ └── Ch12_09_fcpp.cpp
└── Chapter12.sln
├── Chapter13
├── Ch13_01
│ ├── Ch13_01.cpp
│ ├── Ch13_01.h
│ ├── Ch13_01.vcxproj
│ ├── Ch13_01.vcxproj.filters
│ ├── Ch13_01.vcxproj.user
│ └── Ch13_01_fasm.asm
├── Ch13_02
│ ├── Ch13_02.cpp
│ ├── Ch13_02.h
│ ├── Ch13_02.vcxproj
│ ├── Ch13_02.vcxproj.filters
│ ├── Ch13_02.vcxproj.user
│ └── Ch13_02_fasm.asm
├── Ch13_03
│ ├── Ch13_03.cpp
│ ├── Ch13_03.h
│ ├── Ch13_03.vcxproj
│ ├── Ch13_03.vcxproj.filters
│ ├── Ch13_03.vcxproj.user
│ └── Ch13_03_fasm.asm
├── Ch13_04
│ ├── Ch13_04.cpp
│ ├── Ch13_04.h
│ ├── Ch13_04.vcxproj
│ ├── Ch13_04.vcxproj.filters
│ ├── Ch13_04.vcxproj.user
│ └── Ch13_04_fasm.asm
├── Ch13_05
│ ├── Ch13_05.cpp
│ ├── Ch13_05.h
│ ├── Ch13_05.vcxproj
│ ├── Ch13_05.vcxproj.filters
│ ├── Ch13_05.vcxproj.user
│ ├── Ch13_05_bm.cpp
│ ├── Ch13_05_fasm.asm
│ ├── Ch13_05_fcpp.cpp
│ └── Ch13_05_misc.cpp
├── Ch13_06
│ ├── Ch13_06.cpp
│ ├── Ch13_06.h
│ ├── Ch13_06.vcxproj
│ ├── Ch13_06.vcxproj.filters
│ ├── Ch13_06.vcxproj.user
│ ├── Ch13_06_bm.cpp
│ ├── Ch13_06_fasm.asm
│ ├── Ch13_06_fcpp.cpp
│ └── Ch13_06_misc.cpp
└── Chapter13.sln
├── Chapter14
├── Ch14_01
│ ├── Ch14_01.cpp
│ ├── Ch14_01.h
│ ├── Ch14_01.vcxproj
│ ├── Ch14_01.vcxproj.filters
│ ├── Ch14_01.vcxproj.user
│ └── Ch14_01_fasm.asm
├── Ch14_02
│ ├── Ch14_02.cpp
│ ├── Ch14_02.h
│ ├── Ch14_02.vcxproj
│ ├── Ch14_02.vcxproj.filters
│ ├── Ch14_02.vcxproj.user
│ └── Ch14_02_fasm.asm
├── Ch14_03
│ ├── Ch14_03.cpp
│ ├── Ch14_03.h
│ ├── Ch14_03.vcxproj
│ ├── Ch14_03.vcxproj.filters
│ ├── Ch14_03.vcxproj.user
│ ├── Ch14_03_fasm.asm
│ ├── Ch14_03_fcpp.cpp
│ └── Ch14_03_misc.cpp
├── Ch14_04
│ ├── Ch14_04.cpp
│ ├── Ch14_04.h
│ ├── Ch14_04.vcxproj
│ ├── Ch14_04.vcxproj.filters
│ ├── Ch14_04.vcxproj.user
│ ├── Ch14_04_fasm.asm
│ ├── Ch14_04_fcpp.cpp
│ └── Ch14_04_misc.cpp
├── Ch14_05
│ ├── Ch14_05.cpp
│ ├── Ch14_05.h
│ ├── Ch14_05.vcxproj
│ ├── Ch14_05.vcxproj.filters
│ ├── Ch14_05.vcxproj.user
│ ├── Ch14_05_fasm.asm
│ └── Ch14_05_fcpp.cpp
└── Chapter14.sln
├── Chapter15
├── Ch15_01
│ ├── Ch15_01.cpp
│ ├── Ch15_01.h
│ ├── Ch15_01.vcxproj
│ ├── Ch15_01.vcxproj.filters
│ ├── Ch15_01.vcxproj.user
│ └── Ch15_01_fasm.asm
├── Ch15_02
│ ├── Ch15_02.cpp
│ ├── Ch15_02.h
│ ├── Ch15_02.vcxproj
│ ├── Ch15_02.vcxproj.filters
│ ├── Ch15_02.vcxproj.user
│ └── Ch15_02_fasm.asm
├── Ch15_03
│ ├── Ch15_03.cpp
│ ├── Ch15_03.h
│ ├── Ch15_03.vcxproj
│ ├── Ch15_03.vcxproj.filters
│ ├── Ch15_03.vcxproj.user
│ ├── Ch15_03_bm.cpp
│ ├── Ch15_03_fasm.asm
│ ├── Ch15_03_fcpp.cpp
│ └── Ch15_03_misc.cpp
├── Ch15_04
│ ├── Ch15_04.cpp
│ ├── Ch15_04.h
│ ├── Ch15_04.vcxproj
│ ├── Ch15_04.vcxproj.filters
│ ├── Ch15_04.vcxproj.user
│ ├── Ch15_04_bm.cpp
│ ├── Ch15_04_fasm.asm
│ ├── Ch15_04_fcpp.cpp
│ └── Ch15_04_misc.cpp
├── Ch15_05
│ ├── Ch15_05.cpp
│ ├── Ch15_05.h
│ ├── Ch15_05.vcxproj
│ ├── Ch15_05.vcxproj.filters
│ ├── Ch15_05.vcxproj.user
│ ├── Ch15_05_bm.cpp
│ ├── Ch15_05_fasm.asm
│ ├── Ch15_05_fcpp.cpp
│ └── Ch15_05_misc.cpp
└── Chapter15.sln
├── Chapter16
├── Ch16_01
│ ├── Ch16_01.cpp
│ ├── Ch16_01.h
│ ├── Ch16_01.vcxproj
│ ├── Ch16_01.vcxproj.filters
│ ├── Ch16_01.vcxproj.user
│ ├── Ch16_01_fasm.asm
│ ├── Ch16_01_fcpp.cpp
│ └── Ch16_01_misc.cpp
├── Ch16_02
│ ├── Ch16_02.cpp
│ ├── Ch16_02.h
│ ├── Ch16_02.vcxproj
│ ├── Ch16_02.vcxproj.filters
│ ├── Ch16_02.vcxproj.user
│ ├── Ch16_02_bm.cpp
│ ├── Ch16_02_fasm.asm
│ ├── Ch16_02_fcpp.cpp
│ ├── Ch16_02_misc.cpp
│ └── Ch16_02_test.cpp
├── Ch16_03
│ ├── Ch16_03.cpp
│ ├── Ch16_03.h
│ ├── Ch16_03.vcxproj
│ ├── Ch16_03.vcxproj.filters
│ ├── Ch16_03.vcxproj.user
│ ├── Ch16_03_bm.cpp
│ ├── Ch16_03_fasm.asm
│ ├── Ch16_03_fcpp.cpp
│ └── Ch16_03_misc.cpp
├── Ch16_04
│ ├── Ch16_04.cpp
│ ├── Ch16_04.h
│ ├── Ch16_04.vcxproj
│ ├── Ch16_04.vcxproj.filters
│ ├── Ch16_04.vcxproj.user
│ ├── Ch16_04_bm.cpp
│ ├── Ch16_04_fasm.asm
│ ├── Ch16_04_fcpp.cpp
│ └── Ch16_04_misc.cpp
├── Ch16_05
│ ├── Ch16_05.cpp
│ ├── Ch16_05.h
│ ├── Ch16_05.vcxproj
│ ├── Ch16_05.vcxproj.filters
│ ├── Ch16_05.vcxproj.user
│ ├── Ch16_05_bm.cpp
│ ├── Ch16_05_fasm.asm
│ ├── Ch16_05_fcpp.cpp
│ └── Ch16_05_misc.cpp
└── Chapter16.sln
├── Chapter17
├── Ch17_01
│ ├── Ch17_01.cpp
│ ├── Ch17_01.h
│ ├── Ch17_01.vcxproj
│ ├── Ch17_01.vcxproj.filters
│ ├── Ch17_01.vcxproj.user
│ └── Ch17_01_fasm.asm
├── Ch17_02
│ ├── Ch17_02.cpp
│ ├── Ch17_02.h
│ ├── Ch17_02.vcxproj
│ ├── Ch17_02.vcxproj.filters
│ ├── Ch17_02.vcxproj.user
│ └── Ch17_02_fasm.asm
├── Ch17_03
│ ├── Ch17_03.cpp
│ ├── Ch17_03.h
│ ├── Ch17_03.vcxproj
│ ├── Ch17_03.vcxproj.filters
│ ├── Ch17_03.vcxproj.user
│ ├── Ch17_03_fasm.asm
│ ├── Ch17_03_fcpp.cpp
│ └── Ch17_03_misc.cpp
├── Ch17_04
│ ├── Ch17_04.cpp
│ ├── Ch17_04.h
│ ├── Ch17_04.vcxproj
│ ├── Ch17_04.vcxproj.filters
│ ├── Ch17_04.vcxproj.user
│ ├── Ch17_04_bm.cpp
│ ├── Ch17_04_fasm.asm
│ ├── Ch17_04_fcpp.cpp
│ └── Ch17_04_misc.cpp
└── Chapter17.sln
├── Chapter18
├── Ch18_01
│ ├── Ch18_01.cpp
│ ├── Ch18_01.h
│ ├── Ch18_01.vcxproj
│ ├── Ch18_01.vcxproj.filters
│ ├── Ch18_01.vcxproj.user
│ └── Ch18_01_fasm.asm
├── Ch18_02
│ ├── Ch18_02.cpp
│ ├── Ch18_02.h
│ ├── Ch18_02.vcxproj
│ ├── Ch18_02.vcxproj.filters
│ ├── Ch18_02.vcxproj.user
│ └── Ch18_02_fasm.asm
├── Ch18_03
│ ├── Ch18_03.cpp
│ ├── Ch18_03.h
│ ├── Ch18_03.vcxproj
│ ├── Ch18_03.vcxproj.filters
│ ├── Ch18_03.vcxproj.user
│ ├── Ch18_03_fasm.asm
│ ├── Ch18_03_fcpp.cpp
│ └── Ch18_03_misc.cpp
├── Ch18_04
│ ├── Ch18_04.cpp
│ ├── Ch18_04.h
│ ├── Ch18_04.vcxproj
│ ├── Ch18_04.vcxproj.filters
│ ├── Ch18_04.vcxproj.user
│ ├── Ch18_04_bm.cpp
│ ├── Ch18_04_fasm.asm
│ ├── Ch18_04_fcpp.cpp
│ └── Ch18_04_misc.cpp
├── Ch18_05
│ ├── Ch18_05.cpp
│ ├── Ch18_05.h
│ ├── Ch18_05.vcxproj
│ ├── Ch18_05.vcxproj.filters
│ ├── Ch18_05.vcxproj.user
│ ├── Ch18_05_bm.cpp
│ ├── Ch18_05_fasm.asm
│ ├── Ch18_05_fasm2.asm
│ ├── Ch18_05_fcpp.cpp
│ └── Ch18_05_misc.cpp
├── Ch18_06
│ ├── Ch18_06.cpp
│ ├── Ch18_06.h
│ ├── Ch18_06.vcxproj
│ ├── Ch18_06.vcxproj.filters
│ ├── Ch18_06.vcxproj.user
│ ├── Ch18_06_bm.cpp
│ ├── Ch18_06_fasm.asm
│ ├── Ch18_06_fcpp.cpp
│ └── Ch18_06_misc.cpp
└── Chapter18.sln
├── Chapter19
├── Ch19_01
│ ├── Ch19_01.cpp
│ ├── Ch19_01.h
│ ├── Ch19_01.vcxproj
│ ├── Ch19_01.vcxproj.filters
│ ├── Ch19_01.vcxproj.user
│ ├── Ch19_01_bm.cpp
│ ├── Ch19_01_fcpp.cpp
│ ├── Ch19_01_fcpp2.cpp
│ ├── Ch19_01_misc.cpp
│ └── makefile
├── Ch19_02
│ ├── Ch19_02.cpp
│ ├── Ch19_02.h
│ ├── Ch19_02.vcxproj
│ ├── Ch19_02.vcxproj.filters
│ ├── Ch19_02.vcxproj.user
│ ├── Ch19_02_bm.cpp
│ ├── Ch19_02_fcpp.cpp
│ ├── Ch19_02_fcpp2.cpp
│ ├── Ch19_02_misc.cpp
│ ├── Ch19_02_test.cpp
│ └── makefile
└── Chapter19.sln
├── Contributing.md
├── Data
├── ImageA.png
├── ImageB.png
├── ImageC.png
├── ImageD.png
├── ImageE.png
└── ImageF.png
├── Doc
├── ImportantNotes.txt
├── ReleaseHistory.txt
├── VS2022 Review Solution Actions Dialog Box.jpg
└── VS2022Notes.txt
├── Include
├── AlignedMem.h
├── BmThreadTimer.h
├── ImageBuffer.h
├── ImageMatrix.h
├── ImageMatrixL.h
├── ImageMatrixW.h
├── ImageMisc.h
├── ImagePng.h
├── MF.h
├── MT.h
├── MT_Convolve.h
├── MacrosX86-64-AVX.asmh
├── MatrixF32.h
├── MatrixF64.h
├── MiscTypes.h
├── OS.h
├── SimdMath.h
├── XmmVal.h
├── YmmVal.h
├── ZmmVal.h
└── cmpequ.asmh
├── LICENSE.txt
├── README.md
└── errata.md
/978-1-4842-7917-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/modern-parallel-programming-cpp-assembly/608b328fe91d4afb91d144f4123a205a0b18242e/978-1-4842-7917-5.jpg
--------------------------------------------------------------------------------
/AppendixA/Example1/Example1.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
--------------------------------------------------------------------------------
/AppendixA/Example1/Example1.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/AppendixA/TestSolution.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31829.152
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Example1", "Example1\Example1.vcxproj", "{D5ADB351-2739-4853-AD8F-E8A28C5797F7}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Debug|x64.ActiveCfg = Debug|x64
15 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Debug|x64.Build.0 = Debug|x64
16 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Release|x64.ActiveCfg = Release|x64
17 | {D5ADB351-2739-4853-AD8F-E8A28C5797F7}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {30740DBB-52E9-43EF-BC5D-8C33B7137F4E}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/Chapter01/Ch01_01/Ch01_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch01_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch01_01_fcpp.cpp
8 | extern void CalcZ_Cpp(float* z, const float* x, const float* y, size_t n);
9 | extern void CalcZ_Iavx(float* z, const float* x, const float* y, size_t n);
10 |
11 | // Ch01_01_fasm.asm
12 | extern "C" void CalcZ_Aavx(float* z, const float* x, const float* y, size_t n);
13 |
--------------------------------------------------------------------------------
/Chapter01/Ch01_01/Ch01_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter01/Ch01_01/Ch01_01_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch01_01_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch01_01.h"
7 |
8 | void CalcZ_Cpp(float* z, const float* x, const float* y, size_t n)
9 | {
10 | for (size_t i = 0; i < n; i++)
11 | z[i] = x[i] + y[i];
12 | }
13 |
14 | void CalcZ_Iavx(float* z, const float* x, const float* y, size_t n)
15 | {
16 | size_t i = 0;
17 | const size_t num_simd_elements = 8;
18 |
19 | for (; n - i >= num_simd_elements; i += num_simd_elements)
20 | {
21 | // Calculate z[i:i+7] = x[i:i+7] + y[i:i+7]
22 | __m256 x_vals = _mm256_loadu_ps(&x[i]);
23 | __m256 y_vals = _mm256_loadu_ps(&y[i]);
24 | __m256 z_vals = _mm256_add_ps(x_vals, y_vals);
25 |
26 | _mm256_storeu_ps(&z[i], z_vals);
27 | }
28 |
29 | // Calculate z[i] = x[i] + y[i] for any remaining elements
30 | for (; i < n; i += 1)
31 | z[i] = x[i] + y[i];
32 | }
33 |
--------------------------------------------------------------------------------
/Chapter01/Chapter01.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.31424.327
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Ch01_01", "Ch01_01\Ch01_01.vcxproj", "{467CFDA8-9F54-4C8A-945C-430284891B93}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Debug|x64.ActiveCfg = Debug|x64
15 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Debug|x64.Build.0 = Debug|x64
16 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Release|x64.ActiveCfg = Release|x64
17 | {467CFDA8-9F54-4C8A-945C-430284891B93}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {3ACB9B7D-D21D-4217-A57F-E9955222E4F9}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_01/Ch02_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch02_01_fcpp.cpp
9 | extern void AddI16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b);
10 | extern void AddU16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_01/Ch02_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_01/Ch02_01_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_01_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_01.h"
7 |
8 | void AddI16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b)
9 | {
10 | __m128i a_vals = _mm_load_si128((__m128i*)a);
11 | __m128i b_vals = _mm_load_si128((__m128i*)b);
12 |
13 | __m128i c1_vals = _mm_add_epi16(a_vals, b_vals);
14 | __m128i c2_vals = _mm_adds_epi16(a_vals, b_vals);
15 |
16 | _mm_store_si128((__m128i*)c1, c1_vals);
17 | _mm_store_si128((__m128i*)c2, c2_vals);
18 | }
19 |
20 | void AddU16_Iavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b)
21 | {
22 | __m128i a_vals = _mm_load_si128((__m128i*)a);
23 | __m128i b_vals = _mm_load_si128((__m128i*)b);
24 |
25 | __m128i c1_vals = _mm_add_epi16(a_vals, b_vals);
26 | __m128i c2_vals = _mm_adds_epu16(a_vals, b_vals);
27 |
28 | _mm_store_si128((__m128i*)c1, c1_vals);
29 | _mm_store_si128((__m128i*)c2, c2_vals);
30 | }
31 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_02/Ch02_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch02_02_fcpp.cpp
9 | extern void SubI32_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
10 | extern void SubI64_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_02/Ch02_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_02/Ch02_02_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_02_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_02.h"
7 |
8 | extern void SubI32_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b)
9 | {
10 | __m128i a_vals = _mm_load_si128((__m128i*)a);
11 | __m128i b_vals = _mm_load_si128((__m128i*)b);
12 | __m128i c_vals = _mm_sub_epi32(a_vals, b_vals);
13 |
14 | _mm_store_si128((__m128i*)c, c_vals);
15 | }
16 |
17 | extern void SubI64_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b)
18 | {
19 | __m128i a_vals = _mm_load_si128((__m128i*)a);
20 | __m128i b_vals = _mm_load_si128((__m128i*)b);
21 | __m128i c_vals = _mm_sub_epi64(a_vals, b_vals);
22 |
23 | _mm_store_si128((__m128i*)c, c_vals);
24 | }
25 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_03/Ch02_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch02_03_fcpp.cpp
9 | extern void MulI16_Iavx(XmmVal c[2], const XmmVal* a, const XmmVal* b);
10 | extern void MulI32a_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
11 | extern void MulI32b_Iavx(XmmVal c[2], const XmmVal* a, const XmmVal* b);
12 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_03/Ch02_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_04/Ch02_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch02_04_fcpp.cpp
9 | extern void AndU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
10 | extern void OrU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
11 | extern void XorU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
12 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_04/Ch02_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_04/Ch02_04_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_04_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_04.h"
7 |
8 | void AndU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b)
9 | {
10 | __m128i a_vals = _mm_load_si128((__m128i*)a);
11 | __m128i b_vals = _mm_load_si128((__m128i*)b);
12 | __m128i c_vals = _mm_and_si128(a_vals, b_vals);
13 |
14 | _mm_store_si128((__m128i*)c, c_vals);
15 | }
16 |
17 | void OrU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b)
18 | {
19 | __m128i a_vals = _mm_load_si128((__m128i*)a);
20 | __m128i b_vals = _mm_load_si128((__m128i*)b);
21 | __m128i c_vals = _mm_or_si128(a_vals, b_vals);
22 |
23 | _mm_store_si128((__m128i*)c, c_vals);
24 | }
25 |
26 | void XorU16_Iavx(XmmVal* c, const XmmVal* a, const XmmVal* b)
27 | {
28 | __m128i a_vals = _mm_load_si128((__m128i*)a);
29 | __m128i b_vals = _mm_load_si128((__m128i*)b);
30 | __m128i c_vals = _mm_xor_si128(a_vals, b_vals);
31 |
32 | _mm_store_si128((__m128i*)c, c_vals);
33 | }
34 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_05/Ch02_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch02_05_fcpp.cpp
9 | extern void SllU16_Iavx(XmmVal* c, const XmmVal* a, int count);
10 | extern void SrlU16_Iavx(XmmVal* c, const XmmVal* a, int count);
11 | extern void SraU16_Iavx(XmmVal* c, const XmmVal* a, int count);
12 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_05/Ch02_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_05/Ch02_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_05_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_05.h"
7 |
8 | void SllU16_Iavx(XmmVal* c, const XmmVal* a, int count)
9 | {
10 | __m128i a_vals = _mm_load_si128((__m128i*)a);
11 | __m128i b_vals = _mm_slli_epi16(a_vals, count);
12 |
13 | _mm_store_si128((__m128i*)c, b_vals);
14 | }
15 |
16 | void SrlU16_Iavx(XmmVal* c, const XmmVal* a, int count)
17 | {
18 | __m128i a_vals = _mm_load_si128((__m128i*)a);
19 | __m128i b_vals = _mm_srli_epi16(a_vals, count);
20 |
21 | _mm_store_si128((__m128i*)c, b_vals);
22 | }
23 |
24 | void SraU16_Iavx(XmmVal* c, const XmmVal* a, int count)
25 | {
26 | __m128i a_vals = _mm_load_si128((__m128i*)a);
27 | __m128i b_vals = _mm_srai_epi16(a_vals, count);
28 |
29 | _mm_store_si128((__m128i*)c, b_vals);
30 | }
31 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_06/Ch02_06.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_06.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_06.h"
7 | #include "AlignedMem.h"
8 |
9 | static void CalcMinMaxU8();
10 |
11 | int main()
12 | {
13 | CalcMinMaxU8();
14 | CalcMinMaxU8_bm();
15 | }
16 |
17 | static void CalcMinMaxU8()
18 | {
19 | size_t n = c_NumElements;
20 | AlignedArray x_aa(n, 16);
21 | uint8_t* x = x_aa.Data();
22 |
23 | InitArray(x, n, c_RngSeedVal);
24 |
25 | uint8_t x_min1 = 0, x_max1 = 0;
26 | uint8_t x_min2 = 0, x_max2 = 0;
27 |
28 | bool rc1 = CalcMinMaxU8_Cpp(&x_min1, &x_max1, x, n);
29 | bool rc2 = CalcMinMaxU8_Iavx(&x_min2, &x_max2, x, n);
30 |
31 | std::cout << "\nResults for CalcMinMaxU8_Cpp\n";
32 | std::cout << "rc1: " << rc1 << " x_min1: " << (int)x_min1;
33 | std::cout << " x_max1: " << (int)x_max1 << '\n';
34 | std::cout << "\nResults for CalcMinMaxU8_Iavx\n";
35 | std::cout << "rc2: " << rc2 << " x_min2: " << (int)x_min2;
36 | std::cout << " x_max2: " << (int)x_max2 << '\n';
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_06/Ch02_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch02_06_fcpp.cpp
10 | extern bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n);
11 | extern bool CalcMinMaxU8_Iavx(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n);
12 |
13 | // Ch02_06_misc.cpp
14 | extern void InitArray(uint8_t* x, size_t n, unsigned int rng_seed);
15 |
16 | // Ch02_06_BM.cpp
17 | extern void CalcMinMaxU8_bm();
18 |
19 | // c_NumElements must be > 0 and even multiple of 16
20 | const size_t c_NumElements = 10000000;
21 | const unsigned int c_RngSeedVal = 23;
22 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_06/Ch02_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_06/Ch02_06_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_06_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch02_06.h"
7 | #include "AlignedMem.h"
8 | #include "BmThreadTimer.h"
9 |
10 | void CalcMinMaxU8_bm(void)
11 | {
12 | std::cout << "\nRunning benchmark function CalcMinMaxU8_bm - please wait\n";
13 |
14 | size_t n = c_NumElements;
15 | AlignedArray x_aa(n, 16);
16 | uint8_t* x = x_aa.Data();
17 |
18 | InitArray(x, n, c_RngSeedVal);
19 |
20 | uint8_t x_min0 = 0, x_max0 = 0;
21 | uint8_t x_min1 = 0, x_max1 = 0;
22 |
23 | const size_t num_it = 500;
24 | const size_t num_alg = 2;
25 | BmThreadTimer bmtt(num_it, num_alg);
26 |
27 | for (size_t i = 0; i < num_it; i++)
28 | {
29 | bmtt.Start(i, 0);
30 | CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n);
31 | bmtt.Stop(i, 0);
32 |
33 | bmtt.Start(i, 1);
34 | CalcMinMaxU8_Iavx(&x_min1, &x_max1, x, n);
35 | bmtt.Stop(i, 1);
36 | }
37 |
38 | std::string fn = bmtt.BuildCsvFilenameString("Ch02_06_CalcMinMaxU8_bm");
39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
40 | std::cout << "Benchmark times save to file " << fn << '\n';
41 | }
42 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_06/Ch02_06_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_06_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch02_06.h"
6 | #include "MT.h"
7 |
8 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed)
9 | {
10 | int rng_min_val = 5;
11 | int rng_max_val = 250;
12 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed);
13 |
14 | // Use known values for min & max (for test purposes)
15 | x[(n / 4) * 3 + 1] = 2;
16 | x[n / 4 + 11] = 3;
17 | x[n / 2] = 252;
18 | x[n / 2 + 13] = 253;
19 | x[n / 8 + 5] = 4;
20 | x[n / 8 + 7] = 254;
21 | }
22 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_07/Ch02_07.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_07.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch02_07.h"
8 | #include "AlignedMem.h"
9 |
10 | static void CalcMeanU8(void);
11 |
12 | int main()
13 | {
14 | CalcMeanU8();
15 | CalcMeanU8_bm();
16 | }
17 |
18 | static void CalcMeanU8(void)
19 | {
20 | const char nl = '\n';
21 | const size_t n = c_NumElements;
22 | AlignedArray x_aa(n, 16);
23 | uint8_t* x = x_aa.Data();
24 |
25 | InitArray(x, n, c_RngSeedVal);
26 |
27 | bool rc0, rc1;
28 | uint64_t sum_x0, sum_x1;
29 | double mean_x0, mean_x1;
30 |
31 | rc0 = CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n);
32 | rc1 = CalcMeanU8_Iavx(&mean_x1, &sum_x1, x, n);
33 |
34 | std::cout << std::fixed << std::setprecision(6);
35 |
36 | std::cout << "\nResults for CalcMeanU8_Cpp\n";
37 | std::cout << "rc0: " << rc0 << " ";
38 | std::cout << "sum_x0: " << sum_x0 << " ";
39 | std::cout << "mean_x0: " << mean_x0 << nl;
40 |
41 | std::cout << "\nResults for CalcMeanU8_Iavx\n";
42 | std::cout << "rc1: " << rc1 << " ";
43 | std::cout << "sum_x1: " << sum_x1 << " ";
44 | std::cout << "mean_x1: " << mean_x1 << nl;
45 | }
46 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_07/Ch02_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch02_07_fcpp.cpp
10 | extern bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n);
11 | extern bool CalcMeanU8_Iavx(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n);
12 |
13 | // Ch02_07_misc.cpp
14 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed);
15 | extern bool CheckArgs(const uint8_t* x, size_t n);
16 |
17 | // Ch02_07_bm.cpp
18 | extern void CalcMeanU8_bm(void);
19 |
20 | // Miscellaneous constants
21 | const size_t c_NumElements = 10000000;
22 | const size_t c_Alignment = 16;
23 | const unsigned int c_RngSeedVal = 29;
24 | extern "C" size_t g_NumElementsMax;
25 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_07/Ch02_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_07/Ch02_07_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_07_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch02_07.h"
6 | #include "AlignedMem.h"
7 | #include "BmThreadTimer.h"
8 |
9 | void CalcMeanU8_bm(void)
10 | {
11 | std::cout << "\nRunning benchmark function CalcMeanU8_bm - please wait\n";
12 |
13 | size_t n = c_NumElements;
14 | AlignedArray x_aa(n, c_Alignment);
15 | uint8_t* x = x_aa.Data();
16 |
17 | InitArray(x, n, c_RngSeedVal);
18 |
19 | uint64_t sum_x0, sum_x1;
20 | double mean_x0, mean_x1;
21 |
22 | const size_t num_it = 500;
23 | const size_t num_alg = 2;
24 | BmThreadTimer bmtt(num_it, num_alg);
25 |
26 | for (size_t i = 0; i < num_it; i++)
27 | {
28 | bmtt.Start(i, 0);
29 | CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n);
30 | bmtt.Stop(i, 0);
31 |
32 | bmtt.Start(i, 1);
33 | CalcMeanU8_Iavx(&mean_x1, &sum_x1, x, n);
34 | bmtt.Stop(i, 1);
35 | }
36 |
37 | std::string fn = bmtt.BuildCsvFilenameString("Ch02_07_CalcMeanU8_bm");
38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
39 | std::cout << "Benchmark times save to file " << fn << '\n';
40 | }
41 |
--------------------------------------------------------------------------------
/Chapter02/Ch02_07/Ch02_07_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch02_07_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch02_07.h"
6 | #include "MT.h"
7 | #include "AlignedMem.h"
8 |
9 | size_t g_NumElementsMax = 64 * 1024 * 1024;
10 |
11 | bool CheckArgs(const uint8_t* x, size_t n)
12 | {
13 | if (n == 0 || n > g_NumElementsMax)
14 | return false;
15 |
16 | if ((n % 64) != 0)
17 | return false;
18 |
19 | if (!AlignedMem::IsAligned(x, c_Alignment))
20 | return false;
21 |
22 | return true;
23 | }
24 |
25 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed)
26 | {
27 | int rng_min_val = 0;
28 | int rng_max_val = 255;
29 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed);
30 | }
31 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_01/Ch03_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch03_01_fcpp.cpp
9 | extern void PackedMathF32_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
10 | extern void PackedMathF64_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_01/Ch03_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_02/Ch03_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch03_02_fcpp.cpp
9 | extern void PackedCompareF32_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
10 | extern void PackedCompareF64_Iavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_02/Ch03_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_03/Ch03_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | enum class CvtOp : unsigned int
9 | {
10 | I32_TO_F32, F32_TO_I32, I32_TO_F64, F64_TO_I32, F32_TO_F64, F64_TO_F32,
11 | };
12 |
13 | // Ch03_03_fcpp.cpp
14 | extern void PackedConvertFP_Iavx(XmmVal* c, const XmmVal* a, CvtOp cvt_op);
15 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_03/Ch03_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_04/Ch03_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch03_04_fcpp.cpp
9 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n);
10 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean);
11 | extern bool CalcMeanF32_Iavx(float* mean, const float* x, size_t n);
12 | extern bool CalcStDevF32_Iavx(float* st_dev, const float* x, size_t n, float mean);
13 |
14 | // Ch03_04_misc.cpp
15 | extern bool CheckArgs(const float* x, size_t n);
16 | extern void InitArray(float* x, size_t n);
17 |
18 | // Miscellaneous constants
19 | const size_t c_NumElements = 91;
20 | const unsigned int c_RngSeed = 13;
21 | const float c_ArrayFillMin = 1.0f;
22 | const float c_ArrayFillMax = 100.0f;
23 | const size_t c_Alignment = 32;
24 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_04/Ch03_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_04/Ch03_04_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_04_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch03_04.h"
6 | #include "AlignedMem.h"
7 | #include "MT.h"
8 |
9 | bool CheckArgs(const float* x, size_t n)
10 | {
11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment));
12 | }
13 |
14 | void InitArray(float* x, size_t n)
15 | {
16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed);
17 | }
18 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_05/Ch03_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch03_05_fcpp.cpp
9 | extern bool CalcMeanF64_Cpp(double* mean, const double* x, size_t n);
10 | extern bool CalcMeanF64_Iavx(double* mean, const double* x, size_t n);
11 | extern bool CalcStDevF64_Cpp(double* st_dev, const double* x, size_t n, double mean);
12 | extern bool CalcStDevF64_Iavx(double* st_dev, const double* x, size_t n, double mean);
13 |
14 | // Ch03_05_misc.cpp
15 | extern bool CheckArgs(const double* x, size_t n);
16 | extern void InitArray(double* x, size_t n);
17 |
18 | // Miscellaneous constants
19 | const size_t c_NumElements = 91;
20 | const unsigned int c_RngSeed = 13;
21 | const double c_ArrayFillMin = 1.0;
22 | const double c_ArrayFillMax = 100.0;
23 | const size_t c_Alignment = 32;
24 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_05/Ch03_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_05/Ch03_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch03_05.h"
6 | #include "AlignedMem.h"
7 | #include "MT.h"
8 |
9 | bool CheckArgs(const double* x, size_t n)
10 | {
11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment));
12 | }
13 |
14 | void InitArray(double* x, size_t n)
15 | {
16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed);
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_06/Ch03_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | struct PointArrays
9 | {
10 | float* X1;
11 | float* Y1;
12 | float* X2;
13 | float* Y2;
14 | float* Dist1;
15 | float* Dist2;
16 | float* DistCmp1;
17 | float* DistCmp2;
18 | size_t NumPoints;
19 | };
20 |
21 | // Ch03_06_fcpp.cpp
22 | extern bool CalcDistancesF32_Cpp(PointArrays& pa);
23 | extern bool CalcDistancesF32_Iavx(PointArrays& pa);
24 | extern void CompareDistancesF32_Cpp(PointArrays& pa, float cmp_val);
25 | extern void CompareDistancesF32_Iavx(PointArrays& pa, float cmp_val);
26 |
27 | // Ch03_06_misc.cpp
28 | extern bool CheckArgs(PointArrays& pa);
29 | extern void FillPointArraysF32(PointArrays& pa, float min_val, float max_val, unsigned int rng_seed);
30 |
31 | // Miscellaneous constants
32 | const size_t c_NumPoints = 21;
33 | const unsigned int c_RngSeed = 39;
34 | const float c_ArrayFillMin = 1.0f;
35 | const float c_ArrayFillMax = 75.0f;
36 | const float c_CmpVal = 50.0f;
37 | const size_t c_Alignment = 32;
38 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_06/Ch03_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_07/Ch03_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | struct PointArrays
9 | {
10 | double* X1;
11 | double* Y1;
12 | double* X2;
13 | double* Y2;
14 | double* Dist1;
15 | double* Dist2;
16 | double* DistCmp1;
17 | double* DistCmp2;
18 | size_t NumPoints;
19 | };
20 |
21 | // Ch03_07_fcpp.cpp
22 | extern bool CalcDistancesF64_Cpp(PointArrays& pa);
23 | extern bool CalcDistancesF64_Iavx(PointArrays& pa);
24 | extern void CompareDistancesF64_Cpp(PointArrays& pa, double cmp_val);
25 | extern void CompareDistancesF64_Iavx(PointArrays& pa, double cmp_val);
26 |
27 | // Ch03_07_misc.cpp
28 | extern bool CheckArgs(PointArrays& pa);
29 | extern void FillPointArraysF64(PointArrays& pa, double min_val, double max_val, unsigned int rng_seed);
30 |
31 | // Miscellaneous constants
32 | const size_t c_NumPoints = 21;
33 | const unsigned int c_RngSeed = 39;
34 | const double c_ArrayFillMin = 1.0;
35 | const double c_ArrayFillMax = 75.0;
36 | const double c_CmpVal = 50.0;
37 | const size_t c_Alignment = 32;
38 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_07/Ch03_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_08/Ch03_08.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_08.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF32.h"
8 |
9 | // Ch03_08_fcpp.cpp
10 | extern std::vector CalcColumnMeansF32_Cpp(const MatrixF32& x);
11 | extern std::vector CalcColumnMeansF32_Iavx(const MatrixF32& x);
12 |
13 | // Miscellaneous constants
14 | const unsigned int c_RngSeed = 41;
15 | const float c_MatrixFillMin = 1.0f;
16 | const float c_MatrixFillMax = 80.0f;
17 |
18 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_08/Ch03_08.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_09/Ch03_09.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch03_09.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF64.h"
8 |
9 | // Ch03_09_fcpp.cpp
10 | extern std::vector CalcColumnMeansF64_Cpp(const MatrixF64& x);
11 | extern std::vector CalcColumnMeansF64_Iavx(const MatrixF64& x);
12 |
13 | // Miscellaneous constants
14 | const unsigned int c_RngSeed = 41;
15 | const double c_MatrixFillMin = 1.0;
16 | const double c_MatrixFillMax = 80.0;
17 |
--------------------------------------------------------------------------------
/Chapter03/Ch03_09/Ch03_09.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_01/Ch04_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch04_01_fcpp.cpp
9 | extern void MathI16_Iavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b);
10 | extern void MathI32_Iavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_01/Ch04_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_02/Ch04_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch04_02_fcpp.cpp
9 | extern void UnpackU32_U64_Iavx2(YmmVal c[2], const YmmVal* a, const YmmVal* b);
10 | extern void PackI32_I16_Iavx2(YmmVal* c, const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_02/Ch04_02.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 | Source Files
23 |
24 |
25 |
26 |
27 | Header Files
28 |
29 |
30 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_02/Ch04_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_02/Ch04_02_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_02_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch04_02.h"
7 |
8 | void UnpackU32_U64_Iavx2(YmmVal c[2], const YmmVal* a, const YmmVal* b)
9 | {
10 | __m256i a_vals = _mm256_load_si256((__m256i*)a);
11 | __m256i b_vals = _mm256_load_si256((__m256i*)b);
12 |
13 | __m256i c_vals0 = _mm256_unpacklo_epi32(a_vals, b_vals);
14 | __m256i c_vals1 = _mm256_unpackhi_epi32(a_vals, b_vals);
15 |
16 | _mm256_store_si256((__m256i*)&c[0], c_vals0);
17 | _mm256_store_si256((__m256i*)&c[1], c_vals1);
18 | }
19 |
20 | void PackI32_I16_Iavx2(YmmVal* c, const YmmVal* a, const YmmVal* b)
21 | {
22 | __m256i a_vals = _mm256_load_si256((__m256i*)a);
23 | __m256i b_vals = _mm256_load_si256((__m256i*)b);
24 |
25 | __m256i c_vals = _mm256_packs_epi32(a_vals, b_vals);
26 |
27 | _mm256_store_si256((__m256i*)c, c_vals);
28 | }
29 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_03/Ch04_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch04_03_fcpp.cpp
9 | extern void ZeroExtU8_U16_Iavx2(YmmVal c[2], YmmVal* a);
10 | extern void ZeroExtU8_U32_Iavx2(YmmVal c[4], YmmVal* a);
11 | extern void SignExtI16_I32_Iavx2(YmmVal c[2], YmmVal* a);
12 | extern void SignExtI16_I64_Iavx2(YmmVal c[4], YmmVal* a);
13 |
14 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_03/Ch04_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_04/Ch04_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Data structure for pixel clipping algorithms
10 | struct ClipData
11 | {
12 | uint8_t* m_PbSrc; // source buffer pointer
13 | uint8_t* m_PbDes; // destination buffer pointer
14 | size_t m_NumPixels; // number of pixels
15 | size_t m_NumClippedPixels; // number of clipped pixels
16 | uint8_t m_ThreshLo; // low threshold
17 | uint8_t m_ThreshHi; // high threshold
18 | };
19 |
20 | // Ch04_04_fcpp.cpp
21 | extern void ClipPixels_Cpp(ClipData* clip_data);
22 | extern void ClipPixels_Iavx2(ClipData* clip_data);
23 |
24 | // Ch04_04_misc.cpp
25 | extern bool CheckArgs(const ClipData* clip_data);
26 |
27 | // Ch04_04_bm.cpp
28 | extern void ClipPixels_bm(void);
29 |
30 | // Miscellaneous constants
31 | const size_t c_Alignment = 32;
32 | const int c_RngMinVal = 0;
33 | const int c_RngMaxVal = 255;
34 | const unsigned int c_RngSeed = 157;
35 | const uint8_t c_ThreshLo = 10;
36 | const uint8_t c_ThreshHi = 245;
37 | const size_t c_NumPixels = 8 * 1024 * 1024 + 31;
38 | const size_t c_NumPixelsBM = 10000000;
39 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_04/Ch04_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_04/Ch04_04_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_04_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch04_04.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(const ClipData* clip_data)
9 | {
10 | if (clip_data->m_NumPixels == 0)
11 | return false;
12 | if (!AlignedMem::IsAligned(clip_data->m_PbSrc, c_Alignment))
13 | return false;
14 | if (!AlignedMem::IsAligned(clip_data->m_PbDes, c_Alignment))
15 | return false;
16 | return true;
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_05/Ch04_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 | #include "ImageMisc.h"
9 |
10 | // Ch04_05.cpp
11 | extern const float c_Coef[4];
12 | extern const char* c_TestImageFileName;
13 |
14 | // Ch04_05_fcpp.cpp
15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
16 | extern void ConvertRgbToGs_Iavx2(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
17 |
18 | // Ch04_05_misc.cpp
19 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
20 |
21 | // Ch04_05_bm.cpp
22 | extern void ConvertRgbToGs_bm(void);
23 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels);
24 |
25 | // Miscellaneous constants
26 | const size_t c_Alignment = 32;
27 | const size_t c_NumPixelsMax = 256 * 1024 * 1024;
28 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_05/Ch04_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_05/Ch04_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch04_05.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4])
9 | {
10 | if (num_pixels > c_NumPixelsMax)
11 | return false;
12 | if (num_pixels % 8 != 0)
13 | return false;
14 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment))
15 | return false;
16 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment))
17 | return false;
18 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f)
19 | return false;
20 | return true;
21 | }
22 |
23 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels)
24 | {
25 | for (size_t i = 0; i < num_pixels; i++)
26 | {
27 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1)
28 | return false;
29 | }
30 | return true;
31 | }
32 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_06/Ch04_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_06/Ch04_06_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_06_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch04_06.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(size_t num_pixels, const uint8_t* pb_src, const uint8_t* pb_mask)
9 | {
10 | if ((num_pixels == 0) || (num_pixels > c_NumPixelsMax))
11 | return false;
12 | if ((num_pixels % c_NumSimdElements) != 0)
13 | return false;
14 | if (!AlignedMem::IsAligned(pb_src, c_Alignment))
15 | return false;
16 | if (!AlignedMem::IsAligned(pb_mask, c_Alignment))
17 | return false;
18 | return true;
19 | }
20 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_07/Ch04_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch04_07_fcpp.cpp
10 | extern void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels);
11 | extern void ConvertU8ToF32_Iavx2(float* pb_des, const uint8_t* pb_src, size_t num_pixels);
12 |
13 | // Ch04_07_misc.cpp
14 | extern void BuildLUT_U8ToF32(void);
15 | extern bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels);
16 | extern size_t CompareArraysF32(const float* pb_src1, const float* pb_src2, size_t num_pixels);
17 |
18 | // Ch04_07_bm.cpp
19 | extern void ConvertU8ToF32_bm(void);
20 |
21 | // Miscellaneous constants
22 | const size_t c_Alignment = 32;
23 | const size_t c_NumPixels = 1024 * 1024 + 19;
24 | const size_t c_NumPixelsBM = 10000000;
25 | const size_t c_NumPixelsMax = 16 * 1024 * 1024;
26 | const int c_FillMinVal = 0;
27 | const int c_FillMaxVal = 255;
28 | const unsigned int c_RngSeed = 71;
29 | extern float g_LUT_U8ToF32[];
30 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_07/Ch04_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter04/Ch04_07/Ch04_07_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch04_07_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch04_07.h"
7 | #include "AlignedMem.h"
8 |
9 | float g_LUT_U8ToF32[256];
10 |
11 | void BuildLUT_U8ToF32(void)
12 | {
13 | size_t n = sizeof(g_LUT_U8ToF32) / sizeof(float);
14 |
15 | for (size_t i = 0; i < n; i++)
16 | g_LUT_U8ToF32[i] = (float)i / 255.0f;
17 | }
18 |
19 | bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels)
20 | {
21 | if (num_pixels == 0 || num_pixels > c_NumPixelsMax)
22 | return false;
23 | if (!AlignedMem::IsAligned(pb1, c_Alignment))
24 | return false;
25 | if (!AlignedMem::IsAligned(pb2, c_Alignment))
26 | return false;
27 | return true;
28 | }
29 |
30 | size_t CompareArraysF32(const float* pb_src1, const float* pb_src2, size_t num_pixels)
31 | {
32 | size_t num_diff = 0;
33 |
34 | for (size_t i = 0; i < num_pixels; i++)
35 | {
36 | if (pb_src1[i] != pb_src2[i])
37 | {
38 | std::cout << i << ", " << pb_src1[i] << ", " << pb_src2[i] << '\n';
39 | num_diff++;
40 | }
41 | }
42 |
43 | return num_diff;
44 | }
45 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_01/Ch05_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch05_01_fcpp.cpp
8 | extern void CalcLeastSquares_Cpp(double* m, double* b, const double* x,
9 | const double* y, size_t n);
10 | extern void CalcLeastSquares_Iavx2(double* m, double* b, const double* x,
11 | const double* y, size_t n);
12 |
13 | // Ch05_01_misc.cpp
14 | extern bool CheckArgs(const double* x, const double* y, size_t n);
15 | extern void FillArrays(double* x, double* y, size_t n);
16 |
17 | // Miscellaneous constants
18 | const size_t c_Alignment = 32;
19 | const double c_LsEpsilon = 1.0e-12;
20 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_01/Ch05_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_01/Ch05_01_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_01_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch05_01.h"
7 | #include "AlignedMem.h"
8 | #include "MT.h"
9 |
10 | bool CheckArgs(const double* x, const double* y, size_t n)
11 | {
12 | if (n < 2)
13 | return false;
14 | if (!AlignedMem::IsAligned(x, c_Alignment))
15 | return false;
16 | if (!AlignedMem::IsAligned(y, c_Alignment))
17 | return false;
18 |
19 | return true;
20 | }
21 |
22 | void FillArrays(double* x, double* y, size_t n)
23 | {
24 | const unsigned int rng_seed1 = 73;
25 | const unsigned int rng_seed2 = 83;
26 | const double fill_min_val = -25.0;
27 | const double fill_max_val = 25.0;
28 |
29 | MT::FillArrayFP(x, n, fill_min_val, fill_max_val, rng_seed1);
30 | MT::FillArrayFP(y, n, fill_min_val, fill_max_val, rng_seed2);
31 |
32 | for (size_t i = 0; i < n; i++)
33 | y[i] = y[i] * y[i];
34 | }
35 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_02/Ch05_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF32.h"
7 |
8 | // Ch05_02_fcpp.cpp
9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
10 | void MatrixMulF32_Iavx2(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
11 |
12 | // Ch05_02_misc.cpp
13 | bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
14 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b);
15 | void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a,
16 | const MatrixF32& b);
17 |
18 | // Ch05_02_bm.cpp
19 | void MatrixMulF32_bm(void);
20 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_02/Ch05_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_03/Ch05_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF64.h"
7 |
8 | // Ch05_03_fcpp.cpp
9 | void MatrixMulF64_Cpp(MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
10 | void MatrixMulF64_Iavx2(MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
11 |
12 | // Ch05_03_misc.cpp
13 | bool CheckArgs(const MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
14 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b);
15 | void SaveResults(const MatrixF64& c1, const MatrixF64& c2, const MatrixF64& a,
16 | const MatrixF64& b);
17 |
18 | // Ch05_03_bm.cpp
19 | void MatrixMulF64_bm(void);
20 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_03/Ch05_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_04/Ch05_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF32.h"
7 |
8 | // Ch05_04_fcpp.cpp
9 | extern void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a,
10 | const MatrixF32& b);
11 | extern void MatrixMul4x4F32_Iavx2(MatrixF32& c, const MatrixF32& a,
12 | const MatrixF32& b);
13 |
14 | // Ch05_04_misc.cpp
15 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b);
16 |
17 | // Ch05_04_bm.cpp
18 | extern void MatrixMul4x4F32_bm(void);
19 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_04/Ch05_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_04/Ch05_04_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_04_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch05_04.h"
6 |
7 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b)
8 | {
9 | const float a_row0[] = { 10, 11, 12, 13 };
10 | const float a_row1[] = { 20, 21, 22, 23 };
11 | const float a_row2[] = { 30, 31, 32, 33 };
12 | const float a_row3[] = { 40, 41, 42, 43 };
13 |
14 | const float b_row0[] = { 100, 101, 102, 103 };
15 | const float b_row1[] = { 200, 201, 202, 203 };
16 | const float b_row2[] = { 300, 301, 302, 303 };
17 | const float b_row3[] = { 400, 401, 402, 403 };
18 |
19 | a.SetRow(0, a_row0);
20 | a.SetRow(1, a_row1);
21 | a.SetRow(2, a_row2);
22 | a.SetRow(3, a_row3);
23 |
24 | b.SetRow(0, b_row0);
25 | b.SetRow(1, b_row1);
26 | b.SetRow(2, b_row2);
27 | b.SetRow(3, b_row3);
28 |
29 | const int w = 12;
30 | const char* delim = " ";
31 | c1.SetOstream(w, delim);
32 | c2.SetOstream(w, delim);
33 | a.SetOstream(w, delim);
34 | b.SetOstream(w, delim);
35 | }
36 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_05/Ch05_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF64.h"
7 |
8 | // Ch05_05_fcpp.cpp
9 | extern void MatrixMul4x4F64_Cpp(MatrixF64& c, const MatrixF64& a,
10 | const MatrixF64& b);
11 | extern void MatrixMul4x4F64_Iavx2(MatrixF64& c, const MatrixF64& a,
12 | const MatrixF64& b);
13 |
14 | // Ch05_05_misc.cpp
15 | extern void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b);
16 |
17 | // Ch05_05_bm.cpp
18 | extern void MatrixMul4x4F64_bm(void);
19 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_05/Ch05_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_05/Ch05_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch05_05.h"
6 |
7 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b)
8 | {
9 | const double a_row0[] = { 10, 11, 12, 13 };
10 | const double a_row1[] = { 20, 21, 22, 23 };
11 | const double a_row2[] = { 30, 31, 32, 33 };
12 | const double a_row3[] = { 40, 41, 42, 43 };
13 |
14 | const double b_row0[] = { 100, 101, 102, 103 };
15 | const double b_row1[] = { 200, 201, 202, 203 };
16 | const double b_row2[] = { 300, 301, 302, 303 };
17 | const double b_row3[] = { 400, 401, 402, 403 };
18 |
19 | a.SetRow(0, a_row0);
20 | a.SetRow(1, a_row1);
21 | a.SetRow(2, a_row2);
22 | a.SetRow(3, a_row3);
23 |
24 | b.SetRow(0, b_row0);
25 | b.SetRow(1, b_row1);
26 | b.SetRow(2, b_row2);
27 | b.SetRow(3, b_row3);
28 |
29 | const int w = 12;
30 | const char* delim = " ";
31 | c1.SetOstream(w, delim);
32 | c2.SetOstream(w, delim);
33 | a.SetOstream(w, delim);
34 | b.SetOstream(w, delim);
35 | }
36 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_06/Ch05_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF32.h"
8 |
9 | struct Vec4x1_F32
10 | {
11 | float W, X, Y, Z;
12 | };
13 |
14 | // Ch05_06_fcpp.cpp
15 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m,
16 | const Vec4x1_F32* vec_a, size_t num_vec);
17 | extern void MatVecMulF32_Iavx2(Vec4x1_F32* vec_b, const MatrixF32& m,
18 | const Vec4x1_F32* vec_a, size_t num_vec);
19 |
20 | // Ch05_06_misc.cpp
21 | extern bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m,
22 | const Vec4x1_F32* vec_a, size_t num_vec);
23 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec);
24 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2);
25 |
26 | // Ch05_06_bm.cpp
27 | extern void MatrixVecMulF32_bm(void);
28 |
29 | // Miscellaenous constants
30 | const size_t c_Alignment = 32;
31 | const int c_RngMinVal = 1;
32 | const int c_RngMaxVal = 500;
33 | const unsigned int c_RngSeedVal = 187;
34 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_06/Ch05_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_07/Ch05_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF64.h"
8 |
9 | struct Vec4x1_F64
10 | {
11 | double W, X, Y, Z;
12 | };
13 |
14 | // Ch05_07_fcpp.cpp
15 | extern void MatVecMulF64_Cpp(Vec4x1_F64* vec_b, MatrixF64& m,
16 | Vec4x1_F64* vec_a, size_t num_vec);
17 | extern void MatVecMulF64_Iavx2(Vec4x1_F64* vec_b, MatrixF64& m,
18 | Vec4x1_F64* vec_a, size_t num_vec);
19 |
20 | // Ch05_07_misc.cpp
21 | extern bool CheckArgs(const Vec4x1_F64* vec_b, const MatrixF64& m,
22 | const Vec4x1_F64* vec_a, size_t num_vec);
23 | extern void Init(MatrixF64& m, Vec4x1_F64* va, size_t num_vec);
24 | extern bool VecCompare(const Vec4x1_F64* v1, const Vec4x1_F64* v2);
25 |
26 | // Ch05_07_bm.cpp
27 | extern void MatrixVecMulF64_bm(void);
28 |
29 | // Miscellaenous constants
30 | const size_t c_Alignment = 32;
31 | const int c_RngMinVal = 1;
32 | const int c_RngMaxVal = 500;
33 | const unsigned int c_RngSeedVal = 187;
34 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_07/Ch05_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_08/Ch05_08.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_08.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF32.h"
8 |
9 | // Ch05_08_fcpp.cpp
10 | bool MatrixInvF32_Cpp(MatrixF32& a_inv, const MatrixF32& a, float epsilon);
11 | bool MatrixInvF32_Iavx2(MatrixF32& a_inv, const MatrixF32& a, float epsilon);
12 |
13 | // Ch05_08_fcpp2.cpp
14 | MatrixF32 MatrixAddF32_Iavx2(const MatrixF32& a, const MatrixF32& b);
15 | MatrixF32 MatrixMulF32_Iavx2(const MatrixF32& a, const MatrixF32& b);
16 | void MatrixMulScalarF32_Iavx2(MatrixF32& c, const MatrixF32& a, float s_val);
17 |
18 | // Ch05_08_misc.cpp
19 | MatrixF32 GetTestMatrix(size_t id, float* epsilon);
20 | size_t GetNumTestMatrices(void);
21 |
22 | // Ch05_08_bm.cpp
23 | void CalcMatrixInvF32_bm(void);
24 |
25 | // Miscellaenous constants
26 | const float c_DefaultEpsilon = 1.0e-5f;
27 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_08/Ch05_08.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_09/Ch05_09.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch05_09.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF64.h"
8 |
9 | // Ch05_09_fcpp.cpp
10 | bool MatrixInvF64_Cpp(MatrixF64& a_inv, const MatrixF64& a, double epsilon);
11 | bool MatrixInvF64_Iavx2(MatrixF64& a_inv, const MatrixF64& a, double epsilon);
12 |
13 | // Ch05_09_fcpp2.cpp
14 | MatrixF64 MatrixAddF64_Iavx2(const MatrixF64& a, const MatrixF64& b);
15 | MatrixF64 MatrixMulF64_Iavx2(const MatrixF64& a, const MatrixF64& b);
16 | void MatrixMulScalarF64_Iavx2(MatrixF64& c, const MatrixF64& a, double s_val);
17 |
18 | // Ch05_09_misc.cpp
19 | MatrixF64 GetTestMatrix(size_t id, double* epsilon);
20 | size_t GetNumTestMatrices(void);
21 |
22 | // Ch05_09_bm.cpp
23 | void CalcMatrixInvF64_bm(void);
24 |
25 | // Miscellaenous constants
26 | const double c_DefaultEpsilon = 1.0e-5;
27 |
--------------------------------------------------------------------------------
/Chapter05/Ch05_09/Ch05_09.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_01/Ch06_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch06_01_fcpp.cpp
9 | extern void Convolve1D_F32_Cpp(std::vector& y,
10 | const std::vector& x, const std::vector& kernel);
11 | extern void Convolve1D_F32_Iavx2(std::vector& y,
12 | const std::vector& x, const std::vector& kernel);
13 | extern void Convolve1DKs5_F32_Iavx2(std::vector& y,
14 | const std::vector& x, const std::vector& kernel);
15 |
16 | // Ch06_01_misc.cpp
17 | extern bool CheckArgs(std::vector& y,
18 | const std::vector& x, const std::vector& kernel);
19 |
20 | // Ch06_01_bm.cpp
21 | extern void Convolve1D_F32_bm(void);
22 |
23 | // Miscellaneous constants
24 | const unsigned int c_RngSeed = 97;
25 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_01/Ch06_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_01/Ch06_01_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_01_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch06_01.h"
6 |
7 | bool CheckArgs(std::vector& y, const std::vector& x,
8 | const std::vector& kernel)
9 | {
10 | if ((kernel.size() & 1) == 0)
11 | return false;
12 | if (y.size() != x.size())
13 | return false;
14 | if (y.size() < kernel.size())
15 | return false;
16 | return true;
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_02/Ch06_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch06_02_fcpp.cpp
9 | extern void Convolve1D_F64_Cpp(std::vector& y,
10 | const std::vector& x, const std::vector& kernel);
11 | extern void Convolve1D_F64_Iavx2(std::vector& y,
12 | const std::vector& x, const std::vector& kernel);
13 | extern void Convolve1DKs5_F64_Iavx2(std::vector& y,
14 | const std::vector& x, const std::vector& kernel);
15 |
16 | // Ch06_02_misc.cpp
17 | extern bool CheckArgs(std::vector& y,
18 | const std::vector& x, const std::vector& kernel);
19 |
20 | // Ch06_02_bm.cpp
21 | extern void Convolve1D_F64_bm(void);
22 |
23 | // Miscellaneous constants
24 | const unsigned int c_RngSeed = 97;
25 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_02/Ch06_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_02/Ch06_02_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_02_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch06_02.h"
6 |
7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel)
8 | {
9 | if ((kernel.size() & 1) == 0)
10 | return false;
11 | if (y.size() != x.size())
12 | return false;
13 | if (y.size() < kernel.size())
14 | return false;
15 | return true;
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_03/Ch06_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | struct CD_2D
10 | {
11 | size_t m_ImH = 0;
12 | size_t m_ImW = 0;
13 | size_t m_KernelSize = 0;
14 | std::vector m_ImSrc;
15 | std::vector m_ImDes;
16 | std::vector m_Kernel2D;
17 | };
18 |
19 | enum class KERNEL_ID : unsigned int
20 | {
21 | LowPass2D_3x3, LowPass2D_5x5, LowPass2D_7x7, LowPass2D_9x9, LowPass2D_15x15
22 | };
23 |
24 | // Ch06_03_fcpp.cpp
25 | extern void Convolve2D_F32_Cpp(CD_2D& cd);
26 | extern void Convolve2D_F32_Iavx2(CD_2D& cd);
27 |
28 | // Ch06_03_misc.cpp
29 | extern bool CheckArgs2D(const CD_2D& cd);
30 | extern void Init2D(std::array& cd, const char* fn, KERNEL_ID id);
31 |
32 | // Ch06_03_misc2.cpp
33 | extern void DisplayKernel2D(float sigma, size_t ks);
34 | extern void GetKernel2D(CD_2D& cd, KERNEL_ID id);
35 |
36 | // Ch06_03_bm.cpp
37 | extern void Convolve2D_F32_bm(void);
38 |
39 | // Miscellaneous constants
40 | const KERNEL_ID c_KernelID = KERNEL_ID::LowPass2D_15x15;
41 | const KERNEL_ID c_KernelID_BM = KERNEL_ID::LowPass2D_9x9;
42 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_03/Ch06_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_03/Ch06_03_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_03_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch06_03.h"
7 | #include "BmThreadTimer.h"
8 |
9 | void Convolve2D_F32_bm(void)
10 | {
11 | std::cout << "\nRunning benchmark function Convolve2D_F32_bm - please wait\n";
12 |
13 | const char* fn_src = "../../Data/ImageE.png";
14 |
15 | std::array cd;
16 | Init2D(cd, fn_src, c_KernelID_BM);
17 |
18 | const size_t num_it = 500;
19 | const size_t num_alg = 2;
20 | BmThreadTimer bmtt(num_it, num_alg);
21 |
22 | for (size_t i = 0; i < num_it; i++)
23 | {
24 | bmtt.Start(i, 0);
25 | Convolve2D_F32_Cpp(cd[0]);
26 | bmtt.Stop(i, 0);
27 |
28 | bmtt.Start(i, 1);
29 | Convolve2D_F32_Iavx2(cd[1]);
30 | bmtt.Stop(i, 1);
31 |
32 | if ((i % 10) == 0)
33 | std::cout << '.' << std::flush;
34 | }
35 |
36 | std::cout << '\n';
37 | std::string fn = bmtt.BuildCsvFilenameString("Ch06_03_Convolve2D_F32_bm");
38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
39 | std::cout << "Benchmark times saved to file " << fn << '\n';
40 | }
41 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_04/Ch06_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_04/Ch06_04_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch06_04_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch06_04.h"
8 | #include "BmThreadTimer.h"
9 |
10 | void Convolve1Dx2_F32_bm(void)
11 | {
12 | std::cout << "\nRunning benchmark function Convolve1Dx2_F32_bm - please wait\n";
13 |
14 | const char* fn_src = "../../Data/ImageE.png";
15 |
16 | std::array cd;
17 | Init1Dx2(cd, fn_src, c_KernelID_BM);
18 |
19 | const size_t num_it = 500;
20 | const size_t num_alg = 2;
21 | BmThreadTimer bmtt(num_it, num_alg);
22 |
23 | for (size_t i = 0; i < num_it; i++)
24 | {
25 | bmtt.Start(i, 0);
26 | Convolve1Dx2_F32_Cpp(cd[0]);
27 | bmtt.Stop(i, 0);
28 |
29 | bmtt.Start(i, 1);
30 | Convolve1Dx2_F32_Iavx2(cd[1]);
31 | bmtt.Stop(i, 1);
32 |
33 | if ((i % 10) == 0)
34 | std::cout << '.' << std::flush;
35 | }
36 |
37 | std::cout << '\n';
38 | std::string fn = bmtt.BuildCsvFilenameString("Ch06_04_Convolve1Dx2_F32_bm");
39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
40 | std::cout << "Benchmark times saved to file " << fn << '\n';
41 | }
42 |
--------------------------------------------------------------------------------
/Chapter06/Ch06_05/Ch06_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_01/Ch07_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "ZmmVal.h"
8 |
9 | // Ch07_01_fcpp.cpp
10 | extern void MathI16_Iavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b);
11 | extern void MathI64_Iavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b);
12 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_01/Ch07_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_02/Ch07_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "ZmmVal.h"
8 |
9 | // Ch07_02_fcpp.cpp
10 | extern void MaskOpI64a_Iavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a,
11 | const ZmmVal* b);
12 | extern void MaskOpI64b_Iavx512(ZmmVal c[5], uint8_t mask, const ZmmVal* a,
13 | const ZmmVal* b1, const ZmmVal* b2);
14 | extern void MaskOpI64c_Iavx512(ZmmVal* c, const ZmmVal* a, int64_t x1, int64_t x2);
15 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_02/Ch07_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_03/Ch07_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 | #include "ImageMisc.h"
9 |
10 | // Ch07_03.cpp
11 | extern const float c_Coef[4];
12 | extern const char* c_TestImageFileName;
13 |
14 | // Ch07_03_fcpp.cpp
15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
16 | extern void ConvertRgbToGs_Iavx512(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
17 |
18 | // Ch07_03_misc.cpp
19 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
20 |
21 | // Ch07_03_bm.cpp
22 | extern void ConvertRgbToGs_bm(void);
23 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels);
24 |
25 | // Miscellaneous constants
26 | const size_t c_Alignment = 64;
27 | const size_t c_NumPixelsMax = 256 * 1024 * 1024;
28 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_03/Ch07_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_03/Ch07_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch07_03.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4])
9 | {
10 | if (num_pixels > c_NumPixelsMax)
11 | return false;
12 | if (num_pixels % 16 != 0)
13 | return false;
14 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment))
15 | return false;
16 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment))
17 | return false;
18 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f)
19 | return false;
20 | return true;
21 | }
22 |
23 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels)
24 | {
25 | for (size_t i = 0; i < num_pixels; i++)
26 | {
27 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1)
28 | return false;
29 | }
30 | return true;
31 | }
32 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_04/Ch07_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Compare operators
10 | enum class CmpOp { EQ, NE, LT, LE, GT, GE };
11 |
12 | // Ch07_04_fcpp.cpp
13 | extern void ComparePixels_Cpp(uint8_t* pb_des, const uint8_t* pb_src,
14 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val);
15 | extern void ComparePixels_Iavx512(uint8_t* pb_des, const uint8_t* pb_src,
16 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val);
17 |
18 | // Ch07_04_misc.cpp
19 | extern bool CheckArgs(const uint8_t* pb_des, const uint8_t* pb_src,
20 | size_t num_pixels);
21 | extern void DisplayResults(const uint8_t* pb_des1, const uint8_t* pb_des2,
22 | size_t num_pixels, CmpOp cmp_op, uint8_t cmp_val, size_t test_id);
23 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed);
24 |
25 | // Miscellaneous constants
26 | const size_t c_Alignment = 64;
27 | const size_t c_NumPixelsMax = 16 * 1024 * 1024;
28 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_04/Ch07_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_05/Ch07_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | struct ImageStats
10 | {
11 | uint8_t* m_PixelBuffer;
12 | uint32_t m_PixelMinVal;
13 | uint32_t m_PixelMaxVal;
14 | size_t m_NumPixels;
15 | size_t m_NumPixelsInRange;
16 | uint64_t m_PixelSum;
17 | uint64_t m_PixelSumSquares;
18 | double m_PixelMean;
19 | double m_PixelStDev;
20 | };
21 |
22 | // Ch07_05.cpp
23 | extern const char* c_ImageFileName;
24 |
25 | // Ch07_05_fcpp.cpp
26 | extern void CalcImageStats_Cpp(ImageStats& im_stats);
27 | extern void CalcImageStats_Iavx512(ImageStats& im_stats);
28 |
29 | // Ch07_05_misc.cpp
30 | extern bool CheckArgs(const ImageStats& im_stats);
31 |
32 | // Ch07_05_bm.cpp
33 | extern void CalcImageStats_bm(void);
34 |
35 | // Miscellaneous constants
36 | const size_t c_Alignment = 64;
37 | const size_t c_NumPixelsMax = 64 * 1024 * 1024;
38 | const uint32_t c_PixelMinVal = 40;
39 | const uint32_t c_PixelMaxVal = 230;
40 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_05/Ch07_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter07/Ch07_05/Ch07_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch07_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch07_05.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(const ImageStats& im_stats)
9 | {
10 | if (im_stats.m_NumPixels == 0)
11 | return false;
12 | if (im_stats.m_NumPixels % 64 != 0)
13 | return false;
14 | if (im_stats.m_NumPixels > c_NumPixelsMax)
15 | return false;
16 | if (!AlignedMem::IsAligned(im_stats.m_PixelBuffer, c_Alignment))
17 | return false;
18 | return true;
19 | }
20 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_01/Ch08_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "ZmmVal.h"
7 |
8 | // Ch08_01_fcpp.cpp
9 | extern void PackedMathF32_Iavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b);
10 | extern void PackedMathF64_Iavx512(ZmmVal c[8], const ZmmVal* a, const ZmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_01/Ch08_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_02/Ch08_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "ZmmVal.h"
8 |
9 | // Ch08_02_fcpp.cpp
10 | extern void PackedCompareF32_Iavx512(uint16_t c[8], const ZmmVal* a, const ZmmVal* b);
11 | extern void PackedCompareF64_Iavx512(ZmmVal* c, const ZmmVal* a, const ZmmVal* b,
12 | double x1, double x2, double x3);
13 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_02/Ch08_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_03/Ch08_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch08_03_fcpp.cpp
9 | extern void CalcMeanF32_Cpp(float* mean, const float* x, size_t n);
10 | extern void CalcMeanF32_Iavx512(float* mean, const float* x, size_t n);
11 | extern void CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean);
12 | extern void CalcStDevF32_Iavx512(float* st_dev, const float* x, size_t n,
13 | float mean);
14 |
15 | // Ch08_03_misc.cpp
16 | extern bool CheckArgs(const float* x, size_t n);
17 | extern void InitArray(float* x, size_t n);
18 |
19 | // Miscellaneous constants
20 | const size_t c_NumElements = 91;
21 | const unsigned int c_RngSeed = 13;
22 | const float c_ArrayFillMin = 1.0f;
23 | const float c_ArrayFillMax = 100.0f;
24 | const size_t c_Alignment = 64;
25 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_03/Ch08_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_03/Ch08_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch08_03.h"
6 | #include "AlignedMem.h"
7 | #include "MT.h"
8 |
9 | bool CheckArgs(const float* x, size_t n)
10 | {
11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment));
12 | }
13 |
14 | void InitArray(float* x, size_t n)
15 | {
16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed);
17 | }
18 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_04/Ch08_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF64.h"
8 |
9 | // Note: In this example, CMD stands for covariance matrix data
10 | struct CMD
11 | {
12 | MatrixF64 m_X; // Data matrix
13 | MatrixF64 m_CovMat; // Covariance matrix
14 | std::vector m_VarMeans; // Variable (row) means
15 |
16 | CMD(size_t n_vars, size_t n_obvs) :
17 | m_X(n_vars, n_obvs), m_CovMat(n_vars, n_vars), m_VarMeans(n_vars) { }
18 | };
19 |
20 | // Ch08_04_fcpp.cpp
21 | extern void CalcCovMatF64_Cpp(CMD& cmd);
22 | extern void CalcCovMatF64_Iavx512(CMD& cmd);
23 |
24 | // Ch08_04_misc.cpp
25 | extern bool CheckArgs(const CMD& cmd);
26 | extern bool CompareResults(CMD& cmd1, CMD& cmd2);
27 | extern void InitCMD(CMD& cmd1, CMD& cmd2);
28 |
29 | // Ch08_04_misc2.cpp
30 | extern void DisplayData(const CMD& cmd);
31 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_04/Ch08_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_04/Ch08_04_misc2.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_04_misc2.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch08_04.h"
8 |
9 | void DisplayData(const CMD& cmd)
10 | {
11 | std::cout << "----- Data matrix ----\n";
12 | std::cout << cmd.m_X << std::endl;
13 | }
14 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_05/Ch08_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF32.h"
7 |
8 | // Ch08_05_fcpp.cpp
9 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
10 | void MatrixMulF32_Iavx512(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
11 |
12 | // Ch08_05_misc.cpp
13 | bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
14 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b);
15 | void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a,
16 | const MatrixF32& b);
17 |
18 | // Ch08_05_bm.cpp
19 | void MatrixMulF32_bm(void);
20 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_05/Ch08_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_06/Ch08_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF64.h"
7 |
8 | // Ch08_06_fcpp.cpp
9 | void MatrixMulF64_Cpp(MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
10 | void MatrixMulF64_Iavx512(MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
11 |
12 | // Ch08_06_misc.cpp
13 | bool CheckArgs(const MatrixF64& c, const MatrixF64& a, const MatrixF64& b);
14 | void InitMat(MatrixF64& c1, MatrixF64& c2, MatrixF64& a, MatrixF64& b);
15 | void SaveResults(const MatrixF64& c1, const MatrixF64& c2, const MatrixF64& a, const MatrixF64& b);
16 |
17 | // Ch08_06_bm.cpp
18 | void MatrixMulF64_bm(void);
19 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_06/Ch08_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_07/Ch08_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF32.h"
8 |
9 | struct Vec4x1_F32
10 | {
11 | float W, X, Y, Z;
12 | };
13 |
14 | // Ch08_07_fcpp.cpp
15 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, MatrixF32& m,
16 | Vec4x1_F32* vec_a, size_t num_vec);
17 | extern void MatVecMulF32a_Iavx512(Vec4x1_F32* vec_b, MatrixF32& m,
18 | Vec4x1_F32* vec_a, size_t num_vec);
19 | extern void MatVecMulF32b_Iavx512(Vec4x1_F32* vec_b, MatrixF32& m,
20 | Vec4x1_F32* vec_a, size_t num_vec);
21 |
22 | // Ch08_07_misc.cpp
23 | extern bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m,
24 | const Vec4x1_F32* vec_a, size_t num_vec);
25 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec);
26 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2);
27 |
28 | // Ch08_07_bm.cpp
29 | extern void MatrixVecMulF32_bm(void);
30 |
31 | // Miscellaenous constants
32 | const size_t c_Alignment = 64;
33 | const int c_RngMinVal = 1;
34 | const int c_RngMaxVal = 500;
35 | const unsigned int c_RngSeedVal = 187;
36 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_07/Ch08_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_08/Ch08_08.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_08.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch08_08_fcpp.cpp
9 | extern void Convolve1D_F32_Cpp(std::vector& y,
10 | const std::vector& x, const std::vector& kernel);
11 | extern void Convolve1D_F32_Iavx512(std::vector& y,
12 | const std::vector& x, const std::vector& kernel);
13 | extern void Convolve1DKs5_F32_Iavx512(std::vector& y,
14 | const std::vector& x, const std::vector& kernel);
15 |
16 | // Ch08_08_misc.cpp
17 | extern bool CheckArgs(std::vector& y,
18 | const std::vector& x, const std::vector& kernel);
19 |
20 | // Ch08_08_bm.cpp
21 | extern void Convolve1D_F32_bm(void);
22 |
23 | // Miscellaneous constants
24 | const unsigned int c_RngSeed = 97;
25 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_08/Ch08_08.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_08/Ch08_08_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_08_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch08_08.h"
6 |
7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel)
8 | {
9 | if ((kernel.size() & 1) == 0)
10 | return false;
11 | if (y.size() != x.size())
12 | return false;
13 | if (y.size() < kernel.size())
14 | return false;
15 | return true;
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_09/Ch08_09.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_09.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | struct CD_1Dx2
10 | {
11 | size_t m_ImH;
12 | size_t m_ImW;
13 | size_t m_KernelSize;
14 | std::vector m_ImSrc;
15 | std::vector m_ImDes;
16 | std::vector m_ImTmp;
17 | std::vector m_Kernel1Dy;
18 | std::vector m_Kernel1Dx;
19 | };
20 |
21 | enum class KERNEL_ID : unsigned int
22 | {
23 | LowPass1Dx2_3x3, LowPass1Dx2_5x5, LowPass1Dx2_7x7, LowPass1Dx2_9x9
24 | };
25 |
26 | // Ch08_09_fcpp.cpp
27 | extern void Convolve1Dx2_F32_Cpp(CD_1Dx2& cd);
28 | extern void Convolve1Dx2_F32_Iavx512(CD_1Dx2& cd);
29 |
30 | // Ch08_09_misc.cpp
31 | extern bool CheckArgs1Dx2(const CD_1Dx2& cd);
32 | extern void GetKernel1Dx2(CD_1Dx2& cd, KERNEL_ID id);
33 | extern void InitConvData1Dx2(std::array& cd, const char* fn);
34 |
35 | // Ch08_09_bm.cpp
36 | extern void Convolve1Dx2_F32_bm(void);
37 |
38 | // Ch08_09_test.cpp
39 | extern void DisplayKernel1Dx2(float sigma, size_t ks);
40 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_09/Ch08_09.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_09/Ch08_09_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_09_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch08_09.h"
8 | #include "BmThreadTimer.h"
9 |
10 | void Convolve1Dx2_F32_bm(void)
11 | {
12 | std::cout << "\nRunning benchmark function Convolve1Dx2_F32_bm - please wait\n";
13 |
14 | const char* fn_src = "../../Data/ImageE.png";
15 |
16 | std::array cd;
17 | InitConvData1Dx2(cd, fn_src);
18 |
19 | const size_t num_it = 500;
20 | const size_t num_alg = 2;
21 | BmThreadTimer bmtt(num_it, num_alg);
22 |
23 | for (size_t i = 0; i < num_it; i++)
24 | {
25 | bmtt.Start(i, 0);
26 | Convolve1Dx2_F32_Cpp(cd[0]);
27 | bmtt.Stop(i, 0);
28 |
29 | bmtt.Start(i, 1);
30 | Convolve1Dx2_F32_Iavx512(cd[1]);
31 | bmtt.Stop(i, 1);
32 |
33 | if ((i % 10) == 0)
34 | std::cout << '.' << std::flush;
35 | }
36 |
37 | std::cout << '\n';
38 | std::string fn = bmtt.BuildCsvFilenameString("Ch08_09_Convolve1Dx2_F32_bm");
39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
40 | std::cout << "Benchmark times saved to file " << fn << '\n';
41 | }
42 |
--------------------------------------------------------------------------------
/Chapter08/Ch08_09/Ch08_09_test.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch08_09_test.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include "Ch08_09.h"
9 | #include "MT_Convolve.h"
10 |
11 | void DisplayKernel1Dx2(float sigma, size_t ks)
12 | {
13 | std::vector gk = GenGaussianKernel1D(sigma, ks);
14 |
15 | std::cout << std::fixed << std::setprecision(6);
16 |
17 | float sum = 0.0f;
18 | for (size_t i = 0; i < ks; i++)
19 | {
20 | sum += gk[i];
21 | std::cout << std::setw(10) << gk[i] << ' ';
22 | }
23 |
24 | std::cout << '\n';
25 | std::cout << " sum = " << sum << "\n\n";
26 | }
27 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_01/Ch09_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_01/Cpuid__.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Cpuid__.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | struct CpuidRegs
9 | {
10 | uint32_t EAX;
11 | uint32_t EBX;
12 | uint32_t ECX;
13 | uint32_t EDX;
14 | };
15 |
16 | // Cpuid__.cpp
17 | extern uint32_t Cpuid__(uint32_t r_eax, uint32_t r_ecx, CpuidRegs* r_out);
18 | extern void Xgetbv__(uint32_t r_ecx, uint32_t* r_eax, uint32_t* r_edx);
19 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_02/Ch09_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch09_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch09_02_fcpp.cpp
9 | extern void ConvertRectToPolarF32_Cpp(std::vector& r, std::vector& a,
10 | const std::vector& x, const std::vector& y);
11 | extern void ConvertRectToPolarF32_Iavx(std::vector& r, std::vector& a,
12 | const std::vector& x, const std::vector& y);
13 | extern void ConvertPolarToRectF32_Cpp(std::vector& x, std::vector& y,
14 | const std::vector& r, const std::vector& a);
15 | extern void ConvertPolarToRectF32_Iavx(std::vector& x, std::vector& y,
16 | const std::vector& r, const std::vector& a);
17 |
18 | // Ch09_02_misc.cpp
19 | extern bool CheckArgs(const std::vector& v1, const std::vector& v2,
20 | const std::vector& v3, const std::vector& v4);
21 | extern bool CompareResults(const std::vector& v1,
22 | const std::vector& v2);
23 | extern void FillVectorsRect(std::vector& x, std::vector& y);
24 | extern void FillVectorsPolar(std::vector& r, std::vector& a);
25 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_02/Ch09_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_03/Ch09_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch09_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch09_03_fcpp.cpp
9 | extern void CalcBSA_F64_Cpp(std::vector& bsa, const std::vector& ht,
10 | const std::vector& wt);
11 | extern void CalcBSA_F64_Iavx(std::vector& bsa, const std::vector& ht,
12 | const std::vector& wt);
13 |
14 | // Ch09_03_misc.cpp
15 | extern bool CheckArgs(const std::vector& bsa,
16 | const std::vector& ht, const std::vector& wt);
17 | extern bool CompareResults(const std::vector& bsa1,
18 | const std::vector& bsa2);
19 | extern void FillHeightWeightVectors(std::vector& ht,
20 | std::vector& wt);
21 |
22 | // Ch09_03_bm.cpp
23 | void CalcBSA_bm(void);
24 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_03/Ch09_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter09/Ch09_03/Ch09_03_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch09_03_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch09_03.h"
7 | #include "BmThreadTimer.h"
8 |
9 | void CalcBSA_bm(void)
10 | {
11 | std::cout << "\nRunning benchmark function CalcBSA_bm - please wait\n";
12 |
13 | const size_t n = 200000;
14 | std::vector heights(n);
15 | std::vector weights(n);
16 | std::vector bsa1(n * 3);
17 | std::vector bsa2(n * 3);
18 |
19 | FillHeightWeightVectors(heights, weights);
20 |
21 | const size_t num_it = 500;
22 | const size_t num_alg = 2;
23 | BmThreadTimer bmtt(num_it, num_alg);
24 |
25 | for (size_t i = 0; i < num_it; i++)
26 | {
27 | bmtt.Start(i, 0);
28 | CalcBSA_F64_Cpp(bsa1, heights, weights);
29 | bmtt.Stop(i, 0);
30 |
31 | bmtt.Start(i, 1);
32 | CalcBSA_F64_Iavx(bsa2, heights, weights);
33 | bmtt.Stop(i, 1);
34 | }
35 |
36 | std::cout << '\n';
37 | std::string fn = bmtt.BuildCsvFilenameString("Ch09_03_CalcBSA_bm");
38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
39 | std::cout << "Benchmark times save to file " << fn << '\n';
40 | }
41 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_01/Ch11_01.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_01.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch11_01.h"
7 |
8 | static void AddI32(void);
9 | static void SubI64(void);
10 |
11 | int main()
12 | {
13 | AddI32();
14 | SubI64();
15 | return 0;
16 | }
17 |
18 | static void AddI32(void)
19 | {
20 | int a = 10;
21 | int b = 20;
22 | int c = 30;
23 | int d = AddI32_A(a, b, c);
24 |
25 | DisplayResultsAddI32(a, b, c, d);
26 | }
27 |
28 | static void SubI64(void)
29 | {
30 | long long a = 10;
31 | long long b = 20;
32 | long long c = 30;
33 | long long d = SubI64_A(a, b, c);
34 |
35 | DisplayResultsSubI64(a, b, c, d);
36 | }
37 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_01/Ch11_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch11_01_misc.cpp
8 | extern void DisplayResultsAddI32(int a, int b, int c, int d);
9 | extern void DisplayResultsSubI64(long long a, long long b, long long c, long long d);
10 |
11 | // Ch11_01_fasm.asm
12 | extern "C" int AddI32_A(int a, int b, int c);
13 | extern "C" long long SubI64_A(long long a, long long b, long long c);
14 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_01/Ch11_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_01/Ch11_01_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_01_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch11_01.h"
7 |
8 | void DisplayResultsAddI32(int a, int b, int c, int d)
9 | {
10 | const char nl = '\n';
11 | std::cout << "Results for AddI32_A()\n";
12 | std::cout << "a = " << a << nl;
13 | std::cout << "b = " << b << nl;
14 | std::cout << "c = " << c << nl;
15 | std::cout << "d = " << d << nl;
16 | std::cout << nl;
17 | }
18 |
19 | void DisplayResultsSubI64(long long a, long long b, long long c, long long d)
20 | {
21 | const char nl = '\n';
22 | std::cout << "Results for SubI64_A()\n";
23 | std::cout << "a = " << a << nl;
24 | std::cout << "b = " << b << nl;
25 | std::cout << "c = " << c << nl;
26 | std::cout << "d = " << d << nl;
27 | std::cout << nl;
28 | }
29 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_02/Ch11_02.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_02.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch11_02.h"
8 |
9 | static void MulI32(void);
10 | static void MulU64(void);
11 |
12 | int main()
13 | {
14 | MulI32();
15 | MulU64();
16 | return 0;
17 | }
18 |
19 | static void MulI32(void)
20 | {
21 | int32_t a = 10;
22 | int32_t b = -20;
23 | int32_t c = 30;
24 | int32_t d = MulI32_A(a, b, c);
25 |
26 | DisplayResultsMulI32(a, b, c, d);
27 | }
28 |
29 | static void MulU64(void)
30 | {
31 | uint64_t a = 10;
32 | uint64_t b = 20;
33 | uint64_t c = 1000000000;
34 | uint64_t d = MulU64_A(a, b, c);
35 |
36 | DisplayResultsMulU64(a, b, c, d);
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_02/Ch11_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch11_02_misc.cpp
9 | extern void DisplayResultsMulI32(int32_t a, int32_t b, int32_t c, int32_t d);
10 | extern void DisplayResultsMulU64(uint64_t a, uint64_t b, uint64_t c, uint64_t d);
11 |
12 | // Ch11_02_fasm.asm
13 | extern "C" int32_t MulI32_A(int32_t a, int32_t b, int32_t c);
14 | extern "C" uint64_t MulU64_A(uint64_t a, uint64_t b, uint64_t c);
15 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_02/Ch11_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_02/Ch11_02_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_02_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch11_02.h"
7 |
8 | void DisplayResultsMulI32(int32_t a, int32_t b, int32_t c, int32_t d)
9 | {
10 | const char nl = '\n';
11 | std::cout << "Results for MulI32_A()\n";
12 | std::cout << "a = " << a << nl;
13 | std::cout << "b = " << b << nl;
14 | std::cout << "c = " << c << nl;
15 | std::cout << "d = " << d << nl;
16 | std::cout << nl;
17 | }
18 |
19 | void DisplayResultsMulU64(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
20 | {
21 | const char nl = '\n';
22 | std::cout << "Results for MulU64_A()\n";
23 | std::cout << "a = " << a << nl;
24 | std::cout << "b = " << b << nl;
25 | std::cout << "c = " << c << nl;
26 | std::cout << "d = " << d << nl;
27 | std::cout << nl;
28 | }
29 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_03/Ch11_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch11_03_misc.cpp
9 | extern void DisplayResultsDivI32(size_t test_id, int32_t rc, int32_t a,
10 | int32_t b, int32_t quo, int32_t rem);
11 | extern void DisplayResultsDivU64(size_t test_id, int32_t rc, uint64_t a,
12 | uint64_t b, uint64_t quo, uint64_t rem);
13 |
14 | // Ch11_03_fasm.asm
15 | extern "C" int32_t DivI32_A(int32_t a, int32_t b, int32_t* quo, int32_t* rem);
16 | extern "C" int32_t DivU64_A(uint64_t a, uint64_t b, uint64_t* quo, uint64_t* rem);
17 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_03/Ch11_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_03/Ch11_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch11_03.h"
7 |
8 | void DisplayResultsDivI32(size_t test_id, int rc, int32_t a,
9 | int32_t b, int32_t quo, int32_t rem)
10 | {
11 | const char nl = '\n';
12 | std::cout << "Test #" << test_id << " | ";
13 | std::cout << "a: " << a << " b: " << b << nl;
14 |
15 | if (rc != 0)
16 | std::cout << "quo: " << quo << " rem: " << rem << nl;
17 | else
18 | std::cout << "quo: undefined rem: undefined" << nl;
19 |
20 | std::cout << nl;
21 | }
22 |
23 | void DisplayResultsDivU64(size_t test_id, int rc, uint64_t a,
24 | uint64_t b, uint64_t quo, uint64_t rem)
25 | {
26 | const char nl = '\n';
27 | std::cout << "Test #" << test_id << " | ";
28 | std::cout << "a: " << a << " b: " << b << nl;
29 |
30 | if (rc != 0)
31 | std::cout << "quo: " << quo << " rem: " << rem << nl;
32 | else
33 | std::cout << "quo: undefined rem: undefined" << nl;
34 |
35 | std::cout << nl;
36 | }
37 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_04/Ch11_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch11_04_fasm.asm
9 | extern "C" int64_t CalcResultI64_A(int8_t a, int16_t b, int32_t c, int64_t d,
10 | int8_t e, int16_t f, int32_t g, int64_t h);
11 |
12 | extern "C" int32_t CalcResultU64_A(uint8_t a, uint16_t b, uint32_t c, uint64_t d,
13 | uint8_t e, uint16_t f, uint32_t g, uint64_t h, uint64_t* quo, uint64_t* rem);
14 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_04/Ch11_04.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
28 |
29 | Header Files
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_04/Ch11_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_05/Ch11_05.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_05.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include "Ch11_05.h"
9 |
10 | static void MemAddressing(void);
11 |
12 | int main()
13 | {
14 | MemAddressing();
15 | return 0;
16 | }
17 |
18 | static void MemAddressing()
19 | {
20 | const int w = 5;
21 | const char nl = '\n';
22 | const char* delim = ", ";
23 |
24 | int n = g_NumPrimes_A;
25 |
26 | g_SumPrimes_A = 0;
27 |
28 | for (int i = -1; i < n + 1; i++)
29 | {
30 | int v1 = -1, v2 = -1, v3 = -1, v4 = -1;
31 | int rc = MemAddressing_A(i, &v1, &v2, &v3, &v4);
32 |
33 | std::cout << "i = " << std::setw(w - 1) << i << delim;
34 | std::cout << "rc = " << std::setw(w - 1) << rc << delim;
35 | std::cout << "v1 = " << std::setw(w) << v1 << delim;
36 | std::cout << "v2 = " << std::setw(w) << v2 << delim;
37 | std::cout << "v3 = " << std::setw(w) << v3 << delim;
38 | std::cout << "v4 = " << std::setw(w) << v4 << delim;
39 | std::cout << nl;
40 | }
41 |
42 | std::cout << "\ng_SumPrimes_A = " << g_SumPrimes_A << nl;
43 | }
44 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_05/Ch11_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch11_05_fasm.asm
8 | extern "C" int MemAddressing_A(int i, int* v1, int* v2, int* v3, int* v4);
9 |
10 | extern "C" int g_NumPrimes_A;
11 | extern "C" int g_SumPrimes_A;
12 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_05/Ch11_05.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
28 |
29 | Header Files
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_05/Ch11_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_06.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch11_06.h"
8 |
9 | static void SumElementsI32(void);
10 |
11 | int main()
12 | {
13 | SumElementsI32();
14 | return 0;
15 | }
16 |
17 | static void SumElementsI32(void)
18 | {
19 | const size_t n = 20;
20 | int x[n];
21 |
22 | FillArray(x, n);
23 |
24 | int sum1 = SumElementsI32_Cpp(x, n);
25 | int sum2 = SumElementsI32_A(x, n);
26 |
27 | DisplayResults(x, n, sum1, sum2);
28 | }
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch11_06_fcpp.cpp
8 | extern int SumElementsI32_Cpp(const int* x, size_t n);
9 |
10 | // Ch11_06_fasm.asm
11 | extern "C" int SumElementsI32_A(const int* x, size_t n);
12 |
13 | // Ch11_06_misc.cpp
14 | extern void FillArray(int* x, size_t n);
15 | extern void DisplayResults(const int* x, size_t n, int sum1, int sum2);
16 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06_fasm.asm:
--------------------------------------------------------------------------------
1 | ;-------------------------------------------------
2 | ; Ch11_06_fasm.asm
3 | ;-------------------------------------------------
4 |
5 | ;------------------------------------------------------------------------------
6 | ; extern "C" int SumElementsI32_A(const int* x, size_t n);
7 | ;------------------------------------------------------------------------------
8 |
9 | .code
10 | SumElementsI32_A proc
11 |
12 | ; Initialize sum to zero
13 | xor eax,eax ;sum = 0
14 | mov r10,-1 ;i = -1
15 |
16 | ; Sum the elements of the array
17 | Loop1: inc r10 ;i += 1
18 | cmp r10,rdx ;is i >= n?
19 | jae Done ;jump if i >= n
20 |
21 | add eax,[rcx+r10*4] ;sum += x[i]
22 | jmp Loop1 ;perform next iteration
23 |
24 | Done: ret ;return to caller
25 |
26 | SumElementsI32_A endp
27 | end
28 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_06_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch11_06.h"
6 |
7 | int SumElementsI32_Cpp(const int* x, size_t n)
8 | {
9 | int sum = 0;
10 |
11 | for (size_t i = 0; i < n; i++)
12 | sum += x[i];
13 |
14 | return sum;
15 | }
16 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_06/Ch11_06_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_06_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch11_06.h"
8 | #include "MT.h"
9 |
10 | void FillArray(int* x, size_t n)
11 | {
12 | const int min_val = -2000;
13 | const int max_val = 2000;
14 | const unsigned int rng_seed = 1337;
15 |
16 | MT::FillArray(x, n, min_val, max_val, rng_seed, true);
17 | }
18 |
19 | void DisplayResults(const int* x, size_t n, int sum1, int sum2)
20 | {
21 | const char nl = '\n';
22 | std::cout << "----- Results for SumElementsI32() -----\n";
23 |
24 | for (size_t i = 0; i < n; i++)
25 | std::cout << "x[" << i << "] = " << std::setw(4) << x[i] << nl;
26 |
27 | std::cout << nl;
28 | std::cout << "sum1 = " << sum1 << nl;
29 | std::cout << "sum2 = " << sum2 << nl;
30 | }
31 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_07/Ch11_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch11_07_fasm.asm
8 | extern "C" int SignedMin1_A(int a, int b, int c);
9 | extern "C" int SignedMin2_A(int a, int b, int c);
10 | extern "C" int SignedMax1_A(int a, int b, int c);
11 | extern "C" int SignedMax2_A(int a, int b, int c);
12 |
13 | // Ch11_07_misc.cpp
14 | void DisplayResult(const char* s1, int a, int b, int c, int result);
15 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_07/Ch11_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_07/Ch11_07_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_07_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch11_07.h"
8 |
9 | void DisplayResult(const char* s1, int a, int b, int c, int result)
10 | {
11 | const size_t w = 4;
12 |
13 | std::cout << s1 << "(";
14 | std::cout << std::setw(w) << a << ", ";
15 | std::cout << std::setw(w) << b << ", ";
16 | std::cout << std::setw(w) << c << ") = ";
17 | std::cout << std::setw(w) << result << '\n';
18 | }
19 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_08/Ch11_08.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch11_08.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch11_08_fasm.asm
8 | extern "C" void CopyArrayI32_A(int32_t* b, const int32_t* a, size_t n);
9 | extern "C" void FillArrayI32_A(const int32_t* a, int32_t val, size_t n);
10 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_08/Ch11_08.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
28 |
29 | Header Files
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Chapter11/Ch11_08/Ch11_08.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_01/Ch12_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch12_01_fasm.asm
8 | extern "C" float ConvertFtoC_Aavx(float deg_f);
9 | extern "C" float ConvertCtoF_Aavx(float deg_c);
10 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_01/Ch12_01.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Source Files
20 |
21 |
22 |
23 |
24 | Source Files
25 |
26 |
27 |
28 |
29 | Header Files
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_01/Ch12_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_02/Ch12_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch12_02_fcpp.cpp
8 | extern double CalcDistance_Cpp(double x1, double y1, double z1, double x2,
9 | double y2, double z2);
10 |
11 | // Ch12_02_fasm.asm
12 | extern "C" double CalcDistance_Aavx(double x1, double y1, double z1, double x2,
13 | double y2, double z2);
14 |
15 | // Ch12_02_misc.cpp
16 | extern void InitArrays(double* x, double* y, double* z, size_t n,
17 | unsigned int rng_seed);
18 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_02/Ch12_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_02/Ch12_02_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_02_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch12_02.h"
6 | #include
7 |
8 | double CalcDistance_Cpp(double x1, double y1, double z1, double x2, double y2, double z2)
9 | {
10 | double temp_x = (x2 - x1) * (x2 - x1);
11 | double temp_y = (y2 - y1) * (y2 - y1);
12 | double temp_z = (z2 - z1) * (z2 - z1);
13 | double dist = sqrt(temp_x + temp_y + temp_z);
14 | return dist;
15 | }
16 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_02/Ch12_02_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_02_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch12_02.h"
6 | #include "MT.h"
7 |
8 | void InitArrays(double* x, double* y, double* z, size_t n, unsigned int rng_seed)
9 | {
10 | const int rng_min = 1;
11 | const int rng_max = 99;
12 |
13 | MT::FillArray(x, n, rng_min, rng_max, rng_seed);
14 | MT::FillArray(y, n, rng_min, rng_max, rng_seed + 1);
15 | MT::FillArray(z, n, rng_min, rng_max, rng_seed + 2);
16 | }
17 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_03/Ch12_03.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_03.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "Ch12_03.h"
10 |
11 | static void CompareF32(void);
12 |
13 | int main()
14 | {
15 | CompareF32();
16 | return 0;
17 | }
18 |
19 | static void CompareF32(void)
20 | {
21 | const size_t n = 6;
22 | float a[n] {120.0, 250.0, 300.0, -18.0, -81.0, 42.0};
23 | float b[n] {130.0, 240.0, 300.0, 32.0, -100.0, 0.0};
24 |
25 | // Set NAN test value
26 | b[n - 1] = std::numeric_limits::quiet_NaN();
27 |
28 | std::cout << "\n----- Results for CompareF32 -----\n";
29 |
30 | for (size_t i = 0; i < n; i++)
31 | {
32 | uint8_t cmp_results[c_NumCmpOps];
33 |
34 | CompareF32_Aavx(a[i], b[i], cmp_results);
35 | DisplayResults(a[i], b[i], cmp_results);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_03/Ch12_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch12_03_fasm.asm
9 | extern "C" void CompareF32_Aavx(float a, float b, uint8_t* results);
10 |
11 | // Ch12_03_misc.cpp
12 | extern void DisplayResults(float a, float b, const uint8_t* cmp_results);
13 |
14 | // Miscellaenous constants
15 | const size_t c_NumCmpOps = 7;
16 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_03/Ch12_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_03/Ch12_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch12_03.h"
8 |
9 | static const char* c_OpStrings[c_NumCmpOps] =
10 | { "UO", "LT", "LE", "EQ", "NE", "GT", "GE" };
11 |
12 | void DisplayResults(float a, float b, const uint8_t* cmp_results)
13 | {
14 | std::cout << "a = " << a << ", ";
15 | std::cout << "b = " << b << '\n';
16 |
17 | for (size_t i = 0; i < c_NumCmpOps; i++)
18 | {
19 | std::cout << c_OpStrings[i] << '=';
20 | std::cout << std::boolalpha << std::left;
21 | std::cout << std::setw(6) << (int)cmp_results[i] << ' ';
22 | }
23 |
24 | std::cout << "\n\n";
25 | }
26 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_04/Ch12_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Simple union for data exchange
8 | union Uval
9 | {
10 | int32_t m_I32;
11 | int64_t m_I64;
12 | float m_F32;
13 | double m_F64;
14 | };
15 |
16 | // The order of values in enum CvtOp must match the jump table
17 | // that's defined in the .asm file.
18 | enum class CvtOp : unsigned int
19 | {
20 | I32_F32, // int32_t to float
21 | F32_I32, // float to int32_t
22 | I32_F64, // int32_t to double
23 | F64_I32, // double to int32_t
24 | I64_F32, // int64_t to float
25 | F32_I64, // float to int64_t
26 | I64_F64, // int64_t to double
27 | F64_I64, // double to int64_t
28 | F32_F64, // float to double
29 | F64_F32, // double to float
30 | };
31 |
32 | // Enumerated type for rounding control
33 | enum class RC : unsigned int
34 | {
35 | Nearest, Down, Up, Zero // Do not change order
36 | };
37 |
38 | // Ch12_04_fasm.asm
39 | extern "C" bool ConvertScalar_Aavx(Uval* a, Uval* b, CvtOp cvt_op, RC rc);
40 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_04/Ch12_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_05/Ch12_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch12_05_fcpp.cpp
8 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n);
9 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean);
10 |
11 | // Ch12_05_fasm.asm
12 | extern "C" bool CalcMeanF32_Aavx(float* mean, const float* x, size_t n);
13 | extern "C" bool CalcStDevF32_Aavx(float* st_dev, const float* x, size_t n, float mean);
14 |
15 | // Miscellaneous constants
16 | const size_t c_NumElements = 91;
17 | const unsigned int c_RngSeed = 13;
18 | const float c_ArrayFillMin = 1.0f;
19 | const float c_ArrayFillMax = 100.0f;
20 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_05/Ch12_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_05/Ch12_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_05_fcpp.h
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch12_05.h"
7 |
8 | bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n)
9 | {
10 | if (n < 2)
11 | return false;
12 |
13 | float sum = 0.0f;
14 |
15 | for (size_t i = 0; i < n; i++)
16 | sum += x[i];
17 |
18 | *mean = sum / n;
19 | return true;
20 | }
21 |
22 | bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean)
23 | {
24 | if (n < 2)
25 | return false;
26 |
27 | float sum_squares = 0.0f;
28 |
29 | for (size_t i = 0; i < n; i++)
30 | {
31 | float temp = x[i] - mean;
32 | sum_squares += temp * temp;
33 | }
34 |
35 | *st_dev = sqrt(sum_squares / (n - 1));
36 | return true;
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_06/Ch12_06.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_06.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include "Ch12_06.h"
9 |
10 | static void SumIntegers(void);
11 |
12 | int main()
13 | {
14 | SumIntegers();
15 | return 0;
16 | }
17 |
18 | static void SumIntegers(void)
19 | {
20 | int8_t a = 10, e = -20;
21 | int16_t b = -200, f = 400;
22 | int32_t c = -300, g = -600;
23 | int64_t d = 4000, h = -8000;
24 |
25 | int64_t sum = SumIntegers_A(a, b, c, d, e, f, g, h);
26 |
27 | const char nl = '\n';
28 | const size_t w = 7;
29 | std::cout << "----- Results for SumIntegers_A ----- \n";
30 | std::cout << "a: " << std::setw(w) << (int)a << nl;
31 | std::cout << "b: " << std::setw(w) << b << nl;
32 | std::cout << "c: " << std::setw(w) << c << nl;
33 | std::cout << "d: " << std::setw(w) << d << nl;
34 | std::cout << "e: " << std::setw(w) << (int)e << nl;
35 | std::cout << "f: " << std::setw(w) << f << nl;
36 | std::cout << "g: " << std::setw(w) << g << nl;
37 | std::cout << "h: " << std::setw(w) << h << nl;
38 | std::cout << "sum: " << std::setw(w) << sum << nl;
39 | }
40 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_06/Ch12_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch12_06_fasm.asm
9 | extern "C" int64_t SumIntegers_A(int8_t a, int16_t b, int32_t c, int64_t d,
10 | int8_t e, int16_t f, int32_t g, int64_t h);
11 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_06/Ch12_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_07/Ch12_07.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_07.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch12_07_fasm.asm
9 | extern "C" void CalcSumProd_A(const int64_t* a, const int64_t* b, int32_t n,
10 | int64_t* sum_a, int64_t* sum_b, int64_t* prod_a, int64_t* prod_b);
11 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_07/Ch12_07.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_08/Ch12_08.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_08.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch12_08_fcpp.cpp
8 | extern bool CalcConeAreaVol_Cpp(const double* r, const double* h, int n,
9 | double* sa_cone, double* vol_cone);
10 |
11 | // Ch12_08_fasm.asm
12 | extern "C" bool CalcConeAreaVol_A(const double* r, const double* h, int n,
13 | double* sa_cone, double* vol_cone);
14 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_08/Ch12_08.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_08/Ch12_08_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_08_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #define _USE_MATH_DEFINES
6 | #include
7 | #include "Ch12_08.h"
8 |
9 | bool CalcConeAreaVol_Cpp(const double* r, const double* h, int n, double* sa_cone, double* vol_cone)
10 | {
11 | if (n <= 0)
12 | return false;
13 |
14 | for (int i = 0; i < n; i++)
15 | {
16 | sa_cone[i] = M_PI * r[i] * (r[i] + sqrt(r[i] * r[i] + h[i] * h[i]));
17 | vol_cone[i] = M_PI * r[i] * r[i] * h[i] / 3.0;
18 | }
19 |
20 | return true;
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_09/Ch12_09.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_09.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch12_09_fcpp.cpp
8 | extern bool CalcBSA_Cpp(const double* ht, const double* wt, int n,
9 | double* bsa1, double* bsa2, double* bsa3);
10 |
11 | // Ch12_09_fasm.asm
12 | extern "C" bool CalcBSA_Aavx(const double* ht, const double* wt, int n,
13 | double* bsa1, double* bsa2, double* bsa3);
14 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_09/Ch12_09.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter12/Ch12_09/Ch12_09_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch12_09_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch12_09.h"
7 |
8 | bool CalcBSA_Cpp(const double* ht, const double* wt, int n, double* bsa1, double* bsa2, double* bsa3)
9 | {
10 | if (n <= 0)
11 | return false;
12 |
13 | for (int i = 0; i < n; i++)
14 | {
15 | bsa1[i] = 0.007184 * pow(ht[i], 0.725) * pow(wt[i], 0.425);
16 | bsa2[i] = 0.0235 * pow(ht[i], 0.42246) * pow(wt[i], 0.51456);
17 | bsa3[i] = sqrt(ht[i] * wt[i] / 3600.0);
18 | }
19 |
20 | return true;
21 | }
22 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_01/Ch13_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch13_01_fasm.asm
9 | extern "C" void AddI16_Aavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b);
10 | extern "C" void SubI16_Aavx(XmmVal* c1, XmmVal* c2, const XmmVal* a, const XmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_01/Ch13_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_02/Ch13_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch13_02_fasm.asm
9 | extern "C" void MulI16_Aavx(XmmVal c[2], const XmmVal* a, const XmmVal* b);
10 | extern "C" void MulI32a_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
11 | extern "C" void MulI32b_Aavx(XmmVal c[2], const XmmVal* a, const XmmVal* b);
12 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_02/Ch13_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_03/Ch13_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch13_03_fasm.asm
9 | extern "C" void AndU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
10 | extern "C" void OrU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
11 | extern "C" void XorU16_Aavx(XmmVal* c, const XmmVal* a, const XmmVal* b);
12 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_03/Ch13_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_04/Ch13_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "XmmVal.h"
7 |
8 | // Ch13_04_fasm.asm
9 | extern "C" void SllU16_Aavx(XmmVal* c, const XmmVal* a, int count);
10 | extern "C" void SrlU16_Aavx(XmmVal* c, const XmmVal* a, int count);
11 | extern "C" void SraU16_Aavx(XmmVal* c, const XmmVal* a, int count);
12 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_04/Ch13_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_05.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch13_05.h"
7 | #include "AlignedMem.h"
8 |
9 | static void CalcMinMaxU8();
10 |
11 | int main()
12 | {
13 | CalcMinMaxU8();
14 | CalcMinMaxU8_bm();
15 | }
16 |
17 | static void CalcMinMaxU8()
18 | {
19 | const char nl = '\n';
20 | size_t n = c_NumElements;
21 | AlignedArray x_aa(n, 16);
22 | uint8_t* x = x_aa.Data();
23 |
24 | InitArray(x, n, c_RngSeedVal);
25 |
26 | uint8_t x_min0 = 0, x_max0 = 0;
27 | uint8_t x_min1 = 0, x_max1 = 0;
28 |
29 | bool rc0 = CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n);
30 | bool rc1 = CalcMinMaxU8_Aavx(&x_min1, &x_max1, x, n);
31 |
32 | std::cout << "\nResults for CalcMinMaxU8_Cpp\n";
33 | std::cout << "rc0: " << rc0 << " x_min0: " << (int)x_min0;
34 | std::cout << " x_max0: " << (int)x_max0 << nl;
35 |
36 | std::cout << "\nResults for CalcMinMaxU8_Aavx\n";
37 | std::cout << "rc1: " << rc1 << " x_min1: " << (int)x_min1;
38 | std::cout << " x_max1: " << (int)x_max1 << nl;
39 | }
40 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch13_05_fcpp.cpp
10 | extern bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max,
11 | const uint8_t* x, size_t n);
12 |
13 | // Ch13_05_fasm.asm
14 | extern "C" bool CalcMinMaxU8_Aavx(uint8_t* x_min, uint8_t* x_max,
15 | const uint8_t* x, size_t n);
16 |
17 | // Ch13_05_misc.cpp
18 | extern void InitArray(uint8_t* x, size_t n, unsigned int rng_seed);
19 |
20 | // Ch13_05_BM.cpp
21 | extern void CalcMinMaxU8_bm();
22 |
23 | // c_NumElements must be > 0 and even multiple of 16
24 | const size_t c_NumElements = 10000000;
25 | const unsigned int c_RngSeedVal = 23;
26 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_05_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch13_05.h"
7 | #include "AlignedMem.h"
8 | #include "BmThreadTimer.h"
9 |
10 | void CalcMinMaxU8_bm(void)
11 | {
12 | std::cout << "\nRunning benchmark function CalcMinMaxU8_bm - please wait\n";
13 |
14 | size_t n = c_NumElements;
15 | AlignedArray x_aa(n, 16);
16 | uint8_t* x = x_aa.Data();
17 |
18 | InitArray(x, n, c_RngSeedVal);
19 |
20 | uint8_t x_min0 = 0, x_max0 = 0;
21 | uint8_t x_min1 = 0, x_max1 = 0;
22 |
23 | const size_t num_it = 500;
24 | const size_t num_alg = 2;
25 | BmThreadTimer bmtt(num_it, num_alg);
26 |
27 | for (size_t i = 0; i < num_it; i++)
28 | {
29 | bmtt.Start(i, 0);
30 | CalcMinMaxU8_Cpp(&x_min0, &x_max0, x, n);
31 | bmtt.Stop(i, 0);
32 |
33 | bmtt.Start(i, 1);
34 | CalcMinMaxU8_Aavx(&x_min1, &x_max1, x, n);
35 | bmtt.Stop(i, 1);
36 | }
37 |
38 | std::string fn = bmtt.BuildCsvFilenameString("Ch13_05_CalcMinMaxU8_bm");
39 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
40 | std::cout << "Benchmark times save to file " << fn << '\n';
41 | }
42 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_05_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch13_05.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CalcMinMaxU8_Cpp(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, size_t n)
9 | {
10 | if (n == 0 || (n & 0xf) != 0)
11 | return false;
12 |
13 | if (!AlignedMem::IsAligned(x, 16))
14 | return false;
15 |
16 | uint8_t min_val = 0xff;
17 | uint8_t max_val = 0;
18 |
19 | for (size_t i = 0; i < n; i++)
20 | {
21 | uint8_t val = *x++;
22 |
23 | if (val < min_val)
24 | min_val = val;
25 | else if (val > max_val)
26 | max_val = val;
27 | }
28 |
29 | *x_min = min_val;
30 | *x_max = max_val;
31 | return true;
32 | }
33 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_05/Ch13_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch13_05.h"
6 | #include "MT.h"
7 |
8 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed)
9 | {
10 | int rng_min_val = 5;
11 | int rng_max_val = 250;
12 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed);
13 |
14 | // Use known values for min & max (for validation)
15 | x[(n / 4) * 3 + 1] = 2;
16 | x[n / 4 + 11] = 3;
17 | x[n / 2] = 252;
18 | x[n / 2 + 13] = 253;
19 | x[n / 8 + 5] = 4;
20 | x[n / 8 + 7] = 254;
21 | }
22 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_06/Ch13_06.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_06.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch13_06_fcpp.cpp
10 | extern bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n);
11 |
12 | // Ch13_06_fasm.asm
13 | extern "C" bool CalcMeanU8_Aavx(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n);
14 |
15 | // Ch13_06_misc.cpp
16 | extern void InitArray(uint8_t* x, size_t n, unsigned int seed);
17 | extern bool CheckArgs(const uint8_t* x, size_t n);
18 |
19 | // Ch13_06_bm.cpp
20 | extern void CalcMeanU8_bm(void);
21 |
22 | // Miscellaneous constants
23 | const size_t c_NumElements = 10000000;
24 | const size_t c_Alignment = 16;
25 | const unsigned int c_RngSeedVal = 29;
26 | extern "C" size_t g_NumElementsMax;
27 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_06/Ch13_06.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_06/Ch13_06_bm.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_06_bm.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch13_06.h"
6 | #include "AlignedMem.h"
7 | #include "BmThreadTimer.h"
8 |
9 | void CalcMeanU8_bm(void)
10 | {
11 | std::cout << "\nRunning benchmark function CalcMeanU8_bm - please wait\n";
12 |
13 | size_t n = c_NumElements;
14 | AlignedArray x_aa(n, c_Alignment);
15 | uint8_t* x = x_aa.Data();
16 |
17 | InitArray(x, n, c_RngSeedVal);
18 |
19 | uint64_t sum_x0, sum_x1;
20 | double mean_x0, mean_x1;
21 |
22 | const size_t num_it = 500;
23 | const size_t num_alg = 2;
24 | BmThreadTimer bmtt(num_it, num_alg);
25 |
26 | for (size_t i = 0; i < num_it; i++)
27 | {
28 | bmtt.Start(i, 0);
29 | CalcMeanU8_Cpp(&mean_x0, &sum_x0, x, n);
30 | bmtt.Stop(i, 0);
31 |
32 | bmtt.Start(i, 1);
33 | CalcMeanU8_Aavx(&mean_x1, &sum_x1, x, n);
34 | bmtt.Stop(i, 1);
35 | }
36 |
37 | std::string fn = bmtt.BuildCsvFilenameString("Ch13_06_CalcMeanU8_bm");
38 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2);
39 | std::cout << "Benchmark times save to file " << fn << '\n';
40 | }
41 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_06/Ch13_06_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_06_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch13_06.h"
7 |
8 | bool CalcMeanU8_Cpp(double* mean_x, uint64_t* sum_x, const uint8_t* x, size_t n)
9 | {
10 | if (!CheckArgs(x, n))
11 | return false;
12 |
13 | uint64_t sum_x_temp = 0;
14 |
15 | for (size_t i = 0; i < n; i++)
16 | sum_x_temp += x[i];
17 |
18 | *sum_x = sum_x_temp;
19 | *mean_x = (double)sum_x_temp / n;
20 | return true;
21 | }
22 |
--------------------------------------------------------------------------------
/Chapter13/Ch13_06/Ch13_06_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch13_06_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch13_06.h"
6 | #include "MT.h"
7 | #include "AlignedMem.h"
8 |
9 | extern size_t g_NumElementsMax = 64 * 1024 * 1024;
10 |
11 | bool CheckArgs(const uint8_t* x, size_t n)
12 | {
13 | if (n == 0 || n > g_NumElementsMax)
14 | return false;
15 |
16 | if ((n % 64) != 0)
17 | return false;
18 |
19 | if (!AlignedMem::IsAligned(x, c_Alignment))
20 | return false;
21 |
22 | return true;
23 | }
24 |
25 | void InitArray(uint8_t* x, size_t n, unsigned int rng_seed)
26 | {
27 | int rng_min_val = 0;
28 | int rng_max_val = 255;
29 | MT::FillArray(x, n, rng_min_val, rng_max_val, rng_seed);
30 | }
31 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_01/Ch14_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch14_01_fcpp.cpp
9 | extern "C" void PackedMathF32_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
10 | extern "C" void PackedMathF64_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
11 |
12 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_01/Ch14_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_02/Ch14_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch14_02_fasm.cpp
9 | extern "C" void PackedCompareF32_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
10 | extern "C" void PackedCompareF64_Aavx(YmmVal c[8], const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_02/Ch14_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_03/Ch14_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch14_03_fcpp.cpp
8 | extern bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n);
9 | extern bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean);
10 |
11 | // Ch14_03_fasm.asm
12 | extern "C" bool CalcMeanF32_Aavx(float* mean, const float* x, size_t n);
13 | extern "C" bool CalcStDevF32_Aavx(float* st_dev, const float* x, size_t n,
14 | float mean);
15 |
16 | // Ch14_03_misc.cpp
17 | extern bool CheckArgs(const float* x, size_t n);
18 | extern void InitArray(float* x, size_t n);
19 |
20 | // Miscellaneous constants
21 | const size_t c_NumElements = 91;
22 | const unsigned int c_RngSeed = 13;
23 | const float c_ArrayFillMin = 1.0f;
24 | const float c_ArrayFillMax = 100.0f;
25 | const size_t c_Alignment = 32;
26 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_03/Ch14_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_03/Ch14_03_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_03_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch14_03.h"
7 |
8 | bool CalcMeanF32_Cpp(float* mean, const float* x, size_t n)
9 | {
10 | if (!CheckArgs(x, n))
11 | return false;
12 |
13 | float sum = 0.0f;
14 |
15 | for (size_t i = 0; i < n; i++)
16 | sum += x[i];
17 |
18 | *mean = sum / n;
19 | return true;
20 | }
21 |
22 | bool CalcStDevF32_Cpp(float* st_dev, const float* x, size_t n, float mean)
23 | {
24 | if (!CheckArgs(x, n))
25 | return false;
26 |
27 | float sum_squares = 0.0f;
28 |
29 | for (size_t i = 0; i < n; i++)
30 | {
31 | float temp = x[i] - mean;
32 | sum_squares += temp * temp;
33 | }
34 |
35 | *st_dev = sqrt(sum_squares / (n - 1));
36 | return true;
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_03/Ch14_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch14_03.h"
6 | #include "AlignedMem.h"
7 | #include "MT.h"
8 |
9 | bool CheckArgs(const float* x, size_t n)
10 | {
11 | return ((n >= 2) && AlignedMem::IsAligned(x, c_Alignment));
12 | }
13 |
14 | void InitArray(float* x, size_t n)
15 | {
16 | MT::FillArrayFP(x, n, c_ArrayFillMin, c_ArrayFillMax, c_RngSeed);
17 | }
18 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_04/Ch14_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // The members of PA below must match the PA structure
8 | // that's declared in Ch14_04_fasm.asm
9 |
10 | struct PA
11 | {
12 | double* X1;
13 | double* Y1;
14 | double* X2;
15 | double* Y2;
16 | double* Dist1;
17 | double* Dist2;
18 | double* DistCmp1;
19 | double* DistCmp2;
20 | size_t NumPoints;
21 | };
22 |
23 | // Ch14_04_fcpp.cpp
24 | extern bool CalcDistancesF64(PA& pa);
25 | extern void CompareDistancesF64(PA& pa, double cmp_val);
26 |
27 | // Ch14_04_fasm.asm
28 | extern "C" bool CalcDistancesF64_Aavx(PA& pa);
29 | extern "C" void CompareDistancesF64_Aavx(PA& pa, const double* cmp_val);
30 |
31 | // Ch14_04_misc.cpp
32 | extern "C" bool CheckArgs(PA& pa);
33 | extern void FillPAF64(PA& pa, double min_val, double max_val, unsigned int rng_seed);
34 |
35 | // Miscellaneous constants
36 | const size_t c_NumPoints = 21;
37 | const unsigned int c_RngSeed = 39;
38 | const double c_ArrayFillMin = 1.0;
39 | const double c_ArrayFillMax = 75.0;
40 | const double c_CmpVal = 50.0;
41 | const size_t c_Alignment = 32;
42 |
43 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_04/Ch14_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_04/Ch14_04_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_04_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch14_04.h"
7 |
8 | bool CalcDistancesF64(PA& pa)
9 | {
10 | if (!CheckArgs(pa))
11 | return false;
12 |
13 | size_t num_points = pa.NumPoints;
14 |
15 | for (size_t i = 0; i < num_points; i++)
16 | {
17 | double temp1 = pa.X1[i] - pa.X2[i];
18 | double temp2 = pa.Y1[i] - pa.Y2[i];
19 |
20 | pa.Dist1[i] = sqrt(temp1 * temp1 + temp2 * temp2);
21 | }
22 |
23 | return true;
24 | }
25 |
26 | void CompareDistancesF64(PA& pa, double cmp_val)
27 | {
28 | size_t num_points = pa.NumPoints;
29 |
30 | for (size_t i = 0; i < num_points; i++)
31 | {
32 | double temp1 = pa.Dist1[i];
33 | double temp2 = (temp1 >= cmp_val) ? temp1 * -2.0 : temp1;
34 |
35 | pa.DistCmp1[i] = temp2;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_05/Ch14_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch14_05_fcpp.cpp
8 | extern void CalcColumnMeansF64_Cpp(double* col_means, const double* x, size_t nrows,
9 | size_t ncols);
10 |
11 | // Ch14_04_fasm.asm
12 | extern "C" void CalcColumnMeansF64_Aavx(double* col_means, const double* x,
13 | size_t nrows, size_t ncols);
14 |
15 | // Miscellaneous constants
16 | const unsigned int c_RngSeed = 41;
17 | const double c_MatrixFillMin = 1.0;
18 | const double c_MatrixFillMax = 80.0;
19 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_05/Ch14_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter14/Ch14_05/Ch14_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch14_05_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch14_05.h"
6 |
7 | void CalcColumnMeansF64_Cpp(double* col_means, const double* x, size_t nrows,
8 | size_t ncols)
9 | {
10 | for (size_t j = 0; j < ncols; j++)
11 | col_means[j] = 0.0;
12 |
13 | for (size_t i = 0; i < nrows; i++)
14 | {
15 | for (size_t j = 0; j < ncols; j++)
16 | col_means[j] += x[i * ncols + j];
17 | }
18 |
19 | for (size_t j = 0; j < ncols; j++)
20 | col_means[j] /= (double)nrows;
21 | }
22 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_01/Ch15_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch15_01_fasm.asm
9 | extern "C" void MathI16_Aavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b);
10 | extern "C" void MathI32_Aavx2(YmmVal c[6], const YmmVal* a, const YmmVal* b);
11 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_01/Ch15_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_02/Ch15_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "YmmVal.h"
7 |
8 | // Ch15_02_fcpp.cpp
9 | extern "C" void ZeroExtU8_U16_Aavx2(YmmVal c[2], YmmVal* a);
10 | extern "C" void ZeroExtU8_U32_Aavx2(YmmVal c[4], YmmVal* a);
11 | extern "C" void SignExtI16_I32_Aavx2(YmmVal c[2], YmmVal* a);
12 | extern "C" void SignExtI16_I64_Aavx2(YmmVal c[4], YmmVal* a);
13 |
14 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_02/Ch15_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_03/Ch15_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_03/Ch15_03_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_03_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch15_03.h"
6 | #include "AlignedMem.h"
7 |
8 | void ClipPixels_Cpp(ClipData* clip_data)
9 | {
10 | if (!CheckArgs(clip_data))
11 | throw std::runtime_error("ClipPixels_Cpp() - CheckArgs failed");
12 |
13 | uint8_t* pb_src = clip_data->m_PbSrc;
14 | uint8_t* pb_des = clip_data->m_PbDes;
15 | size_t num_pixels = clip_data->m_NumPixels;
16 | size_t num_clipped_pixels = 0;
17 | uint8_t thresh_lo = clip_data->m_ThreshLo;
18 | uint8_t thresh_hi = clip_data->m_ThreshHi;
19 |
20 | for (size_t i = 0; i < num_pixels; i++)
21 | {
22 | uint8_t pixel = pb_src[i];
23 |
24 | if (pixel < thresh_lo)
25 | {
26 | pb_des[i] = thresh_lo;
27 | num_clipped_pixels++;
28 | }
29 | else if (pixel > thresh_hi)
30 | {
31 | pb_des[i] = thresh_hi;
32 | num_clipped_pixels++;
33 | }
34 | else
35 | pb_des[i] = pb_src[i];
36 | }
37 |
38 | clip_data->m_NumClippedPixels = num_clipped_pixels;
39 | }
40 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_03/Ch15_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch15_03.h"
6 | #include "AlignedMem.h"
7 |
8 | bool CheckArgs(const ClipData* clip_data)
9 | {
10 | if (clip_data->m_NumPixels == 0)
11 | return false;
12 |
13 | if (!AlignedMem::IsAligned(clip_data->m_PbSrc, c_Alignment))
14 | return false;
15 |
16 | if (!AlignedMem::IsAligned(clip_data->m_PbDes, c_Alignment))
17 | return false;
18 |
19 | return true;
20 | }
21 |
22 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_04/Ch15_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 | #include "ImageMisc.h"
9 |
10 | // Ch15_04.cpp
11 | extern const float c_Coef[4];
12 | extern const char* c_TestImageFileName;
13 |
14 | // Ch15_04_fcpp.cpp
15 | extern void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
16 |
17 | // Ch15_04_fasm.asm
18 | extern "C" void ConvertRgbToGs_Aavx2(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
19 |
20 | // Ch15_04_misc.cpp
21 | extern bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4]);
22 |
23 | // Ch15_04_bm.cpp
24 | extern void ConvertRgbToGs_bm(void);
25 | extern bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels);
26 |
27 | // Miscellaneous constants
28 | const size_t c_Alignment = 32;
29 | const size_t c_NumPixelsMax = 256 * 1024 * 1024;
30 | extern "C" size_t g_NumPixelsMax;
31 |
32 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_04/Ch15_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_04/Ch15_04_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_04_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch15_04.h"
8 | #include "ImageMisc.h"
9 |
10 | void ConvertRgbToGs_Cpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4])
11 | {
12 | if (!CheckArgs(pb_gs, pb_rgb, num_pixels, coef))
13 | throw std::runtime_error("ConvertRgbToGs_Cpp() - CheckArgs failed");
14 |
15 | for (size_t i = 0; i < num_pixels; i++)
16 | {
17 | uint8_t r = pb_rgb[i].m_R;
18 | uint8_t g = pb_rgb[i].m_G;
19 | uint8_t b = pb_rgb[i].m_B;
20 |
21 | float gs_temp = r * coef[0] + g * coef[1] + b * coef[2] + 0.5f;
22 |
23 | if (gs_temp > 255.0f)
24 | gs_temp = 255.0f;
25 |
26 | pb_gs[i] = (uint8_t)gs_temp;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_04/Ch15_04_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_04_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch15_04.h"
6 | #include "AlignedMem.h"
7 |
8 | size_t g_NumPixelsMax = c_NumPixelsMax; // For use by assembly language function
9 |
10 | bool CheckArgs(const uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float coef[4])
11 | {
12 | if (num_pixels > c_NumPixelsMax)
13 | return false;
14 |
15 | if (num_pixels % 8 != 0)
16 | return false;
17 |
18 | if (!AlignedMem::IsAligned(pb_gs, c_Alignment))
19 | return false;
20 |
21 | if (!AlignedMem::IsAligned(pb_rgb, c_Alignment))
22 | return false;
23 |
24 | if (coef[0] < 0.0f || coef[1] < 0.0f || coef[2] < 0.0f)
25 | return false;
26 |
27 | return true;
28 | }
29 |
30 | bool CompareGsPixelBuffers(const uint8_t* pb_gs1, const uint8_t* pb_gs2, size_t num_pixels)
31 | {
32 | for (size_t i = 0; i < num_pixels; i++)
33 | {
34 | if (abs((int)pb_gs1[i] - (int)pb_gs2[i]) > 1)
35 | return false;
36 | }
37 |
38 | return true;
39 | }
40 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_05/Ch15_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include
8 |
9 | // Ch15_05_fcpp.cpp
10 | extern void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels);
11 |
12 | // Ch15_05_fasm.asm
13 | extern "C" void ConvertU8ToF32_Aavx2(float* pb_des, const uint8_t* pb_src,
14 | size_t num_pixels);
15 |
16 | // Ch15_05_misc.cpp
17 | extern void BuildLUT_U8ToF32(void);
18 | extern bool CheckArgs(const void* pb1, const void* pb2, size_t num_pixels);
19 | extern size_t CompareArraysF32(const float* pb_src1, const float* pb_src2,
20 | size_t num_pixels);
21 |
22 | // Ch15_05_bm.cpp
23 | extern void ConvertU8ToF32_bm(void);
24 |
25 | // Miscellaneous constants
26 | const size_t c_Alignment = 32;
27 | const size_t c_NumPixels = 1024 * 1024 + 19;
28 | const size_t c_NumPixelsBM = 10000000;
29 | const size_t c_NumPixelsMax = 16 * 1024 * 1024;
30 | const int c_FillMinVal = 0;
31 | const int c_FillMaxVal = 255;
32 | const unsigned int c_RngSeed = 71;
33 |
34 | extern "C" float g_LUT_U8ToF32[];
35 | extern "C" size_t g_NumPixelsMax;
36 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_05/Ch15_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter15/Ch15_05/Ch15_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch15_05_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch15_05.h"
7 |
8 | void ConvertU8ToF32_Cpp(float* pb_des, const uint8_t* pb_src, size_t num_pixels)
9 | {
10 | if (!CheckArgs(pb_des, pb_src, num_pixels))
11 | throw std::runtime_error("ConvertU8ToF32_Cpp() CheckArgs failed");
12 |
13 | const float* lut = g_LUT_U8ToF32;
14 |
15 | for (size_t i = 0; i < num_pixels; i++)
16 | pb_des[i] = lut[pb_src[i]];
17 | }
18 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_01/Ch16_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 |
7 | // Ch16_01_fcpp.cpp
8 | extern void CalcLeastSquares_Cpp(double* m, double* b, const double* x, const double* y, size_t n);
9 |
10 | // Ch16_01_fasm.asm
11 | extern "C" void CalcLeastSquares_Aavx2(double* m, double* b, const double* x, const double* y, size_t n);
12 |
13 | // Ch16_01_misc.cpp
14 | extern bool CheckArgs(const double* x, const double* y, size_t n);
15 | extern void FillArrays(double* x, double* y, size_t n);
16 |
17 | // Miscellaneous constants
18 | const size_t c_Alignment = 32;
19 | const double c_LsEpsilon = 1.0e-12;
20 | extern "C" double g_LsEpsilon;
21 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_01/Ch16_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_01/Ch16_01_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_01_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include "Ch16_01.h"
8 |
9 | void CalcLeastSquares_Cpp(double* m, double* b, const double* x, const double* y, size_t n)
10 | {
11 | *m = 0.0;
12 | *b = 0.0;
13 |
14 | if (!CheckArgs(x, y, n))
15 | throw std::runtime_error("CalcLeastSquares_cpp() CheckArgs failed");
16 |
17 | double sum_x = 0.0, sum_y = 0.0, sum_xx = 0.0, sum_xy = 0.0;
18 |
19 | for (size_t i = 0; i < n; i++)
20 | {
21 | sum_x += x[i];
22 | sum_y += y[i];
23 | sum_xx += x[i] * x[i];
24 | sum_xy += x[i] * y[i];
25 | }
26 |
27 | double denom = n * sum_xx - sum_x * sum_x;
28 |
29 | if (fabs(denom) >= c_LsEpsilon)
30 | {
31 | *m = (n * sum_xy - sum_x * sum_y) / denom;
32 | *b = (sum_xx * sum_y - sum_x * sum_xy) / denom;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_01/Ch16_01_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_01_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch16_01.h"
7 | #include "AlignedMem.h"
8 | #include "MT.h"
9 |
10 | extern "C" double g_LsEpsilon = c_LsEpsilon;
11 |
12 | bool CheckArgs(const double* x, const double* y, size_t n)
13 | {
14 | if (n < 2)
15 | return false;
16 |
17 | if (!AlignedMem::IsAligned(x, c_Alignment))
18 | return false;
19 |
20 | if (!AlignedMem::IsAligned(y, c_Alignment))
21 | return false;
22 |
23 | return true;
24 | }
25 |
26 | void FillArrays(double* x, double* y, size_t n)
27 | {
28 | const unsigned int rng_seed1 = 73;
29 | const unsigned int rng_seed2 = 83;
30 | const double fill_min_val = -25.0;
31 | const double fill_max_val = 25.0;
32 |
33 | MT::FillArrayFP(x, n, fill_min_val, fill_max_val, rng_seed1);
34 | MT::FillArrayFP(y, n, fill_min_val, fill_max_val, rng_seed2);
35 |
36 | for (size_t i = 0; i < n; i++)
37 | y[i] = y[i] * y[i];
38 | }
39 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_02/Ch16_02.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_02.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF32.h"
7 |
8 | // Ch16_02_fcpp.cpp
9 | extern void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
10 |
11 | // Ch16_02_fasm2.asm
12 | extern "C" void MatrixMulF32_Aavx2(float* c, const float* a, const float* b,
13 | const size_t* sizes);
14 |
15 | // Ch16_02_misc.cpp
16 | extern bool CheckArgs(const MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
17 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& a, MatrixF32& b);
18 | extern void SaveResults(const MatrixF32& c1, const MatrixF32& c2, const MatrixF32& a,
19 | const MatrixF32& b);
20 |
21 | // Ch16_02_bm.cpp
22 | extern void MatrixMulF32_bm(void);
23 |
24 | // Ch16_02_test.cpp
25 | extern void MatrixMulF32p_Iavx2(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
26 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_02/Ch16_02.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_02/Ch16_02_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_02_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch16_02.h"
7 |
8 | void MatrixMulF32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b)
9 | {
10 | MatrixF32::Mul(c, a, b);
11 | }
12 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_03/Ch16_03.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_03.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include "MatrixF32.h"
7 |
8 | // Ch16_03_fcpp.cpp
9 | extern void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b);
10 |
11 | // Ch16_03_fasm.asm
12 | extern "C" void MatrixMul4x4F32a_Aavx2(float* c, const float* a, const float* b);
13 | extern "C" void MatrixMul4x4F32b_Aavx2(float* c, const float* a, const float* b);
14 |
15 | // Ch16_03_misc.cpp
16 | extern void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& c3, MatrixF32& a, MatrixF32& b);
17 |
18 | // Ch16_03_bm.cpp
19 | extern void MatrixMul4x4F32_bm(void);
20 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_03/Ch16_03.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_03/Ch16_03_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_03_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch16_03.h"
6 |
7 | void MatrixMul4x4F32_Cpp(MatrixF32& c, const MatrixF32& a, const MatrixF32& b)
8 | {
9 | MatrixF32::Mul4x4(c, a, b);
10 | }
11 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_03/Ch16_03_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_03_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch16_03.h"
6 |
7 | void InitMat(MatrixF32& c1, MatrixF32& c2, MatrixF32& c3, MatrixF32& a, MatrixF32& b)
8 | {
9 | const float a_row0[] = { 10, 11, 12, 13 };
10 | const float a_row1[] = { 20, 21, 22, 23 };
11 | const float a_row2[] = { 30, 31, 32, 33 };
12 | const float a_row3[] = { 40, 41, 42, 43 };
13 |
14 | const float b_row0[] = { 100, 101, 102, 103 };
15 | const float b_row1[] = { 200, 201, 202, 203 };
16 | const float b_row2[] = { 300, 301, 302, 303 };
17 | const float b_row3[] = { 400, 401, 402, 403 };
18 |
19 | a.SetRow(0, a_row0);
20 | a.SetRow(1, a_row1);
21 | a.SetRow(2, a_row2);
22 | a.SetRow(3, a_row3);
23 |
24 | b.SetRow(0, b_row0);
25 | b.SetRow(1, b_row1);
26 | b.SetRow(2, b_row2);
27 | b.SetRow(3, b_row3);
28 |
29 | const int w = 12;
30 | const char* delim = " ";
31 | c1.SetOstream(w, delim);
32 | c2.SetOstream(w, delim);
33 | c3.SetOstream(w, delim);
34 | a.SetOstream(w, delim);
35 | b.SetOstream(w, delim);
36 | }
37 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_04/Ch16_04.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_04.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "MatrixF32.h"
8 |
9 | // Simple 4x1 vector structure
10 | struct Vec4x1_F32
11 | {
12 | float W, X, Y, Z;
13 | };
14 |
15 | // Ch16_04_fcpp.cpp
16 | extern void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec);
17 |
18 | // Ch16_04_fasm.asm
19 | extern "C" void MatVecMulF32_Aavx2(Vec4x1_F32* vec_b, const float* m, const Vec4x1_F32* vec_a, size_t num_vec);
20 |
21 | // Ch16_04_misc.cpp
22 | extern "C" bool CheckArgs(const Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec);
23 | extern void Init(MatrixF32& m, Vec4x1_F32* va, size_t num_vec);
24 | extern bool VecCompare(const Vec4x1_F32* v1, const Vec4x1_F32* v2);
25 |
26 | // Ch16_04_bm.cpp
27 | extern void MatrixVecMulF32_bm(void);
28 |
29 | // Miscellaenous constants
30 | const size_t c_Alignment = 32;
31 | const int c_RngMinVal = 1;
32 | const int c_RngMaxVal = 500;
33 | const unsigned int c_RngSeedVal = 187;
34 |
35 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_04/Ch16_04.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_04/Ch16_04_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_04_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include
7 | #include
8 | #include "Ch16_04.h"
9 | #include "MatrixF32.h"
10 | #include "AlignedMem.h"
11 |
12 | void MatVecMulF32_Cpp(Vec4x1_F32* vec_b, const MatrixF32& m, const Vec4x1_F32* vec_a, size_t num_vec)
13 | {
14 | if (!CheckArgs(vec_b, m, vec_a, num_vec))
15 | throw std::runtime_error("MatVecMulF32_Cpp() - CheckArgs failed");
16 |
17 | const float* mm = m.Data();
18 |
19 | // Calculate matrix-vector products
20 | for (size_t i = 0; i < num_vec; i++)
21 | {
22 | vec_b[i].W = mm[0] * vec_a[i].W + mm[1] * vec_a[i].X;
23 | vec_b[i].W += mm[2] * vec_a[i].Y + mm[3] * vec_a[i].Z;
24 |
25 | vec_b[i].X = mm[4] * vec_a[i].W + mm[5] * vec_a[i].X;
26 | vec_b[i].X += mm[6] * vec_a[i].Y + mm[7] * vec_a[i].Z;
27 |
28 | vec_b[i].Y = mm[8] * vec_a[i].W + mm[9] * vec_a[i].X;
29 | vec_b[i].Y += mm[10] * vec_a[i].Y + mm[11] * vec_a[i].Z;
30 |
31 | vec_b[i].Z = mm[12] * vec_a[i].W + mm[13] * vec_a[i].X;
32 | vec_b[i].Z += mm[14] * vec_a[i].Y + mm[15] * vec_a[i].Z;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_05/Ch16_05.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_05.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 |
8 | // Ch16_05_fcpp.cpp
9 | extern void Convolve1D_F32_Cpp(std::vector& y, const std::vector& x, const std::vector& kernel);
10 |
11 | // Ch16_05_fasm.asm.cpp
12 | extern "C" void Convolve1D_F32_Aavx2(float* y, const float* x, const float* kernel, size_t num_pts, size_t kernel_size);
13 | extern "C" void Convolve1DKs5_F32_Aavx2(float* y, const float* x, const float* kernel, size_t num_pts);
14 |
15 | // Ch16_05_misc.cpp
16 | extern bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel);
17 |
18 | // Ch16_05_bm.cpp
19 | extern void Convolve1D_F32_bm(void);
20 |
21 | // Miscellaneous constants
22 | const unsigned int c_RngSeed = 97;
23 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_05/Ch16_05.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_05/Ch16_05_fcpp.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_05_fcpp.cpp
3 | //------------------------------------------------
4 |
5 | #include
6 | #include "Ch16_05.h"
7 | #include "MiscTypes.h"
8 |
9 | void Convolve1D_F32_Cpp(std::vector& y, const std::vector& x, const std::vector& kernel)
10 | {
11 | if (!CheckArgs(y, x, kernel))
12 | throw std::runtime_error("Convolve1D_F32_Cpp() - CheckArgs failed");
13 |
14 | indx_t num_pts = (indx_t)y.size();
15 | indx_t ks2 = kernel.size() / 2;
16 |
17 | for (indx_t i = ks2; i < num_pts - ks2; i++)
18 | {
19 | float y_val = 0;
20 |
21 | for (indx_t k = -ks2; k <= ks2; k++)
22 | y_val += x[i - k] * kernel[k + ks2];
23 |
24 | y[i] = y_val;
25 | }
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/Chapter16/Ch16_05/Ch16_05_misc.cpp:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch16_05_misc.cpp
3 | //------------------------------------------------
4 |
5 | #include "Ch16_05.h"
6 |
7 | bool CheckArgs(std::vector& y, const std::vector& x, const std::vector& kernel)
8 | {
9 | if ((kernel.size() & 1) == 0)
10 | return false;
11 |
12 | if (y.size() != x.size())
13 | return false;
14 |
15 | if (y.size() < kernel.size())
16 | return false;
17 |
18 | return true;
19 | }
20 |
--------------------------------------------------------------------------------
/Chapter17/Ch17_01/Ch17_01.h:
--------------------------------------------------------------------------------
1 | //------------------------------------------------
2 | // Ch17_01.h
3 | //------------------------------------------------
4 |
5 | #pragma once
6 | #include
7 | #include "ZmmVal.h"
8 |
9 | // Ch17_01_fasm.asm.cpp
10 | extern "C" void MathI16_Aavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b);
11 | extern "C" void MathI64_Aavx512(ZmmVal c[6], const ZmmVal* a, const ZmmVal* b);
12 |
13 |
--------------------------------------------------------------------------------
/Chapter17/Ch17_01/Ch17_01.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |