├── .gitattributes ├── 9781484262665.jpg ├── Chapter02 ├── Ch02_01 │ ├── Ch02_01.cpp │ ├── Ch02_01_.s │ └── makefile ├── Ch02_02 │ ├── Ch02_02.cpp │ ├── Ch02_02_.s │ └── makefile ├── Ch02_03 │ ├── Ch02_03.cpp │ ├── Ch02_03_.s │ └── makefile ├── Ch02_04 │ ├── Ch02_04.cpp │ ├── Ch02_04_.s │ └── makefile ├── Ch02_05 │ ├── Ch02_05.cpp │ ├── Ch02_05_.s │ └── makefile ├── Ch02_06 │ ├── Ch02_06.cpp │ ├── Ch02_06_.s │ ├── Ch02_06_Misc.cpp │ └── makefile └── Ch02_07 │ ├── Ch02_07.cpp │ ├── Ch02_07_.s │ ├── Ch02_07_Misc.cpp │ └── makefile ├── Chapter03 ├── Ch03_01 │ ├── Ch03_01.cpp │ ├── Ch03_01_.s │ └── makefile ├── Ch03_02 │ ├── Ch03_02.cpp │ ├── Ch03_02_.s │ └── makefile ├── Ch03_03 │ ├── Ch03_03.cpp │ ├── Ch03_03_.s │ └── makefile ├── Ch03_04 │ ├── Ch03_04.cpp │ ├── Ch03_04_.s │ └── makefile ├── Ch03_05 │ ├── Ch03_05.cpp │ ├── Ch03_05_.s │ └── makefile └── Ch03_06 │ ├── Ch03_06.cpp │ ├── Ch03_06_.s │ ├── Ch03_06_Misc.cpp │ └── makefile ├── Chapter04 ├── Ch04_01 │ ├── Ch04_01.cpp │ ├── Ch04_01_.s │ └── makefile ├── Ch04_02 │ ├── Ch04_02.cpp │ ├── Ch04_02_.s │ └── makefile ├── Ch04_03 │ ├── Ch04_03.cpp │ ├── Ch04_03_.s │ └── makefile ├── Ch04_04 │ ├── Ch04_04.cpp │ ├── Ch04_04_.s │ ├── Ch04_04_Misc.cpp │ └── makefile ├── Ch04_05 │ ├── Ch04_05.cpp │ ├── Ch04_05_.s │ └── makefile └── Ch04_06 │ ├── Ch04_06.cpp │ ├── Ch04_06_.s │ └── makefile ├── Chapter05 └── Ch05_01 │ ├── Ch05_01.cpp │ └── makefile ├── Chapter06 ├── Ch06_01 │ ├── Ch06_01.cpp │ ├── Ch06_01_.s │ └── makefile ├── Ch06_02 │ ├── Ch06_02.cpp │ ├── Ch06_02_.s │ └── makefile ├── Ch06_03 │ ├── Ch06_03.cpp │ ├── Ch06_03_.s │ └── makefile ├── Ch06_04 │ ├── Ch06_04.cpp │ ├── Ch06_04_.s │ └── makefile ├── Ch06_05 │ ├── Ch06_05.cpp │ ├── Ch06_05_.s │ └── makefile ├── Ch06_06 │ ├── Ch06_06.cpp │ ├── Ch06_06_.s │ └── makefile ├── Ch06_07 │ ├── Ch06_07.cpp │ ├── Ch06_07_.s │ └── makefile └── Ch06_08 │ ├── Ch06_08.cpp │ ├── Ch06_08_.s │ └── makefile ├── Chapter08 ├── Ch08_01 │ ├── Ch08_01.cpp │ ├── Ch08_01_.s │ └── makefile ├── Ch08_02 │ ├── Ch08_02.cpp │ ├── Ch08_02_.s │ └── makefile ├── Ch08_03 │ ├── Ch08_03.cpp │ ├── Ch08_03_.s │ └── makefile ├── Ch08_04 │ ├── Ch08_04.cpp │ ├── Ch08_04.h │ ├── Ch08_04_.s │ ├── Ch08_04_BM.cpp │ └── makefile ├── Ch08_05 │ ├── Ch08_05.cpp │ ├── Ch08_05.h │ ├── Ch08_05_.s │ ├── Ch08_05_BM.cpp │ └── makefile └── Ch08_06 │ ├── Ch08_06.cpp │ ├── Ch08_06.h │ ├── Ch08_06_.s │ ├── Ch08_06_BM.cpp │ ├── Ch08_06_Misc.cpp │ └── makefile ├── Chapter09 ├── Ch09_01 │ ├── Ch09_01.cpp │ ├── Ch09_01_.s │ └── makefile ├── Ch09_02 │ ├── Ch09_02.cpp │ ├── Ch09_02_.s │ └── makefile ├── Ch09_03 │ ├── Ch09_03.cpp │ ├── Ch09_03_.s │ └── makefile ├── Ch09_04 │ ├── Ch09_04.cpp │ ├── Ch09_04_.s │ └── makefile ├── Ch09_05 │ ├── Ch09_05.cpp │ ├── Ch09_05_.s │ └── makefile ├── Ch09_06 │ ├── Ch09_06.cpp │ ├── Ch09_06_.s │ └── makefile └── Ch09_07 │ ├── Ch09_07.cpp │ ├── Ch09_07.h │ ├── Ch09_07_.s │ ├── Ch09_07_BM.cpp │ └── makefile ├── Chapter11 ├── Ch11_01 │ ├── Ch11_01.cpp │ ├── Ch11_01_.s │ └── makefile ├── Ch11_02 │ ├── Ch11_02.cpp │ ├── Ch11_02_.s │ └── makefile ├── Ch11_03 │ ├── Ch11_03.cpp │ ├── Ch11_03_.s │ └── makefile ├── Ch11_04 │ ├── Ch11_04.cpp │ ├── Ch11_04_.s │ └── makefile ├── Ch11_05 │ ├── Ch11_05.cpp │ ├── Ch11_05_.s │ └── makefile ├── Ch11_06 │ ├── Ch11_06.cpp │ ├── Ch11_06_.s │ ├── Ch11_06_Misc.cpp │ └── makefile └── Ch11_07 │ ├── Ch11_07.cpp │ ├── Ch11_07_.s │ ├── Ch11_07_Misc.cpp │ └── makefile ├── Chapter12 ├── Ch12_01 │ ├── Ch12_01.cpp │ ├── Ch12_01_.s │ └── makefile ├── Ch12_02 │ ├── Ch12_02.cpp │ ├── Ch12_02_.s │ └── makefile ├── Ch12_03 │ ├── Ch12_03.cpp │ ├── Ch12_03_.s │ └── makefile ├── Ch12_04 │ ├── Ch12_04.cpp │ ├── Ch12_04_.s │ └── makefile ├── Ch12_05 │ ├── Ch12_05.cpp │ ├── Ch12_05_.s │ └── makefile └── Ch12_06 │ ├── Ch12_06.cpp │ ├── Ch12_06_.s │ └── makefile ├── Chapter13 ├── Ch13_01 │ ├── Ch13_01.cpp │ ├── Ch13_01_.s │ └── makefile ├── Ch13_02 │ ├── Ch13_02.cpp │ ├── Ch13_02_.s │ └── makefile ├── Ch13_03 │ ├── Ch13_03.cpp │ ├── Ch13_03_.s │ └── makefile ├── Ch13_04 │ ├── Ch13_04.cpp │ ├── Ch13_04_.s │ └── makefile ├── Ch13_05 │ ├── Ch13_05.cpp │ ├── Ch13_05_.s │ ├── GetSetRm_.s │ └── makefile ├── Ch13_06 │ ├── Ch13_06.cpp │ ├── Ch13_06_.s │ └── makefile ├── Ch13_07 │ ├── Ch13_07.cpp │ ├── Ch13_07_.s │ └── makefile └── Ch13_08 │ ├── Ch13_08.cpp │ ├── Ch13_08_.s │ └── makefile ├── Chapter14 ├── Ch14_01 │ ├── Ch14_01.cpp │ ├── Ch14_01_.s │ └── makefile ├── Ch14_02 │ ├── Ch14_02.cpp │ ├── Ch14_02_.s │ └── makefile ├── Ch14_03 │ ├── Ch14_03.cpp │ ├── Ch14_03_.s │ └── makefile ├── Ch14_04 │ ├── Ch14_04.cpp │ ├── Ch14_04.h │ ├── Ch14_04_.s │ ├── Ch14_04_BM.cpp │ └── makefile ├── Ch14_05 │ ├── Ch14_05.cpp │ ├── Ch14_05.h │ ├── Ch14_05_BM.cpp │ ├── Ch14_05_Macros_.inc │ ├── Ch14_05_Misc.cpp │ ├── Ch14_05a_.s │ ├── Ch14_05b_.s │ └── makefile └── Ch14_06 │ ├── Ch14_06.cpp │ ├── Ch14_06.h │ ├── Ch14_06_.s │ ├── Ch14_06_BM.cpp │ ├── Ch14_06_Macros_.inc │ └── makefile ├── Chapter15 ├── Ch15_01 │ ├── Ch15_01.cpp │ ├── Ch15_01_.s │ └── makefile ├── Ch15_02 │ ├── Ch15_02.cpp │ ├── Ch15_02_.s │ └── makefile ├── Ch15_03 │ ├── Ch15_03.cpp │ ├── Ch15_03_.s │ └── makefile ├── Ch15_04 │ ├── Ch15_04.cpp │ ├── Ch15_04_.s │ └── makefile ├── Ch15_05 │ ├── Ch15_05.cpp │ ├── Ch15_05.h │ ├── Ch15_05_.s │ ├── Ch15_05_BM.cpp │ ├── Ch15_05_Macros_.inc │ └── makefile ├── Ch15_06 │ ├── Ch15_06.cpp │ ├── Ch15_06.h │ ├── Ch15_06_.s │ ├── Ch15_06_BM.cpp │ └── makefile └── Ch15_07 │ ├── Ch15_07.cpp │ ├── Ch15_07_.s │ └── makefile ├── Chapter16 ├── Ch16_01 │ ├── Ch16_01.cpp │ ├── Ch16_01.h │ ├── Ch16_01_.s │ ├── Ch16_01_BM.cpp │ ├── Ch16_01_Misc.cpp │ └── makefile ├── Ch16_02 │ ├── Ch16_02.cpp │ ├── Ch16_02.h │ ├── Ch16_02_.s │ ├── Ch16_02_BM.cpp │ ├── Ch16_02_Misc.cpp │ └── makefile ├── Ch16_03 │ ├── Ch16_03.cpp │ ├── Ch16_03.h │ ├── Ch16_03_.s │ ├── Ch16_03_BM.cpp │ └── makefile └── Ch16_04 │ ├── Ch16_04.cpp │ ├── Ch16_04.h │ ├── Ch16_04_.s │ ├── Ch16_04_BM.cpp │ ├── Ch16_04_Macros_.inc │ └── makefile ├── Contributing.md ├── Data ├── ImageA.png ├── ImageB.png └── ImageC.png ├── ImportantNotes.txt ├── Include ├── AlignedMem.h ├── BmThreadTimer.h ├── ImageMatrix.h ├── ImageMisc.h ├── ImagePng.h ├── MatrixF32.h ├── MatrixF64.h ├── Misc.h ├── OS.h └── Vec128.h ├── LICENSE.txt ├── README.md ├── ReleaseHistory.txt ├── Sh ├── clean32.sh ├── clean64.sh ├── dirs32.txt ├── dirs64.txt ├── make32.sh ├── make64.sh ├── readme.txt ├── run32.sh └── run64.sh └── errata.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484262665.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-arm-assembly-language-programming/af88d2766656447e0c3308408a17828bab4f41a1/9781484262665.jpg -------------------------------------------------------------------------------- /Chapter02/Ch02_01/Ch02_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_01.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int IntegerAddSub_(int a, int b, int c, int d); 10 | 11 | void PrintResult(const char* msg, int a, int b, int c, int d, int result) 12 | { 13 | const char nl = '\n'; 14 | 15 | cout << msg << nl; 16 | cout << "a = " << a << nl; 17 | cout << "b = " << b << nl; 18 | cout << "c = " << c << nl; 19 | cout << "d = " << d << nl; 20 | cout << "result = " << result << nl; 21 | cout << nl; 22 | } 23 | 24 | int main(int argc, char** argv) 25 | { 26 | int a, b, c, d, result; 27 | 28 | a = 10; b = 20; c = 30; d = 18; 29 | result = IntegerAddSub_(a, b, c, d); 30 | PrintResult("Test case #1", a, b, c, d, result); 31 | 32 | a = 101; b = 34; c = -190; d = 25; 33 | result = IntegerAddSub_(a, b, c, d); 34 | PrintResult("Test case #2", a, b, c, d, result); 35 | } -------------------------------------------------------------------------------- /Chapter02/Ch02_01/Ch02_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_01_.s 3 | //------------------------------------------------ 4 | 5 | // extern "C" int IntegerAddSub_(int a, int b int c, int d); 6 | 7 | .text 8 | .global IntegerAddSub_ 9 | IntegerAddSub_: 10 | 11 | // Calculate a + b + c - d 12 | add r0,r0,r1 // r0 = a + b 13 | add r0,r0,r2 // r0 = a + b + c 14 | sub r0,r0,r3 // r0 = a + b + c - d 15 | 16 | bx lr // return to caller 17 | -------------------------------------------------------------------------------- /Chapter02/Ch02_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/Ch02_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_02.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int IntegerMulA_(int a, int b); 10 | extern "C" long long IntegerMulB_(int a, int b); 11 | extern "C" unsigned long long IntegerMulC_(unsigned int a, unsigned int b); 12 | 13 | template 14 | void PrintResult(const char* msg, T1 a, T1 b, T2 result) 15 | { 16 | const char nl = '\n'; 17 | 18 | cout << msg << nl; 19 | cout << "a = " << a << ", b = " << b; 20 | cout << " result = " << result << nl << nl; 21 | } 22 | 23 | int main(int argc, char** argv) 24 | { 25 | int a1 = 50; 26 | int b1 = 25; 27 | int result1 = IntegerMulA_(a1, b1); 28 | PrintResult("Test case #1", a1, b1, result1); 29 | 30 | int a2 = -300; 31 | int b2 = 7; 32 | int result2 = IntegerMulA_(a2, b2); 33 | PrintResult("Test case #2", a2, b2, result2); 34 | 35 | int a3 = 4000; 36 | int b3 = 1000000;; 37 | long long result3 = IntegerMulB_(a3, b3); 38 | PrintResult("Test case #3", a3, b3, result3); 39 | 40 | int a4 = 100000; 41 | int b4 = -20000000; 42 | long long result4 = IntegerMulB_(a4, b4); 43 | PrintResult("Test case #4", a4, b4, result4); 44 | 45 | unsigned int a5 = 0x80000000; 46 | unsigned int b5 = 0x80000000; 47 | unsigned long long result5 = IntegerMulC_(a5, b5); 48 | PrintResult("Test case #5", a5, b5, result5); 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/Ch02_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_02_.s 3 | //------------------------------------------------ 4 | 5 | // extern "C" int IntegerMulA_(int a, int b); 6 | 7 | .text 8 | .global IntegerMulA_ 9 | IntegerMulA_: 10 | 11 | // Calculate a * b and save result 12 | mul r0,r0,r1 // calc a * b (32-bit) 13 | bx lr 14 | 15 | // extern "C" long long IntegerMulB_(int a, int b); 16 | 17 | .global IntegerMulB_ 18 | IntegerMulB_: 19 | 20 | // Calculate a * b and save result 21 | smull r0,r1,r0,r1 // calc a * b (signed 64-bit) 22 | bx lr 23 | 24 | // extern "C" unsigned long long IntegerMulC_(unsigned int a, unsigned int b); 25 | 26 | .global IntegerMulC_ 27 | IntegerMulC_: 28 | 29 | // Calculate a * b and save result 30 | umull r0,r1,r0,r1 // calc a * b (unsigned 64-bit) 31 | bx lr 32 | -------------------------------------------------------------------------------- /Chapter02/Ch02_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_03/Ch02_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_03.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" void CalcQuoRem_(const int* a, const int* b, int* quo, int* rem); 10 | 11 | void PrintResult(const char* msg, int a, int b, int quo, int rem) 12 | { 13 | const char nl = '\n'; 14 | 15 | cout << msg << nl; 16 | cout << "a = " << a << nl; 17 | cout << "b = " << b << nl; 18 | cout << "quotient = " << quo << nl; 19 | cout << "remainder = " << rem << nl; 20 | cout << nl; 21 | } 22 | 23 | int main(int argc, char** argv) 24 | { 25 | int a, b, quo, rem; 26 | 27 | a = 100; b = 7; 28 | CalcQuoRem_(&a, &b, &quo, &rem); 29 | PrintResult("Test case #1", a, b, quo, rem); 30 | 31 | a = 200; b = 10; 32 | CalcQuoRem_(&a, &b, &quo, &rem); 33 | PrintResult("Test case #2", a, b, quo, rem); 34 | 35 | a = 300; b = -17; 36 | CalcQuoRem_(&a, &b, &quo, &rem); 37 | PrintResult("Test case #3", a, b, quo, rem); 38 | } 39 | -------------------------------------------------------------------------------- /Chapter02/Ch02_03/Ch02_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_03_.s 3 | //------------------------------------------------ 4 | 5 | // extern "C" void CalcQuoRem_(const int* a, const int* b, int* quo, int* rem); 6 | 7 | .text 8 | .global CalcQuoRem_ 9 | CalcQuoRem_: 10 | 11 | // Save non-volatile registers 12 | push {r4,r5} // save r4 and r5 on stack 13 | 14 | // Load a and b 15 | ldr r4,[r0] // r4 = a (dividend) 16 | ldr r5,[r1] // r5 = b (divisor) 17 | 18 | // Calculate quotient and remainder 19 | sdiv r0,r4,r5 // r0 = quotient 20 | str r0,[r2] // save quotient 21 | 22 | mul r1,r0,r5 // r1 = quotient * b 23 | sub r2,r4,r1 // r2 = a - quotient * b 24 | str r2,[r3] // save remainder 25 | 26 | // Restore non-volatile registers and return 27 | pop {r4,r5} // restore r4 & r5 28 | bx lr // return to caller 29 | -------------------------------------------------------------------------------- /Chapter02/Ch02_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/Ch02_04.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_04.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int TestLDR_(void); 10 | 11 | int main(int argc, char** argv) 12 | { 13 | int result = TestLDR_(); 14 | cout << "result = " << result << '\n'; 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/Ch02_04_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_04_.s 3 | //------------------------------------------------ 4 | 5 | .data 6 | Foo: .word 100, 200, 300, 400 7 | 8 | // extern "C" void TestLDR_(void); 9 | 10 | .text 11 | .global TestLDR_ 12 | 13 | TestLDR_: ldr r1,=Foo // r1 = address of Foo 14 | 15 | ldr r2,[r1] // r2 = Foo[0] 16 | ldr r3,[r1,#4] // r3 = Foo[1] 17 | 18 | add r0,r2,r3 // r0 = Foo[0] + Foo[1] 19 | 20 | ldr r2,[r1,#8] // r2 = Foo[2] 21 | add r0,r0,r2 // r0 += Foo[2] 22 | 23 | ldr r2,[r1,#12] // r2 = Foo[3] 24 | add r0,r0,r2 // r0 += Foo[3] 25 | 26 | bx lr // return to caller 27 | -------------------------------------------------------------------------------- /Chapter02/Ch02_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_05/Ch02_05.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_05.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int MoveImmA_(void); 10 | extern "C" int MoveImmB_(void); 11 | extern "C" int MoveImmC_(void); 12 | 13 | int main(int argc, char** argv) 14 | { 15 | int a = MoveImmA_(); 16 | int b = MoveImmB_(); 17 | int c = MoveImmC_(); 18 | 19 | cout << "a = " << a << '\n'; 20 | cout << "b = " << b << '\n'; 21 | cout << "c = " << c << '\n'; 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /Chapter02/Ch02_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/Ch02_06.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_06.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | // Ch02_06_.s 10 | extern "C" void MoveRegA_(unsigned int a, unsigned int* b); 11 | extern "C" void MoveRegB_(unsigned int a, unsigned int* b, unsigned int count); 12 | 13 | // Ch02_06_Misc.cpp 14 | extern void PrintResultA(const char* msg, unsigned int a, const unsigned int* b, size_t n); 15 | extern void PrintResultB(const char* msg, unsigned int a, const unsigned int* b, size_t n, size_t count); 16 | 17 | int main(int argc, char** argv) 18 | { 19 | // Exercise function MoveRegA_ 20 | const size_t n = 5; 21 | 22 | unsigned int a1 = 0x12345678; 23 | unsigned int b1[5]; 24 | MoveRegA_(a1, b1); 25 | PrintResultA("MoveRegA_ Test Case #1", a1, b1, n); 26 | 27 | unsigned int a2 = 0xfedcba91; 28 | unsigned int b2[n]; 29 | MoveRegA_(a2, b2); 30 | PrintResultA("MoveRegA_ Test Case #2", a2, b2, n); 31 | cout << "\n"; 32 | 33 | // Exercise function MoveRegB_ 34 | const size_t nn = 4; 35 | const size_t count = 8; 36 | 37 | unsigned int a3 = 0x12345678; 38 | unsigned int b3[nn]; 39 | MoveRegB_(a3, b3, count); 40 | PrintResultB("MoveRegB_ Test Case #1", a3, b3, nn, count); 41 | 42 | unsigned int a4 = 0xfedcba91; 43 | unsigned int b4[nn]; 44 | MoveRegB_(a4, b4, count); 45 | PrintResultB("MoveRegB_ Test Case #2", a4, b4, nn, count); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /Chapter02/Ch02_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | // Ch02_07_.s 10 | extern "C" void TestBitOpsA_(unsigned int a, unsigned int b, unsigned int* c); 11 | extern "C" void TestBitOpsB_(unsigned int a, unsigned int* b); 12 | 13 | // Ch02_07_Misc.cpp 14 | extern void PrintResultA(const char* msg, unsigned int a, unsigned int b, const unsigned int* c, size_t n); 15 | extern void PrintResultB(const char* msg, unsigned int a, const unsigned int* b, size_t n); 16 | 17 | int main(int argc, char** argv) 18 | { 19 | // Exercise function TestBitOpsA_ 20 | const size_t n = 3; 21 | unsigned int a1 = 0x12345678; 22 | unsigned int b1 = 0xaa55aa55; 23 | unsigned int c1[n]; 24 | TestBitOpsA_(a1, b1, c1); 25 | PrintResultA("TestBitOpsA_ Test Case #1", a1, b1, c1, n); 26 | 27 | unsigned int a2 = 0x12345678; 28 | unsigned int b2 = 0x00ffc384; 29 | unsigned int c2[n]; 30 | TestBitOpsA_(a2, b2, c2); 31 | PrintResultA("TestBitOpsA_ Test Case #2", a2, b2, c2, n); 32 | 33 | cout << "\n"; 34 | 35 | // Exercise function TestBitOpsB_ 36 | const size_t nn = 4; 37 | unsigned int a3 = 0x12345678; 38 | unsigned int b3[nn]; 39 | TestBitOpsB_(a3, b3); 40 | PrintResultB("TestBitOpsB_ Test Case #1", a3, b3, nn); 41 | 42 | unsigned int a4 = 0xaa55aa55; 43 | unsigned int b4[nn]; 44 | TestBitOpsB_(a4, b4); 45 | PrintResultB("TestBitOpsB_ Test Case #2", a4, b4, nn); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/Ch02_07_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch02_07_.s 3 | //------------------------------------------------ 4 | 5 | // extern "C" void TestBitOpsA_(unsigned int a, unsigned int b, unsigned int* c); 6 | 7 | .text 8 | .global TestBitOpsA_ 9 | TestBitOpsA_: 10 | push {r4,r5} 11 | 12 | and r3,r0,r1 // a AND b 13 | str r3,[r2] 14 | 15 | orr r4,r0,r1 // a OR b 16 | str r4,[r2,#4] 17 | 18 | eor r5,r0,r1 // a EOR b 19 | str r5,[r2,#8] 20 | 21 | pop {r4,r5} 22 | bx lr 23 | 24 | 25 | .int 100 26 | .long 200 27 | .word - 100 28 | 29 | .quad -1000 30 | .octa -2000 31 | 32 | 33 | // extern "C" void TestBitOpsB_(unsigned int a, unsigned int* b); 34 | 35 | .global TestBitOpsB_ 36 | TestBitOpsB_: 37 | 38 | push {r4-r7} 39 | 40 | and r2,r0,#0x0000ff00 // a AND 0x0000ff00 41 | str r2,[r1] 42 | 43 | orr r3,r0,#0x00ff0000 // a OR 0x00ff0000 44 | str r3,[r1,#4] 45 | 46 | eor r4,r0,#0xff000000 // a EOR 0xff000000 47 | str r4,[r1,#8] 48 | 49 | // removing comment generates invalid constant after fixup error 50 | 51 | // and r5,r0,#0x00ffff00 // invalid constant 52 | 53 | movw r5,#0xff00 54 | movt r5,#0x00ff // r5 = 0x00ffff00 55 | and r6,r0,r5 // a AND 0x00ffff00 56 | str r6,[r1,#12] 57 | 58 | pop {r4-r7} 59 | bx lr 60 | -------------------------------------------------------------------------------- /Chapter02/Ch02_07/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch02_07 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_01/Ch03_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_01.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int SumSquares_(int a, int b, int c, int d, int e, int f, int g); 10 | 11 | void PrintResult(const char* msg, int a, int b, int c, int d, int e, int f, int g, int sum) 12 | { 13 | const char nl = '\n'; 14 | const char* sep = " | "; 15 | 16 | cout << msg << nl; 17 | cout << "a = " << a << sep; 18 | cout << "b = " << b << sep; 19 | cout << "c = " << c << sep; 20 | cout << "d = " << d << nl; 21 | cout << "e = " << e << sep; 22 | cout << "f = " << f << sep; 23 | cout << "g = " << g << sep; 24 | cout << "sum= " << sum << nl; 25 | cout << nl; 26 | } 27 | 28 | int main(int argc, char** argv) 29 | { 30 | int a, b, c, d, e, f, g, sum; 31 | 32 | a = 10; b = 20; c = 30; d = 40; 33 | e = 50; f = 60; g = 70; 34 | sum = SumSquares_(a, b, c, d, e, f, g); 35 | PrintResult("SumSquares - Test Case #1", a, b, c, d, e, f, g, sum); 36 | 37 | a = 10; b = -200; c = 30; d = 40; 38 | e = -500; f = 60; g = -700; 39 | sum = SumSquares_(a, b, c, d, e, f, g); 40 | PrintResult("SumSquares - Test Case #2", a, b, c, d, e, f, g, sum); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /Chapter03/Ch03_01/Ch03_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_01_.s 3 | //------------------------------------------------ 4 | 5 | // extern "C" int SumSquares_(int a, int b, int c, int d, int e, 6 | // int f, int g); 7 | 8 | .equ ARG_E,0 // stack offset for e 9 | .equ ARG_F,4 // stack offset for f 10 | .equ ARG_G,8 // stack offset for g 11 | 12 | .text 13 | .global SumSquares_ 14 | SumSquares_: 15 | 16 | mul r0,r0,r0 // r0 = a * a 17 | mul r1,r1,r1 // r1 = b * b 18 | add r0,r0,r1 // r0 = a * a + b * b 19 | 20 | mul r2,r2,r2 // r2 = c * c 21 | mul r3,r3,r3 // r3 = d * d 22 | add r2,r2,r3 // r2 = c * c + d * d 23 | 24 | add r0,r0,r2 // r0 = intermediate sum 25 | 26 | ldr r1,[sp,#ARG_E] // r1 = e 27 | mul r1,r1,r1 // r1 = e * e 28 | add r0,r0,r1 // r0 = intermediate sum 29 | 30 | ldr r1,[sp,#ARG_F] // r1 = f 31 | mul r1,r1,r1 // r1 = f * f 32 | add r0,r0,r1 // r0 = intermediate sum 33 | 34 | ldr r1,[sp,#ARG_G] // r1 = g 35 | mul r1,r1,r1 // r1 = g * g 36 | add r0,r0,r1 // r0 = final sum 37 | 38 | bx lr 39 | -------------------------------------------------------------------------------- /Chapter03/Ch03_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter03/Ch03_06/Ch03_06_Misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch03_06_Misc.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | void PrintResult(const char* msg, int n, int sum1_cpp, int sum1_asm, 11 | int sum2_cpp, int sum2_asm) 12 | { 13 | const char nl = '\n'; 14 | const char* sep = " | "; 15 | const size_t w = 6; 16 | 17 | cout << nl << msg << nl; 18 | cout << "n = " << setw(w) << n << nl; 19 | 20 | cout << "sum1_cpp = " << setw(w) << sum1_cpp << sep << "sum1_asm = "; 21 | cout << setw(w) << sum1_asm << nl; 22 | 23 | cout << "sum2_cpp = " << setw(w) << sum2_cpp << sep << "sum2_asm = "; 24 | cout << setw(w) << sum2_asm << nl; 25 | } 26 | -------------------------------------------------------------------------------- /Chapter03/Ch03_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch03_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/Ch04_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_02_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" int32_t CalcZ_(int32_t* z const int8_t* x, const int16_t* y, int32_t n); 6 | 7 | .text 8 | .global CalcZ_ 9 | CalcZ_: push {r4-r9} 10 | 11 | mov r4,#0 // sum = 0 12 | cmp r3,#0 13 | ble Done // jump if n <= 0 14 | 15 | ldr r5,=g_Val1 16 | ldr r5,[r5] // r5 = g_Val1 17 | 18 | ldr r6,=g_Val2 19 | ldr r6,[r6] // r6 = g_Val2 20 | 21 | // Main processing loop 22 | Loop1: ldrsb r7,[r1],#1 // r7 = x[i] 23 | ldrsh r8,[r2],#2 // r8 = y[i] 24 | 25 | cmp r7,#0 // is x[i] < 0? 26 | 27 | mullt r9,r8,r5 // temp = y[i] * g_Val1 28 | // (if x[i] < 0) 29 | 30 | mulge r9,r8,r6 // temp = y[i] * g_Val2 31 | // (if x[i] >= 0) 32 | 33 | add r4,r4,r9 // sum += temp 34 | str r9,[r0],#4 // save result z[i] 35 | 36 | subs r3,#1 // n -= 1 37 | bne Loop1 // repeat until done 38 | 39 | Done: mov r0,r4 // r0 = final sum 40 | pop {r4-r9} 41 | bx lr 42 | -------------------------------------------------------------------------------- /Chapter04/Ch04_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_03/Ch04_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_03.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" void CalcMatrixSquares_(int* y, const int* x, int m, int n); 11 | 12 | void CalcMatrixSquares(int* y, const int* x, int m, int n) 13 | { 14 | for (int i = 0; i < m; i++) 15 | { 16 | for (int j = 0; j < n; j++) 17 | { 18 | int kx = j * m + i; 19 | int ky = i * n + j; 20 | y[ky] = x[kx] * x[kx]; 21 | } 22 | } 23 | } 24 | 25 | int main() 26 | { 27 | const int m = 6; 28 | const int n = 3; 29 | int y1[m][n], y2[m][n]; 30 | 31 | int x[n][m] {{ 1, 2, 3, 4, 5, 6 }, 32 | { 7, 8, 9, 10, 11, 12 }, 33 | { 13, 14, 15, 16, 17, 18 }}; 34 | 35 | CalcMatrixSquares(&y1[0][0], &x[0][0], m, n); 36 | CalcMatrixSquares_(&y2[0][0], &x[0][0], m, n); 37 | 38 | for (int i = 0; i < m; i++) 39 | { 40 | for (int j = 0; j < n; j++) 41 | { 42 | cout << "y1[" << setw(2) << i << "][" << setw(2) << j << "] = "; 43 | cout << setw(6) << y1[i][j] << ' ' ; 44 | 45 | cout << "y2[" << setw(2) << i << "][" << setw(2) << j << "] = "; 46 | cout << setw(6) << y2[i][j] << ' '; 47 | 48 | cout << "x[" << setw(2) << j << "][" << setw(2) << i << "] = "; 49 | cout << setw(6) << x[j][i] << '\n'; 50 | 51 | if (y1[i][j] != y2[i][j]) 52 | cout << "Compare failed\n"; 53 | } 54 | } 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /Chapter04/Ch04_03/Ch04_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void CalcMatrixSquares_(int* y, const int* x, int m, int n); 6 | 7 | .text 8 | .global CalcMatrixSquares_ 9 | CalcMatrixSquares_: 10 | push {r4-r8} 11 | 12 | cmp r2,#0 13 | ble Done // jump if m <= 0 14 | cmp r3,#0 15 | ble Done // jump if n <= 0 16 | 17 | mov r4,#0 // i = 0 18 | 19 | Loop1: mov r5,#0 // j = 0 20 | 21 | Loop2: mov r6,r5 // r6 = j 22 | mul r6,r6,r2 // r6 = j * m 23 | add r6,r6,r4 // kx = j * m + i 24 | ldr r7,[r1,r6,lsl #2] // r7 = x[kx] (x[j][i]) 25 | 26 | mul r7,r7,r7 // r7 = x[j][i] * x[j][i] 27 | 28 | mov r8,r4 // r8 = i 29 | mul r8,r8,r3 // r8 = i * n 30 | add r8,r8,r5 // ky = i * n + j 31 | str r7,[r0,r8,lsl #2] // save y[ky] (y[i][j]) 32 | 33 | add r5,#1 // j += 1 34 | cmp r5,r3 35 | blt Loop2 // jump if j < n 36 | 37 | add r4,#1 // i += 1 38 | cmp r4,r2 39 | blt Loop1 // jump if i < m 40 | 41 | Done: pop {r4-r8} 42 | bx lr 43 | 44 | -------------------------------------------------------------------------------- /Chapter04/Ch04_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_04/Ch04_04_Misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_04_Misc.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | void PrintResult(const char* msg, const int* row_sums, const int* col_sums, const int* x, int nrows, int ncols) 13 | { 14 | const int w = 6; 15 | const char nl = '\n'; 16 | const string s(48, '-'); 17 | 18 | cout << nl << nl << msg << nl; 19 | cout << s << nl; 20 | 21 | for (int i = 0; i < nrows; i++) 22 | { 23 | for (int j = 0; j < ncols; j++) 24 | cout << setw(w) << x[i* ncols + j]; 25 | cout << " " << setw(w) << row_sums[i] << nl; 26 | } 27 | 28 | cout << nl; 29 | 30 | for (int i = 0; i < ncols; i++) 31 | cout << setw(w) << col_sums[i]; 32 | cout << nl; 33 | } 34 | -------------------------------------------------------------------------------- /Chapter04/Ch04_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_05/Ch04_05.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_05.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | extern "C" void ReverseArrayA_(int* y, const int* x, int n); 12 | extern "C" void ReverseArrayB_(int* x, int n); 13 | 14 | void Init(int* x, int n, unsigned int seed) 15 | { 16 | uniform_int_distribution<> d {1, 1000}; 17 | mt19937 rng {seed}; 18 | 19 | for (int i = 0; i < n; i++) 20 | x[i] = d(rng); 21 | } 22 | 23 | void PrintArray(const char* msg, const int* x, int n) 24 | { 25 | const char nl = '\n'; 26 | 27 | cout << nl << msg << nl; 28 | 29 | for (int i = 0; i < n; i++) 30 | { 31 | cout << setw(5) << x[i]; 32 | 33 | if ((i + 1) % 10 == 0) 34 | cout << nl; 35 | } 36 | cout << nl; 37 | } 38 | 39 | void ReverseArrayA(void) 40 | { 41 | const int n = 25; 42 | int x[n], y[n]; 43 | 44 | Init(x, n, 32); 45 | PrintArray("ReverseArrayA - original array x", x, n); 46 | ReverseArrayA_(y, x, n); 47 | PrintArray("ReverseArrayA - reversed array y", y, n); 48 | } 49 | 50 | void ReverseArrayB(void) 51 | { 52 | const int n = 25; 53 | int x[n]; 54 | 55 | Init(x, n, 32); 56 | PrintArray("ReverseArrayB - array x before reversal", x, n); 57 | ReverseArrayB_(x, n); 58 | PrintArray("ReverseArrayB - array x after reversal", x, n); 59 | } 60 | 61 | int main() 62 | { 63 | ReverseArrayA(); 64 | ReverseArrayB(); 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /Chapter04/Ch04_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter04/Ch04_06/Ch04_06_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch04_06_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" int32_t CalcTestStructSum_(const TestStruct* ts); 6 | 7 | // Offsets for TestStruct 8 | .equ S_ValA,0 // int8_t 9 | .equ S_ValB,1 // int8_t 10 | .equ S_ValC,4 // int32_t 11 | .equ S_ValD,8 // int16_t 12 | .equ S_ValE,12 // int32_t 13 | .equ S_ValF,16 // uint8_t 14 | .equ S_ValG,18 // uint16_t 15 | 16 | .text 17 | .global CalcTestStructSum_ 18 | CalcTestStructSum_: 19 | 20 | // Sum the elements of TestStruct 21 | ldrsb r1,[r0,#S_ValA] // r1 = ValA (sign-extended) 22 | ldrsb r2,[r0,#S_ValB] // r2 = ValB (sign-extended) 23 | add r1,r1,r2 24 | 25 | ldr r2,[r0,#S_ValC] // r2 = ValC 26 | add r1,r1,r2 27 | 28 | ldrsh r2,[r0,#S_ValD] // r2 = ValD (sign-extended) 29 | add r1,r1,r2 30 | 31 | ldr r2,[r0,#S_ValE] // r2 = ValE 32 | add r1,r1,r2 33 | 34 | ldrb r2,[r0,#S_ValF] // r2 = ValF (zero-extended) 35 | add r1,r1,r2 36 | 37 | ldrh r2,[r0,#S_ValG] // r2 = ValG (zero-extended) 38 | add r1,r1,r2 39 | 40 | mov r0,r1 41 | bx lr 42 | -------------------------------------------------------------------------------- /Chapter04/Ch04_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch04_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter05/Ch05_01/Ch05_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch05_01.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | union Val 11 | { 12 | float f; 13 | unsigned int u; 14 | }; 15 | 16 | void PrintResult(const char* msg, float a, float b, float c, float d) 17 | { 18 | const char nl = '\n'; 19 | const size_t w = 8; 20 | 21 | Val v; 22 | v.f = d; 23 | 24 | cout << fixed << setprecision(8); 25 | cout << nl << msg << nl; 26 | cout << "a: " << setw(w) << a << nl; 27 | cout << "b: " << setw(w) << b << nl; 28 | cout << "c: " << setw(w) << c << nl; 29 | cout << scientific << setprecision(8); 30 | cout << "d (float): " << setw(w) << d << nl; 31 | cout << "d (binary): 0x" << hex << setw(8) << v.u << nl; 32 | } 33 | 34 | int main(int argc, char** argv) 35 | { 36 | float a = 0.01; 37 | float b = 0.001; 38 | float c = 0.0001; 39 | float d1 = (a * b) * c; 40 | float d2 = a * (b * c); 41 | 42 | PrintResult("Results for (a * b) * c", a, b, c, d1); 43 | PrintResult("Results for a * (b * c)", a, b, c, d2); 44 | } 45 | -------------------------------------------------------------------------------- /Chapter05/Ch05_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch05_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/Ch06_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_01.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" float ConvertFtoC_(float deg_f); 11 | extern "C" float ConvertCtoF_(float deg_c); 12 | 13 | int main(int argc, char** argv) 14 | { 15 | const char nl = '\n'; 16 | const int w = 10; 17 | float deg_fvals[] = {-459.67f, -40.0f, 0.0f, 32.0f, 18 | 72.0f, 98.6f, 212.0f}; 19 | size_t nf = sizeof(deg_fvals) / sizeof(float); 20 | 21 | cout << setprecision(6); 22 | 23 | cout << "\n-------- ConvertFtoC Results --------\n"; 24 | 25 | for (size_t i = 0; i < nf; i++) 26 | { 27 | float deg_c = ConvertFtoC_(deg_fvals[i]); 28 | 29 | cout << " i: " << i << " "; 30 | cout << "f: " << setw(w) << deg_fvals[i] << " "; 31 | cout << "c: " << setw(w) << deg_c << nl; 32 | } 33 | 34 | cout << "\n-------- ConvertCtoF Results --------\n"; 35 | 36 | float deg_cvals[] = {-273.15f, -40.0f, -17.777778f, 0.0f, 37 | 25.0f, 37.0f, 100.0f}; 38 | size_t nc = sizeof(deg_cvals) / sizeof(float); 39 | 40 | for (size_t i = 0; i < nc; i++) 41 | { 42 | float deg_f = ConvertCtoF_(deg_cvals[i]); 43 | 44 | cout << " i: " << i << " "; 45 | cout << "c: " << setw(w) << deg_cvals[i] << " "; 46 | cout << "f: " << setw(w) << deg_f << nl; 47 | } 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/Ch06_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_01_.s 3 | //------------------------------------------------- 4 | 5 | // Constants for temperature conversion functions 6 | .text 7 | r4_ScaleFtoC: .single 0.55555556 // 5 / 9 8 | r4_ScaleCtoF: .single 1.8 // 9 / 5 9 | r4_32p0: .single 32.0 10 | 11 | // extern "C" float ConvertFtoC_(float deg_f); 12 | 13 | .global ConvertFtoC_ 14 | ConvertFtoC_: 15 | vldr.f32 s1,r4_32p0 // s1 = 32 16 | vldr.f32 s2,r4_ScaleFtoC // s2 = 5 / 9 17 | vsub.f32 s3,s0,s1 // s3 = deg_f - 32 18 | vmul.f32 s0,s3,s2 // s0 = (deg_f - 32) * 5 / 9 19 | bx lr 20 | 21 | // extern "C" float ConvertCtoF_(float deg_c); 22 | 23 | .global ConvertCtoF_ 24 | ConvertCtoF_: 25 | vldr.f32 s1,r4_32p0 // s1 = 32 26 | vldr.f32 s2,r4_ScaleCtoF // s2 = 9 / 5 27 | vmul.f32 s3,s0,s2 // s3 = deg_c * 9 / 5 28 | vadd.f32 s0,s3,s1 // s3 = deg_c * 9 / 5 + 32 29 | bx lr 30 | 31 | -------------------------------------------------------------------------------- /Chapter06/Ch06_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/Ch06_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_02.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" void CalcSphereAreaVolume_(double r, double* sa, double* vol); 11 | 12 | int main(int argc, char** argv) 13 | { 14 | double r[] = { 0.0, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0, 32.0 }; 15 | size_t num_r = sizeof(r) / sizeof(double); 16 | 17 | cout << setprecision(8); 18 | cout << "\n------ Results for CalcSphereAreaVolume ------\n"; 19 | 20 | for (size_t i = 0; i < num_r; i++) 21 | { 22 | double sa = -1, vol = -1; 23 | 24 | CalcSphereAreaVolume_(r[i], &sa, &vol); 25 | 26 | cout << "r: " << setw(6) << r[i] << " "; 27 | cout << "sa: " << setw(11) << sa << " "; 28 | cout << "vol: " << setw(11) << vol << '\n'; 29 | } 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/Ch06_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_02_.s 3 | //------------------------------------------------- 4 | 5 | .text 6 | r8_PI: .double 3.14159265358979323846 7 | 8 | // extern "C" void CalcSphereAreaVolume_(double r, double* sa, double* vol); 9 | 10 | .global CalcSphereAreaVolume_ 11 | CalcSphereAreaVolume_: 12 | 13 | // Calculate sphere surface area and volume 14 | vldr.f64 d5,r8_PI // d5 = PI 15 | vmov.f64 d6,#4.0 // d6 = 4.0 16 | vmov.f64 d7,#3.0 // d7 = 3.0 17 | 18 | vmul.f64 d1,d0,d0 // d1 = r * r 19 | vmul.f64 d1,d1,d5 // d1 = r * r * PI 20 | vmul.f64 d1,d1,d6 // d1 = r * r * PI * 4 21 | vstr.f64 d1,[r0] // save surface area 22 | 23 | vmul.f64 d2,d1,d0 // d2 = sa * r 24 | vdiv.f64 d3,d2,d7 // d3 = sa * r / 3 25 | vstr.f64 d3,[r1] // save volume 26 | bx lr 27 | 28 | -------------------------------------------------------------------------------- /Chapter06/Ch06_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_03/Ch06_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" double CalcDist_(double x1, double y1, double z1, double x2, double y2, 6 | // double z2); 7 | 8 | .text 9 | .global CalcDist_ 10 | 11 | CalcDist_: vsub.f64 d0,d3,d0 // d0 = x2 - x1 12 | vmul.f64 d0,d0,d0 // d0 = (x2 - x1) ** 2 13 | 14 | vsub.f64 d1,d4,d1 // d1 = y2 - y1 15 | vfma.f64 d0,d1,d1 // d0 += (y2 - y1) ** 2 16 | 17 | vsub.f64 d2,d5,d2 // d2 = z2 - z1 18 | vfma.f64 d0,d2,d2 // d0 += (z2 - z1) ** 2 19 | 20 | vsqrt.f64 d0,d0 // d0 = final distance 21 | bx lr 22 | -------------------------------------------------------------------------------- /Chapter06/Ch06_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_04/Ch06_04.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_04.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | extern "C" void CompareF32_(bool* results, float a, float b); 13 | 14 | const char* c_OpStrings[] = {"UO", "LT", "LE", "EQ", "NE", "GT", "GE"}; 15 | const size_t c_NumOpStrings = sizeof(c_OpStrings) / sizeof(char*); 16 | 17 | template void PrintResults(const bool* cmp_results, T a, T b) 18 | { 19 | cout << "a = " << a << ", "; 20 | cout << "b = " << b << '\n'; 21 | 22 | for (size_t i = 0; i < c_NumOpStrings; i++) 23 | { 24 | cout << c_OpStrings[i] << '='; 25 | cout << boolalpha << left << setw(6) << cmp_results[i] << ' '; 26 | } 27 | 28 | cout << "\n\n"; 29 | } 30 | 31 | void CompareF32(void) 32 | { 33 | const size_t n = 7; 34 | float a[n] {120.0, 250.0, 300.0, -18.0, -81.0, -250.0, 42.0}; 35 | float b[n] {130.0, 240.0, 300.0, 32.0, -100.0, -75.0, 0.0}; 36 | 37 | // Set NAN test value 38 | b[n - 1] = numeric_limits::quiet_NaN(); 39 | 40 | const string dashes(72, '-'); 41 | cout << "\nResults for CompareF32\n"; 42 | cout << dashes << '\n'; 43 | 44 | for (size_t i = 0; i < n; i++) 45 | { 46 | bool cmp_results[c_NumOpStrings]; 47 | 48 | CompareF32_(cmp_results, a[i], b[i]); 49 | PrintResults(cmp_results, a[i], b[i]); 50 | } 51 | } 52 | 53 | int main() 54 | { 55 | CompareF32(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /Chapter06/Ch06_04/Ch06_04_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_04_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void CompareF32_(bool* results, float a, float b); 6 | .text 7 | .global CompareF32_ 8 | 9 | CompareF32_: 10 | vcmpe.f32 s0,s1 // compare F32 values a and b 11 | vmrs APSR_nzcv,fpscr // move compare results 12 | 13 | mov r1,#0 14 | movvs r1,#1 // r1 = 1 if unordered 15 | strb r1,[r0,#0] // save result 16 | 17 | mov r1,#0 18 | movlo r1,#1 // r1 = 1 if a < b 19 | strb r1,[r0,#1] // save result 20 | 21 | mov r1,#0 22 | movls r1,#1 // r1 = 1 if a <= b 23 | strb r1,[r0,#2] // save result 24 | 25 | mov r1,#0 26 | moveq r1,#1 // r1 = 1 if a == b 27 | strb r1,[r0,#3] // save result 28 | 29 | mov r1,#0 30 | movne r1,#1 // r1 = 1 if a != b 31 | strb r1,[r0,#4] // save result 32 | 33 | mov r1,#0 34 | movgt r1,#1 // r1 = 1 if a > b 35 | strb r1,[r0,#5] // save result 36 | 37 | mov r1,#0 38 | movge r1,#1 // r1 = 1 if a >= b 39 | strb r1,[r0,#6] // save result 40 | 41 | bx lr 42 | -------------------------------------------------------------------------------- /Chapter06/Ch06_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_07/Ch06_07_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_07_.s 3 | //------------------------------------------------- 4 | 5 | .text 6 | r8_zero: .double 0.0 7 | 8 | // extern "C" bool CalcTrace_(double* trace, const double* x, int nrows, int ncols); 9 | 10 | .global CalcTrace_ 11 | CalcTrace_: push {r4,r5} 12 | cmp r2,r3 13 | bne InvalidArg // jump if nrows != ncols 14 | cmp r2,#0 15 | ble InvalidArg // jump if nrows <= 0 16 | 17 | // Calculate trace 18 | vldr.f64 d0,r8_zero // sum = 0.0 19 | mov r4,#0 // i = 0 20 | 21 | Loop1: mul r5,r4,r3 // r5 = i * ncols 22 | add r5,r5,r4 // r5 = i * ncols + i 23 | add r5,r1,r5,lsl #3 // r5 = ptr to x[i][i] 24 | 25 | vldr.f64 d1,[r5] // d1 = x[i][i] 26 | vadd.f64 d0,d0,d1 // sum += x[i][i] 27 | 28 | add r4,#1 // i += 1 29 | cmp r4,r2 30 | blt Loop1 // jump if not done 31 | 32 | vstr.f64 d0,[r0] // save trace value 33 | 34 | mov r0,#1 // set success return code 35 | Done: pop {r4,r5} 36 | bx lr 37 | 38 | InvalidArg: mov r0,#0 // set error return code 39 | b Done 40 | -------------------------------------------------------------------------------- /Chapter06/Ch06_07/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_07 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter06/Ch06_08/Ch06_08.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch06_08.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" void RectToPolar_(double* r, double* theta, double x, double y); 11 | extern "C" void PolarToRect_(double* x, double* y, double r, double theta); 12 | 13 | int main() 14 | { 15 | //?? Add a few more test cases? 16 | const int n = 7; 17 | const double x1[n] = { 3.0, -4.0, 1.0, 1.0, 1.5, -5.0, -8.0 }; 18 | const double y1[n] = { 4.0, 3.0, 1.0, -1.0, 6.0, -4.0, 9.0 }; 19 | 20 | cout << fixed << setprecision(4); 21 | 22 | for (int i = 0; i < n; i++) 23 | { 24 | const int w = 9; 25 | const char nl = '\n'; 26 | 27 | double r, theta, x2, y2; 28 | 29 | RectToPolar_(&r, &theta, x1[i], y1[i]); 30 | PolarToRect_(&x2, &y2, r, theta); 31 | 32 | cout << setw(w) << x1[i] << ", " << setw(w) << y1[i] << " | "; 33 | cout << setw(w) << r << ", " << setw(w) << theta << " | "; 34 | cout << setw(w) << x2 << ", " << setw(w) << y2 << nl; 35 | } 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /Chapter06/Ch06_08/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch06_08 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter08/Ch08_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch08_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter08/Ch08_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch08_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/Ch08_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch08_03.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Vec128.h" 9 | 10 | using namespace std; 11 | 12 | extern "C" bool PackedMulA_(Vec128* x, const Vec128& a, const Vec128& b, int16_t c); 13 | 14 | void PackedMulA(void) 15 | { 16 | Vec128 x[4], a, b; 17 | const int c = 5; 18 | const char nl = '\n'; 19 | string sep(75, '-'); 20 | 21 | a.m_I16[0] = 10; b.m_I16[0] = 6; 22 | a.m_I16[1] = 7; b.m_I16[1] = 13; 23 | a.m_I16[2] = -23; b.m_I16[2] = -75; 24 | a.m_I16[3] = 41; b.m_I16[3] = 9; 25 | a.m_I16[4] = 6; b.m_I16[4] = 37; 26 | a.m_I16[5] = -33; b.m_I16[5] = 28; 27 | a.m_I16[6] = 19; b.m_I16[6] = 56; 28 | a.m_I16[7] = 16; b.m_I16[7] = -18; 29 | 30 | PackedMulA_(x, a, b, c); 31 | 32 | cout << "\nResults for PackedMulA_\n"; 33 | cout << sep << nl; 34 | 35 | cout << "a: " << a.ToStringI16() << nl; 36 | cout << "b: " << b.ToStringI16() << nl; 37 | cout << "c: " << setw(8) << c << nl << nl; 38 | cout << "x[0]: " << x[0].ToStringI16() << nl << nl; 39 | cout << "x[1]: " << x[1].ToStringI16() << nl << nl; 40 | cout << "x[2]: " << x[2].ToStringI32() << nl; 41 | cout << "x[3]: " << x[3].ToStringI32() << nl; 42 | } 43 | 44 | int main(void) 45 | { 46 | PackedMulA(); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/Ch08_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch08_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" bool PackedMulA_(Vec128 x[4], const Vec128& a, const Vec128& b, int16_t c); 6 | 7 | .text 8 | .global PackedMulA_ 9 | PackedMulA_: 10 | vldm r1,{q0} // q0 = a 11 | vldm r2,{q1} // q1 = b 12 | 13 | vmov s14,r3 // s14 = c 14 | vmul.i16 q2,q0,d7[0] // q2 = a * c (scalar mul) 15 | vstm r0!,{q2} // save result to x[0] 16 | 17 | vmul.i16 q2,q0,q1 // q2 = a * b (vector 16-bit) 18 | vstm r0!,{q2} // save result to x[1] 19 | 20 | vmull.s16 q2,d0,d2 // q2 = a[0:3] * b[0:3] (32-bit) 21 | vstm r0!,{q2} // save result to x[2] 22 | vmull.s16 q2,d1,d3 // q2 = a[4:7] * b[4:7] (32-bit) 23 | vstm r0,{q2} // save result to x[3] 24 | 25 | bx lr 26 | -------------------------------------------------------------------------------- /Chapter08/Ch08_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch08_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter08/Ch08_04/Ch08_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch08_04.cpp 9 | extern void Init(uint8_t* x, uint32_t n, unsigned int seed); 10 | extern bool CalcMinMaxU8(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, uint32_t n); 11 | 12 | // Ch08_04_BM.cpp 13 | extern void MinMaxU8_BM(void); 14 | 15 | // Ch08_04_.s 16 | extern "C" bool CalcMinMaxU8_(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, uint32_t n); 17 | 18 | // Common constants 19 | const uint32_t c_NumElements = 16 * 1024 * 1024 + 7; 20 | const unsigned int c_RngSeedVal = 23; 21 | -------------------------------------------------------------------------------- /Chapter08/Ch08_04/Ch08_04_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_04_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch08_04.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | #include "OS.h" 10 | 11 | using namespace std; 12 | 13 | void MinMaxU8_BM(void) 14 | { 15 | cout << "\nRunning benchmark function MinMaxU8_BM - please wait\n"; 16 | 17 | uint32_t n = c_NumElements; 18 | AlignedArray x_aa(n, 16); 19 | uint8_t* x = x_aa.Data(); 20 | 21 | Init(x, n, c_RngSeedVal); 22 | 23 | const size_t num_it = 500; 24 | const size_t num_alg = 2; 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | uint8_t x_min0 = 0, x_max0 = 0; 30 | uint8_t x_min1 = 0, x_max1 = 0; 31 | 32 | bmtt.Start(i, 0); 33 | CalcMinMaxU8(&x_min0, &x_max0, x, n); 34 | bmtt.Stop(i, 0); 35 | 36 | bmtt.Start(i, 1); 37 | CalcMinMaxU8_(&x_min1, &x_max1, x, n); 38 | bmtt.Stop(i, 1); 39 | } 40 | 41 | string fn = bmtt.BuildCsvFilenameString("Ch08_04_MinMaxU8_BM"); 42 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 43 | cout << "Benchmark times save to file " << fn << '\n'; 44 | } 45 | -------------------------------------------------------------------------------- /Chapter08/Ch08_05/Ch08_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch08_05.cpp 9 | extern void Init(uint8_t* x, size_t n, unsigned int seed); 10 | extern bool CalcMeanU8(uint32_t* sum_x, double* mean, const uint8_t* x, uint32_t n); 11 | 12 | // Ch08_05_BM.cpp 13 | extern void MeanU8_BM(void); 14 | 15 | // Ch08_05_.s 16 | extern "C" bool CalcMeanU8_(uint32_t* sum_x, double* mean, const uint8_t* x, uint32_t n); 17 | 18 | // Common constants 19 | const uint32_t c_NumElements = 8 * 1024 * 1024 + 19; 20 | const uint32_t c_NumElementsMax = 16 * 1024 * 1024; 21 | const unsigned int c_RngSeedVal = 29; 22 | -------------------------------------------------------------------------------- /Chapter08/Ch08_05/Ch08_05_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_05_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch08_05.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | #include "OS.h" 10 | 11 | using namespace std; 12 | 13 | void MeanU8_BM(void) 14 | { 15 | cout << "\nRunning benchmark function MeanU8_BM - please wait\n"; 16 | 17 | uint32_t n = c_NumElements; 18 | AlignedArray x_aa(n, 16); 19 | uint8_t* x = x_aa.Data(); 20 | 21 | Init(x, n, c_RngSeedVal); 22 | 23 | const size_t num_it = 500; 24 | const size_t num_alg = 2; 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | uint32_t sum0, sum1; 30 | double mean0, mean1; 31 | 32 | bmtt.Start(i, 0); 33 | CalcMeanU8(&sum0, &mean0, x, n); 34 | bmtt.Stop(i, 0); 35 | 36 | bmtt.Start(i, 1); 37 | CalcMeanU8_(&sum1, &mean1, x, n); 38 | bmtt.Stop(i, 1); 39 | } 40 | 41 | string fn = bmtt.BuildCsvFilenameString("Ch08_05_MeanU8_BM"); 42 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 43 | cout << "Benchmark times save to file " << fn << '\n'; 44 | } 45 | -------------------------------------------------------------------------------- /Chapter08/Ch08_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch08_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter08/Ch08_06/Ch08_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch08_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct ITD 9 | { 10 | uint8_t* PbSrc; // Source image pixel buffer 11 | uint8_t* PbMask; // Mask image pixel buffer 12 | uint32_t NumPixels; // Number of source image pixels 13 | uint32_t NumMaskedPixels; // Number of masked pixels 14 | uint32_t SumMaskedPixels; // Sum of masked pixels 15 | uint8_t Threshold; // Image threshold value 16 | double MeanMaskedPixels; // Mean of masked pixels 17 | }; 18 | 19 | // Ch08_06.cpp 20 | extern bool ThresholdImage(ITD* itd); 21 | extern void CalcMeanMaskedPixels(ITD* itd); 22 | 23 | // Ch08_06_BM.cpp 24 | extern void Threshold_BM(void); 25 | 26 | // Ch08_06_.s 27 | extern "C" bool CheckArgs_(const uint8_t* pb_src, const uint8_t* pb_mask, uint32_t num_pixels); 28 | extern "C" bool ThresholdImage_(ITD* itd); 29 | extern "C" void CalcMeanMaskedPixels_(ITD* itd); 30 | 31 | // Ch08_06_Misc.cpp 32 | extern void SaveItdEquates(void); 33 | 34 | // Miscellaneous constants 35 | const uint8_t c_TestThreshold = 226; 36 | 37 | -------------------------------------------------------------------------------- /Chapter08/Ch08_06/Ch08_06_Misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch08_06_Misc.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include "Ch08_06.h" 8 | 9 | using namespace std; 10 | 11 | void SaveItdEquates(void) 12 | { 13 | const char nl = '\n'; 14 | const char* fn = "Ch08_06_ITD_Equates.txt"; 15 | 16 | ofstream ofs(fn); 17 | 18 | if (ofs.bad()) 19 | ofs << "File create error - " << fn << nl; 20 | else 21 | { 22 | string s(12, ' '); 23 | 24 | ofs << s << ".equ S_PbSrc," << offsetof(ITD, PbSrc) << nl; 25 | ofs << s << ".equ S_PbMask," << offsetof(ITD, PbMask) << nl; 26 | ofs << s << ".equ S_NumPixels," << offsetof(ITD, NumPixels) << nl; 27 | ofs << s << ".equ S_NumMaskedPixels," << offsetof(ITD, NumMaskedPixels) << nl; 28 | ofs << s << ".equ S_SumMaskedPixels," << offsetof(ITD, SumMaskedPixels) << nl; 29 | ofs << s << ".equ S_Threshold," << offsetof(ITD, Threshold) << nl; 30 | ofs << s << ".equ S_MeanMaskedPixels," << offsetof(ITD, MeanMaskedPixels) << nl; 31 | 32 | ofs.close(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Chapter09/Ch09_01/Ch09_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch09_01.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include "Vec128.h" 7 | 8 | using namespace std; 9 | 10 | extern "C" void PackedMathF32_(Vec128 x[7], const Vec128& a, const Vec128& b); 11 | 12 | void PackedMathF32(void) 13 | { 14 | Vec128 x[7], a, b; 15 | const char nl = '\n'; 16 | 17 | a.m_F32[0] = 36.0; b.m_F32[0] = -0.125; 18 | a.m_F32[1] = 0.03125; b.m_F32[1] = 64.0; 19 | a.m_F32[2] = 2.0; b.m_F32[2] = -0.0625; 20 | a.m_F32[3] = -42.0; b.m_F32[3] = 13.75; 21 | 22 | PackedMathF32_(x, a, b); 23 | 24 | cout << ("\nResults for PackedMathF32_\n"); 25 | cout << "a: " << a.ToStringF32() << nl; 26 | cout << "b: " << b.ToStringF32() << nl; 27 | cout << nl; 28 | cout << "vadd: " << x[0].ToStringF32() << nl; 29 | cout << "vsub: " << x[1].ToStringF32() << nl; 30 | cout << "vmul: " << x[2].ToStringF32() << nl; 31 | cout << "vabs(a): " << x[3].ToStringF32() << nl; 32 | cout << "vneg(b): " << x[4].ToStringF32() << nl; 33 | cout << "vminnm: " << x[5].ToStringF32() << nl; 34 | cout << "vmaxnm: " << x[6].ToStringF32() << nl; 35 | } 36 | 37 | int main() 38 | { 39 | PackedMathF32(); 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /Chapter09/Ch09_01/Ch09_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch09_01_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void PackedMathF32_(Vec128 x[7], const Vec128& a, const Vec128& b); 6 | 7 | .text 8 | .global PackedMathF32_ 9 | PackedMathF32_: 10 | 11 | // Simple packed floating-point (F32) arithmetic 12 | vldm.f32 r1,{q0} // q0 = a 13 | vldm.f32 r2,{q1} // q1 = b 14 | 15 | vadd.f32 q2,q0,q1 // q2 = a + b 16 | vstm r0!,{q2} 17 | 18 | vsub.f32 q2,q0,q1 // q2 = a - b 19 | vstm r0!,{q2} 20 | 21 | vmul.f32 q2,q0,q1 // q2 = a * b 22 | vstm r0!,{q2} 23 | 24 | vabs.f32 q2,q0 // q2 = abs(a) 25 | vstm r0!,{q2} 26 | 27 | vneg.f32 q2,q1 // q2 = -b 28 | vstm r0!,{q2} 29 | 30 | vminnm.f32 q2,q0,q1 // q2 = min(a, b) 31 | vstm r0!,{q2} 32 | 33 | vmaxnm.f32 q2,q0,q1 // q2 = max(a, b) 34 | vstm r0,{q2} 35 | bx lr 36 | -------------------------------------------------------------------------------- /Chapter09/Ch09_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_02/Ch09_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch09_02.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include "Vec128.h" 9 | 10 | using namespace std; 11 | 12 | extern "C" void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b); 13 | 14 | const char* c_CmpStr[8] = 15 | { 16 | "EQ", "NE", "LT", "LE", "GT", "GE", "LT0", "GT0" 17 | }; 18 | 19 | void PackedCompareF32(void) 20 | { 21 | const char nl = '\n'; 22 | Vec128 x[8], a, b; 23 | 24 | a.m_F32[0] = 2.0; b.m_F32[0] = -4.0; 25 | a.m_F32[1] = 17.0; b.m_F32[1] = 12.0; 26 | a.m_F32[2] = -6.0; b.m_F32[2] = -6.0; 27 | a.m_F32[3] = 3.0; b.m_F32[3] = 8.0; 28 | 29 | PackedCompareF32_(x, a, b); 30 | 31 | cout << "\nResults for PackedCompareF32\n"; 32 | cout << "a: " << a.ToStringF32() << nl; 33 | cout << "b: " << b.ToStringF32() << nl; 34 | cout << nl; 35 | 36 | for (int j = 0; j < 8; j++) 37 | { 38 | string s = string(c_CmpStr[j]) + ":"; 39 | cout << left << setw(4) << s << x[j].ToStringX32() << nl; 40 | } 41 | } 42 | 43 | int main() 44 | { 45 | PackedCompareF32(); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /Chapter09/Ch09_02/Ch09_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch09_02_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b); 6 | 7 | .text 8 | .global PackedCompareF32_ 9 | PackedCompareF32_: 10 | 11 | // Simple packed floating-point (F32) compare operations 12 | vldm.f32 r1,{q0} // q0 = a 13 | vldm.f32 r2,{q1} // q1 = b 14 | 15 | vceq.f32 q2,q0,q1 // packed a == b 16 | vstm r0!,{q2} 17 | 18 | vmvn.u32 q2,q2 // packed a != b 19 | vstm r0!,{q2} 20 | 21 | vclt.f32 q2,q0,q1 // packed a < b 22 | vstm r0!,{q2} 23 | 24 | vcle.f32 q2,q0,q1 // packed a <= b 25 | vstm r0!,{q2} 26 | 27 | vcgt.f32 q2,q0,q1 // packed a > b 28 | vstm r0!,{q2} 29 | 30 | vcge.f32 q2,q0,q1 // packed a >= b 31 | vstm r0!,{q2} 32 | 33 | vclt.f32 q2,q0,#0 // packed a < 0 34 | vstm r0!,{q2} 35 | 36 | vcgt.f32 q2,q0,#0 // packed a > 0 37 | vstm r0,{q2} 38 | bx lr 39 | -------------------------------------------------------------------------------- /Chapter09/Ch09_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter09/Ch09_07/Ch09_07.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch09_07.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch09_07_.s 8 | extern "C" void Mat4x4MulF32_(float* m_des, const float* m_src1, const float* m_src2); 9 | 10 | // Ch09_07_BM.cpp 11 | extern void Mat4x4MulF32_BM(void); 12 | -------------------------------------------------------------------------------- /Chapter09/Ch09_07/Ch09_07_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch09_07_.s 3 | //------------------------------------------------- 4 | 5 | // Macro Mat4x4MulF32 6 | // 7 | // Input: q0:q3 matrix m_src1 8 | // q8:q11 matrix m_src2 9 | // 10 | // Output: q12:q15 m_src1 * m_src2 11 | // 12 | // Note: registers q0:q3 and q8:q11 are not modified. 13 | 14 | .macro Mat4x4MulF32 15 | 16 | // Calc row 0 17 | vmul.f32 q12,q8,d0[0] 18 | vmla.f32 q12,q9,d0[1] 19 | vmla.f32 q12,q10,d1[0] 20 | vmla.f32 q12,q11,d1[1] 21 | 22 | // Calc row 1 23 | vmul.f32 q13,q8,d2[0] 24 | vmla.f32 q13,q9,d2[1] 25 | vmla.f32 q13,q10,d3[0] 26 | vmla.f32 q13,q11,d3[1] 27 | 28 | // Calc row 2 29 | vmul.f32 q14,q8,d4[0] 30 | vmla.f32 q14,q9,d4[1] 31 | vmla.f32 q14,q10,d5[0] 32 | vmla.f32 q14,q11,d5[1] 33 | 34 | // Calc row 3 35 | vmul.f32 q15,q8,d6[0] 36 | vmla.f32 q15,q9,d6[1] 37 | vmla.f32 q15,q10,d7[0] 38 | vmla.f32 q15,q11,d7[1] 39 | .endm 40 | 41 | // extern "C" void Mat4x4MulF32_(float* m_des, const float* m_src1, const float* m_src2); 42 | 43 | .global Mat4x4MulF32_ 44 | Mat4x4MulF32_: 45 | vldm r1,{q0-q3} // q0:q3 = m_src1 46 | vldm r2,{q8-q11} // q8:q11 = m_src2 47 | 48 | Mat4x4MulF32 // calc m_src1 * m_src2 49 | 50 | vstm r0,{q12-q15} // save result to m_des 51 | bx lr 52 | -------------------------------------------------------------------------------- /Chapter09/Ch09_07/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch09_07 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_01.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int IntegerAddSubA_(int a, int b, int c); 10 | extern "C" long IntegerAddSubB_(long a, long b, long c); 11 | 12 | template 13 | void PrintResult(const char* msg, T a, T b, T c, T result) 14 | { 15 | const char nl = '\n'; 16 | 17 | cout << msg << nl; 18 | cout << "a = " << a << nl; 19 | cout << "b = " << b << nl; 20 | cout << "c = " << c << nl; 21 | cout << "result (a + b - c) = " << result << nl; 22 | cout << nl; 23 | } 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int a1 = 100, b1 = 200, c1 = -50, result1; 28 | result1 = IntegerAddSubA_(a1, b1, c1); 29 | PrintResult("IntegerAddSubA_", a1, b1, c1, result1); 30 | 31 | long a2 = 1000, b2 = -2000, c2 = 500, result2; 32 | result2 = IntegerAddSubB_(a2, b2, c2); 33 | PrintResult("IntegerAddSubB_", a2, b2, c2, result2); 34 | } 35 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/Ch11_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_01_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" int IntegerAddSubA_(int a, int b int c); 6 | 7 | .text 8 | .global IntegerAddSubA_ 9 | IntegerAddSubA_: 10 | 11 | // Calculate a + b - c 12 | add w3,w0,w1 // w3 = a + b 13 | sub w0,w3,w2 // w0 = a + b - c 14 | ret // return to caller 15 | 16 | // extern "C" long IntegerAddSubB_(long a, long b long c); 17 | 18 | .global IntegerAddSubB_ 19 | IntegerAddSubB_: 20 | 21 | // Calculate a + b - c 22 | add x3,x0,x1 // x3 = a + b 23 | sub x0,x3,x2 // x0 = a + b - c 24 | ret // return to caller 25 | 26 | -------------------------------------------------------------------------------- /Chapter11/Ch11_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_02.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int IntegerMulA_(int a, int b); 10 | extern "C" long IntegerMulB_(long a, long b); 11 | extern "C" long IntegerMulC_(int a, int b); 12 | extern "C" unsigned long IntegerMulD_(unsigned int a, unsigned int b); 13 | 14 | template 15 | void PrintResult(const char* msg, T1 a, T1 b, T2 result) 16 | { 17 | const char nl = '\n'; 18 | 19 | cout << msg << nl; 20 | cout << "a = " << a << ", b = " << b; 21 | cout << " result = " << result << nl << nl; 22 | } 23 | 24 | int main(int argc, char** argv) 25 | { 26 | int a1 = 50; 27 | int b1 = 25; 28 | int result1 = IntegerMulA_(a1, b1); 29 | PrintResult("IntegerMulA_", a1, b1, result1); 30 | 31 | long a2 = -3000000000; 32 | long b2 = 7; 33 | long result2 = IntegerMulB_(a2, b2); 34 | PrintResult("IntegerMulB_", a2, b2, result2); 35 | 36 | int a3 = 4000; 37 | int b3 = 0x80000000; 38 | long result3 = IntegerMulC_(a3, b3); 39 | PrintResult("IntegerMulC_", a3, b3, result3); 40 | 41 | unsigned int a4 = 4000; 42 | unsigned int b4 = 0x80000000; 43 | unsigned long result4 = IntegerMulD_(a4, b4); 44 | PrintResult("IntegerMulD_", a4, b4, result4); 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/Ch11_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_02_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" int IntegerMulA_(int a, int b); 6 | 7 | .text 8 | .global IntegerMulA_ 9 | IntegerMulA_: 10 | 11 | // Calculate a * b and save result 12 | mul w0,w0,w1 // a * b (32-bit) 13 | ret 14 | 15 | // extern "C" long IntegerMulB_(long a, long b); 16 | 17 | .global IntegerMulB_ 18 | IntegerMulB_: 19 | 20 | // Calculate a * b and save result 21 | mul x0,x0,x1 // a * b (64-bit) 22 | ret 23 | 24 | // extern "C" long IntegerMulC_(int a, int b); 25 | 26 | .global IntegerMulC_ 27 | IntegerMulC_: 28 | 29 | // Calculate a * b and save result 30 | smull x0,w0,w1 // signed 64-bit 31 | ret 32 | 33 | // extern "C" unsigned long IntegerMulD_(unsigned int a, unsigned int b); 34 | 35 | .global IntegerMulD_ 36 | IntegerMulD_: 37 | 38 | // Calculate a * b and save result 39 | umull x0,w0,w1 // unsigned signed 64-bit 40 | ret 41 | -------------------------------------------------------------------------------- /Chapter11/Ch11_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/Ch11_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_03.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" void CalcQuoRemA_(int a, int b, int* quo, int* rem); 10 | extern "C" void CalcQuoRemB_(long a, long b, long* quo, long* rem); 11 | 12 | template 13 | void PrintResult(const char* msg, T a, T b, T quo, T rem) 14 | { 15 | const char nl = '\n'; 16 | 17 | cout << msg << nl; 18 | cout << "a = " << a << nl; 19 | cout << "b = " << b << nl; 20 | cout << "quotient = " << quo << nl; 21 | cout << "remainder = " << rem << nl; 22 | cout << nl; 23 | } 24 | 25 | int main(int argc, char** argv) 26 | { 27 | int a1 = 100, b1 = 7, quo1, rem1; 28 | CalcQuoRemA_(a1, b1, &quo1, &rem1); 29 | PrintResult("CalcQuoRemA_", a1, b1, quo1, rem1); 30 | 31 | long a2 = -2000000000, b2 = 11, quo2, rem2; 32 | CalcQuoRemB_(a2, b2, &quo2, &rem2); 33 | PrintResult("CalcQuoRemB_", a2, b2, quo2, rem2); 34 | } 35 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/Ch11_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void CalcQuoRemA_(int a, int b, int* quo, int* rem); 6 | 7 | .text 8 | .global CalcQuoRemA_ 9 | CalcQuoRemA_: 10 | 11 | // Calculate quotient and remainder 12 | sdiv w4,w0,w1 // a / b 13 | str w4,[x2] // save quotient 14 | 15 | mul w5,w4,w1 // quotient * b 16 | sub w6,w0,w5 // a - quotient * b 17 | str w6,[x3] // save remainder 18 | ret // return to caller 19 | 20 | // extern "C" void CalcQuoRemB_(long a, long b, long* quo, long* rem); 21 | 22 | .global CalcQuoRemB_ 23 | CalcQuoRemB_: 24 | 25 | // Calculate quotient and remainder 26 | sdiv x4,x0,x1 // a / b 27 | str x4,[x2] // save quotient 28 | 29 | mul x5,x4,x1 // quotient * b 30 | sub x6,x0,x5 // a - quotient * b 31 | str x6,[x3] // save remainder 32 | ret // return to caller 33 | 34 | -------------------------------------------------------------------------------- /Chapter11/Ch11_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/Ch11_04.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_04.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | extern "C" int TestLDR1_(unsigned int i, unsigned long j); 10 | extern "C" long TestLDR2_(unsigned int i, unsigned long j); 11 | extern "C" short TestLDR3_(unsigned int i, unsigned long j); 12 | 13 | void TestLDR1(void) 14 | { 15 | const char nl = '\n'; 16 | unsigned int i = 3; 17 | unsigned long j = 6; 18 | 19 | int test_ldr1 = TestLDR1_(i, j); 20 | 21 | cout << "TestLDR1_(" << i << ", " << j << ") = " << test_ldr1 << nl; 22 | } 23 | 24 | void TestLDR2(void) 25 | { 26 | const char nl = '\n'; 27 | unsigned int i = 2; 28 | unsigned long j = 7; 29 | 30 | long test_ldr2 = TestLDR2_(i, j); 31 | 32 | cout << "TestLDR2_(" << i << ", " << j << ") = " << test_ldr2 << nl; 33 | } 34 | 35 | void TestLDR3(void) 36 | { 37 | const char nl = '\n'; 38 | unsigned int i = 5; 39 | unsigned long j = 1; 40 | 41 | short test_ldr3 = TestLDR3_(i, j); 42 | 43 | cout << "TestLDR3_(" << i << ", " << j << ") = " << test_ldr3 << nl; 44 | } 45 | 46 | int main(int argc, char** argv) 47 | { 48 | TestLDR1(); 49 | TestLDR2(); 50 | TestLDR3(); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/Ch11_04_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_04_.s 3 | //------------------------------------------------- 4 | 5 | // Test arrays 6 | .data 7 | A1: .word 1, 2, 3, 4, 5, 6, 7, 8 8 | A2: .quad 10, -20, 30, -40, 50, -60, 70, -80 9 | 10 | .text 11 | A3: .short 100, 200, -300, 400, 500, -600, 700, 800 12 | 13 | // extern "C" int TestLDR1_(unsigned int i, unsigned long j); 14 | 15 | .global TestLDR1_ 16 | TestLDR1_: ldr x2,=A1 // x2 = ptr to A1 17 | 18 | ldr w3,[x2,w0,uxtw 2] // w3 = A1[i] 19 | ldr w4,[x2,x1,lsl 2] // w4 = A1[j] 20 | 21 | add w0,w3,w4 // w0 = A1[i] + A1[j] 22 | ret 23 | 24 | // extern "C" long TestLDR2_(unsigned int i, unsigned long j); 25 | 26 | .global TestLDR2_ 27 | TestLDR2_: ldr x2,=A2 // x2 = ptr to A2 28 | 29 | ldr x3,[x2,w0,uxtw 3] // x3 = A2[i] 30 | ldr x4,[x2,x1,lsl 3] // x4 = A2[j] 31 | 32 | add x0,x3,x4 // w0 = A2[i] + A2[j] 33 | ret 34 | 35 | // extern "C" short TestLDR3_(unsigned int i, unsigned long j); 36 | 37 | .global TestLDR3_ 38 | TestLDR3_: adr x2,A3 // x2 = ptr to A3 39 | 40 | ldrsh w3,[x2,w0,uxtw 1] // w3 = A3[i] 41 | ldrsh w4,[x2,x1,lsl 1] // w4 = A3[j] 42 | 43 | add w0,w3,w4 // w0 = A3[i] + A3[j] 44 | ret 45 | -------------------------------------------------------------------------------- /Chapter11/Ch11_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/Ch11_05.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_05.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" void MoveA_(int32_t& a0, int32_t& a1, int32_t& a2, int32_t& a3); 11 | extern "C" void MoveB_(int64_t& b0, int64_t& b1, int64_t& b2, int64_t& b3); 12 | extern "C" void MoveC_(int32_t& c0, int32_t& c1); 13 | extern "C" void MoveD_(int64_t& d0, int64_t& d1, int64_t& d2); 14 | 15 | int main(int argc, char** argv) 16 | { 17 | const char nl = '\n'; 18 | 19 | int32_t a0, a1, a2, a3; 20 | MoveA_(a0, a1, a2, a3); 21 | cout << "\nResults for MoveA_" << nl; 22 | cout << "a0 = " << a0 << nl; 23 | cout << "a1 = " << a1 << nl; 24 | cout << "a2 = " << a2 << nl; 25 | cout << "a3 = " << a3 << nl; 26 | 27 | int64_t b0, b1, b2, b3; 28 | MoveB_(b0, b1, b2, b3); 29 | cout << "\nResults for MoveB_" << nl; 30 | cout << "b0 = " << b0 << nl; 31 | cout << "b1 = " << b1 << nl; 32 | cout << "b2 = " << b2 << nl; 33 | cout << "b3 = " << b3 << nl; 34 | 35 | int32_t c0, c1; 36 | MoveC_(c0, c1); 37 | cout << "\nResults for MoveC_" << nl; 38 | cout << "c0 = " << c0 << nl; 39 | cout << "c1 = " << c1 << nl; 40 | 41 | int64_t d0, d1, d2; 42 | MoveD_(d0, d1, d2); 43 | cout << "\nResults for MoveD_" << nl; 44 | cout << "d0 = " << d0 << nl; 45 | cout << "d1 = " << d1 << nl; 46 | cout << "d2 = " << d2 << nl; 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /Chapter11/Ch11_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_06.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | // Ch11_06_Misc.cpp 11 | extern void PrintResult(const char* msg, const uint32_t* x, uint32_t a, 12 | size_t n, int count = -1); 13 | 14 | // Ch11_06_.s 15 | extern "C" void ShiftA_(uint32_t* x, uint32_t a); 16 | extern "C" void ShiftB_(uint32_t* x, uint32_t a, uint32_t count); 17 | 18 | void ShiftA(void) 19 | { 20 | const size_t n = 4; 21 | 22 | uint32_t a = 0xC1234561; 23 | uint32_t x[4]; 24 | ShiftA_(x, a); 25 | PrintResult("ShiftA_", x, a, n); 26 | } 27 | 28 | void ShiftB(void) 29 | { 30 | const size_t n = 4; 31 | 32 | uint32_t a = 0xC1234561; 33 | uint32_t x[4]; 34 | uint32_t count = 8; 35 | ShiftB_(x, a, count); 36 | PrintResult("ShiftB_", x, a, n, (int)count); 37 | } 38 | 39 | int main(int argc, char** argv) 40 | { 41 | ShiftA(); 42 | ShiftB(); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_06_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void ShiftA_(uint32_t* x, uint32_t a); 6 | 7 | .text 8 | .global ShiftA_ 9 | ShiftA_: asr w2,w1,2 // arithmetic shift right - 2 bits 10 | lsl w3,w1,4 // logical shift left - 4 bits 11 | lsr w4,w1,5 // logical shift right - 5 bits 12 | ror w5,w1,3 // rotate right - 3 bits 13 | 14 | str w2,[x0] // save asr result to x[0] 15 | str w3,[x0,4] // save lsl result to x[1] 16 | str w4,[x0,8] // save lsr result to x[2] 17 | str w5,[x0,12] // save ror result to x[3] 18 | ret 19 | 20 | // extern "C" void ShiftB_(uint32_t* x, uint32_t a, uint32_t count); 21 | 22 | .global ShiftB_ 23 | ShiftB_: asr w3,w1,w2 // arithmetic shift right 24 | lsl w4,w1,w2 // logical shift left 25 | lsr w5,w1,w2 // logical shift right 26 | ror w6,w1,w2 // rotate right 27 | 28 | str w3,[x0] // save asr result to x[0] 29 | str w4,[x0,4] // save lsl result to x[1] 30 | str w5,[x0,8] // save lsr result to x[2] 31 | str w6,[x0,12] // save ror result to x[3] 32 | ret 33 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/Ch11_06_Misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_06_Misc.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | string ToHexString(unsigned int a) 14 | { 15 | ostringstream oss; 16 | 17 | oss << setfill('0'); 18 | oss << "0x" << hex << setw(8) << a; 19 | return oss.str(); 20 | } 21 | 22 | string ToBitString(unsigned int a) 23 | { 24 | bitset<32> bs {a}; 25 | size_t n = bs.size(); 26 | stringstream oss; 27 | 28 | for (size_t i = 0; i < n; i++) 29 | { 30 | oss << bs[n - 1 - i]; 31 | 32 | if ((i + 1) % 4 == 0) 33 | oss << ' '; 34 | } 35 | 36 | return oss.str(); 37 | } 38 | 39 | void PrintResult(const char* msg, const uint32_t* x, uint32_t a, size_t n, int count) 40 | { 41 | const char nl = '\n'; 42 | string s1[] { "asr #2", "lsl #4", "lsr #5", "ror #3" }; 43 | string s2[] { "asr", "lsl", "lsr", "ror" }; 44 | 45 | if (count < 0) 46 | cout << nl << msg << nl; 47 | else 48 | cout << nl << msg << " - count = " << count << nl; 49 | 50 | cout << "a: " << ToHexString(a); 51 | cout << " | " << ToBitString(a) << nl; 52 | 53 | for (size_t i = 0; i < n; i++) 54 | { 55 | cout << "x[" << i << "]: " << ToHexString(x[i]); 56 | cout << " | " << ToBitString(x[i]); 57 | cout << " | "; 58 | 59 | if (count < 0) 60 | cout << s1[i] << nl; 61 | else 62 | cout << s2[i] << nl; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Chapter11/Ch11_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/Ch11_07.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_07.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | // Ch11_07_Misc.cpp 11 | extern void PrintResultA(const char* msg, const uint32_t* x, uint32_t a, uint32_t b, size_t n); 12 | extern void PrintResultB(const char* msg, const uint32_t* x, uint32_t a, size_t n); 13 | 14 | // Ch11_07_.s 15 | extern "C" void BitwiseOpsA_(uint32_t* x, uint32_t a, uint32_t b); 16 | extern "C" void BitwiseOpsB_(uint32_t* x, uint32_t a); 17 | 18 | void BitwiseOpsA(void) 19 | { 20 | const size_t n = 3; 21 | uint32_t a, b, x[n]; 22 | 23 | a = 0x12345678; 24 | b = 0xaa55aa55; 25 | BitwiseOpsA_(x, a, b); 26 | PrintResultA("BitwiseOpsA_ Test #1", x, a, b, n); 27 | 28 | a = 0x12345678; 29 | b = 0x00ffc384; 30 | BitwiseOpsA_(x, a, b); 31 | PrintResultA("BitwiseOpsA_ Test #2", x, a, b, n); 32 | } 33 | 34 | void BitwiseOpsB(void) 35 | { 36 | const size_t n = 4; 37 | uint32_t a, x[n]; 38 | 39 | a = 0x12345678; 40 | BitwiseOpsB_(x, a); 41 | PrintResultB("BitwiseOpsB_ Test #1", x, a, n); 42 | 43 | a = 0xaa55aa55; 44 | BitwiseOpsB_(x, a); 45 | PrintResultB("BitwiseOpsB_ Test #2", x, a, n); 46 | } 47 | 48 | int main(int argc, char** argv) 49 | { 50 | BitwiseOpsA(); 51 | cout << "\n"; 52 | BitwiseOpsB(); 53 | return 0; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/Ch11_07_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch11_07_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void BitwiseOpsA_(uint32_t* x, uint32_t a, uint32_t b); 6 | 7 | .text 8 | .global BitwiseOpsA_ 9 | BitwiseOpsA_: 10 | 11 | // Perform various bitwise logical operations 12 | and w3,w1,w2 // a AND b 13 | str w3,[x0] 14 | 15 | orr w3,w1,w2 // a OR b 16 | str w3,[x0,4] 17 | 18 | eor w3,w1,w2 // a EOR b 19 | str w3,[x0,8] 20 | ret 21 | 22 | // extern "C" void BitwiseOpsB_(uint32_t* x, uint32_t a); 23 | 24 | .global BitwiseOpsB_ 25 | BitwiseOpsB_: 26 | 27 | and w2,w1,0x0000ff00 // a AND 0x0000ff00 28 | str w2,[x0] 29 | 30 | orr w2,w1,0x00ff0000 // a OR 0x00ff0000 31 | str w2,[x0,4] 32 | 33 | eor w2,w1,0xff000000 // a EOR 0xff000000 34 | str w2,[x0,8] 35 | 36 | // and w2,w1,0xcc00ff00 // invalid imm. operand 37 | mov w2,0xff00 38 | movk w2,0xcc00,lsl 16 // w2 = 0xcc00ff00 39 | and w2,w1,w2 // a AND 0xcc00ff00 40 | str w2,[x0,12] 41 | ret 42 | -------------------------------------------------------------------------------- /Chapter11/Ch11_07/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch11_07 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_01/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_01 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_02/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_02 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_03/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_03 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_04/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_04 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_05/Ch12_05_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch12_05_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" int CalcArraySumA_(const int* x, int n); 6 | 7 | .text 8 | .global CalcArraySumA_ 9 | CalcArraySumA_: 10 | 11 | mov w2,0 // sum = 0 12 | cmp w1,w0 // is n <= 0? 13 | b.le DoneA // jump if n <= 0 14 | 15 | LoopA: ldr w3,[x0],4 // w3 = x[i] 16 | add w2,w2,w3 // sum += x[i] 17 | 18 | subs w1,w1,1 // n -= 1 19 | b.ne LoopA // jump if more data 20 | 21 | DoneA: mov w0,w2 // w0 = final sum 22 | ret 23 | 24 | // extern "C" uint64_t CalcArraySumB_(const uint64_t* x, uint32_t n); 25 | 26 | .global CalcArraySumB_ 27 | CalcArraySumB_: 28 | 29 | mov x2,0 // sum = 0 30 | cmp w1,0 // is n == 0? 31 | b.eq DoneB // jump if n == 0 32 | 33 | mov w3,0 // i = 0 34 | 35 | LoopB: ldr x4,[x0,w3,uxtw 3] // r5 = x[i] 36 | add x2,x2,x4 // sum += x[i] 37 | 38 | add w3,w3,1 // i += 1 39 | cmp w3,w1 // is i == n? 40 | b.ne LoopB // jump if more data 41 | 42 | DoneB: mov x0,x2 43 | ret 44 | -------------------------------------------------------------------------------- /Chapter12/Ch12_05/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_05 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter12/Ch12_06/makefile: -------------------------------------------------------------------------------- 1 | # Target, include, and object directories 2 | TARGET = Ch12_06 3 | INCDIR1 = . 4 | INCDIR2 = ../../Include 5 | OBJDIR = obj 6 | 7 | # include files 8 | CPPINCFILES1 = $(wildcard $(INCDIR1)/*.h) 9 | CPPINCFILES2 = $(wildcard $(INCDIR2)/*.h) 10 | ASMINCFILES1 = $(wildcard $(INCDIR1)/*.inc) 11 | ASMINCFILES2 = $(wildcard $(INCDIR2)/*.inc) 12 | 13 | # .cpp files in current directory 14 | CPPFILES = $(wildcard *.cpp) 15 | CPPOBJFILES_ = $(CPPFILES:.cpp=.o) 16 | CPPOBJFILES = $(patsubst %, $(OBJDIR)/%, $(CPPOBJFILES_)) 17 | 18 | # .s files in current directory 19 | ASMFILES = $(wildcard *.s) 20 | ASMOBJFILES_ = $(ASMFILES:.s=.o) 21 | ASMOBJFILES = $(patsubst %, $(OBJDIR)/%, $(ASMOBJFILES_)) 22 | 23 | # Target object files 24 | OBJFILES = $(CPPOBJFILES) $(ASMOBJFILES) 25 | 26 | # g++ and assembler options - required 27 | GPPOPT = -march=armv8-a+simd -O3 -std=c++14 -Wall 28 | ASMOPT = -march=armv8-a+simd 29 | 30 | # g++ and assembler options - optional (uncomment to enable) 31 | DEBUG = -g 32 | LISTFILE_CPP = -Wa,-aghl=$(OBJDIR)/$(basename $<).lst -save-temps=$(OBJDIR) 33 | LISTFILE_ASM = -aghlms=$(OBJDIR)/$(basename $<).lst 34 | 35 | # Create directory for object and temp files 36 | MKOBJDIR := $(shell mkdir -p $(OBJDIR)) 37 | 38 | # Build rules 39 | $(TARGET): $(OBJFILES) 40 | g++ $(OBJFILES) -o $(TARGET) 41 | 42 | # Note: full recompiles/assembles on any include file changes 43 | $(OBJDIR)/%.o: %.cpp $(CPPINCFILES1) $(CPPINCFILES2) 44 | g++ $(DEBUG) $(LISTFILE_CPP) $(GPPOPT) -I$(INCDIR1) -I$(INCDIR2) -c $< -o $@ 45 | 46 | $(OBJDIR)/%.o: %.s $(ASMINCFILES1) $(ASMINCFILES2) 47 | as $(DEBUG) $(LISTFILE_ASM) $(ASMOPT) -I$(INCDIR1) -I$(INCDIR2) $< -o $@ 48 | 49 | .PHONY: clean 50 | 51 | clean: 52 | rm -f $(TARGET) 53 | rm -rf $(OBJDIR) 54 | -------------------------------------------------------------------------------- /Chapter13/Ch13_01/Ch13_01.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_01.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" float ConvertFtoC_(float deg_f); 11 | extern "C" float ConvertCtoF_(float deg_c); 12 | 13 | int main() 14 | { 15 | const int w = 10; 16 | float deg_fvals[] = {-459.67f, -40.0f, 0.0f, 32.0f, 72.0f, 98.6f, 212.0f}; 17 | size_t nf = sizeof(deg_fvals) / sizeof(float); 18 | 19 | cout << setprecision(6); 20 | 21 | cout << "\n-------- ConvertFtoC Results --------\n"; 22 | 23 | for (size_t i = 0; i < nf; i++) 24 | { 25 | float deg_c = ConvertFtoC_(deg_fvals[i]); 26 | 27 | cout << " i: " << i << " "; 28 | cout << "f: " << setw(w) << deg_fvals[i] << " "; 29 | cout << "c: " << setw(w) << deg_c << '\n'; 30 | } 31 | 32 | cout << "\n-------- ConvertCtoF Results --------\n"; 33 | 34 | float deg_cvals[] = {-273.15f, -40.0f, -17.777778f, 0.0f, 25.0f, 37.0f, 100.0f}; 35 | size_t nc = sizeof(deg_cvals) / sizeof(float); 36 | 37 | for (size_t i = 0; i < nc; i++) 38 | { 39 | float deg_f = ConvertCtoF_(deg_cvals[i]); 40 | 41 | cout << " i: " << i << " "; 42 | cout << "c: " << setw(w) << deg_cvals[i] << " "; 43 | cout << "f: " << setw(w) << deg_f << '\n'; 44 | } 45 | 46 | return 0; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /Chapter13/Ch13_01/Ch13_01_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_01_.s 3 | //------------------------------------------------- 4 | 5 | // Constants for temperature conversion functions 6 | .text 7 | r4_ScaleFtoC: .single 0.55555556 // 5 / 9 8 | r4_ScaleCtoF: .single 1.8 // 9 / 5 9 | r4_32p0: .single 32.0 10 | 11 | // extern "C" float ConvertFtoC_(float deg_f); 12 | 13 | .global ConvertFtoC_ 14 | ConvertFtoC_: 15 | 16 | // Convert deg_f to Celsius 17 | ldr s1,r4_32p0 // s1 = 32 18 | ldr s2,r4_ScaleFtoC // s2 = 5 / 9 19 | fsub s3,s0,s1 // s3 = deg_f - 32 20 | fmul s0,s3,s2 // s0 = (deg_f - 32) * 5 / 9 21 | ret 22 | 23 | // extern "C" float ConvertCtoF_(float deg_c); 24 | 25 | .global ConvertCtoF_ 26 | ConvertCtoF_: 27 | 28 | // Convert deg_c to Fahrenheit 29 | ldr s1,r4_32p0 // s1 = 32 30 | ldr s2,r4_ScaleCtoF // s2 = 9 / 5 31 | fmul s3,s0,s2 // s3 = deg_c * 9 / 5 32 | fadd s0,s3,s1 // s3 = deg_c * 9 / 5 + 32 33 | ret 34 | -------------------------------------------------------------------------------- /Chapter13/Ch13_02/Ch13_02.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_02.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | extern "C" void CalcSphereAreaVolume_(double r, double* sa, double* vol); 11 | 12 | int main(int argc, char** argv) 13 | { 14 | double r[] = { 0.0, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0, 32.0 }; 15 | size_t num_r = sizeof(r) / sizeof(double); 16 | 17 | cout << setprecision(8); 18 | cout << "\n--------- Results for CalcSphereAreaVolume -----------\n"; 19 | 20 | for (size_t i = 0; i < num_r; i++) 21 | { 22 | double sa = -1, vol = -1; 23 | 24 | CalcSphereAreaVolume_(r[i], &sa, &vol); 25 | 26 | cout << "i: " << i << " "; 27 | cout << "r: " << setw(6) << r[i] << " "; 28 | cout << "sa: " << setw(11) << sa << " "; 29 | cout << "vol: " << setw(11) << vol << '\n'; 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /Chapter13/Ch13_02/Ch13_02_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_02_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" void CalcSphereAreaVolume_(double r, double* sa, double* vol); 6 | 7 | .text 8 | .global CalcSphereAreaVolume_ 9 | CalcSphereAreaVolume_: 10 | 11 | // Calculate surface area and volume 12 | ldr d5,r8_PI // d5 = PI 13 | fmov d6,4.0 // d6 = 4.0 14 | fmov d7,3.0 // d7 = 3.0 15 | 16 | fmul d1,d0,d0 // d1 = r * r 17 | fmul d1,d1,d5 // d1 = r * r * PI 18 | fmul d1,d1,d6 // d1 = r * r * PI * 4 19 | str d1,[x0] // save surface area 20 | 21 | fmul d2,d1,d0 // d2 = sa * r 22 | fdiv d3,d2,d7 // d3 = sa * r / 3 23 | str d3,[x1] // save volume 24 | ret 25 | 26 | r8_PI: .double 3.14159265358979323846 27 | -------------------------------------------------------------------------------- /Chapter13/Ch13_03/Ch13_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" double CalcDist_(double x1, double y1, double z1, double x2, double y2, double z2); 6 | 7 | .text 8 | .global CalcDist_ 9 | CalcDist_: 10 | 11 | // Calculate distance 12 | fsub d0,d3,d0 // d0 = x2 - x1 13 | fmul d0,d0,d0 // d0 = (x2 - x1) ** 2 14 | 15 | fsub d1,d4,d1 // d1 = y2 - y1 16 | fmadd d0,d1,d1,d0 // d0 += (y2 - y1) ** 2 17 | 18 | fsub d2,d5,d2 // d2 = z2 - z1 19 | fmadd d0,d2,d2,d0 // d0 += (z2 - z1) ** 2 20 | 21 | fsqrt d0,d0 // d0 = final distance 22 | ret 23 | -------------------------------------------------------------------------------- /Chapter13/Ch13_04/Ch13_04.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_04.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | extern "C" void CompareF32_(bool* results, float a, float b); 13 | 14 | const char* c_OpStrings[] = {"UO", "LT", "LE", "EQ", "NE", "GT", "GE"}; 15 | const size_t c_NumOpStrings = sizeof(c_OpStrings) / sizeof(char*); 16 | 17 | const string c_Dashes(72, '-'); 18 | 19 | template void PrintResults(const bool* cmp_results, T a, T b) 20 | { 21 | cout << "a = " << a << ", "; 22 | cout << "b = " << b << '\n'; 23 | 24 | for (size_t i = 0; i < c_NumOpStrings; i++) 25 | { 26 | cout << c_OpStrings[i] << '='; 27 | cout << boolalpha << left << setw(6) << cmp_results[i] << ' '; 28 | } 29 | 30 | cout << "\n\n"; 31 | } 32 | 33 | void CompareF32(void) 34 | { 35 | const size_t n = 6; 36 | float a[n] {120.0, 250.0, 300.0, -18.0, -81.0, 42.0}; 37 | float b[n] {130.0, 240.0, 300.0, 32.0, -100.0, 0.0}; 38 | 39 | // Set NAN test value 40 | b[n - 1] = numeric_limits::quiet_NaN(); 41 | 42 | cout << "\nResults for CompareF32\n"; 43 | cout << c_Dashes << '\n'; 44 | 45 | for (size_t i = 0; i < n; i++) 46 | { 47 | bool cmp_results[c_NumOpStrings]; 48 | 49 | CompareF32_(cmp_results, a[i], b[i]); 50 | PrintResults(cmp_results, a[i], b[i]); 51 | } 52 | } 53 | 54 | int main() 55 | { 56 | CompareF32(); 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /Chapter13/Ch13_04/Ch13_04_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_04_.s 3 | //------------------------------------------------- 4 | 5 | 6 | // extern "C" void CompareF32_(bool* results, float a, float b); 7 | .text 8 | .global CompareF32_ 9 | CompareF32_: 10 | 11 | fcmpe s0,s1 // compare a and b, update NZCV 12 | 13 | cset w1,vs // w1 = 1 if unordered 14 | strb w1,[x0,0] // save result 15 | 16 | cset w1,mi // w1 = 1 if a < b 17 | strb w1,[x0,1] // save result 18 | 19 | cset w1,ls // w1 = 1 if a <= b 20 | strb w1,[x0,2] // save result 21 | 22 | cset w1,eq // w1 = 1 if a == b 23 | strb w1,[x0,3] // save result 24 | 25 | cset w1,ne // w1 = 1 if a != b 26 | strb w1,[x0,4] // save result 27 | 28 | cset w1,gt // w1 = 1 if a > b 29 | strb w1,[x0,5] // save result 30 | 31 | cset w1,ge // w1 = 1 if a >= b 32 | strb w1,[x0,6] // save result 33 | ret 34 | -------------------------------------------------------------------------------- /Chapter13/Ch13_05/GetSetRm_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // GetSetRm_.s 3 | //------------------------------------------------- 4 | 5 | // Example functions to get/set AArch64 FP rounding mode 6 | // 7 | // rm = 0 Round to nearest 8 | // rm = 1 Round to plus infinity 9 | // rm = 2 Round to minus infinity 10 | // rm = 3 Round to zero 11 | 12 | // extern "C" Rm GetRm_(void); 13 | 14 | .text 15 | .global GetRm_ 16 | GetRm_: mrs x0,fpcr // x0 = fpcr 17 | lsr x1,x0,22 18 | and x0,x1,3 // rounding mode in bits 1:0 19 | ret 20 | 21 | // extern "C" void SetRm_(uint64_t rm); 22 | 23 | .global SetRm_ 24 | SetRm_: mrs x1,fpcr // x1 = fpcr 25 | bfi x1,x0,22,2 // insert new rounding mode 26 | msr fpcr,x1 // save updated fpcr 27 | ret 28 | -------------------------------------------------------------------------------- /Chapter13/Ch13_07/Ch13_07_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch13_07_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" bool CalcTrace_(double* trace, const double* x, int nrows, int ncols); 6 | 7 | .text 8 | .global CalcTrace_ 9 | CalcTrace_: 10 | cmp w2,w3 11 | b.ne InvalidArg // jump if nrows != ncols 12 | cmp w2,0 13 | b.le InvalidArg // jump if nrows <= 0 14 | 15 | // Calculate matrix trace 16 | movi d0,0 // sum = 0.0 17 | mov w4,0 // i = 0 18 | 19 | Loop1: mul w5,w4,w3 // w5 = i * ncols 20 | add w5,w5,w4 // w5 = i * ncols + i 21 | 22 | ldr d1,[x1,w5,sxtw 3] // d1 = x[i][i] 23 | fadd d0,d0,d1 // sum += x[i][i] 24 | 25 | add w4,w4,1 // i += 1 26 | cmp w4,w2 27 | b.lt Loop1 // jump if not done 28 | 29 | str d0,[x0] // save trace value 30 | mov w0,1 // set success return code 31 | ret 32 | 33 | InvalidArg: mov w0,0 // set error return code 34 | ret 35 | -------------------------------------------------------------------------------- /Chapter13/Ch13_08/Ch13_08.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch13_08.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | extern "C" void CalcBSA_(double bsa[3], double ht, double wt); 12 | 13 | void CalcBSA(double bsa[3], double ht, double wt) 14 | { 15 | bsa[0] = 0.007184 * pow(ht, 0.725) * pow(wt, 0.425); 16 | bsa[1] = 0.0235 * pow(ht, 0.42246) * pow(wt, 0.51456); 17 | bsa[2] = sqrt(ht * wt / 3600.0); 18 | } 19 | 20 | int main() 21 | { 22 | const int n = 6; 23 | const double ht[n] = { 150, 160, 170, 180, 190, 200 }; 24 | const double wt[n] = { 50.0, 60.0, 70.0, 80.0, 90.0, 100.0 }; 25 | 26 | cout << "---------- Body Surface Area Results ----------\n"; 27 | cout << fixed; 28 | 29 | for (int i = 0; i < n; i++) 30 | { 31 | cout << setprecision(1); 32 | cout << "height: " << setw(6) << ht[i] << " cm\n"; 33 | cout << "weight: " << setw(6) << wt[i] << " kg\n"; 34 | 35 | double bsa1[3], bsa2[3]; 36 | 37 | CalcBSA(bsa1, ht[i], wt[i]); 38 | CalcBSA_(bsa2, ht[i], wt[i]); 39 | 40 | for (int j = 0; j < 3; j++) 41 | { 42 | cout << setprecision(6); 43 | cout << "bsa1[" << j << "]: " << setw(10) << bsa1[j]; 44 | cout << " | "; 45 | cout << "bsa2[" << j << "]: " << setw(10) << bsa2[j]; 46 | cout << " (sq. m)\n"; 47 | } 48 | 49 | cout << '\n'; 50 | } 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch14_03.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include "Vec128.h" 8 | 9 | using namespace std; 10 | 11 | extern "C" bool PackedMulA_(Vec128* x, const Vec128& a, const Vec128& b, int16_t c); 12 | 13 | void PackedMulA(void) 14 | { 15 | Vec128 x[4], a, b; 16 | const int16_t c = 5; 17 | const char nl = '\n'; 18 | string sep(75, '-'); 19 | 20 | a.m_I16[0] = 10; b.m_I16[0] = 6; 21 | a.m_I16[1] = 7; b.m_I16[1] = 13; 22 | a.m_I16[2] = -23; b.m_I16[2] = -75; 23 | a.m_I16[3] = 41; b.m_I16[3] = 9; 24 | a.m_I16[4] = 6; b.m_I16[4] = 37; 25 | a.m_I16[5] = -33; b.m_I16[5] = 28; 26 | a.m_I16[6] = 19; b.m_I16[6] = 56; 27 | a.m_I16[7] = 16; b.m_I16[7] = -18; 28 | 29 | PackedMulA_(x, a, b, c); 30 | 31 | cout << "\nPackedMulA_\n"; 32 | cout << sep << nl; 33 | 34 | cout << "a: " << a.ToStringI16() << nl; 35 | cout << "b: " << b.ToStringI16() << nl << nl; 36 | cout << "x[0]: " << x[0].ToStringI16() << nl << nl; 37 | cout << "x[1]: " << x[1].ToStringI16() << nl << nl; 38 | cout << "x[2]: " << x[2].ToStringI32() << nl; 39 | cout << "x[3]: " << x[3].ToStringI32() << nl; 40 | } 41 | 42 | int main(void) 43 | { 44 | PackedMulA(); 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /Chapter14/Ch14_03/Ch14_03_.s: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch14_03_.s 3 | //------------------------------------------------- 4 | 5 | // extern "C" bool PackedMulA_(Vec128 x[4], const Vec128& a, const Vec128& b, int16_t c); 6 | 7 | .text 8 | .global PackedMulA_ 9 | PackedMulA_: 10 | ld1 {v0.8h},[x1] // v0 = a 11 | ld1 {v1.8h},[x2] // v1 = b 12 | 13 | mul v2.8h,v0.8h,v1.8h // a * b (vector 16-bit) 14 | st1 {v2.8h},[x0],16 // save result x[0] 15 | 16 | mov v3.8h[0],w3 // load c into low 16 bits 17 | mul v2.8h,v0.8h,v3.8h[0] // mul elements in a by c 18 | st1 {v2.8h},[x0],16 // save result to x[1] 19 | 20 | smull v2.4s,v0.4h,v1.4h // signed mul long 21 | smull2 v3.4s,v0.8h,v1.8h 22 | st1 {v2.4s,v3.4s},[x0] // results to x[2], x[3] 23 | ret 24 | 25 | -------------------------------------------------------------------------------- /Chapter14/Ch14_04/Ch14_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | // Ch08_04.cpp 9 | extern void Init(uint8_t* x, uint64_t n, unsigned int seed); 10 | extern bool CalcMinMaxU8(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, uint64_t n); 11 | 12 | // Ch08_04_BM.cpp 13 | extern void MinMaxU8_BM(void); 14 | 15 | // Ch08_04_.s 16 | extern "C" bool CalcMinMaxU8_(uint8_t* x_min, uint8_t* x_max, const uint8_t* x, uint64_t n); 17 | 18 | // Common constants 19 | const uint64_t c_NumElements = 16 * 1024 * 1024 + 7; 20 | const unsigned int c_RngSeedVal = 23; 21 | -------------------------------------------------------------------------------- /Chapter14/Ch14_04/Ch14_04_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_04_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch14_04.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | #include "OS.h" 10 | 11 | using namespace std; 12 | 13 | void MinMaxU8_BM(void) 14 | { 15 | cout << "\nRunning benchmark function MinMaxU8_BM - please wait\n"; 16 | 17 | size_t n = c_NumElements; 18 | AlignedArray x_aa(n, 16); 19 | uint8_t* x = x_aa.Data(); 20 | 21 | Init(x, n, c_RngSeedVal); 22 | 23 | const size_t num_it = 500; 24 | const size_t num_alg = 2; 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | uint8_t x_min0 = 0, x_max0 = 0; 30 | uint8_t x_min1 = 0, x_max1 = 0; 31 | 32 | bmtt.Start(i, 0); 33 | CalcMinMaxU8(&x_min0, &x_max0, x, n); 34 | bmtt.Stop(i, 0); 35 | 36 | bmtt.Start(i, 1); 37 | CalcMinMaxU8_(&x_min1, &x_max1, x, n); 38 | bmtt.Stop(i, 1); 39 | } 40 | 41 | string fn = bmtt.BuildCsvFilenameString("Ch14_04_MinMaxU8_BM"); 42 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 43 | cout << "Benchmark times save to file " << fn << '\n'; 44 | } 45 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct ClipData 9 | { 10 | uint8_t* m_Des; // destination buffer pointer 11 | uint8_t* m_Src; // source buffer pointer 12 | uint64_t m_NumPixels; // number of pixels 13 | uint64_t m_NumClippedPixels; // number of clipped pixels 14 | uint8_t m_ThreshLo; // low threshold 15 | uint8_t m_ThreshHi; // high threshold 16 | }; 17 | 18 | // Ch14_05.cpp 19 | extern void Init(uint8_t* x, uint64_t n, unsigned int seed); 20 | extern bool ClipPixelsCpp(ClipData* cd); 21 | 22 | // Ch14_05a_.s 23 | extern "C" bool ClipPixelsA_(ClipData* cd); 24 | 25 | // Ch14_05b_.s 26 | extern "C" bool ClipPixelsB_(ClipData* cd); 27 | 28 | // Ch14_05_BM.cpp 29 | extern void ClipPixels_BM(void); 30 | 31 | // Ch14_05_Misc.cpp 32 | extern void PrintClipDataStructOffsets(void); 33 | 34 | // Algorithm constants 35 | const uint8_t c_ThreshLo = 10; 36 | const uint8_t c_ThreshHi = 245; 37 | const uint64_t c_NumPixels = 8 * 1024 * 1024; 38 | const unsigned int c_Seed = 157; 39 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05_Macros_.inc: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch14_05_Macros_.inc 3 | //------------------------------------------------- 4 | 5 | // Macro ClipPix 6 | // 7 | // Input: Vreg - original source pixels 8 | // 9 | // Output: Vreg - clipped pixels 10 | // 11 | // Data registers: v16 - packed m_ThreshLo 12 | // v17 - packed m_ThreshHi 13 | // v18 - packed 0x01 14 | // x8 - num_clipped_pixels 15 | // 16 | // Temp registers: v0, v1, v2, v3, x9 17 | 18 | .macro ClipPix Vreg 19 | umax v0.16b,\Vreg\().16b,v16.16b // clip to thresh_lo 20 | umin v1.16b,v0.16b,v17.16b // clip to thresh_hi 21 | 22 | cmeq v2.16b,v1.16b,\Vreg\().16b // compare clipped to original 23 | not v2.16b,v2.16b 24 | and v3.16b,v2.16b,v18.16b // clipped if lane = 0x01 25 | addv b3,v3.16b // b3 = num clipped pixels 26 | 27 | umov w9,v3.b[0] 28 | add x8,x8,x9 // num_clipped_pixels += x9 29 | mov \Vreg\().16b,v1.16b // save clipped pixels 30 | .endm 31 | -------------------------------------------------------------------------------- /Chapter14/Ch14_05/Ch14_05_Misc.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Ch14_05_Misc.cpp 3 | //------------------------------------------------- 4 | 5 | #include 6 | #include "Ch14_05.h" 7 | 8 | using namespace std; 9 | 10 | void PrintClipDataStructOffsets(void) 11 | { 12 | const char nl = '\n'; 13 | 14 | cout << "offsetof(ClipData.m_Src) = " << offsetof(ClipData, m_Src) << nl; 15 | cout << "offsetof(ClipData.m_Des) = " << offsetof(ClipData, m_Des) << nl; 16 | cout << "offsetof(ClipData.m_NumPixels) = " << offsetof(ClipData, m_NumPixels) << nl; 17 | cout << "offsetof(ClipData.m_NumClippedPixels) = " << offsetof(ClipData, m_NumClippedPixels) << nl; 18 | cout << "offsetof(ClipData.m_ThreshLo) = " << offsetof(ClipData, m_ThreshLo) << nl; 19 | cout << "offsetof(ClipData.m_ThreshHi) = " << offsetof(ClipData, m_ThreshHi) << nl; 20 | } 21 | -------------------------------------------------------------------------------- /Chapter14/Ch14_06/Ch14_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_06.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | 8 | struct ImageStats 9 | { 10 | uint8_t* m_PixelBuffer; 11 | uint64_t m_NumPixels; 12 | uint64_t m_PixelSum; 13 | uint64_t m_PixelSumSquares; 14 | double m_PixelMean; 15 | double m_PixelSd; 16 | }; 17 | 18 | // Ch14_06.cpp 19 | extern bool CalcImageStatsCpp(ImageStats& im_stats); 20 | 21 | // Ch14_06_.asm 22 | extern "C" bool CalcImageStats_(ImageStats& im_stats); 23 | 24 | // Ch04_06_BM.cpp 25 | extern void CalcImageStats_BM(void); 26 | -------------------------------------------------------------------------------- /Chapter14/Ch14_06/Ch14_06_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch14_06_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch14_06.h" 7 | #include "AlignedMem.h" 8 | #include "BmThreadTimer.h" 9 | #include "ImageMatrix.h" 10 | #include "OS.h" 11 | 12 | using namespace std; 13 | 14 | void CalcImageStats_BM(void) 15 | { 16 | cout << "\nRunning benchmark function CalcImageStats_BM - please wait\n"; 17 | 18 | const char* image_fn = "../../Data/ImageB.png"; 19 | 20 | ImageMatrix im(image_fn, PixelType::Gray8, Channel::G); 21 | uint64_t num_pixels = im.GetNumPixels(); 22 | uint8_t* pb = im.GetPixelBuffer(); 23 | 24 | ImageStats is1; 25 | is1.m_PixelBuffer = pb; 26 | is1.m_NumPixels = num_pixels; 27 | 28 | ImageStats is2; 29 | is2.m_PixelBuffer = pb; 30 | is2.m_NumPixels = num_pixels; 31 | 32 | const size_t num_it = 500; 33 | const size_t num_alg = 2; 34 | BmThreadTimer bmtt(num_it, num_alg); 35 | 36 | for (size_t i = 0; i < num_it; i++) 37 | { 38 | bmtt.Start(i, 0); 39 | CalcImageStatsCpp(is1); 40 | bmtt.Stop(i, 0); 41 | 42 | bmtt.Start(i, 1); 43 | CalcImageStats_(is2); 44 | bmtt.Stop(i, 1); 45 | } 46 | 47 | string fn = bmtt.BuildCsvFilenameString("Ch14_06_CalcImageStats_BM"); 48 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 49 | cout << "Benchmark times save to file " << fn << '\n'; 50 | } 51 | -------------------------------------------------------------------------------- /Chapter15/Ch15_05/Ch15_05.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_05.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | #include 7 | #include "ImageMatrix.h" 8 | 9 | // Ch15_05.cpp 10 | extern float g_Coef[]; 11 | extern bool ConvertRgbToGsCpp(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float* coef); 12 | 13 | // Ch15_05_.s 14 | extern "C" bool ConvertRgbToGs_(uint8_t* pb_gs, const RGB32* pb_rgb, size_t num_pixels, const float* coef); 15 | 16 | // Ch15_05_BM.cpp 17 | extern void ConvertRgbToGs_BM(void); 18 | -------------------------------------------------------------------------------- /Chapter15/Ch15_05/Ch15_05_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_05_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch15_05.h" 7 | #include "BmThreadTimer.h" 8 | #include "OS.h" 9 | 10 | using namespace std; 11 | 12 | void ConvertRgbToGs_BM(void) 13 | { 14 | cout << "\nRunning benchmark function ConvertRgbToGs_BM - please wait\n"; 15 | 16 | const char* fn_rgb = "../../Data/ImageC.png"; 17 | 18 | ImageMatrix im_rgb(fn_rgb, PixelType::Rgb32); 19 | int im_h = im_rgb.GetHeight(); 20 | int im_w = im_rgb.GetWidth(); 21 | size_t num_pixels = im_h * im_w; 22 | ImageMatrix im_gs1(im_h, im_w, PixelType::Gray8); 23 | ImageMatrix im_gs2(im_h, im_w, PixelType::Gray8); 24 | RGB32* pb_rgb = im_rgb.GetPixelBuffer(); 25 | uint8_t* pb_gs1 = im_gs1.GetPixelBuffer(); 26 | uint8_t* pb_gs2 = im_gs2.GetPixelBuffer(); 27 | 28 | const size_t num_it = 500; 29 | const size_t num_alg = 2; 30 | BmThreadTimer bmtt(num_it, num_alg); 31 | 32 | for (size_t i = 0; i < num_it; i++) 33 | { 34 | bmtt.Start(i, 0); 35 | ConvertRgbToGsCpp(pb_gs1, pb_rgb, num_pixels, g_Coef); 36 | bmtt.Stop(i, 0); 37 | 38 | bmtt.Start(i, 1); 39 | ConvertRgbToGs_(pb_gs2, pb_rgb, num_pixels, g_Coef); 40 | bmtt.Stop(i, 1); 41 | } 42 | 43 | string fn = bmtt.BuildCsvFilenameString("Ch15_05_ConvertRgbToGs_BM"); 44 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 45 | cout << "Benchmark times save to file " << fn << '\n'; 46 | } 47 | -------------------------------------------------------------------------------- /Chapter15/Ch15_06/Ch15_06.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch15_06.h 3 | //------------------------------------------------ 4 | 5 | // Ch15_06.cpp 6 | extern void Mat4x4MulF32(float* m_des, const float* m_src1, const float* m_src2); 7 | extern void Mat4x4MulF64(double* m_des, const double* m_src1, const double* m_src2); 8 | 9 | // Ch15_06_.s 10 | extern "C" void Mat4x4MulF32_(float* m_des, const float* m_src1, const float* m_src2); 11 | extern "C" void Mat4x4MulF64_(double* m_des, const double* m_src1, const double* m_src2); 12 | 13 | // Ch15_06_BM.cpp 14 | extern void Mat4x4MulF32_BM(void); 15 | extern void Mat4x4MulF64_BM(void); 16 | 17 | -------------------------------------------------------------------------------- /Chapter16/Ch16_01/Ch16_01.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_01.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Ch16_01_Misc.cpp 8 | extern void CreateSignal(float* x, int n, int kernel_size, unsigned int seed); 9 | extern void PadSignal(float* x2, int n2, const float* x1, int n1, int ks2); 10 | 11 | // Ch16_01.cpp 12 | extern unsigned int g_RngSeedVal; 13 | extern bool ConvolveKsN(float* y, const float* x, int num_pts, const float* kernel, int kernel_size); 14 | extern bool ConvolveKs5(float* y, const float* x, int num_pts, const float* kernel, int kernel_size); 15 | 16 | // Ch16_01_.s 17 | extern "C" bool ConvolveKsN_(float* y, const float* x, int num_pts, const float* kernel, int kernel_size); 18 | extern "C" bool ConvolveKs5_(float* y, const float* x, int num_pts, const float* kernel, int kernel_size); 19 | 20 | // Ch16_01_BM.cpp 21 | extern void Convolve_BM(void); 22 | -------------------------------------------------------------------------------- /Chapter16/Ch16_02/Ch16_02.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_02.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Simple vector structure 8 | typedef struct 9 | { 10 | float X; // X component 11 | float Y; // Y component 12 | float Z; // Z component 13 | } Vector; 14 | 15 | // Vector structure of arrays 16 | typedef struct 17 | { 18 | float* X; // X components 19 | float* Y; // Y components 20 | float* Z; // Z components 21 | } VectorSoA; 22 | 23 | const size_t c_Align = 16; 24 | 25 | // Ch16_02.cpp 26 | void CrossProdAOS(Vector* c, const Vector* a, const Vector* b, size_t n); 27 | void CrossProdSOA(VectorSoA& c, const VectorSoA& a, const VectorSoA& b, size_t n); 28 | 29 | // Ch16_02_.asm 30 | extern "C" void CrossProdAOS_(Vector* c, const Vector* a, const Vector* b, size_t n); 31 | extern "C" void CrossProdSOA_(VectorSoA& c, const VectorSoA& a, const VectorSoA& b, 32 | size_t n); 33 | 34 | // Ch16_02_BM.cpp 35 | extern void CrossProd_BM(void); 36 | 37 | // Ch16_02_Misc.cpp 38 | void InitVec(Vector* a_aos, Vector* b_aos, VectorSoA& a_soa, VectorSoA& b_soa, size_t n); 39 | bool CompareCP(Vector* c1_aos, Vector* c2_aos, VectorSoA& c1_soa, VectorSoA& c2_soa, 40 | size_t n); 41 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_03.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | // Simple 4x1 vector structure 8 | struct Vec4x1F32 9 | { 10 | float W, X, Y, Z; 11 | }; 12 | 13 | // Ch16_03.cpp 14 | extern void InitVecArray(Vec4x1F32* a, size_t n); 15 | extern bool MatVecMulF32Cpp(Vec4x1F32* b, float m[4][4], Vec4x1F32* a, size_t n); 16 | 17 | // Ch16_03_.asm 18 | extern "C" bool MatVecMulF32_(Vec4x1F32* b, float m[4][4], Vec4x1F32* a, size_t n); 19 | 20 | // Ch16_03_BM.cpp 21 | extern void MatVecMulF32_BM(void); 22 | 23 | // Constants 24 | const size_t c_Align = 16; 25 | -------------------------------------------------------------------------------- /Chapter16/Ch16_03/Ch16_03_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_03_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include 7 | #include "Ch16_03.h" 8 | #include "AlignedMem.h" 9 | #include "BmThreadTimer.h" 10 | #include "OS.h" 11 | 12 | using namespace std; 13 | 14 | void MatVecMulF32_BM(void) 15 | { 16 | cout << "\nRunning benchmark function MatVecMulF32_BM - please wait\n"; 17 | 18 | const size_t num_vec = 1000000; 19 | 20 | alignas(c_Align) float m[4][4] 21 | { 22 | 10.0, 11.0, 12.0, 13.0, 23 | 20.0, 21.0, 22.0, 23.0, 24 | 30.0, 31.0, 32.0, 33.0, 25 | 40.0, 41.0, 42.0, 43.0 26 | }; 27 | 28 | AlignedArray a_aa(num_vec, c_Align); 29 | AlignedArray b1_aa(num_vec, c_Align); 30 | AlignedArray b2_aa(num_vec, c_Align); 31 | 32 | Vec4x1F32* a = a_aa.Data(); 33 | Vec4x1F32* b1 = b1_aa.Data(); 34 | Vec4x1F32* b2 = b2_aa.Data(); 35 | 36 | InitVecArray(a, num_vec); 37 | 38 | const size_t num_it = 500; 39 | const size_t num_alg = 2; 40 | BmThreadTimer bmtt(num_it, num_alg); 41 | 42 | for (size_t i = 0; i < num_it; i++) 43 | { 44 | bmtt.Start(i, 0); 45 | MatVecMulF32Cpp(b1, m, a, num_vec); 46 | bmtt.Stop(i, 0); 47 | 48 | bmtt.Start(i, 1); 49 | MatVecMulF32_(b2, m, a, num_vec); 50 | bmtt.Stop(i, 1); 51 | } 52 | 53 | string fn = bmtt.BuildCsvFilenameString("Ch16_03_MatVecMulF32_BM"); 54 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 55 | cout << "Benchmark times save to file " << fn << '\n'; 56 | } 57 | -------------------------------------------------------------------------------- /Chapter16/Ch16_04/Ch16_04.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_04.h 3 | //------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | #include "MatrixF32.h" 8 | 9 | const float c_Epsilon = 1.0e-5; 10 | 11 | // Ch16_04.cpp 12 | extern bool Mat4x4InvF32(MatrixF32& m_inv, const MatrixF32& m, float epsilon); 13 | 14 | // Ch16_04_Test.cpp 15 | extern bool TestMat4x4F32(const MatrixF32& m1, const MatrixF32& m2); 16 | 17 | // Ch16_04_.s 18 | extern "C" bool Mat4x4InvF32_(float* m_inv, const float* m, float epsilon); 19 | 20 | // Ch16_04_Test.s 21 | extern "C" void Mat4x4AddF32_(float* m_des, const float* m_src1, const float* m_src2); 22 | extern "C" void Mat4x4MulF32_(float* m_des, const float* m_src1, const float* m_src2); 23 | extern "C" void Mat4x4MulScalarF32_(float* m_des, const float* m_src1, float sv); 24 | extern "C" float Mat4x4TraceF32_(const float* m_src1); 25 | 26 | // Ch16_04_BM.cpp 27 | extern void Mat4x4InvF32_BM(const MatrixF32& m); 28 | -------------------------------------------------------------------------------- /Chapter16/Ch16_04/Ch16_04_BM.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------ 2 | // Ch16_04_BM.cpp 3 | //------------------------------------------------ 4 | 5 | #include 6 | #include "Ch16_04.h" 7 | #include "MatrixF32.h" 8 | #include "BmThreadTimer.h" 9 | #include "OS.h" 10 | 11 | using namespace std; 12 | 13 | void Mat4x4InvF32_BM(const MatrixF32& m) 14 | { 15 | cout << "\nRunning benchmark function Mat4x4InvF32_BM - please wait\n"; 16 | 17 | const float epsilon = c_Epsilon; 18 | MatrixF32 m_inv0(m.GetNumRows(), m.GetNumCols()); 19 | MatrixF32 m_inv1(m.GetNumRows(), m.GetNumCols()); 20 | 21 | const size_t num_it = 500; 22 | const size_t num_alg = 2; 23 | const size_t num_ops = 100000; 24 | 25 | BmThreadTimer bmtt(num_it, num_alg); 26 | 27 | for (size_t i = 0; i < num_it; i++) 28 | { 29 | bmtt.Start(i, 0); 30 | for (size_t j = 0; j < num_ops; j++) 31 | Mat4x4InvF32(m_inv0, m, epsilon); 32 | bmtt.Stop(i, 0); 33 | 34 | bmtt.Start(i, 1); 35 | for (size_t j = 0; j < num_ops; j++) 36 | Mat4x4InvF32_(m_inv1.Data(), m.Data(), epsilon); 37 | bmtt.Stop(i, 1); 38 | } 39 | 40 | string fn = bmtt.BuildCsvFilenameString("Ch16_04_Mat4x4InvF32_BM"); 41 | bmtt.SaveElapsedTimes(fn, BmThreadTimer::EtUnit::MicroSec, 2); 42 | cout << "Benchmark times save to file " << fn << '\n'; 43 | } 44 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /Data/ImageA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-arm-assembly-language-programming/af88d2766656447e0c3308408a17828bab4f41a1/Data/ImageA.png -------------------------------------------------------------------------------- /Data/ImageB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-arm-assembly-language-programming/af88d2766656447e0c3308408a17828bab4f41a1/Data/ImageB.png -------------------------------------------------------------------------------- /Data/ImageC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/modern-arm-assembly-language-programming/af88d2766656447e0c3308408a17828bab4f41a1/Data/ImageC.png -------------------------------------------------------------------------------- /ImportantNotes.txt: -------------------------------------------------------------------------------- 1 | The sole purpose of the source code is to elucidate programming examples 2 | that are directly related to the topics discussed in this book. 3 | Minimal attention is given to essential software engineering concerns 4 | such as robust error handling, security risks, numerical stability, 5 | rounding errors, or ill-conditioned functions. You are responsible 6 | for addressing these concerns should you decide to use any of the 7 | source code in your own programs. 8 | 9 | The Include folder contains shared C++ header files. These files 10 | incorporate classes, templates, methods, and macros that are intended 11 | for use with this book's source code. This code should not be used for 12 | other purposes without proper modifications. 13 | -------------------------------------------------------------------------------- /Include/ImageMisc.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // ImageMisc.h 3 | //------------------------------------------------- 4 | 5 | #pragma once 6 | 7 | struct RGB32 8 | { 9 | // Do not change order of elements below 10 | uint8_t m_R; 11 | uint8_t m_G; 12 | uint8_t m_B; 13 | uint8_t m_A; 14 | }; 15 | 16 | enum class PixelType : unsigned int 17 | { 18 | Undefined, 19 | Gray8, 20 | Rgb32 21 | }; 22 | 23 | enum class Channel : unsigned int 24 | { 25 | // Do not change order of R, G, B, A 26 | R, G, B, A, 27 | None 28 | }; 29 | -------------------------------------------------------------------------------- /Include/Misc.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------- 2 | // Misc.h 3 | //------------------------------------------------- 4 | 5 | #pragma once 6 | #include 7 | #include 8 | 9 | class Misc 10 | { 11 | public: 12 | static bool IsExt(const std::string& fn, const std::string& fn_ext_test) 13 | { 14 | std::string fn_ext = fn.substr(fn.find_last_of(".") + 1); 15 | 16 | return fn_ext.compare(fn_ext_test) == 0; 17 | } 18 | }; 19 | 20 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2020 Daniel Kusswurm 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*Modern Arm Assembly Language Programming*](https://www.apress.com/9781484262665) by Daniel Kusswurm (Apress, 2020). 4 | 5 | [comment]: #cover 6 | ![Cover image](9781484262665.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /ReleaseHistory.txt: -------------------------------------------------------------------------------- 1 | 2020-09-23 2 | Initial release for publication. 3 | -------------------------------------------------------------------------------- /Sh/clean32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs32.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | cd $dirname2 10 | 11 | echo "running make clean in $dirname2" 12 | make clean 13 | echo "" 14 | 15 | num_dirs=$[$num_dirs + 1] 16 | done 17 | 18 | echo "num_dirs = $num_dirs" 19 | -------------------------------------------------------------------------------- /Sh/clean64.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs64.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | cd $dirname2 10 | 11 | echo "running make clean in $dirname2" 12 | make clean 13 | echo "" 14 | 15 | num_dirs=$[$num_dirs + 1] 16 | done 17 | 18 | echo "num_dirs = $num_dirs" 19 | -------------------------------------------------------------------------------- /Sh/dirs32.txt: -------------------------------------------------------------------------------- 1 | ModArmAsm/Chapter02/Ch02_01 2 | ModArmAsm/Chapter02/Ch02_02 3 | ModArmAsm/Chapter02/Ch02_03 4 | ModArmAsm/Chapter02/Ch02_04 5 | ModArmAsm/Chapter02/Ch02_05 6 | ModArmAsm/Chapter02/Ch02_06 7 | ModArmAsm/Chapter02/Ch02_07 8 | ModArmAsm/Chapter03/Ch03_01 9 | ModArmAsm/Chapter03/Ch03_02 10 | ModArmAsm/Chapter03/Ch03_03 11 | ModArmAsm/Chapter03/Ch03_04 12 | ModArmAsm/Chapter03/Ch03_05 13 | ModArmAsm/Chapter03/Ch03_06 14 | ModArmAsm/Chapter04/Ch04_01 15 | ModArmAsm/Chapter04/Ch04_02 16 | ModArmAsm/Chapter04/Ch04_03 17 | ModArmAsm/Chapter04/Ch04_04 18 | ModArmAsm/Chapter04/Ch04_05 19 | ModArmAsm/Chapter04/Ch04_06 20 | ModArmAsm/Chapter05/Ch05_01 21 | ModArmAsm/Chapter06/Ch06_01 22 | ModArmAsm/Chapter06/Ch06_02 23 | ModArmAsm/Chapter06/Ch06_03 24 | ModArmAsm/Chapter06/Ch06_04 25 | ModArmAsm/Chapter06/Ch06_05 26 | ModArmAsm/Chapter06/Ch06_06 27 | ModArmAsm/Chapter06/Ch06_07 28 | ModArmAsm/Chapter06/Ch06_08 29 | ModArmAsm/Chapter08/Ch08_01 30 | ModArmAsm/Chapter08/Ch08_02 31 | ModArmAsm/Chapter08/Ch08_03 32 | ModArmAsm/Chapter08/Ch08_04 33 | ModArmAsm/Chapter08/Ch08_05 34 | ModArmAsm/Chapter08/Ch08_06 35 | ModArmAsm/Chapter09/Ch09_01 36 | ModArmAsm/Chapter09/Ch09_02 37 | ModArmAsm/Chapter09/Ch09_03 38 | ModArmAsm/Chapter09/Ch09_04 39 | ModArmAsm/Chapter09/Ch09_05 40 | ModArmAsm/Chapter09/Ch09_06 41 | ModArmAsm/Chapter09/Ch09_07 42 | -------------------------------------------------------------------------------- /Sh/dirs64.txt: -------------------------------------------------------------------------------- 1 | ModArmAsm/Chapter11/Ch11_01 2 | ModArmAsm/Chapter11/Ch11_02 3 | ModArmAsm/Chapter11/Ch11_03 4 | ModArmAsm/Chapter11/Ch11_04 5 | ModArmAsm/Chapter11/Ch11_05 6 | ModArmAsm/Chapter11/Ch11_06 7 | ModArmAsm/Chapter11/Ch11_07 8 | ModArmAsm/Chapter12/Ch12_01 9 | ModArmAsm/Chapter12/Ch12_02 10 | ModArmAsm/Chapter12/Ch12_03 11 | ModArmAsm/Chapter12/Ch12_04 12 | ModArmAsm/Chapter12/Ch12_05 13 | ModArmAsm/Chapter12/Ch12_06 14 | ModArmAsm/Chapter13/Ch13_01 15 | ModArmAsm/Chapter13/Ch13_02 16 | ModArmAsm/Chapter13/Ch13_03 17 | ModArmAsm/Chapter13/Ch13_04 18 | ModArmAsm/Chapter13/Ch13_05 19 | ModArmAsm/Chapter13/Ch13_06 20 | ModArmAsm/Chapter13/Ch13_07 21 | ModArmAsm/Chapter13/Ch13_08 22 | ModArmAsm/Chapter14/Ch14_01 23 | ModArmAsm/Chapter14/Ch14_02 24 | ModArmAsm/Chapter14/Ch14_03 25 | ModArmAsm/Chapter14/Ch14_04 26 | ModArmAsm/Chapter14/Ch14_05 27 | ModArmAsm/Chapter14/Ch14_06 28 | ModArmAsm/Chapter15/Ch15_01 29 | ModArmAsm/Chapter15/Ch15_02 30 | ModArmAsm/Chapter15/Ch15_03 31 | ModArmAsm/Chapter15/Ch15_04 32 | ModArmAsm/Chapter15/Ch15_05 33 | ModArmAsm/Chapter15/Ch15_06 34 | ModArmAsm/Chapter15/Ch15_07 35 | ModArmAsm/Chapter16/Ch16_01 36 | ModArmAsm/Chapter16/Ch16_02 37 | ModArmAsm/Chapter16/Ch16_03 38 | ModArmAsm/Chapter16/Ch16_04 39 | -------------------------------------------------------------------------------- /Sh/make32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs32.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | cd $dirname2 10 | 11 | echo "running make in $dirname2" 12 | make 13 | echo "" 14 | 15 | num_dirs=$[$num_dirs + 1] 16 | done 17 | 18 | echo "num_dirs = $num_dirs" 19 | -------------------------------------------------------------------------------- /Sh/make64.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs64.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | cd $dirname2 10 | 11 | echo "running make in $dirname2" 12 | make 13 | echo "" 14 | 15 | num_dirs=$[$num_dirs + 1] 16 | done 17 | 18 | echo "num_dirs = $num_dirs" 19 | -------------------------------------------------------------------------------- /Sh/readme.txt: -------------------------------------------------------------------------------- 1 | To use the shell scripts in this directory, the source code must be 2 | installed in the following directory: 3 | ~/ModArmAsm 4 | See Appendix A for more information. 5 | 6 | To run the shell scripts, you must change the file permissions as follows: 7 | 8 | 1. Open a terminal window in your home directory. 9 | 10 | 2. Type the following commands: 11 | cd ModArmAsm/Sh 12 | chmod u+x *.sh 13 | 14 | ---------------------------------------------------------- 15 | 16 | The script clean32.sh runs "make clean" for all 32-bit 17 | source code examples. 18 | 19 | The script make32.sh runs "make" for all 32-bit 20 | source code examples. 21 | 22 | The script run32.sh executes all 32-bit source code examples. 23 | 24 | The scripts clean64.sh, make64.sh, and run64.sh are the counterpart 25 | scripts for the 64-bit source code examples. 26 | 27 | The files dirs32.txt and dirs64.txt contain the source code example 28 | subdirectory names. These files are used by the scripts. 29 | -------------------------------------------------------------------------------- /Sh/run32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs32.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | ex_name=${dirname2##*/} 10 | ex_cmd=./$ex_name 11 | 12 | echo "" 13 | echo "---------- Running $ex_cmd ----------" 14 | 15 | cd $dirname2 16 | eval $ex_cmd 17 | echo "" 18 | 19 | num_dirs=$[$num_dirs + 1] 20 | done 21 | 22 | echo "num_dirs = $num_dirs" 23 | -------------------------------------------------------------------------------- /Sh/run64.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | num_dirs=0 4 | dirnames="dirs64.txt" 5 | 6 | for dirname1 in $(cat $dirnames) 7 | do 8 | dirname2=$HOME/$dirname1 9 | ex_name=${dirname2##*/} 10 | ex_cmd=./$ex_name 11 | 12 | echo "" 13 | echo "---------- Running $ex_cmd ----------" 14 | 15 | cd $dirname2 16 | eval $ex_cmd 17 | echo "" 18 | 19 | num_dirs=$[$num_dirs + 1] 20 | done 21 | 22 | echo "num_dirs = $num_dirs" 23 | -------------------------------------------------------------------------------- /errata.md: -------------------------------------------------------------------------------- 1 | # Errata for *Modern Arm Assembly Language Programming* 2 | 3 | On **page xx** [Summary of error]: 4 | 5 | Details of error here. Highlight key pieces in **bold**. 6 | 7 | *** 8 | 9 | On **page xx** [Summary of error]: 10 | 11 | Details of error here. Highlight key pieces in **bold**. 12 | 13 | *** --------------------------------------------------------------------------------