├── Assignments.zip ├── Assignments ├── .DS_Store ├── Week4 │ ├── .DS_Store │ └── C │ │ ├── HelloWorld │ │ ├── data │ │ ├── Plot_MT_Launch.mlx │ │ ├── Plot_MT_performance_8x6.mlx │ │ ├── Plot_MT_Loop2_MT_Pack_8x6.mlx │ │ ├── Plot_MT_Loop3_MT_Pack_8x6.mlx │ │ ├── Plot_MT_performance_12x4.mlx │ │ ├── Plot_MT_Aggregate_GFLOPS_8x6.mlx │ │ ├── my_plot_colors.m │ │ ├── Plot_MT_Loop3_MT_Pack_8x6_m.m │ │ ├── Plot_MT_Aggregate_GFLOPS_8x6_m.m │ │ ├── Plot_MT_performance_12x4_m.m │ │ ├── Plot_MT_performance_8x6_m.m │ │ ├── Plot_MT_Loop2_MT_Pack_8x6_m.m │ │ └── Plot_MT_Launch_m.m │ │ ├── HelloWorld.c │ │ ├── RandomMatrix.c │ │ ├── Gemm_IJP.c │ │ ├── Gemm_JPI.c │ │ ├── MaxAbsDiff.c │ │ ├── MT_PackA.c │ │ ├── PackA.c │ │ ├── PackB.c │ │ ├── FLA_Clock.c │ │ ├── Gemm_Five_Loops_Packed_MRxNRKernel.c │ │ ├── Gemm_12x4Kernel_Packed.c │ │ ├── Gemm_8x6Kernel_Packed.c │ │ └── driver.c ├── Week1 │ └── C │ │ ├── data │ │ ├── Plot_IJP.mlx │ │ ├── Plot_Outer_J.mlx │ │ ├── Plot_Outer_P.mlx │ │ ├── Plot_All_Outer.mlx │ │ ├── Plot_All_Orderings.mlx │ │ ├── Plot_All_Outer_m.m │ │ ├── Plot_Outer_P_m.m │ │ ├── Plot_Outer_J_m.m │ │ ├── Plot_All_Orderings_m.m │ │ └── Plot_IJP_m.m │ │ ├── Axpy.c │ │ ├── Dots.c │ │ ├── RandomMatrix.c │ │ ├── Ger_I_Axpy.c │ │ ├── Ger_J_Axpy.c │ │ ├── Gemv_J_Axpy.c │ │ ├── Gemv_I_Dots.c │ │ ├── Gemm_IJP.c │ │ ├── Gemm_J_Gemv.c │ │ ├── Gemm_P_Ger.c │ │ ├── Gemm_I_bli_dgemv.c │ │ ├── Gemm_I_dgemv.c │ │ ├── MaxAbsDiff.c │ │ ├── FLA_Clock.c │ │ ├── driver_ger.c │ │ ├── driver_gemv.c │ │ └── driver.c ├── Week2 │ └── C │ │ ├── data │ │ ├── Plot_Opener.mlx │ │ ├── Plot_Blocked_MMM.mlx │ │ ├── Plot_optimize_MRxNR.mlx │ │ ├── Plot_register_blocking.mlx │ │ ├── my_plot_colors.m │ │ ├── Plot_Blocked_MMM_m.m │ │ ├── Plot_register_blocking_m.m │ │ ├── Plot_Opener_m.m │ │ └── Plot_optimize_MRxNR_m.m │ │ ├── Axpy.c │ │ ├── RandomMatrix.c │ │ ├── Ger_J_Axpy.c │ │ ├── Gemm_JPI.c │ │ ├── MaxAbsDiff.c │ │ ├── Gemm_JI_MRxNRKernel.c │ │ ├── FLA_Clock.c │ │ ├── Gemm_JIP_PJI.c │ │ ├── Gemm_JIP_P_Ger.c │ │ ├── Gemm_4x4Kernel.c │ │ ├── driver.c │ │ └── Makefile ├── Week3 │ └── C │ │ ├── data │ │ ├── Plot_Five_Loops.mlx │ │ ├── Plot_MC_KC_Performance.mlx │ │ ├── Plot_XYZ_JI_MRxNRKernel.mlx │ │ ├── Plot_XY_JI_MRxNRKernel.mlx │ │ ├── Plot_XYZ_JI_MRxNRKernel (1).mlx │ │ ├── my_plot_colors.m │ │ ├── Plot_MC_KC_Performance_m.m │ │ ├── Plot_XY_JI_MRxNRKernel_m.m │ │ ├── Plot_Five_Loops_m.m │ │ └── Plot_XYZ_JI_MRxNRKernel_m.m │ │ ├── RandomMatrix.c │ │ ├── MaxAbsDiff.c │ │ ├── PackA.c │ │ ├── PackB.c │ │ ├── FLA_Clock.c │ │ ├── Gemm_PI_JI_MCxKC.c │ │ ├── Gemm_PI_JI_MRxNRKernel.c │ │ ├── Gemm_IJP_JI_MRxNRKernel.c │ │ ├── Gemm_4x4Kernel_Packed.c │ │ ├── Gemm_Five_Loops_MRxNRKernel.c │ │ ├── Gemm_Five_Loops_Packed_MRxNRKernel.c │ │ ├── Gemm_Five_Loops_Packed_MRxNRKernel_MCxKC.c │ │ ├── driver.c │ │ └── driver_MCxKC.c └── Week0 │ └── C │ ├── driver.c │ ├── RandomMatrix.c │ ├── MaxAbsDiff.c │ ├── Makefile │ └── FLA_Clock.c ├── .gitignore ├── README.md ├── Makefile~ ├── texput.log ├── Makefile └── LICENSE /Assignments.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments.zip -------------------------------------------------------------------------------- /Assignments/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/.DS_Store -------------------------------------------------------------------------------- /Assignments/Week4/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignores all object files and executables that may be generated when compiling 2 | *.o 3 | *.x 4 | -------------------------------------------------------------------------------- /Assignments/Week4/C/HelloWorld: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/HelloWorld -------------------------------------------------------------------------------- /Assignments/Week1/C/data/Plot_IJP.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week1/C/data/Plot_IJP.mlx -------------------------------------------------------------------------------- /Assignments/Week1/C/data/Plot_Outer_J.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week1/C/data/Plot_Outer_J.mlx -------------------------------------------------------------------------------- /Assignments/Week1/C/data/Plot_Outer_P.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week1/C/data/Plot_Outer_P.mlx -------------------------------------------------------------------------------- /Assignments/Week2/C/data/Plot_Opener.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week2/C/data/Plot_Opener.mlx -------------------------------------------------------------------------------- /Assignments/Week1/C/data/Plot_All_Outer.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week1/C/data/Plot_All_Outer.mlx -------------------------------------------------------------------------------- /Assignments/Week3/C/data/Plot_Five_Loops.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week3/C/data/Plot_Five_Loops.mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_Launch.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_Launch.mlx -------------------------------------------------------------------------------- /Assignments/Week1/C/data/Plot_All_Orderings.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week1/C/data/Plot_All_Orderings.mlx -------------------------------------------------------------------------------- /Assignments/Week2/C/data/Plot_Blocked_MMM.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week2/C/data/Plot_Blocked_MMM.mlx -------------------------------------------------------------------------------- /Assignments/Week2/C/data/Plot_optimize_MRxNR.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week2/C/data/Plot_optimize_MRxNR.mlx -------------------------------------------------------------------------------- /Assignments/Week2/C/data/Plot_register_blocking.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week2/C/data/Plot_register_blocking.mlx -------------------------------------------------------------------------------- /Assignments/Week3/C/data/Plot_MC_KC_Performance.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week3/C/data/Plot_MC_KC_Performance.mlx -------------------------------------------------------------------------------- /Assignments/Week3/C/data/Plot_XYZ_JI_MRxNRKernel.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week3/C/data/Plot_XYZ_JI_MRxNRKernel.mlx -------------------------------------------------------------------------------- /Assignments/Week3/C/data/Plot_XY_JI_MRxNRKernel.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week3/C/data/Plot_XY_JI_MRxNRKernel.mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_performance_8x6.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_performance_8x6.mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_Loop2_MT_Pack_8x6.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_Loop2_MT_Pack_8x6.mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_Loop3_MT_Pack_8x6.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_Loop3_MT_Pack_8x6.mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_performance_12x4.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_performance_12x4.mlx -------------------------------------------------------------------------------- /Assignments/Week3/C/data/Plot_XYZ_JI_MRxNRKernel (1).mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week3/C/data/Plot_XYZ_JI_MRxNRKernel (1).mlx -------------------------------------------------------------------------------- /Assignments/Week4/C/HelloWorld.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | printf( "Hello World!\n" ); 7 | } 8 | -------------------------------------------------------------------------------- /Assignments/Week4/C/data/Plot_MT_Aggregate_GFLOPS_8x6.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ULAFF/LAFF-On-PfHP/HEAD/Assignments/Week4/C/data/Plot_MT_Aggregate_GFLOPS_8x6.mlx -------------------------------------------------------------------------------- /Assignments/Week2/C/data/my_plot_colors.m: -------------------------------------------------------------------------------- 1 | plot_colors = [ 2 | 0, 0, 0; 0, 0.4470, 0.7410; 0.8500, 0.3250, 0.0980; 3 | 0.9290, 0.6940, 0.1250; 0.4940, 0.1840, 0.5560; 0.4660, 0.6740, 0.1880; 4 | 0.3010, 0.7450, 0.9330; 0.6350, 0.0780, 0.1840 ]; -------------------------------------------------------------------------------- /Assignments/Week3/C/data/my_plot_colors.m: -------------------------------------------------------------------------------- 1 | plot_colors = [ 2 | 0, 0, 0; 0, 0.4470, 0.7410; 0.8500, 0.3250, 0.0980; 3 | 0.9290, 0.6940, 0.1250; 0.4940, 0.1840, 0.5560; 0.4660, 0.6740, 0.1880; 4 | 0.3010, 0.7450, 0.9330; 0.6350, 0.0780, 0.1840 ]; -------------------------------------------------------------------------------- /Assignments/Week4/C/data/my_plot_colors.m: -------------------------------------------------------------------------------- 1 | plot_colors = [ 2 | 0, 0, 0; 0, 0.4470, 0.7410; 0.8500, 0.3250, 0.0980; 3 | 0.9290, 0.6940, 0.1250; 0.4940, 0.1840, 0.5560; 0.4660, 0.6740, 0.1880; 4 | 0.3010, 0.7450, 0.9330; 0.6350, 0.0780, 0.1840 ]; -------------------------------------------------------------------------------- /Assignments/Week1/C/Axpy.c: -------------------------------------------------------------------------------- 1 | #define chi( i ) x[ (i)*incx ] // map chi( i ) to array x 2 | #define psi( i ) y[ (i)*incy ] // map psi( i ) to array y 3 | 4 | void Axpy( int n, double alpha, double *x, int incx, double *y, int incy ) 5 | { 6 | for ( int i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "omp.h" 7 | 8 | int main(int argc, char *argv[]) 9 | { 10 | #pragma omp parallel 11 | printf("Hello World\n"); 12 | 13 | exit(0); 14 | } 15 | -------------------------------------------------------------------------------- /Assignments/Week1/C/Dots.c: -------------------------------------------------------------------------------- 1 | #define chi( i ) x[ (i)*incx ] // map chi( i ) to array x 2 | #define psi( i ) y[ (i)*incy ] // map psi( i ) to array y 3 | 4 | void Dots( int n, double *x, int incx, double *y, int incy, double *gamma ) 5 | { 6 | for ( int i=0; i 3 | 4 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 5 | 6 | void RandomMatrix( int m, int n, double *ap, int lda ) 7 | /* 8 | RandomMatrix overwrite A with random values. 9 | */ 10 | { 11 | int i, j; 12 | 13 | for ( i=0; i 3 | 4 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 5 | 6 | void RandomMatrix( int m, int n, double *ap, int lda ) 7 | /* 8 | RandomMatrix overwrite A with random values. 9 | */ 10 | { 11 | int i, j; 12 | 13 | for ( i=0; i 3 | 4 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 5 | 6 | void RandomMatrix( int m, int n, double *ap, int lda ) 7 | /* 8 | RandomMatrix overwrite A with random values. 9 | */ 10 | { 11 | int i, j; 12 | 13 | for ( i=0; i 3 | 4 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 5 | 6 | void RandomMatrix( int m, int n, double *ap, int lda ) 7 | /* 8 | RandomMatrix overwrite A with random values. 9 | */ 10 | { 11 | int i, j; 12 | 13 | for ( i=0; i 3 | 4 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 5 | 6 | void RandomMatrix( int m, int n, double *ap, int lda ) 7 | /* 8 | RandomMatrix overwrite A with random values. 9 | */ 10 | { 11 | int i, j; 12 | 13 | for ( i=0; i diff ) 18 | diff = dabs( A( i,j ) - B( i,j ) ); 19 | 20 | return diff; 21 | } 22 | -------------------------------------------------------------------------------- /Assignments/Week1/C/MaxAbsDiff.c: -------------------------------------------------------------------------------- 1 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 2 | #define B( i,j ) *( bp + (j)*ldb + (i) ) // map B( i,j ) to array bp in column-major order 3 | 4 | #define dabs( x ) ( (x) < 0 ? -(x) : x ) 5 | 6 | double MaxAbsDiff( int m, int n, double *ap, int lda, double *bp, int ldb ) 7 | /* 8 | MaxAbsDiff returns the maximum absolute difference over 9 | corresponding elements of matrices A and B. 10 | */ 11 | { 12 | double diff=0.0; 13 | int i, j; 14 | 15 | for ( i=0; i diff ) 18 | diff = dabs( A( i,j ) - B( i,j ) ); 19 | 20 | return diff; 21 | } 22 | -------------------------------------------------------------------------------- /Assignments/Week2/C/MaxAbsDiff.c: -------------------------------------------------------------------------------- 1 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 2 | #define B( i,j ) *( bp + (j)*ldb + (i) ) // map B( i,j ) to array bp in column-major order 3 | 4 | #define dabs( x ) ( (x) < 0 ? -(x) : x ) 5 | 6 | double MaxAbsDiff( int m, int n, double *ap, int lda, double *bp, int ldb ) 7 | /* 8 | MaxAbsDiff returns the maximum absolute difference over 9 | corresponding elements of matrices A and B. 10 | */ 11 | { 12 | double diff=0.0; 13 | int i, j; 14 | 15 | for ( i=0; i diff ) 18 | diff = dabs( A( i,j ) - B( i,j ) ); 19 | 20 | return diff; 21 | } 22 | -------------------------------------------------------------------------------- /Assignments/Week3/C/MaxAbsDiff.c: -------------------------------------------------------------------------------- 1 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 2 | #define B( i,j ) *( bp + (j)*ldb + (i) ) // map B( i,j ) to array bp in column-major order 3 | 4 | #define dabs( x ) ( (x) < 0 ? -(x) : x ) 5 | 6 | double MaxAbsDiff( int m, int n, double *ap, int lda, double *bp, int ldb ) 7 | /* 8 | MaxAbsDiff returns the maximum absolute difference over 9 | corresponding elements of matrices A and B. 10 | */ 11 | { 12 | double diff=0.0; 13 | int i, j; 14 | 15 | for ( i=0; i diff ) 18 | diff = dabs( A( i,j ) - B( i,j ) ); 19 | 20 | return diff; 21 | } 22 | -------------------------------------------------------------------------------- /Assignments/Week4/C/MaxAbsDiff.c: -------------------------------------------------------------------------------- 1 | #define A( i,j ) *( ap + (j)*lda + (i) ) // map A( i,j ) to array ap in column-major order 2 | #define B( i,j ) *( bp + (j)*ldb + (i) ) // map B( i,j ) to array bp in column-major order 3 | 4 | #define dabs( x ) ( (x) < 0 ? -(x) : x ) 5 | 6 | double MaxAbsDiff( int m, int n, double *ap, int lda, double *bp, int ldb ) 7 | /* 8 | MaxAbsDiff returns the maximum absolute difference over 9 | corresponding elements of matrices A and B. 10 | */ 11 | { 12 | double diff=0.0; 13 | int i, j; 14 | 15 | for ( i=0; i diff ) 18 | diff = dabs( A( i,j ) - B( i,j ) ); 19 | 20 | return diff; 21 | } 22 | -------------------------------------------------------------------------------- /Makefile~: -------------------------------------------------------------------------------- 1 | clean: 2 | rm Assignments/Week0/C/*.c 3 | rm Assignments/Week0/C/*.o 4 | rm Assignments/Week0/C/*.x 5 | rm Assignments/Week1/C/*.c 6 | rm Assignments/Week1/C/*.o 7 | rm Assignments/Week1/C/*.x 8 | rm Assignments/Week1/C/data/* 9 | rm Assignments/Week1/C/*.c 10 | rm Assignments/Week1/C/*.o 11 | rm Assignments/Week1/C/*.x 12 | rm Assignments/Week1/C/data/* 13 | rm Assignments/Week2/C/*.c 14 | rm Assignments/Week2/C/*.o 15 | rm Assignments/Week2/C/*.x 16 | rm Assignments/Week2/C/data/* 17 | rm Assignments/Week3/C/*.c 18 | rm Assignments/Week3/C/*.o 19 | rm Assignments/Week3/C/*.x 20 | rm Assignments/Week3/C/data/* 21 | rm Assignments/Week4/C/*.c 22 | rm Assignments/Week4/C/*.o 23 | rm Assignments/Week4/C/*.x 24 | rm Assignments/Week4/C/data/* 25 | -------------------------------------------------------------------------------- /texput.log: -------------------------------------------------------------------------------- 1 | This is pdfTeX, Version 3.14159265-2.6-1.40.19 (TeX Live 2018) (preloaded format=pdflatex 2018.4.16) 15 APR 2019 20:02 2 | entering extended mode 3 | restricted \write18 enabled. 4 | %&-line parsing enabled. 5 | **All.tex 6 | 7 | ! Emergency stop. 8 | <*> All.tex 9 | 10 | End of file on the terminal! 11 | 12 | 13 | Here is how much of TeX's memory you used: 14 | 3 strings out of 492649 15 | 98 string characters out of 6129622 16 | 56311 words of memory out of 5000000 17 | 3988 multiletter control sequences out of 15000+600000 18 | 3640 words of font info for 14 fonts, out of 8000000 for 9000 19 | 1141 hyphenation exceptions out of 8191 20 | 0i,0n,0p,1b,6s stack positions out of 5000i,500n,10000p,200000b,80000s 21 | ! ==> Fatal error occurred, no output PDF file produced! 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -f Assignments/Week0/C/*.c 3 | rm -f Assignments/Week0/C/*.o 4 | rm -f Assignments/Week0/C/*.x 5 | rm -f Assignments/Week1/C/*.c 6 | rm -f Assignments/Week1/C/*.o 7 | rm -f Assignments/Week1/C/*.x 8 | rm -f Assignments/Week1/C/data/* 9 | rm -f Assignments/Week1/C/*.c 10 | rm -f Assignments/Week1/C/*.o 11 | rm -f Assignments/Week1/C/*.x 12 | rm -f Assignments/Week1/C/data/* 13 | rm -f Assignments/Week2/C/*.c 14 | rm -f Assignments/Week2/C/*.o 15 | rm -f Assignments/Week2/C/*.x 16 | rm -f Assignments/Week2/C/data/* 17 | rm -f Assignments/Week3/C/*.c 18 | rm -f Assignments/Week3/C/*.o 19 | rm -f Assignments/Week3/C/*.x 20 | rm -f Assignments/Week3/C/data/* 21 | rm -f Assignments/Week4/C/*.c 22 | rm -f Assignments/Week4/C/*.o 23 | rm -f Assignments/Week4/C/*.x 24 | rm -f Assignments/Week4/C/data/* 25 | -------------------------------------------------------------------------------- /Assignments/Week2/C/Gemm_JI_MRxNRKernel.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | // The following definitions are not needed here because 9 | // MR and NR will be specified at compile time using Makefile. 10 | //#define MR 4 11 | //#define NR 4 12 | 13 | void Gemm_MRxNRKernel( int, double *, int, double *, int, double *, int ); 14 | 15 | void MyGemm( int m, int n, int k, double *A, int ldA, 16 | double *B, int ldB, double *C, int ldC ) 17 | { 18 | if ( m % MR != 0 || n % NR != 0 ){ 19 | printf( "m and n must be multiples of MR and NR, respectively \n" ); 20 | exit( 0 ); 21 | } 22 | 23 | for ( int j=0; j 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | double FLA_Clock_helper( void ); 9 | 10 | // A global variable used when FLA_Clock_helper() is defined in terms of 11 | // clock_gettime()/gettimeofday(). 12 | double gtod_ref_time_sec = 0.0; 13 | 14 | double FLA_Clock( void ) 15 | { 16 | return FLA_Clock_helper(); 17 | } 18 | 19 | #if defined(__APPLE__) || defined(__MACH__) 20 | 21 | double FLA_Clock_helper() 22 | { 23 | mach_timebase_info_data_t timebase; 24 | mach_timebase_info( &timebase ); 25 | 26 | uint64_t nsec = mach_absolute_time(); 27 | 28 | double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; 29 | 30 | if ( gtod_ref_time_sec == 0.0 ) 31 | gtod_ref_time_sec = the_time; 32 | 33 | return the_time - gtod_ref_time_sec; 34 | } 35 | 36 | #else 37 | 38 | double FLA_Clock_helper() 39 | { 40 | double the_time, norm_sec; 41 | struct timespec ts; 42 | 43 | clock_gettime( CLOCK_MONOTONIC, &ts ); 44 | 45 | if ( gtod_ref_time_sec == 0.0 ) 46 | gtod_ref_time_sec = ( double ) ts.tv_sec; 47 | 48 | norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; 49 | 50 | the_time = norm_sec + ts.tv_nsec * 1.0e-9; 51 | 52 | return the_time; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Assignments/Week1/C/FLA_Clock.c: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) || defined(__MACH__) 2 | #include 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | double FLA_Clock_helper( void ); 9 | 10 | // A global variable used when FLA_Clock_helper() is defined in terms of 11 | // clock_gettime()/gettimeofday(). 12 | double gtod_ref_time_sec = 0.0; 13 | 14 | double FLA_Clock( void ) 15 | { 16 | return FLA_Clock_helper(); 17 | } 18 | 19 | #if defined(__APPLE__) || defined(__MACH__) 20 | 21 | double FLA_Clock_helper() 22 | { 23 | mach_timebase_info_data_t timebase; 24 | mach_timebase_info( &timebase ); 25 | 26 | uint64_t nsec = mach_absolute_time(); 27 | 28 | double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; 29 | 30 | if ( gtod_ref_time_sec == 0.0 ) 31 | gtod_ref_time_sec = the_time; 32 | 33 | return the_time - gtod_ref_time_sec; 34 | } 35 | 36 | #else 37 | 38 | double FLA_Clock_helper() 39 | { 40 | double the_time, norm_sec; 41 | struct timespec ts; 42 | 43 | clock_gettime( CLOCK_MONOTONIC, &ts ); 44 | 45 | if ( gtod_ref_time_sec == 0.0 ) 46 | gtod_ref_time_sec = ( double ) ts.tv_sec; 47 | 48 | norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; 49 | 50 | the_time = norm_sec + ts.tv_nsec * 1.0e-9; 51 | 52 | return the_time; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Assignments/Week2/C/FLA_Clock.c: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) || defined(__MACH__) 2 | #include 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | double FLA_Clock_helper( void ); 9 | 10 | // A global variable used when FLA_Clock_helper() is defined in terms of 11 | // clock_gettime()/gettimeofday(). 12 | double gtod_ref_time_sec = 0.0; 13 | 14 | double FLA_Clock( void ) 15 | { 16 | return FLA_Clock_helper(); 17 | } 18 | 19 | #if defined(__APPLE__) || defined(__MACH__) 20 | 21 | double FLA_Clock_helper() 22 | { 23 | mach_timebase_info_data_t timebase; 24 | mach_timebase_info( &timebase ); 25 | 26 | uint64_t nsec = mach_absolute_time(); 27 | 28 | double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; 29 | 30 | if ( gtod_ref_time_sec == 0.0 ) 31 | gtod_ref_time_sec = the_time; 32 | 33 | return the_time - gtod_ref_time_sec; 34 | } 35 | 36 | #else 37 | 38 | double FLA_Clock_helper() 39 | { 40 | double the_time, norm_sec; 41 | struct timespec ts; 42 | 43 | clock_gettime( CLOCK_MONOTONIC, &ts ); 44 | 45 | if ( gtod_ref_time_sec == 0.0 ) 46 | gtod_ref_time_sec = ( double ) ts.tv_sec; 47 | 48 | norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; 49 | 50 | the_time = norm_sec + ts.tv_nsec * 1.0e-9; 51 | 52 | return the_time; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Assignments/Week2/C/Gemm_JIP_PJI.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( (x) < (y) ? (x) : (y) ) 9 | 10 | #define MB 4 11 | #define NB 4 12 | #define KB 4 13 | 14 | void Gemm_PJI( int, int, int, double *, int, double *, int, double *, int ); 15 | 16 | void MyGemm( int m, int n, int k, double *A, int ldA, 17 | double *B, int ldB, double *C, int ldC ) 18 | { 19 | for ( int j=0; j 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | double FLA_Clock_helper( void ); 9 | 10 | // A global variable used when FLA_Clock_helper() is defined in terms of 11 | // clock_gettime()/gettimeofday(). 12 | double gtod_ref_time_sec = 0.0; 13 | 14 | double FLA_Clock( void ) 15 | { 16 | return FLA_Clock_helper(); 17 | } 18 | 19 | #if defined(__APPLE__) || defined(__MACH__) 20 | 21 | double FLA_Clock_helper() 22 | { 23 | mach_timebase_info_data_t timebase; 24 | mach_timebase_info( &timebase ); 25 | 26 | uint64_t nsec = mach_absolute_time(); 27 | 28 | double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; 29 | 30 | if ( gtod_ref_time_sec == 0.0 ) 31 | gtod_ref_time_sec = the_time; 32 | 33 | return the_time - gtod_ref_time_sec; 34 | } 35 | 36 | #else 37 | 38 | double FLA_Clock_helper() 39 | { 40 | double the_time, norm_sec; 41 | struct timespec ts; 42 | 43 | clock_gettime( CLOCK_MONOTONIC, &ts ); 44 | 45 | if ( gtod_ref_time_sec == 0.0 ) 46 | gtod_ref_time_sec = ( double ) ts.tv_sec; 47 | 48 | norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; 49 | 50 | the_time = norm_sec + ts.tv_nsec * 1.0e-9; 51 | 52 | return the_time; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Assignments/Week4/C/FLA_Clock.c: -------------------------------------------------------------------------------- 1 | #if defined(__APPLE__) || defined(__MACH__) 2 | #include 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | double FLA_Clock_helper( void ); 9 | 10 | // A global variable used when FLA_Clock_helper() is defined in terms of 11 | // clock_gettime()/gettimeofday(). 12 | double gtod_ref_time_sec = 0.0; 13 | 14 | double FLA_Clock( void ) 15 | { 16 | return FLA_Clock_helper(); 17 | } 18 | 19 | #if defined(__APPLE__) || defined(__MACH__) 20 | 21 | double FLA_Clock_helper() 22 | { 23 | mach_timebase_info_data_t timebase; 24 | mach_timebase_info( &timebase ); 25 | 26 | uint64_t nsec = mach_absolute_time(); 27 | 28 | double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; 29 | 30 | if ( gtod_ref_time_sec == 0.0 ) 31 | gtod_ref_time_sec = the_time; 32 | 33 | return the_time - gtod_ref_time_sec; 34 | } 35 | 36 | #else 37 | 38 | double FLA_Clock_helper() 39 | { 40 | double the_time, norm_sec; 41 | struct timespec ts; 42 | 43 | clock_gettime( CLOCK_MONOTONIC, &ts ); 44 | 45 | if ( gtod_ref_time_sec == 0.0 ) 46 | gtod_ref_time_sec = ( double ) ts.tv_sec; 47 | 48 | norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; 49 | 50 | the_time = norm_sec + ts.tv_nsec * 1.0e-9; 51 | 52 | return the_time; 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Assignments/Week2/C/Gemm_JIP_P_Ger.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( (x) < (y) ? (x) : (y) ) 9 | 10 | #define MB 4 11 | #define NB 4 12 | #define KB 4 13 | 14 | void Gemm_P_Ger( int, int, int, double *, int, double *, int, double *, int ); 15 | void Ger( int, int, double *, int, double *, int, double *, int ); 16 | 17 | void MyGemm( int m, int n, int k, double *A, int ldA, 18 | double *B, int ldB, double *C, int ldC ) 19 | { 20 | for ( int j=0; j 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( (x) < (y) ? x : y ) 9 | 10 | void Gemm_MRxNRKernel( int, double *, int, double *, int, 11 | double *, int ); 12 | 13 | void Gemm_JI_MRxNRKernel( int, int, int, double *, int, double *, int, 14 | double *, int ); 15 | 16 | int MC, KC; 17 | 18 | void MyGemm( int m, int n, int k, double *A, int ldA, 19 | double *B, int ldB, double *C, int ldC ) 20 | { 21 | if ( m % MR != 0 || MC % MR != 0 ){ 22 | printf( "m and MC must be multiples of MR\n" ); 23 | exit( 0 ); 24 | } 25 | if ( n % NR != 0 ){ 26 | printf( "n and NC must be multiples of NR\n" ); 27 | exit( 0 ); 28 | } 29 | 30 | for ( int p=0; p 6 | 7 | void Gemm_MRxNRKernel( int k, double *A, int ldA, double *B, int ldB, 8 | double *C, int ldC ) 9 | { 10 | /* Declare vector registers to hold 4x4 C and load them */ 11 | __m256d gamma_0123_0 = _mm256_loadu_pd( &gamma( 0,0 ) ); 12 | __m256d gamma_0123_1 = _mm256_loadu_pd( &gamma( 0,1 ) ); 13 | __m256d gamma_0123_2 = _mm256_loadu_pd( &gamma( 0,2 ) ); 14 | __m256d gamma_0123_3 = _mm256_loadu_pd( &gamma( 0,3 ) ); 15 | 16 | for ( int p=0; p 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( (x) < (y) ? x : y ) 9 | 10 | void Gemm_MRxNRKernel( int, double *, int, double *, int, 11 | double *, int ); 12 | 13 | void Gemm_JI_MRxNRKernel( int, int, int, double *, int, double *, int, 14 | double *, int ); 15 | 16 | #define MC 96 17 | #define NC 96 18 | #define KC 96 19 | 20 | void MyGemm( int m, int n, int k, double *A, int ldA, 21 | double *B, int ldB, double *C, int ldC ) 22 | { 23 | if ( m % MR != 0 || MC % MR != 0 ){ 24 | printf( "m and MC must be multiples of MR\n" ); 25 | exit( 0 ); 26 | } 27 | if ( n % NR != 0 || NC % NR != 0 ){ 28 | printf( "n and NC must be multiples of NR\n" ); 29 | exit( 0 ); 30 | } 31 | 32 | for ( int p=0; p 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( (x) < (y) ? x : y ) 9 | 10 | void Gemm_MRxNRKernel( int, double *, int, double *, int, 11 | double *, int ); 12 | 13 | void Gemm_JI_MRxNRKernel( int, int, int, double *, int, double *, int, 14 | double *, int ); 15 | 16 | #define MC 96 17 | #define NC 96 18 | #define KC 96 19 | 20 | void MyGemm( int m, int n, int k, double *A, int ldA, 21 | double *B, int ldB, double *C, int ldC ) 22 | { 23 | if ( m % MR != 0 || MC % MR != 0 ){ 24 | printf( "m and MC must be multiples of MR\n" ); 25 | exit( 0 ); 26 | } 27 | if ( n % NR != 0 || NC % NR != 0 ){ 28 | printf( "n and NC must be multiples of NR\n" ); 29 | exit( 0 ); 30 | } 31 | 32 | for ( int i=0; i 6 | 7 | void Gemm_MRxNRKernel_Packed( int k, 8 | double *MP_A, double *MP_B, double *C, int ldC ) 9 | { 10 | __m256d gamma_0123_0 = _mm256_loadu_pd( &gamma( 0,0 ) ); 11 | __m256d gamma_0123_1 = _mm256_loadu_pd( &gamma( 0,1 ) ); 12 | __m256d gamma_0123_2 = _mm256_loadu_pd( &gamma( 0,2 ) ); 13 | __m256d gamma_0123_3 = _mm256_loadu_pd( &gamma( 0,3 ) ); 14 | 15 | __m256d beta_p_j; 16 | 17 | for ( int p=0; p 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( ( x ) < ( y ) ? x : y ) 9 | 10 | void LoopFive( int, int, int, double *, int, double *, int, double *, int ); 11 | void LoopFour( int, int, int, double *, int, double *, int, double *, int ); 12 | void LoopThree( int, int, int, double *, int, double *, int, double *, int ); 13 | void LoopTwo( int, int, int, double *, int, double *, int, double *, int ); 14 | void LoopOne( int, int, int, double *, int, double *, int, double *, int ); 15 | void Gemm_MRxNRKernel( int, double *, int, double *, int, double *, int ); 16 | 17 | void MyGemm( int m, int n, int k, double *A, int ldA, 18 | double *B, int ldB, double *C, int ldC ) 19 | { 20 | if ( m % MR != 0 || MC % MR != 0 ){ 21 | printf( "m and MC must be multiples of MR\n" ); 22 | exit( 0 ); 23 | } 24 | if ( n % NR != 0 || NC % NR != 0 ){ 25 | printf( "n and NC must be multiples of NR\n" ); 26 | exit( 0 ); 27 | } 28 | 29 | LoopFive( m, n, k, A, ldA, B, ldB, C, ldC ); 30 | } 31 | 32 | void LoopFive( int m, int n, int k, double *A, int ldA, 33 | double *B, int ldB, double *C, int ldC ) 34 | { 35 | for ( int j=0; j 2 | #include 3 | 4 | #define alpha( i,j ) A[ (j)*ldA + (i) ] // map alpha( i,j ) to array A 5 | #define beta( i,j ) B[ (j)*ldB + (i) ] // map beta( i,j ) to array B 6 | #define gamma( i,j ) C[ (j)*ldC + (i) ] // map gamma( i,j ) to array C 7 | 8 | #define min( x, y ) ( ( x ) < ( y ) ? x : y ) 9 | 10 | void LoopFive( int, int, int, double *, int, double *, int, double *, int ); 11 | void LoopFour( int, int, int, double *, int, double *, int, double *, int ); 12 | void LoopThree( int, int, int, double *, int, double *, double *, int ); 13 | void LoopTwo( int, int, int, double *, double *, double *, int ); 14 | void LoopOne( int, int, int, double *, double *, double *, int ); 15 | void Gemm_MRxNRKernel_Packed( int, double *, double *, double *, int ); 16 | void PackBlockA_MCxKC( int, int, double *, int, double * ); 17 | void PackPanelB_KCxNC( int, int, double *, int, double * ); 18 | 19 | void MyGemm( int m, int n, int k, double *A, int ldA, 20 | double *B, int ldB, double *C, int ldC ) 21 | { 22 | if ( m % MR != 0 || MC % MR != 0 ){ 23 | printf( "m and MC must be multiples of MR\n" ); 24 | exit( 0 ); 25 | } 26 | if ( n % NR != 0 || NC % NR != 0 ){ 27 | printf( "n and NC must be multiples of NR\n" ); 28 | exit( 0 ); 29 | } 30 | 31 | LoopFive( m, n, k, A, ldA, B, ldB, C, ldC ); 32 | } 33 | 34 | void LoopFive( int m, int n, int k, double *A, int ldA, 35 | double *B, int ldB, double *C, int ldC ) 36 | { 37 | for ( int j=0; j