Added ANSI colours for error messages. Major update for the makefile, including parametrized start/stop/step and looping over optimization flags (O0/O1/O2/O3/Ofast)

author: Connor Moore <connor@hhmoore.ca> 2026-01-31 00:32:06 -0500
committer: Connor Moore <connor@hhmoore.ca> 2026-01-31 00:32:06 -0500
commit: c896807fd9fc5b5f2cdbec9eef717f815af3779a (patch)
tree: 3267fd9331ea6337d327f407063f53a798a79bce
parent: e1babc4f71ba2e3fa3139dddb6d77f1c7b5a9683 (diff)
2 files changed, 85 insertions, 53 deletions
diff --git a/Makefile b/Makefile
index 32449de..629cda6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,35 +1,55 @@
-GCC=gfortran
-oneAPI=ifx
+GCC     = gfortran
+oneAPI  = ifx
+
+# These flags will be looped over to generate different datasets
+FLAGS  := O0 O1 O2 O3 Ofast
+# Also specify a default (note the conditional assignment operation, ?=)
+OPTFLAGS ?= O3
+
+GREEN  := \033[0;32m
+RESET  := \033[0m
+
+S_START:= 100
+S_END  := 3500
+S_STEP := 100
+
+L_END  := 20000
+L_STEP := 500
 
 all:
-	mkdir -p bin/
-	$(GCC) matrixproduct.f90 -o bin/$(GCC).serial.out -O3 -fexternal-blas -lopenblas -march=native
-	$(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).serial.out -qmkl=sequential -O3 -heap-arrays -xHost
-	$(GCC) matrixproduct.f90 -o bin/$(GCC).parallel.out -O3 -fexternal-blas -lopenblas -march=native -fopenmp
-	$(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).parallel.out -qmkl=parallel -O3 -heap-arrays -xHost -fopenmp
-
-tests: clean all
-	mkdir -p results/
-	
-	# Serial runs
-	export OMP_NUM_THREADS=1
-	./bin/$(GCC).serial.out 100 3500 100 yes > results/$(GCC)_short_serial
-	./bin/$(oneAPI).serial.out 100 3500 100 yes > results/$(oneAPI)_short_serial
-	
-	./bin/$(GCC).serial.out 3500 20000 500 no > results/$(GCC)_long_serial
-	./bin/$(oneAPI).serial.out 3500 20000 500 no > results/$(oneAPI)_long_serial
-	
-	# Parallel runs
-	export OMP_NUM_THREADS=8
-	./bin/$(GCC).parallel.out 100 3500 100 yes > results/$(GCC)_short_parallel
-	./bin/$(oneAPI).parallel.out 100 3500 100 yes > results/$(oneAPI)_short_parallel
-	
-	./bin/$(GCC).parallel.out 3500 20000 500 no > results/$(GCC)_long_parallel
-	./bin/$(oneAPI).parallel.out 3500 20000 500 no > results/$(oneAPI)_long_parallel
+	@mkdir -p bin/
+	@echo "$(GREEN)Compiling serial and parallel binaries with $(OPTFLAGS)$(RESET)"
+	$(GCC) matrixproduct.f90 -o bin/$(GCC).serial.out -$(OPTFLAGS) -fexternal-blas -lopenblas -march=native
+	$(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).serial.out -qmkl=sequential -$(OPTFLAGS) -heap-arrays -xHost
+	$(GCC) matrixproduct.f90 -o bin/$(GCC).parallel.out -$(OPTFLAGS) -fexternal-blas -lopenblas -march=native -fopenmp
+	$(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).parallel.out -qmkl=parallel -$(OPTFLAGS) -heap-arrays -xHost -fopenmp
+
+tests: clean
+	@mkdir -p results/
+	@for opt in $(FLAGS); do \
+                $(MAKE) all OPTFLAGS=$$opt; \
+                echo "$(GREEN)Running serial short runs with $$opt$(RESET)"; \
+                export OMP_NUM_THREADS=1; \
+                export MKL_NUM_THREADS=1; \
+                ./bin/$(GCC).serial.out $(S_START) $(S_END) $(S_STEP) yes > results/$(GCC)_short_serial_$$opt.out; \
+                ./bin/$(oneAPI).serial.out $(S_START) $(S_END) $(S_STEP) yes > results/$(oneAPI)_short_serial_$$opt.out; \
+                echo "$(GREEN)Running serial long runs with $$opt$(RESET)"; \
+                ./bin/$(GCC).serial.out $(S_END) $(L_END) $(L_STEP) no > results/$(GCC)_long_serial_$$opt.out; \
+                ./bin/$(oneAPI).serial.out $(S_END) $(L_END) $(L_STEP) no > results/$(oneAPI)_long_serial_$$opt.out; \
+                echo "$(GREEN)Running parallel short runs with $$opt$(RESET)"; \
+                export OMP_NUM_THREADS=8; \
+                export MKL_NUM_THREADS=8; \
+                ./bin/$(GCC).parallel.out $(S_START) $(S_END) $(S_STEP) yes > results/$(GCC)_short_parallel; \
+                ./bin/$(oneAPI).parallel.out $(S_START) $(S_END) $(S_STEP) yes > results/$(oneAPI)_short_parallel; \
+                echo "$(GREEN)Running parallel long runs with $$opt$(RESET)"; \
+                ./bin/$(GCC).parallel.out $(S_END) $(L_END) $(L_STEP) no > results/$(GCC)_long_parallel; \
+                ./bin/$(oneAPI).parallel.out $(S_END) $(L_END) $(L_STEP) no > results/$(oneAPI)_long_parallel; \
+	done
 
 plots:
 	gnuplot -p plots.gnu
 
 clean:
-	rm -rf bin/
-	rm -rf results/
+	@echo "$(GREEN)Cleaning bin/ and results/$(RESET)"
+	@rm -rf bin/
+	@rm -rf results/
diff --git a/matrixproduct.f90 b/matrixproduct.f90
index d7b3d13..97417e4 100644
--- a/matrixproduct.f90
+++ b/matrixproduct.f90
@@ -1,31 +1,40 @@
 program matrixproduct
 
+    !> MCSC-6030G Project 1. Connor Moore, 2026 <connor@hhmoore.ca>
     !> Performs a matrix-matrix multiplication using various methods
     !> and compares the performance of each. The following are used:
     !>  1. Basic triple-loop (iterative algorithm)
     !>  2. Fortran native matmul routine
     !>  3. LAPACK/BLAS library call
+    !> The wall times for these are compared with gfortran from gnu
+    !> and ifx from Intel. OpenMP is used for parallel comparisons
         
-    use omp_lib
-    use, intrinsic :: iso_fortran_env
-    implicit none
-    external :: dgemm   !> double-precision general matrix-matrix multiplication
+    use omp_lib                         !> For OpenMP parallel do loops
+    use, intrinsic :: iso_fortran_env   !> For named datatypes, e.g. real64
+    implicit none                       !> Don't infer any types from names
+    external :: dgemm                   !> double-precision general matrix-matrix multiplication
     
+    !> A number of variables will be delcared. This includes three square matrices (allocatable),
+    !> a start/end time holder, variables to hold time for each "technique" (loop/loop/matmul/blas),
+    !> and a (char and logical) for command line arugments. The ANSI escape char is also defined.
     real(real64), allocatable, dimension(:,:) :: A, B, C
-    real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time
+    real(real64) :: loop_time, loop_alt_time, matmul_time, blas_time
+    integer(int64) :: start, end, clockrate
     integer(int32) :: n, start_num, step_num, stop_num
-    character(10) :: temp_in
+    character(len=32) :: temp_in
+    character(len=*), parameter :: ESC_CHAR=achar(27) !> Pretty printing with ANSI escape sequences
     logical :: run_loops
     
     !> Start by taking the command-line arguments. This is useful because
     !> it lets us call the program from Bash with a variable matrix size
 
     call get_command_argument(1, temp_in)
-    read(temp_in,'(i10)') start_num
+    read(temp_in,*) start_num
     call get_command_argument(2, temp_in)
-    read(temp_in,'(i10)') stop_num
+    read(temp_in,*) stop_num
     call get_command_argument(3, temp_in)
-    read(temp_in,'(i10)') step_num
+    read(temp_in,*) step_num
+
     !> The last argument is a string [yes/no] that instructs the program
     !> to either run with the triple-loops or ignore them completely.
     call get_command_argument(4, temp_in)
@@ -35,40 +44,43 @@ program matrixproduct
         case ('no')
             run_loops=.FALSE.
         case default
-            write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in
-            run_loops=.TRUE.
+            write(error_unit,'(A)') ESC_CHAR // "[31mERROR: Unsupported input (" // trim(temp_in) // ") for loop specification [yes/no]" // ESC_CHAR // "[0m"
+            stop
+
     end select 
 
-    write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS()
-    write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num
+    write(*,'(A)') ESC_CHAR // "[32m" // COMPILER_VERSION() // achar(10) // COMPILER_OPTIONS() // ESC_CHAR // "[0m"
+    write(*,'(A,I0,A,I0,A,I0)') "Running with start=", start_num, ", stop=", stop_num, ", step=", step_num
+
+    call system_clock(count_rate=clockrate)
     
     do n = start_num, stop_num, step_num
         call prep_mats(A,B,C,n)
 
         if(run_loops) then
-                call cpu_time(start)
+                call system_clock(count=start)
                 call triple_loop_mul(A,B,C,n)
-                call cpu_time(end)
-                loop_time = end-start
+                call system_clock(count=end)
+                loop_time = real(end-start, real64)/real(clockrate, real64)
                 C = 0
 
-                call cpu_time(start)
+                call system_clock(count=start)
                 call triple_loop_mul_alt(A,B,C,n)
-                call cpu_time(end)
-                loop_alt_time = end-start
+                call system_clock(count=end)
+                loop_alt_time = real(end-start, real64)/real(clockrate, real64)
                 C = 0
         endif
 
-        call cpu_time(start)
+        call system_clock(count=start)
         C = matmul(A,B)
-        call cpu_time(end)
-        matmul_time = end-start
+        call system_clock(count=end)
+        matmul_time = real(end-start, real64)/real(clockrate, real64)
         C = 0
 
-        call cpu_time(start)
+        call system_clock(count=start)
         call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n)
-        call cpu_time(end)
-        blas_time = end-start
+        call system_clock(count=end)
+        blas_time = real(end-start, real64)/real(clockrate, real64)
 
         deallocate(A,B,C)
author	Connor Moore <connor@hhmoore.ca>	2026-01-31 00:32:06 -0500
committer	Connor Moore <connor@hhmoore.ca>	2026-01-31 00:32:06 -0500
commit	c896807fd9fc5b5f2cdbec9eef717f815af3779a (patch)
tree	3267fd9331ea6337d327f407063f53a798a79bce
parent	e1babc4f71ba2e3fa3139dddb6d77f1c7b5a9683 (diff)