From c896807fd9fc5b5f2cdbec9eef717f815af3779a Mon Sep 17 00:00:00 2001 From: Connor Moore Date: Sat, 31 Jan 2026 00:32:06 -0500 Subject: Added ANSI colours for error messages. Major update for the makefile, including parametrized start/stop/step and looping over optimization flags (O0/O1/O2/O3/Ofast) --- matrixproduct.f90 | 62 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'matrixproduct.f90') diff --git a/matrixproduct.f90 b/matrixproduct.f90 index d7b3d13..97417e4 100644 --- a/matrixproduct.f90 +++ b/matrixproduct.f90 @@ -1,31 +1,40 @@ program matrixproduct + !> MCSC-6030G Project 1. Connor Moore, 2026 !> Performs a matrix-matrix multiplication using various methods !> and compares the performance of each. The following are used: !> 1. Basic triple-loop (iterative algorithm) !> 2. Fortran native matmul routine !> 3. LAPACK/BLAS library call + !> The wall times for these are compared with gfortran from gnu + !> and ifx from Intel. OpenMP is used for parallel comparisons - use omp_lib - use, intrinsic :: iso_fortran_env - implicit none - external :: dgemm !> double-precision general matrix-matrix multiplication + use omp_lib !> For OpenMP parallel do loops + use, intrinsic :: iso_fortran_env !> For named datatypes, e.g. real64 + implicit none !> Don't infer any types from names + external :: dgemm !> double-precision general matrix-matrix multiplication + !> A number of variables will be delcared. This includes three square matrices (allocatable), + !> a start/end time holder, variables to hold time for each "technique" (loop/loop/matmul/blas), + !> and a (char and logical) for command line arugments. The ANSI escape char is also defined. real(real64), allocatable, dimension(:,:) :: A, B, C - real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time + real(real64) :: loop_time, loop_alt_time, matmul_time, blas_time + integer(int64) :: start, end, clockrate integer(int32) :: n, start_num, step_num, stop_num - character(10) :: temp_in + character(len=32) :: temp_in + character(len=*), parameter :: ESC_CHAR=achar(27) !> Pretty printing with ANSI escape sequences logical :: run_loops !> Start by taking the command-line arguments. This is useful because !> it lets us call the program from Bash with a variable matrix size call get_command_argument(1, temp_in) - read(temp_in,'(i10)') start_num + read(temp_in,*) start_num call get_command_argument(2, temp_in) - read(temp_in,'(i10)') stop_num + read(temp_in,*) stop_num call get_command_argument(3, temp_in) - read(temp_in,'(i10)') step_num + read(temp_in,*) step_num + !> The last argument is a string [yes/no] that instructs the program !> to either run with the triple-loops or ignore them completely. call get_command_argument(4, temp_in) @@ -35,40 +44,43 @@ program matrixproduct case ('no') run_loops=.FALSE. case default - write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in - run_loops=.TRUE. + write(error_unit,'(A)') ESC_CHAR // "[31mERROR: Unsupported input (" // trim(temp_in) // ") for loop specification [yes/no]" // ESC_CHAR // "[0m" + stop + end select - write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS() - write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num + write(*,'(A)') ESC_CHAR // "[32m" // COMPILER_VERSION() // achar(10) // COMPILER_OPTIONS() // ESC_CHAR // "[0m" + write(*,'(A,I0,A,I0,A,I0)') "Running with start=", start_num, ", stop=", stop_num, ", step=", step_num + + call system_clock(count_rate=clockrate) do n = start_num, stop_num, step_num call prep_mats(A,B,C,n) if(run_loops) then - call cpu_time(start) + call system_clock(count=start) call triple_loop_mul(A,B,C,n) - call cpu_time(end) - loop_time = end-start + call system_clock(count=end) + loop_time = real(end-start, real64)/real(clockrate, real64) C = 0 - call cpu_time(start) + call system_clock(count=start) call triple_loop_mul_alt(A,B,C,n) - call cpu_time(end) - loop_alt_time = end-start + call system_clock(count=end) + loop_alt_time = real(end-start, real64)/real(clockrate, real64) C = 0 endif - call cpu_time(start) + call system_clock(count=start) C = matmul(A,B) - call cpu_time(end) - matmul_time = end-start + call system_clock(count=end) + matmul_time = real(end-start, real64)/real(clockrate, real64) C = 0 - call cpu_time(start) + call system_clock(count=start) call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n) - call cpu_time(end) - blas_time = end-start + call system_clock(count=end) + blas_time = real(end-start, real64)/real(clockrate, real64) deallocate(A,B,C) -- cgit v1.2.3