program matrixproduct !> MCSC-6030G Project 1. Connor Moore, 2026 !> Performs a matrix-matrix multiplication using various methods !> and compares the performance of each. The following are used: !> 1. Basic triple-loop (iterative algorithm) !> 2. Fortran native matmul routine !> 3. LAPACK/BLAS library call !> The wall times for these are compared with gfortran from gnu !> and ifx from Intel. OpenMP is used for parallel comparisons use omp_lib !> For OpenMP parallel do loops use, intrinsic :: iso_fortran_env !> For named datatypes, e.g. real64 implicit none !> Don't infer any types from names external :: dgemm !> double-precision general matrix-matrix multiplication !> A number of variables will be delcared. This includes three square matrices (allocatable), !> a start/end time holder, variables to hold time for each "technique" (loop/loop/matmul/blas), !> and a (char and logical) for command line arugments. The ANSI escape char is also defined. real(real64), allocatable, dimension(:,:) :: A, B, C real(real64) :: loop_time, loop_alt_time, matmul_time, blas_time integer(int64) :: start, end, clockrate integer(int32) :: n, start_num, step_num, stop_num character(len=32) :: temp_in character(len=*), parameter :: ESC_CHAR=achar(27) !> Pretty printing with ANSI escape sequences logical :: run_loops !> Start by taking the command-line arguments. This is useful because !> it lets us call the program from Bash with a variable matrix size call get_command_argument(1, temp_in) read(temp_in,*) start_num call get_command_argument(2, temp_in) read(temp_in,*) stop_num call get_command_argument(3, temp_in) read(temp_in,*) step_num !> The last argument is a string [yes/no] that instructs the program !> to either run with the triple-loops or ignore them completely. call get_command_argument(4, temp_in) select case (trim(temp_in)) case ('yes') run_loops=.TRUE. case ('no') run_loops=.FALSE. case default write(error_unit,'(A)') ESC_CHAR // & "[31mERROR: Unsupported input (" // trim(temp_in) // & ") for loop specification [yes/no]" // ESC_CHAR // "[0m" stop end select write(*,'(A)') ESC_CHAR // "[32m" // COMPILER_VERSION() // achar(10) // COMPILER_OPTIONS() // ESC_CHAR // "[0m" write(*,'(A,I0,A,I0,A,I0)') "Running with start=", start_num, ", stop=", stop_num, ", step=", step_num call system_clock(count_rate=clockrate) do n = start_num, stop_num, step_num call prep_mats(A,B,C,n) if(run_loops) then call system_clock(count=start) call triple_loop_mul(A,B,C,n) call system_clock(count=end) loop_time = real(end-start, real64)/real(clockrate, real64) C = 0 call system_clock(count=start) call triple_loop_mul_alt(A,B,C,n) call system_clock(count=end) loop_alt_time = real(end-start, real64)/real(clockrate, real64) C = 0 endif call system_clock(count=start) C = matmul(A,B) call system_clock(count=end) matmul_time = real(end-start, real64)/real(clockrate, real64) C = 0 call system_clock(count=start) call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n) call system_clock(count=end) blas_time = real(end-start, real64)/real(clockrate, real64) deallocate(A,B,C) if(run_loops) then write(*,'((i5),4(e16.8))') n, loop_time, loop_alt_time, matmul_time, blas_time else write(*,'((i5),2(e16.8))') n, matmul_time, blas_time endif enddo contains subroutine prep_mats(A,B,C,n) real(real64), dimension(:,:), allocatable, intent(out) :: A,B,C integer(int32), intent(in) :: n allocate(A(n,n)) allocate(B(n,n)) allocate(C(n,n)) call random_number(A) call random_number(B) C = 0 end subroutine prep_mats subroutine triple_loop_mul(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k !$omp parallel do private(j,k) row: do i = 1,n col: do j = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do col end do row !$omp end parallel do end subroutine triple_loop_mul subroutine triple_loop_mul_alt(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k !$omp parallel do private(i,k) col: do j = 1,n row: do i = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do row end do col !$omp end parallel do end subroutine triple_loop_mul_alt end program matrixproduct