program matrixproduct !> Performs a matrix-matrix multiplication using various methods !> and compares the performance of each. The following are used: !> 1. Basic triple-loop (iterative algorithm) !> 2. Fortran native matmul routine !> 3. LAPACK/BLAS library call use omp_lib use, intrinsic :: iso_fortran_env implicit none external :: dgemm !> double-precision general matrix-matrix multiplication real(real64), allocatable, dimension(:,:) :: A, B, C real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time integer(int32) :: n, start_num, step_num, stop_num character(10) :: temp_in logical :: run_loops !> Start by taking the command-line arguments. This is useful because !> it lets us call the program from Bash with a variable matrix size call get_command_argument(1, temp_in) read(temp_in,'(i10)') start_num call get_command_argument(2, temp_in) read(temp_in,'(i10)') stop_num call get_command_argument(3, temp_in) read(temp_in,'(i10)') step_num !> The last argument is a string [yes/no] that instructs the program !> to either run with the triple-loops or ignore them completely. call get_command_argument(4, temp_in) select case (trim(temp_in)) case ('yes') run_loops=.TRUE. case ('no') run_loops=.FALSE. case default write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in run_loops=.TRUE. end select write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS() write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num do n = start_num, stop_num, step_num call prep_mats(A,B,C,n) if(run_loops) then call cpu_time(start) call triple_loop_mul(A,B,C,n) call cpu_time(end) loop_time = end-start C = 0 call cpu_time(start) call triple_loop_mul_alt(A,B,C,n) call cpu_time(end) loop_alt_time = end-start C = 0 endif call cpu_time(start) C = matmul(A,B) call cpu_time(end) matmul_time = end-start C = 0 call cpu_time(start) call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n) call cpu_time(end) blas_time = end-start deallocate(A,B,C) if(run_loops) then write(*,'((i5),4(e16.8))') n, loop_time, loop_alt_time, matmul_time, blas_time else write(*,'((i5),2(e16.8))') n, matmul_time, blas_time endif enddo contains subroutine prep_mats(A,B,C,n) real(real64), dimension(:,:), allocatable, intent(out) :: A,B,C integer(int32), intent(in) :: n allocate(A(n,n)) allocate(B(n,n)) allocate(C(n,n)) call random_number(A) call random_number(B) C = 0 end subroutine prep_mats subroutine triple_loop_mul(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k !$omp parallel do private(j,k) row: do i = 1,n col: do j = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do col end do row !$omp end parallel do end subroutine triple_loop_mul subroutine triple_loop_mul_alt(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k !$omp parallel do private(j,k) col: do j = 1,n row: do i = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do row end do col !$omp end parallel do end subroutine triple_loop_mul_alt end program matrixproduct