program matrixproduct !> Performs a matrix-matrix multiplication using various methods !> and compares the performance of each. The following are used: !> 1. Basic triple-loop (iterative algorithm) !> 2. Fortran native matmul routine !> 3. LAPACK/BLAS library call use :: omp_lib use, intrinsic :: iso_fortran_env implicit none external :: dgemm !> double-precision general matrix-matrix multiplication real(real64), allocatable, dimension(:,:) :: A, B, C real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time integer(int32) :: n, start_num, step_num, stop_num character(10) :: temp_in logical :: run_loops call get_command_argument(1, temp_in) read(temp_in,'(i10)') start_num call get_command_argument(2, temp_in) read(temp_in,'(i10)') step_num call get_command_argument(3, temp_in) read(temp_in,'(i10)') stop_num call get_command_argument(4, temp_in) select case (temp_in) case ('yes ') run_loops=.TRUE. case ('no ') run_loops=.FALSE. case default write(*,'(A,A,A)') "WARNING: ",temp_in," not supported argument for run_loops [yes/no]" end select write(*,'(A,i10,i10,i10)') "Running with start, step, stop ",start_num,step_num,stop_num do n = start_num, stop_num, step_num call prep_mats(A,B,C,n) if(run_loops) then call cpu_time(start) call triple_loop_mul(A,B,C,n) call cpu_time(end) loop_time = end-start C = 0 call cpu_time(start) call triple_loop_mul_alt(A,B,C,n) call cpu_time(end) loop_alt_time = end-start C = 0 endif call cpu_time(start) C = matmul(A,B) call cpu_time(end) matmul_time = end-start C = 0 call cpu_time(start) call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n) call cpu_time(end) blas_time = end-start deallocate(A,B,C) if(run_loops) then write(*,'((i5),4(e16.8))') n, loop_time, loop_alt_time, matmul_time, blas_time else write(*,'((i5),2(e16.8))') n, matmul_time, blas_time endif enddo contains subroutine prep_mats(A,B,C,n) real(real64), dimension(:,:), allocatable, intent(out) :: A,B,C integer(int32), intent(in) :: n allocate(A(n,n)) allocate(B(n,n)) allocate(C(n,n)) call random_number(A) call random_number(B) C = 0 end subroutine prep_mats subroutine triple_loop_mul(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k !$omp parallel do private(i,j,k) row: do i = 1,n col: do j = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do col end do row !$omp end parallel do end subroutine triple_loop_mul subroutine triple_loop_mul_alt(A,B,C,n) real(real64), dimension(:,:), intent(in) :: A,B real(real64), dimension(:,:), intent(out) :: C integer(int32), intent(in) :: n integer(int32) :: i, j, k col: do j = 1,n row: do i = 1,n sum: do k = 1,n C(i,j) = C(i,j) + A(i,k)*B(k,j) end do sum end do row end do col end subroutine triple_loop_mul_alt end program matrixproduct