1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
program matrixproduct
!> Performs a matrix-matrix multiplication using various methods
!> and compares the performance of each. The following are used:
!> 1. Basic triple-loop (iterative algorithm)
!> 2. Fortran native matmul routine
!> 3. LAPACK/BLAS library call
use :: omp_lib
use, intrinsic :: iso_fortran_env
implicit none
external :: dgemm !> double-precision general matrix-matrix multiplication
real(real64), allocatable, dimension(:,:) :: A, B, C
real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time
integer(int32) :: n, start_num, step_num, stop_num
character(10) :: temp_in
logical :: run_loops
call get_command_argument(1, temp_in)
read(temp_in,'(i10)') start_num
call get_command_argument(2, temp_in)
read(temp_in,'(i10)') step_num
call get_command_argument(3, temp_in)
read(temp_in,'(i10)') stop_num
call get_command_argument(4, temp_in)
select case (temp_in)
case ('yes ')
run_loops=.TRUE.
case ('no ')
run_loops=.FALSE.
case default
write(*,'(A,A,A)') "WARNING: ",temp_in," not supported argument for run_loops [yes/no]"
end select
write(*,'(A,i10,i10,i10)') "Running with start, step, stop ",start_num,step_num,stop_num
do n = start_num, stop_num, step_num
call prep_mats(A,B,C,n)
if(run_loops) then
call cpu_time(start)
call triple_loop_mul(A,B,C,n)
call cpu_time(end)
loop_time = end-start
C = 0
call cpu_time(start)
call triple_loop_mul_alt(A,B,C,n)
call cpu_time(end)
loop_alt_time = end-start
C = 0
endif
call cpu_time(start)
C = matmul(A,B)
call cpu_time(end)
matmul_time = end-start
C = 0
call cpu_time(start)
call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n)
call cpu_time(end)
blas_time = end-start
deallocate(A,B,C)
if(run_loops) then
write(*,'((i5),4(e16.8))') n, loop_time, loop_alt_time, matmul_time, blas_time
else
write(*,'((i5),2(e16.8))') n, matmul_time, blas_time
endif
enddo
contains
subroutine prep_mats(A,B,C,n)
real(real64), dimension(:,:), allocatable, intent(out) :: A,B,C
integer(int32), intent(in) :: n
allocate(A(n,n))
allocate(B(n,n))
allocate(C(n,n))
call random_number(A)
call random_number(B)
C = 0
end subroutine prep_mats
subroutine triple_loop_mul(A,B,C,n)
real(real64), dimension(:,:), intent(in) :: A,B
real(real64), dimension(:,:), intent(out) :: C
integer(int32), intent(in) :: n
integer(int32) :: i, j, k
!$omp parallel do private(i,j,k)
row: do i = 1,n
col: do j = 1,n
sum: do k = 1,n
C(i,j) = C(i,j) + A(i,k)*B(k,j)
end do sum
end do col
end do row
!$omp end parallel do
end subroutine triple_loop_mul
subroutine triple_loop_mul_alt(A,B,C,n)
real(real64), dimension(:,:), intent(in) :: A,B
real(real64), dimension(:,:), intent(out) :: C
integer(int32), intent(in) :: n
integer(int32) :: i, j, k
col: do j = 1,n
row: do i = 1,n
sum: do k = 1,n
C(i,j) = C(i,j) + A(i,k)*B(k,j)
end do sum
end do row
end do col
end subroutine triple_loop_mul_alt
end program matrixproduct
|