matrixproduct.f90


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

program matrixproduct

    !> Performs a matrix-matrix multiplication using various methods
    !> and compares the performance of each. The following are used:
    !>  1. Basic triple-loop (iterative algorithm)
    !>  2. Fortran native matmul routine
    !>  3. LAPACK/BLAS library call
        
    use omp_lib
    use, intrinsic :: iso_fortran_env
    implicit none
    external :: dgemm   !> double-precision general matrix-matrix multiplication
    
    real(real64), allocatable, dimension(:,:) :: A, B, C
    real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time
    integer(int32) :: n, start_num, step_num, stop_num
    character(10) :: temp_in
    logical :: run_loops
    
    !> Start by taking the command-line arguments. This is useful because
    !> it lets us call the program from Bash with a variable matrix size

    call get_command_argument(1, temp_in)
    read(temp_in,'(i10)') start_num
    call get_command_argument(2, temp_in)
    read(temp_in,'(i10)') stop_num
    call get_command_argument(3, temp_in)
    read(temp_in,'(i10)') step_num
    !> The last argument is a string [yes/no] that instructs the program
    !> to either run with the triple-loops or ignore them completely.
    call get_command_argument(4, temp_in)
    select case (trim(temp_in))
        case ('yes')
            run_loops=.TRUE.
        case ('no')
            run_loops=.FALSE.
        case default
            write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in
            run_loops=.TRUE.
    end select 

    write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS()
    write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num
    
    do n = start_num, stop_num, step_num
        call prep_mats(A,B,C,n)

        if(run_loops) then
                call cpu_time(start)
                call triple_loop_mul(A,B,C,n)
                call cpu_time(end)
                loop_time = end-start
                C = 0

                call cpu_time(start)
                call triple_loop_mul_alt(A,B,C,n)
                call cpu_time(end)
                loop_alt_time = end-start
                C = 0
        endif

        call cpu_time(start)
        C = matmul(A,B)
        call cpu_time(end)
        matmul_time = end-start
        C = 0

        call cpu_time(start)
        call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n)
        call cpu_time(end)
        blas_time = end-start

        deallocate(A,B,C)

        if(run_loops) then
                write(*,'((i5),4(e16.8))') n, loop_time, loop_alt_time, matmul_time, blas_time
        else
                write(*,'((i5),2(e16.8))') n, matmul_time, blas_time
        endif
    enddo

contains

    subroutine prep_mats(A,B,C,n)
        real(real64), dimension(:,:), allocatable, intent(out) :: A,B,C
        integer(int32), intent(in) :: n

        allocate(A(n,n))
        allocate(B(n,n))
        allocate(C(n,n))

        call random_number(A)
        call random_number(B)
        C = 0

    end subroutine prep_mats
    
    subroutine triple_loop_mul(A,B,C,n)
        real(real64), dimension(:,:), intent(in) :: A,B
        real(real64), dimension(:,:), intent(out) :: C
        integer(int32), intent(in) :: n
        integer(int32) :: i, j, k

        !$omp parallel do private(j,k)
        row: do i = 1,n
            col: do j = 1,n
                sum: do k = 1,n
                    C(i,j) = C(i,j) + A(i,k)*B(k,j)
                end do sum
            end do col
        end do row
        !$omp end parallel do

    end subroutine triple_loop_mul

    subroutine triple_loop_mul_alt(A,B,C,n)
        real(real64), dimension(:,:), intent(in) :: A,B
        real(real64), dimension(:,:), intent(out) :: C
        integer(int32), intent(in) :: n
        integer(int32) :: i, j, k

        !$omp parallel do private(j,k)
        col: do j = 1,n
            row: do i = 1,n
                sum: do k = 1,n
                    C(i,j) = C(i,j) + A(i,k)*B(k,j)
                end do sum
            end do row
        end do col 
        !$omp end parallel do

    end subroutine triple_loop_mul_alt

end program matrixproduct