summaryrefslogtreecommitdiff
path: root/matrixproduct.f90
diff options
context:
space:
mode:
Diffstat (limited to 'matrixproduct.f90')
-rw-r--r--matrixproduct.f9062
1 files changed, 37 insertions, 25 deletions
diff --git a/matrixproduct.f90 b/matrixproduct.f90
index d7b3d13..97417e4 100644
--- a/matrixproduct.f90
+++ b/matrixproduct.f90
@@ -1,31 +1,40 @@
program matrixproduct
+ !> MCSC-6030G Project 1. Connor Moore, 2026 <connor@hhmoore.ca>
!> Performs a matrix-matrix multiplication using various methods
!> and compares the performance of each. The following are used:
!> 1. Basic triple-loop (iterative algorithm)
!> 2. Fortran native matmul routine
!> 3. LAPACK/BLAS library call
+ !> The wall times for these are compared with gfortran from gnu
+ !> and ifx from Intel. OpenMP is used for parallel comparisons
- use omp_lib
- use, intrinsic :: iso_fortran_env
- implicit none
- external :: dgemm !> double-precision general matrix-matrix multiplication
+ use omp_lib !> For OpenMP parallel do loops
+ use, intrinsic :: iso_fortran_env !> For named datatypes, e.g. real64
+ implicit none !> Don't infer any types from names
+ external :: dgemm !> double-precision general matrix-matrix multiplication
+ !> A number of variables will be delcared. This includes three square matrices (allocatable),
+ !> a start/end time holder, variables to hold time for each "technique" (loop/loop/matmul/blas),
+ !> and a (char and logical) for command line arugments. The ANSI escape char is also defined.
real(real64), allocatable, dimension(:,:) :: A, B, C
- real(real64) :: start, end, loop_time, loop_alt_time, matmul_time, blas_time
+ real(real64) :: loop_time, loop_alt_time, matmul_time, blas_time
+ integer(int64) :: start, end, clockrate
integer(int32) :: n, start_num, step_num, stop_num
- character(10) :: temp_in
+ character(len=32) :: temp_in
+ character(len=*), parameter :: ESC_CHAR=achar(27) !> Pretty printing with ANSI escape sequences
logical :: run_loops
!> Start by taking the command-line arguments. This is useful because
!> it lets us call the program from Bash with a variable matrix size
call get_command_argument(1, temp_in)
- read(temp_in,'(i10)') start_num
+ read(temp_in,*) start_num
call get_command_argument(2, temp_in)
- read(temp_in,'(i10)') stop_num
+ read(temp_in,*) stop_num
call get_command_argument(3, temp_in)
- read(temp_in,'(i10)') step_num
+ read(temp_in,*) step_num
+
!> The last argument is a string [yes/no] that instructs the program
!> to either run with the triple-loops or ignore them completely.
call get_command_argument(4, temp_in)
@@ -35,40 +44,43 @@ program matrixproduct
case ('no')
run_loops=.FALSE.
case default
- write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in
- run_loops=.TRUE.
+ write(error_unit,'(A)') ESC_CHAR // "[31mERROR: Unsupported input (" // trim(temp_in) // ") for loop specification [yes/no]" // ESC_CHAR // "[0m"
+ stop
+
end select
- write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS()
- write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num
+ write(*,'(A)') ESC_CHAR // "[32m" // COMPILER_VERSION() // achar(10) // COMPILER_OPTIONS() // ESC_CHAR // "[0m"
+ write(*,'(A,I0,A,I0,A,I0)') "Running with start=", start_num, ", stop=", stop_num, ", step=", step_num
+
+ call system_clock(count_rate=clockrate)
do n = start_num, stop_num, step_num
call prep_mats(A,B,C,n)
if(run_loops) then
- call cpu_time(start)
+ call system_clock(count=start)
call triple_loop_mul(A,B,C,n)
- call cpu_time(end)
- loop_time = end-start
+ call system_clock(count=end)
+ loop_time = real(end-start, real64)/real(clockrate, real64)
C = 0
- call cpu_time(start)
+ call system_clock(count=start)
call triple_loop_mul_alt(A,B,C,n)
- call cpu_time(end)
- loop_alt_time = end-start
+ call system_clock(count=end)
+ loop_alt_time = real(end-start, real64)/real(clockrate, real64)
C = 0
endif
- call cpu_time(start)
+ call system_clock(count=start)
C = matmul(A,B)
- call cpu_time(end)
- matmul_time = end-start
+ call system_clock(count=end)
+ matmul_time = real(end-start, real64)/real(clockrate, real64)
C = 0
- call cpu_time(start)
+ call system_clock(count=start)
call dgemm('N', 'N', n, n, n, 1.0_real64, A, n, B, n, 0.0_real64, C, n)
- call cpu_time(end)
- blas_time = end-start
+ call system_clock(count=end)
+ blas_time = real(end-start, real64)/real(clockrate, real64)
deallocate(A,B,C)