summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Moore <connor@hhmoore.ca>2026-01-30 23:13:40 -0500
committerConnor Moore <connor@hhmoore.ca>2026-01-30 23:13:40 -0500
commite1babc4f71ba2e3fa3139dddb6d77f1c7b5a9683 (patch)
tree463089ac45f16f5650e9ae8c1b69c0853b2e493f
parentd81191c40989de0c3809c4eade2c58ab1c44146d (diff)
Added support for OpenMP. Cleaned up makefile. Added gnuplot file for `make plots`.
-rw-r--r--Makefile36
-rw-r--r--matrixproduct.f9027
-rw-r--r--plots.gnu43
3 files changed, 86 insertions, 20 deletions
diff --git a/Makefile b/Makefile
index 39b5bdf..32449de 100644
--- a/Makefile
+++ b/Makefile
@@ -1,17 +1,31 @@
-gnu:
- gfortran matrixproduct.f90 -o bin/gcc.out -lblas -O3 -fopenmp
-intel:
- ifx matrixproduct.f90 -o bin/intel.out -lblas -O3 -fopenmp -heap-arrays
+GCC=gfortran
+oneAPI=ifx
-dir:
- mkdir results bin
+all:
+ mkdir -p bin/
+ $(GCC) matrixproduct.f90 -o bin/$(GCC).serial.out -O3 -fexternal-blas -lopenblas -march=native
+ $(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).serial.out -qmkl=sequential -O3 -heap-arrays -xHost
+ $(GCC) matrixproduct.f90 -o bin/$(GCC).parallel.out -O3 -fexternal-blas -lopenblas -march=native -fopenmp
+ $(oneAPI) matrixproduct.f90 -o bin/$(oneAPI).parallel.out -qmkl=parallel -O3 -heap-arrays -xHost -fopenmp
-tests: clean dir gnu intel
- ./bin/gcc.out 100 100 3500 yes > results/gcc_short
- ./bin/intel.out 100 100 3500 yes > results/intel_short
+tests: clean all
+ mkdir -p results/
- ./bin/gcc.out 3500 1000 10500 no > results/gcc_long
- ./bin/intel.out 3500 1000 10500 no > results/gcc_long
+ # Serial runs
+ export OMP_NUM_THREADS=1
+ ./bin/$(GCC).serial.out 100 3500 100 yes > results/$(GCC)_short_serial
+ ./bin/$(oneAPI).serial.out 100 3500 100 yes > results/$(oneAPI)_short_serial
+
+ ./bin/$(GCC).serial.out 3500 20000 500 no > results/$(GCC)_long_serial
+ ./bin/$(oneAPI).serial.out 3500 20000 500 no > results/$(oneAPI)_long_serial
+
+ # Parallel runs
+ export OMP_NUM_THREADS=8
+ ./bin/$(GCC).parallel.out 100 3500 100 yes > results/$(GCC)_short_parallel
+ ./bin/$(oneAPI).parallel.out 100 3500 100 yes > results/$(oneAPI)_short_parallel
+
+ ./bin/$(GCC).parallel.out 3500 20000 500 no > results/$(GCC)_long_parallel
+ ./bin/$(oneAPI).parallel.out 3500 20000 500 no > results/$(oneAPI)_long_parallel
plots:
gnuplot -p plots.gnu
diff --git a/matrixproduct.f90 b/matrixproduct.f90
index e622ac2..d7b3d13 100644
--- a/matrixproduct.f90
+++ b/matrixproduct.f90
@@ -6,7 +6,7 @@ program matrixproduct
!> 2. Fortran native matmul routine
!> 3. LAPACK/BLAS library call
- use :: omp_lib
+ use omp_lib
use, intrinsic :: iso_fortran_env
implicit none
external :: dgemm !> double-precision general matrix-matrix multiplication
@@ -16,24 +16,31 @@ program matrixproduct
integer(int32) :: n, start_num, step_num, stop_num
character(10) :: temp_in
logical :: run_loops
+
+ !> Start by taking the command-line arguments. This is useful because
+ !> it lets us call the program from Bash with a variable matrix size
call get_command_argument(1, temp_in)
read(temp_in,'(i10)') start_num
call get_command_argument(2, temp_in)
- read(temp_in,'(i10)') step_num
- call get_command_argument(3, temp_in)
read(temp_in,'(i10)') stop_num
+ call get_command_argument(3, temp_in)
+ read(temp_in,'(i10)') step_num
+ !> The last argument is a string [yes/no] that instructs the program
+ !> to either run with the triple-loops or ignore them completely.
call get_command_argument(4, temp_in)
- select case (temp_in)
- case ('yes ')
+ select case (trim(temp_in))
+ case ('yes')
run_loops=.TRUE.
- case ('no ')
+ case ('no')
run_loops=.FALSE.
case default
- write(*,'(A,A,A)') "WARNING: ",temp_in," not supported argument for run_loops [yes/no]"
+ write(*,'("WARNING:",A," is not a supported argument [yes/no], defaulting to YES")') temp_in
+ run_loops=.TRUE.
end select
- write(*,'(A,i10,i10,i10)') "Running with start, step, stop ",start_num,step_num,stop_num
+ write(*,'("Compiled with ",A,A," on ",A)') COMPILER_VERSION(), COMPILER_OPTIONS()
+ write(*,'("Running with start=",I0,", stop=",I0,", step=",I0)') start_num, stop_num, step_num
do n = start_num, stop_num, step_num
call prep_mats(A,B,C,n)
@@ -94,7 +101,7 @@ contains
integer(int32), intent(in) :: n
integer(int32) :: i, j, k
- !$omp parallel do private(i,j,k)
+ !$omp parallel do private(j,k)
row: do i = 1,n
col: do j = 1,n
sum: do k = 1,n
@@ -112,6 +119,7 @@ contains
integer(int32), intent(in) :: n
integer(int32) :: i, j, k
+ !$omp parallel do private(j,k)
col: do j = 1,n
row: do i = 1,n
sum: do k = 1,n
@@ -119,6 +127,7 @@ contains
end do sum
end do row
end do col
+ !$omp end parallel do
end subroutine triple_loop_mul_alt
diff --git a/plots.gnu b/plots.gnu
new file mode 100644
index 0000000..a277220
--- /dev/null
+++ b/plots.gnu
@@ -0,0 +1,43 @@
+set key top left
+set format y "%.1f x 10^{%L}"
+
+short_keys = "Triple-loop-row Triple-loop-col Matmul BLAS-Dgemm"
+
+set terminal x11 0 title "GCC Runs"
+set multiplot layout 2, 1
+ set title "Short runs"
+ set logscale x
+ set logscale y
+ set grid
+ set xlabel("N")
+ set ylabel("Time [s]")
+ plot for [i=2:5] 'results/gfortran_short' every ::1 using 1:i with linespoints title word(short_keys,i-1)
+
+ set title "Long runs"
+ set logscale x
+ set logscale y
+ set grid
+ set xlabel("N")
+ set ylabel("Time [s]")
+ plot for [i=2:3] 'results/gfortran_long' every ::1 using 1:i with linespoints title word(short_keys,i+1)
+unset multiplot
+
+
+set terminal x11 1 title "OneAPI Runs"
+set multiplot layout 2, 1
+ set title "Short runs"
+ set logscale x
+ set logscale y
+ set grid
+ set xlabel("N")
+ set ylabel("Time [s]")
+ plot for [i=2:5] 'results/ifx_short' every ::1 using 1:i with linespoints title word(short_keys,i-1)
+
+ set title "Long runs"
+ set logscale x
+ set logscale y
+ set grid
+ set xlabel("N")
+ set ylabel("Time [s]")
+ plot for [i=2:3] 'results/ifx_long' every ::1 using 1:i with linespoints title word(short_keys,i+1)
+unset multiplot