module globals ! Global variables use omp_lib ! help the compiler find the OMP libraries implicit none integer :: n = 1000 double precision, parameter :: L=1.0d0 double precision, parameter :: pi=2q0*asin(1q0) ! numerical constant end module globals module Langevin ! Initialization and update rule for Langevin particles use globals implicit none logical, allocatable, dimension(:) :: is_tracked double precision :: dt,kT,g,m,sigma,eps,rc ! time step size and physical parameters double precision :: pref1,pref2 ! auxiliary parameters double precision, allocatable, dimension(:) :: x,y,vx,vy,ax,ay,vhx,vhy,x0,y0 ! particle positions, accellerations, velocities, half-step velocities, initial positions contains subroutine set_parameters ! Set time step and physical parameters dt=0.00001d0 ! time step size kT=0.1d0 ! energy g=0.1d0 ! drag coefficient m=1d0 ! mass of the particles, can be normalized to 1. sigma=1.0d-3 ! Potential parameters eps=1d0 rc=sigma*(2.0d0**(1.0d0/6.0d0)) ! Effective particle size ! Set auxiliary parameters pref1=g pref2=sqrt(24d0*kT*g/dt) end subroutine set_parameters subroutine initialize_particles integer :: i, j, k, nx, ix, iy double precision :: ran1(n),ran2(n),gr1(n),gr2(n),dist, lx, ly logical, allocatable, dimension(:,:) :: is_filled ! Give particles initial position and velocity call random_seed() call random_number(ran1) ! uses the built-in PRNG, easy but not very accurate call random_number(ran2) !x=L*(ran1-0.5d0) !x0=x !y=L*(ran2-0.5d0) !y0=y !> Basic box stacking with no RNG !nx = ceiling(sqrt(real(n))) !dist = (0.9*L)/nx !k = 1 !outer: do i = 1,nx ! do j = 1,nx ! x(k) = i*dist-L/2 ! y(k) = j*dist-L/2 ! k = k + 1 ! if (k.eq.n) exit outer ! end do !end do outer !> Box RNG on a finer mesh nx = ceiling(L/rc) dist = 0.9*L/nx allocate(is_filled(nx,nx)) k = 1 !> Assuming (nx)*(nx) grid, first scale random numbers to a max of nx do while (k .lt. n) !> Random numbers call random_number(lx) call random_number(ly) !> Scale them to be grid points ix = ceiling(lx*nx) iy = ceiling(ly*nx) !> Check if grid is occupied if (is_filled(ix,iy)) cycle !> If not, set the particle position and call it a day x(k) = ix*rc - L/2 y(k) = iy*rc - L/2 !> And mark the spot as filled is_filled(ix,iy) = .true. !> Increment!! k = k + 1 end do ax=0d0 ay=0d0 call random_number(ran1) call random_number(ran2) gr1=sqrt(kT/(m))*sqrt(-2*log(ran1))*cos(2*pi*ran2) ! Box-Mueller transform gr2=sqrt(kT/(m))*sqrt(-2*log(ran1))*sin(2*pi*ran2) vx=gr1 vy=gr2 end subroutine initialize_particles end module Langevin module domainDecomposition use globals use Langevin implicit none integer :: b=4 integer, allocatable :: nbl(:,:) contains subroutine set_sector_grid(side_count) integer, intent(in) :: side_count if (side_count .le. 0) then print *, 'Error: sector side count must be > 0.' stop 1 end if b = side_count if (allocated(nbl)) deallocate(nbl) allocate(nbl(0:b*b-1,9)) nbl = -1 end subroutine set_sector_grid include "neighbourList.f90" include "putParticleInBox.f90" include "sortParticles.f90" end module domainDecomposition module BC ! Subroutines related to the boundary conditions use globals use langevin implicit none contains subroutine impose_BC(i) integer :: i !> Recall we are inside an LxL box centered at the origin !> Top boundary if(y(i) .GT. L/2) then y(i) = L - y(i) vhy(i) = -vhy(i) end if !> Left boundary if(x(i) .LT. -L/2) then x(i) = -L - x(i) vhx(i) = -vhx(i) end if !> Bottom boundary if(y(i) .LT. -L/2) then y(i) = -L - y(i) vhy(i) = -vhy(i) end if !> Right boundary if(x(i) .GT. L/2) then x(i) = L - x(i) vhx(i) = -vhx(i) end if if(abs(x(i)).gt.L/2 .or. abs(y(i)).gt.L/2) is_tracked(i)=.false. end subroutine impose_BC end module BC program main use globals use domainDecomposition use Langevin use BC implicit none integer :: i,j,s,ns,p1,p2,nthreads,sector_side integer, allocatable :: lim(:,:) double precision :: t,t_max,m1,m2,rx,ry,dij,F,msd double precision :: wtime,begin,end,Tint,TsinceWrite integer :: NumParticles, NumThreads, NumSectors, tsteps double precision :: moveTime,sortTime,interTime,reactTime,writeTime,rngTime,initTime,overTime,totalTime,wallTime double precision :: t0_move,t1_move,t0_sort,t1_sort,t0_inter,t1_inter,t0_react,t1_react,t0_write,t1_write,t0_rng,t1_rng,t0_init,t1_init,t0_over,t1_over double precision, allocatable, dimension(:) :: ran1,ran2,xscrap,yscrap,vxscrap,vyscrap double precision :: cellcount begin = omp_get_wtime() ! Set all times to 0 moveTime=0d0 ! Time spent moving particles (position and halfstep velocity updates) sortTime=0d0 ! Time spent sorting particles into sectors interTime=0d0 ! Time spent computing interactions (force updates) reactTime=0d0 ! Time spent handling reactions (accel and fullstep velocity updates) writeTime=0d0 ! Time spent writing output rngTime=0d0 ! Time spent generating random numbers initTime=0d0 ! Time spent initializing particles overTime=0d0 ! Time spent on overhead tasks totalTime=0d0 ! Total simulation time accounted for (sum of all above) wallTime=0d0 ! Total wall clock time for the simulation measured t0_over = omp_get_wtime() ! Open files open(11,file='trajectories.xyz') open(12,file='means.out') open(13,file='kinetic_energies.out') open(14,file='data/timings.csv', position='append') t1_over = omp_get_wtime() overTime = overTime + (t1_over-t0_over) t0_init = omp_get_wtime() call parse_command_line(nthreads,sector_side) call set_sector_grid(sector_side) call omp_set_num_threads(nthreads) ! Allocate arrays allocate(x(n),y(n),vx(n),vy(n),ax(n),ay(n),vhx(n),vhy(n),x0(n),y0(n),is_tracked(n),ran1(n),ran2(n),xscrap(n),yscrap(n),vxscrap(n),vyscrap(n)) allocate(lim(0:b*b,2)) call buildNBL() is_tracked = .True. t=0d0 tsteps=0 t_max= 4.0d0 ! integration time Tint = 0.001d0 tsinceWrite=0d0 call set_parameters call initialize_particles t1_init = omp_get_wtime() initTime = initTime + (t1_init-t0_init) print *, 'Starting simulation with rc=',rc,', sigma=',sigma,' and L/b=',L/real(b),'.' ! Check if side length is smaller than cutoff radius cellcount = real(sector_side) if (L/cellcount .lt. rc) then print *, 'Error: sector size (L/b) must be >= cutoff radius (rc).' print *, 'Maximum sectors per dimension for L = ',L,' and rc = ',rc,' is ',ceiling(L/rc) stop 1 end if print *, 'Running simulation with n=',n,' particles, b=',b,' sectors per dimension, and using ',omp_get_max_threads(),' threads.' print *, "rc=",rc," and L/b=",L/real(sector_side) x0 = x y0 = y !$omp parallel do while(t.lt.t_max) ! one thread: fetch pseudo-random numbers ! one thread: update velocity, position, impose BC xscrap=x yscrap=y vxscrap=vx vyscrap=vy !$omp sections !$omp section ! Write to disk t0_write = omp_get_wtime() if(tSinceWrite.gt.Tint) then msd = 0d0 ! reset msd write(11,'(i0)') count(is_tracked) write(11,'(f10.5)') t do i=1,n ! Write traj line like this: "P0 x y z" where P is a literal character, 0 is the particle index, and x,y,z are the coordinates. Use fixed format with 5 decimal places for the coordinates and a field width of 10 characters for each coordinate, and separate them by a single space. write(11,'("P",i0,1x,f10.5,1x,f10.5,1x,f10.5)') i, xscrap(i),yscrap(i),0d0 msd = msd + ((xscrap(i)-x0(i))**2 + (yscrap(i)-y0(i))**2) enddo write(12,*) t,sqrt(msd/real(count(is_tracked),8)),count(is_tracked) write(13,*) t,sum(0.5*vx**2 + 0.5*vy**2, mask=is_tracked) tSinceWrite=0d0 end if t1_write = omp_get_wtime() writeTime = writeTime + (t1_write-t0_write) !$omp section t0_rng = omp_get_wtime() call random_number(ran1) ran1=ran1-0.5d0 call random_number(ran2) ran2=ran2-0.5d0 t1_rng = omp_get_wtime() rngTime = rngTime + (t1_rng-t0_rng) !$omp section t0_move = omp_get_wtime() vhx=vx+0.5d0*ax*dt vhy=vy+0.5d0*ay*dt x=x+vhx*dt y=y+vhy*dt do j=1,n call impose_BC(j) end do t1_move = omp_get_wtime() moveTime = moveTime + (t1_move-t0_move) t0_sort = omp_get_wtime() call order(x,y,vx,vy,x0,y0,vhx,vhy,lim) ! BUGFIX 11/03: half-step velocities must also be re-ordered. t1_sort = omp_get_wtime() sortTime = sortTime + (t1_sort-t0_sort) ax=0d0 ! Add forces here if any ay=0d0 ! Add forces here if any !$omp end sections !$omp flush ! Our first attempt at parallelization of the code: run the computation of the distances and interaction forces on multiple threads: !$omp single t0_inter = omp_get_wtime() !$omp end single !$omp do private(s,i,ns,p1,p2,rx,ry,dij,F) do s=0,b*b-1 do i=1,9 if(nbl(s,i).eq.-1) exit ns=nbl(s,i) ! BUGFIX 11/03: the loop counter was used as sector index instead of the entries of nbl. do p1=lim(s,1),lim(s,2) do p2=lim(ns,1),lim(ns,2) if(p1.eq.p2) cycle rx=x(p2)-x(p1) ry=y(p2)-y(p1) dij=sqrt(rx**2 + ry**2) if(dij.lt.rc) then F=4d0*eps*( -12d0*sigma**12/dij**13 + 6D0* sigma**6/dij**7 ) ax(p1)=ax(p1)+F*rx/(dij*m) ay(p1)=ay(p1)+F*ry/(dij*m) end if end do end do end do end do !$omp end do !$omp single t1_inter = omp_get_wtime() interTime = interTime + (t1_inter - t0_inter) !$omp end single !$omp single t0_react = omp_get_wtime() vx=vhx+0.5d0*ax*dt vy=vhy+0.5d0*ay*dt t=t+dt tSinceWrite=tSinceWrite+dt t1_react = omp_get_wtime() reactTime = reactTime + (t1_react - t0_react) !$omp end single tsteps = tsteps + 1 end do !$omp end parallel print *, 'Time-stepping loop finished after ', tsteps, ' steps.' end = omp_get_wtime() wallTime = end - begin totalTime = moveTime + sortTime + interTime + reactTime + writeTime + rngTime + initTime + overTime !call cpu_time(end) print *,'Parameters:' print *,'NumParticles=',n print *,'NumThreads=',nthreads print *,'NumSectors per dim=',b print *,'Timing:' print *,'InitTime=',initTime print *,'WriteTime=',writeTime print *,'RNGTime=',rngTime print *,'MoveTime=',moveTime print *,'SortTime=',sortTime print *,'InterTime=',interTime print *,'ReactTime=',reactTime print *,'OverheadTime=',overTime print *,'TotalTime=',totalTime print *,'WallTime=',wallTime ! Append times to csvfile with each line as: NumParticles,NumThreads,NumSectors,wallTime,totalTime,moveTime,sortTime,interTime,reactTime,writeTime,rngTime,initTime,overTime ! with commas as separators NumParticles = n NumThreads = nthreads NumSectors = b*b write(14,'("FullPara,",i0,",",i0,",",i0,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5,",",f10.5)') & NumParticles, NumThreads, NumSectors, wallTime, totalTime, moveTime, sortTime, interTime, reactTime, writeTime, rngTime, initTime, overTime if(lim(b*b,2).gt.lim(b*b,1)) print '(a5,i7,a8,i8,a11)','Lost ',lim(b*b,2)-lim(b*b,1),' out of ',n,' particles.' ! De-allocate arrays deallocate(x,y,vx,vy,ax,ay,x0,y0,is_tracked,ran1,ran2,xscrap,yscrap,vxscrap,vyscrap) if (allocated(lim)) deallocate(lim) if (allocated(nbl)) deallocate(nbl) ! Close files close(11) close(12) close(13) close(14) contains subroutine parse_command_line(nthreads_out,sector_side_out) integer, intent(out) :: nthreads_out,sector_side_out integer :: argc,idx,parsed_value character(len=64) :: arg,value nthreads_out = omp_get_max_threads() sector_side_out = b argc = command_argument_count() idx = 1 do while (idx .le. argc) call get_command_argument(idx,arg) select case (trim(arg)) case ('-N') if (idx .ge. argc) call usage_and_stop('Missing value for -N') idx = idx + 1 call get_command_argument(idx,value) call parse_positive_integer(value,'-N',parsed_value) n = parsed_value case ('-P') if (idx .ge. argc) call usage_and_stop('Missing value for -P') idx = idx + 1 call get_command_argument(idx,value) call parse_positive_integer(value,'-P',parsed_value) nthreads_out = parsed_value case ('-S') if (idx .ge. argc) call usage_and_stop('Missing value for -S') idx = idx + 1 call get_command_argument(idx,value) call parse_positive_integer(value,'-S',parsed_value) sector_side_out = parsed_value case default call usage_and_stop('Unknown argument: '//trim(arg)) end select idx = idx + 1 end do end subroutine parse_command_line subroutine parse_positive_integer(text,flag_name,parsed_value) character(len=*), intent(in) :: text,flag_name integer, intent(out) :: parsed_value integer :: io read(text,*,iostat=io) parsed_value if (io .ne. 0 .or. parsed_value .le. 0) then call usage_and_stop('Invalid value for '//trim(flag_name)//': '//trim(text)) end if end subroutine parse_positive_integer subroutine usage_and_stop(message) character(len=*), intent(in) :: message print *, trim(message) print *, 'Usage: ./simulationLD [-N ] [-P ] [-S ]' print *, ' -N Number of particles' print *, ' -P Number of OpenMP threads' print *, ' -S Number of sectors per dimension (total = S*S)' stop 1 end subroutine usage_and_stop end program main