Skip to content

Commit

Permalink
Updated stdpar version to be more in sync with main version. Also add…
Browse files Browse the repository at this point in the history
…ed a stdpar_datadir version that is the same as stdpar but with data directives added back in. This makes is different than the main version only in array reductions were the stdpar uses the do concurrent reduce clause, while the main code still uses OpenMP target loops.
  • Loading branch information
sumseq committed Jan 7, 2025
1 parent ddbbc14 commit b281def
Show file tree
Hide file tree
Showing 6 changed files with 9,967 additions and 26 deletions.
7 changes: 4 additions & 3 deletions src/stdpar/README
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
This version of the code has ZERO OpenACC directives.
This version of the code has no directives other than device selection.

It requires Fortran 2023 standard as it uses "reduce" on
do concurrent loops.

In order to run this on more than one GPU, you must launch it
in a special way.
To make a "pure Fortran" version of the code, one can remove the two
device selection directives. If they are removed, in order to run
on more than one GPU, it must be launched in a special way.

An example of a launch script to do this is below:

Expand Down
33 changes: 10 additions & 23 deletions src/stdpar/pot3d.F90
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ module ident
!-----------------------------------------------------------------------
!
character(*), parameter :: idcode='POT3D'
character(*), parameter :: vers ='4.3.1_nogpumpi'
character(*), parameter :: vers ='4.3.1_stdpar'
character(*), parameter :: update='12/05/2024'
!
end module
Expand Down Expand Up @@ -3820,7 +3820,7 @@ subroutine potfld
! ****** boundary conditions (i.e., the inhomogenous part).
!
call set_boundary_points (x_ax,one)
call seam_gen (x_ax,nr,nt,np)
call seam_hhh (x_ax)
call delsq (x_ax,rhs_cg)
!
! ****** Original rhs is zero so just use negative of boundary
Expand All @@ -3847,7 +3847,7 @@ subroutine potfld
call unpack_scalar (phi,x_cg)
!
call set_boundary_points (phi,one)
call seam_gen (phi,nr,nt,np)
call seam_hhh (phi)
!
call dealloc_pot3d_matrix_coefs
deallocate(rhs_cg)
Expand Down Expand Up @@ -3986,7 +3986,7 @@ subroutine write_validation_solution
enddo
!
call set_boundary_points (phi,one)
call seam_gen (phi,nr,nt,np)
call seam_hhh (phi)
!
!
! ****** Set Br.
Expand Down Expand Up @@ -5020,7 +5020,7 @@ subroutine ax (x,y,N)
!
! ****** Seam along edges between processors.
!
call seam_gen (x_ax,nr,nt,np)
call seam_hhh (x_ax)
!
! ****** Get the matrix-vector product.
!
Expand Down Expand Up @@ -5737,8 +5737,6 @@ subroutine seam_gen (a,n1,n2,n3)
real(r_typ), dimension(n2,n3) :: sbuf12,rbuf12
real(r_typ), dimension(n1,n3) :: sbuf21,rbuf21
real(r_typ), dimension(n1,n3) :: sbuf22,rbuf22
real(r_typ), dimension(n1,n2) :: sbuf31,rbuf31
real(r_typ), dimension(n1,n2) :: sbuf32,rbuf32
!
!-----------------------------------------------------------------------
!
Expand All @@ -5761,34 +5759,23 @@ subroutine seam_gen (a,n1,n2,n3)
call timer_on
!
! ****** Seam the third (periodic) dimension.
!
! ****** Since halo data is stride-1, no need for buffers.
!
lbuf=n1*n2
!
do concurrent (j=1:n2, i=1:n1)
sbuf31(i,j)=a(i,j,n3-1)
sbuf32(i,j)=a(i,j, 2)
enddo
!
call MPI_Isend (sbuf31,lbuf,ntype_real,iproc_pp,tag, &
call MPI_Isend (a(:,:,n3-1),lbuf,ntype_real,iproc_pp,tag, &
comm_all,reqs(1),ierr)
!
call MPI_Isend (sbuf32,lbuf,ntype_real,iproc_pm,tag, &
call MPI_Isend (a(:,:, 2),lbuf,ntype_real,iproc_pm,tag, &
comm_all,reqs(2),ierr)
!
call MPI_Irecv (rbuf31,lbuf,ntype_real,iproc_pm,tag, &
call MPI_Irecv (a(:,:, 1),lbuf,ntype_real,iproc_pm,tag, &
comm_all,reqs(3),ierr)
!
call MPI_Irecv (rbuf32,lbuf,ntype_real,iproc_pp,tag, &
call MPI_Irecv (a(:,:,n3),lbuf,ntype_real,iproc_pp,tag, &
comm_all,reqs(4),ierr)
!
call MPI_Waitall (4,reqs,MPI_STATUSES_IGNORE,ierr)
!
do concurrent (j=1:n2, i=1:n1)
a(i,j, 1)=rbuf31(i,j)
a(i,j,n3)=rbuf32(i,j)
enddo
!
!
! ****** Seam the first dimension.
!
Expand Down
Loading

0 comments on commit b281def

Please sign in to comment.