Dear All,
I was parallelising the serial molecular dynamic simulation code as given below:
I have only two processors. My system is a duacore system.
c------------------------------------------------------------------
SERIAL CODE
c...................................................................
DO m=1,nmol !! nmol is total number of molecules
DO i=1,2
ax(i,m)=0.0d0 !acceleration
ay(i,m)=0.0d0 !acceleration
az(i,m)=0.0d0 !acceleration
ENDDO
DO j=1,nmol
Ngmol(j,m)=0
ENDDO
ENDDO
c---------------------------------------------
c force calculations
c---------------------------------------------
DO m=1,nmol-1
DO i=1,2
natom = natom +1
ibeg = inbl(natom)
iend = inbl(natom+1)-1
DO ilist=ibeg,iend !! no. of neighbors
j=inblst1(ilist) !! neighbor molecular label
k=inblst2(ilist) !! neighbor atomic label
c j,k are molecular and atomic label of neighbour list of each molecule
c on each processor.
C ____________________
C
C Interatomic distance
C ____________________
xij = x1(i,m) - x1(k,j)
yij = y1(i,m) - y1(k,j)
zij = z1(i,m) - z1(k,j)
C __________________________________
C
C Apply periodic boundary conditions
C __________________________________
dpbcx = - boxx*dnint(xij/boxx)
dpbcy = - boxy*dnint(yij/boxy)
dpbcz = - boxz*dnint(zij/boxz)
xij = xij + dpbcx
yij = yij + dpbcy
zij = zij + dpbcz
rij2 = xij*xij + yij*yij + zij*zij
C ________________
C
C Calculate forces
C ________________
IF (rij2.le.rcutsq) then
rij = dsqrt(rij2)
r_2 = sig1sq/rij2
r_6 = r_2*r_2*r_2
r_12 = r_6*r_6
pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1
fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij)
fxlj = fij*xij
fylj = fij*yij
fzlj = fij*zij
ax(i,m) = ax(i,m) + fxlj
ay(i,m) = ay(i,m) + fylj
az(i,m) = az(i,m) + fzlj
ax(k,j) = ax(k,j) - fxlj
ay(k,j) = ay(k,j) - fylj
az(k,j) = az(k,j) - fzlj
pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj)
IF (ngmol(j,m).eq.0) then
xmolij = xmol(m) - xmol(j) + dpbcx
ymolij = ymol(m) - ymol(j) + dpbcy
zmolij = zmol(m) - zmol(j) + dpbcz
rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij
& +zmolij*zmolij)
nr = dnint(rmolij/dgr)
ng12(nr) = ng12(nr)+2
ngmol(j,m) = 1
ENDIF
ENDIF
ENDDO
ENDDO
ENDDO
DO m=1,nmol
DO i=1,2
write(*,100)ax(i,m),ay(i,m),az(i,m)
ENDDO
ENDDO
and below is the parallelised part
c------------------------------------------------------------------------
c PARALLEL CODE:
c--------------------------------------------------------------------------
DO m=1,nmol
DO i=1,2
ax(i,m)=0.0d0
ay(i,m)=0.0d0
az(i,m)=0.0d0
ENDDO
DO j=1,nmol
ngmol(j,m)=0
ENDDO
ENDDO
CALL para_range(1, nmol, nprocs, myrank, nmolsta, nmolend)
DO m=nmolsta,nmolend-1 !!nmol is diveded into two parts
!!and nmolsta and nmolend are starting and ending
!!index for each processor
DO i=1,2
ibeg = inbl(natom)
iend = inbl(natom+1)-1
DO ilist=ibeg,iend !! no. of neighbors
j=inblst1(ilist) !! neighbor molecular label
k=inblst2(ilist) !! neighbor atomic label
c ____________________
C
C Interatomic distance
C ____________________
xij = x1(i,m) - x1(k,j)
yij = y1(i,m) - y1(k,j)
zij = z1(i,m) - z1(k,j)
dpbcx = - boxx*dnint(xij/boxx)
dpbcy = - boxy*dnint(yij/boxy)
dpbcz = - boxz*dnint(zij/boxz)
xij = xij + dpbcx
yij = yij + dpbcy
zij = zij + dpbcz
rij2 = xij*xij + yij*yij + zij*zij
IF (rij2.le.rcutsq) then
rij = dsqrt(rij2)
r_2 = sig1sq/rij2
r_6 = r_2*r_2*r_2
r_12 = r_6*r_6
pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1
fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij)
fxlj = fij*xij
fylj = fij*yij
fzlj = fij*zij
ax(i,m) = ax(i,m) + fxlj
ay(i,m) = ay(i,m) + fylj
az(i,m) = az(i,m) + fzlj
ax(k,j) = ax(k,j) - fxlj
ay(k,j) = ay(k,j) - fylj
az(k,j) = az(k,j) - fzlj
pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj)
IF (ngmol(j,m).eq.0) then
xmolij = xmol(m) - xmol(j) + dpbcx
ymolij = ymol(m) - ymol(j) + dpbcy
zmolij = zmol(m) - zmol(j) + dpbcz
rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij
& +zmolij*zmolij)
nr = dnint(rmolij/dgr)
ng12(nr) = ng12(nr)+2
ngmol(j,m) = 1
ENDIF
ENDIF
ENDDO
natom = natom +1
ENDDO
ENDDO
if(myrank.ne.(nprocs-1))then !! this is for nmolend molecule on each processor
DO i=1,2
m=nmolend
natom = (2*nmolend)-1
ibeg = inbl(natom)
iend = inbl(natom+1)-1
DO ilist=ibeg,iend !! no. of neighbors
j=inblst1(ilist) !! neighbor molecular label
k=inblst2(ilist) !! neighbor atomic label
xij = x1(i,m) - x1(k,j)
yij = y1(i,m) - y1(k,j)
zij = z1(i,m) - z1(k,j)
dpbcx = - boxx*dnint(xij/boxx)
dpbcy = - boxy*dnint(yij/boxy)
dpbcz = - boxz*dnint(zij/boxz)
xij = xij + dpbcx
yij = yij + dpbcy
zij = zij + dpbcz
rij2 = xij*xij + yij*yij + zij*zij
IF (rij2.le.rcutsq) then
rij = dsqrt(rij2)
r_2 = sig1sq/rij2
r_6 = r_2*r_2*r_2
r_12 = r_6*r_6
pot_lj = pot_lj+((r_12-r_6) + rij*vfc-vc) !! need 4*eps1
fij = 24.0d0*eps1*((2*r_12-r_6)/rij2 - fc/rij)
fxlj = fij*xij
fylj = fij*yij
fzlj = fij*zij
ax(i,m) = ax(i,m) + fxlj
ay(i,m) = ay(i,m) + fylj
az(i,m) = az(i,m) + fzlj
ax(k,j) = ax(k,j) - fxlj
ay(k,j) = ay(k,j) - fylj
az(k,j) = az(k,j) - fzlj
pconf = pconf+(xij*fxlj + yij*fylj + zij*fzlj)!!............doubt
IF (ngmol(j,m).eq.0) then
xmolij = xmol(m) - xmol(j) + dpbcx
ymolij = ymol(m) - ymol(j) + dpbcy
zmolij = zmol(m) - zmol(j) + dpbcz
rmolij = dsqrt(xmolij*xmolij+ymolij*ymolij
& +zmolij*zmolij)
nr = dnint(rmolij/dgr)
ng12(nr) = ng12(nr)+2
ngmol(j,m) = 1
ENDIF
ENDIF
ENDDO
natom = natom +1
ENDDO
endif
c--------------------------------------------------------------------------------------------
An all to all communication to gather all acceleration information on both the processors
c----------------------------------------------------------------------------------------------
DO irank = 0, nprocs - 1
CALL para_range(1, nmol, nprocs, irank, nmolsta, nmolend)
jjasta(irank) = nmolsta
jjalen(irank) = nua * (nmolend - nmolsta + 1)
ENDDO
CALL para_range(1, nmol, nprocs, myrank, nmolsta, nmolend)
DO irank = 0, nprocs - 1
CALL MPI_BCAST(ax(1,jjasta(irank)), jjalen(irank), MPI_REAL8,
& irank, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(ay(1,jjasta(irank)), jjalen(irank), MPI_REAL8,
& irank, MPI_COMM_WORLD, ierr)
CALL MPI_BCAST(az(1,jjasta(irank)), jjalen(irank), MPI_REAL8,
& irank, MPI_COMM_WORLD, ierr)
ENDDO
c----------------------------------------------------------------------------------
If(myrank.eq.0)then !! or if(myrank.eq.1)
DO m=1,nmol
DO i=1,2
write(*,100)ax(i,m),ay(i,m),az(i,m)
ENDDO
ENDDO
endif
SUBROUTINE para_range(n1, n2, nprocs, irank, ista, iend)
iwork1 = (n2 - n1 + 1) / nprocs
iwork2 = MOD(n2 - n1 + 1, nprocs)
ista = irank * iwork1 + n1 + MIN(irank, iwork2)
iend = ista + iwork1 - 1
IF (iwork2 > irank) iend = iend + 1
END
MY question is : when I printed the results,accelerations on processor 0( ie from 1 to nmol/2) are same as the results for serial code, where as they arent same for processor 1(nmol/2+1 to nmol). As I am learning MPI I couldnt find where it went wrong in doing an all to all operation for accleration part ax(i,m),ay(i,m),az(i,m).
Please give me some solution for this part.
thank you all,
regards,
Ramesh
|