Hello,

i am still trying to understand the parallelized version of the heat equation 2D solving that we saw at school. In order to explain my problem, i need to list the main code :

9       program heat
10 !**************************************************************************
11 !
12 !   This program solves the heat equation on the unit square [0,1]x[0,1]
13 !        | du/dt - Delta(u) = 0
14 !        |  u/gamma = cste
15 !   by implementing a explicit scheme.
16 !   The discretization is done using a 5 point finite difference scheme
17 !   and the domain is decomposed into sub-domains.
18 !   The PDE is discretized using a 5 point finite difference scheme
19 !   over a (x_dim+2)*(x_dim+2) grid including the end points
20 !   correspond to the boundary points that are stored.
21 !
22 !   The data on the whole domain are stored in
23 !   the following way :
24 !
25 !    y
26 !           ------------------------------------
27 !    d      |                                  |
28 !    i      |                                  |
29 !    r      |                                  |
30 !    e      |                                  |
31 !    c      |                                  |
32 !    t      |                                  |
33 !    i      | x20                              |
34 !    o /\   |                                  |
35 !    n  |   | x10                              |
36 !       |   |                                  |
37 !       |   | x00  x01 x02 ...                 |
38 !       |   ------------------------------------
39 !        -------> x direction  x(*,j)
40 !
41 !   The boundary conditions are stored in the following submatrices
42 !
43 !
44 !        x(1:x_dim, 0)          ---> left   temperature
45 !        x(1:x_dim, x_dim+1)    ---> right  temperature
46 !        x(0, 1:x_dim)          ---> top    temperature
47 !        x(x_dim+1, 1:x_dim)    ---> bottom temperature
48 !
49 !**************************************************************************
50       implicit none
51       include 'mpif.h'
52 ! size of the discretization
53       integer :: x_dim, nb_iter
54       double precision, allocatable :: x(:,:),b(:,:),x0(:,:)
55       double precision  :: dt, h, epsilon
56       double precision  :: resLoc, result, t, tstart, tend
57 !
58       integer :: i,j
59       integer :: step, maxStep
60       integer :: size_x, size_y, me, x_domains,y_domains
61       integer :: iconf(5), size_x_glo
62       double precision conf(2)
63 !
64 ! MPI variables
65       integer :: nproc, infompi, comm, comm2d, lda, ndims
66       INTEGER, DIMENSION(2)  :: dims
67       LOGICAL, DIMENSION(2)  :: periods
68       LOGICAL, PARAMETER     :: reorganisation = .false.
69       integer :: row_type
70       integer, parameter :: nbvi=4
71       integer, parameter :: S=1, E=2, N=3, W=4
72       integer, dimension(4) :: neighBor
73
74 !
75       intrinsic abs
76 !
77 !
78       call MPI_INIT(infompi)
79       comm = MPI_COMM_WORLD
80       call MPI_COMM_SIZE(comm,nproc,infompi)
81       call MPI_COMM_RANK(comm,me,infompi)
82 !
83 !
84       if (me.eq.0) then
86       endif
87       call MPI_BCAST(iconf,5,MPI_INTEGER,0,comm,infompi)
88       call MPI_BCAST(conf,2,MPI_DOUBLE_PRECISION,0,comm,infompi)
89 !
90       size_x    = iconf(1)
91       size_y    = iconf(1)
92       x_domains = iconf(3)
93       y_domains = iconf(4)
94       maxStep   = iconf(5)
95       dt        = conf(1)
96       epsilon   = conf(2)
97 !
98       size_x_glo = x_domains*size_x+2
99       h      = 1.0d0/dble(size_x_glo)
100       dt     = 0.25*h*h
101 !
102 !
103       lda = size_y+2
104       allocate(x(0:size_y+1,0:size_x+1))
105       allocate(x0(0:size_y+1,0:size_x+1))
106       allocate(b(0:size_y+1,0:size_x+1))
107 !
108 ! Create 2D cartesian grid
109       periods(:) = .false.
110
111       ndims = 2
112       dims(1)=x_domains
113       dims(2)=y_domains
114       CALL MPI_CART_CREATE(MPI_COMM_WORLD, ndims, dims, periods, &
115         reorganisation,comm2d,infompi)
116 !
117 ! Identify neighbors
118 !
119       NeighBor(:) = MPI_PROC_NULL
120 ! Left/West and right/Est neigbors
121       CALL MPI_CART_SHIFT(comm2d,0,1,NeighBor(W),NeighBor(E),infompi)
122
123       print *,'mpi_proc_null=', MPI_PROC_NULL
124       print *,'rang=', me
125       print *, 'ici premier mpi_cart_shift : neighbor(w)=',NeighBor(W)
126       print *, 'ici premier mpi_cart_shift : neighbor(e)=',NeighBor(E)
127
128 ! Bottom/South and Upper/North neigbors
129       CALL MPI_CART_SHIFT(comm2d,1,1,NeighBor(S),NeighBor(N),infompi)
130
131
132       print *, 'ici deuxieme mpi_cart_shift : neighbor(s)=',NeighBor(S)
133       print *, 'ici deuxieme mpi_cart_shift : neighbor(n)=',NeighBor(N)
134
135
136
137 !
138 ! Create row data type to coimmunicate with South and North neighbors
139 !
140       CALL MPI_TYPE_VECTOR(size_x, 1, size_y+2, MPI_DOUBLE_PRECISION, row_type,infompi)
141       CALL MPI_TYPE_COMMIT(row_type, infompi)
142 !
143 ! initialization
144 !
145       call initvalues(x0, b, size_x+1, size_x )
146 !
147 ! Update the boundaries
148 !
149       call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
150
151       step = 0
152       t    = 0.0
153 !
154       tstart = MPI_Wtime()
155 ! REPEAT
156  10   continue
157 !
158          step = step + 1
159          t    = t + dt
160 ! perform one step of the explicit scheme
161          call Explicit(x0,x,b, size_x+1, size_x, size_x, dt, h, resLoc)
162 ! update the partial solution along the interface
163          call updateBound(x0,size_x,size_x, NeighBor, comm2d, row_type)
164 ! Check the distance between two iterates
165          call MPI_ALLREDUCE(resLoc,result,1, MPI_DOUBLE_PRECISION, MPI_SUM,comm,infompi)
166          result= sqrt(result)
167 !
168          if (me.eq.0) write(*,1002) t,result
169 !
170        if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
171 !
172 ! UNTIL "Convergence"
173 !
174        tend = MPI_Wtime()
175        if (me.eq.0) then
176          write(*,*)
177          write(*,*) ' Convergence after ', step,' steps '
178          write(*,*) '      Problem size              ', size_x*x_domains*size_y*y_domains
179          write(*,*) ' Wall Clock                     ', tend-tstart
180
181 !
182 ! Print the solution at each point of the grid
183 !
184          write(*,*)
185          write(*,*) ' Computed solution '
186          write(*,*)
187          do 30, j=size_x+1,0,-1
188             write(*,1000)(x0(j,i),i=0,size_x+1)
189  30      continue
190        endif
191 !
192       call MPI_FINALIZE(infompi)
193 !
194       deallocate(x)
195       deallocate(x0)
196       deallocate(b)
197 !
198 ! Formats available to display the computed values on the grid
199 !
200 1000  format(100(1x, f7.3))
201 1001  format(100(1x, e7.3))
202 1002   format(' At time ',E8.2,' Norm ', E8.2)
203
204 !
205       stop
206       end
207 !
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

The routine "Explicit" at line 161 allows to compute  the domain 2D ( the values of Temperature are stocked in vector "x" : x(1:size_x,1:size_y)) with the following scheme :

! The stencil of the explicit operator for the heat equation
! on a regular rectangular grid using a five point finite difference
! scheme in space is
!
!                     |                + 1                                    |
!                     |                                                          |
!dt/(h*h) *      | +1      -4 + h*h/dt                   + 1    |
!                     |                                                          |
!                     |               + 1                                     |
!
diag = - 4.0 + h*h/dt
weight =  dt/(h*h)
!
! perform an explicit update on the points within the domain
do 20, j=1,size_x
do 30, i=1,size_y
x(i,j) = weight *( x0(i-1,j) + x0(i+1,j)  &
+ x0(i,j-1) + x0(i,j+1) +x0(i,j)*diag)
30       continue
20     continue

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

The routine "updateBound" updates the bound values like this :

1 !*******************************************************************
2 SUBROUTINE updateBound(x, size_x, size_y, NeighBor, comm2d, row_type)
3
4 !*****************************************************************
5    include 'mpif.h'
6 !
7   INTEGER size_x, size_y
8 !Iterate
9   double precision,  dimension(0:size_y+1,0:size_x+1) :: x
10 !Type row_type
11   INTEGER :: row_type
12   integer, parameter :: nbvi=4
13   integer, parameter :: S=1, E=2, N=3, W=4
14   integer, dimension(4) :: neighBor
15 !
16   INTEGER  :: infompi, comm2d
17   INTEGER :: flag
18   INTEGER, DIMENSION(MPI_STATUS_SIZE)    :: status
19
20 !********* North/South communication ************************************
21   flag = 1
22   !Send my boundary to North and receive from South
23   CALL MPI_SENDRECV(x(size_y, 1), 1, row_type ,neighBor(N),flag, &
24                      x(0,1), 1, row_type,neighbor(S),flag, &
25                      comm2d, status, infompi)
26
27   !Send my boundary to South and receive from North
28   CALL MPI_SENDRECV(x(1,1), 1, row_type ,neighbor(S),flag, &
29                      x(size_y+1,1), 1, row_type ,neighbor(N),flag, &
30                      comm2d, status, infompi)
31
32 !********* Est/West communication ************************************
33   flag = 2
34   !Send my boundary to West and receive from Est
35   CALL MPI_SENDRECV(x(1,1),  size_y, MPI_DOUBLE_PRECISION, neighbor(W), flag, &
36                      x(1, size_x+1),  size_y, MPI_DOUBLE_PRECISION,neighbor(E), flag, &
37                      comm2d, status, infompi)
38
39   !Send my boundary to Est and receive from West
40   CALL MPI_SENDRECV( x(1,size_x), size_y, MPI_DOUBLE_PRECISION, neighbor(E), flag, &
41                      x(1,0),size_y, MPI_DOUBLE_PRECISION, neighbor(W), flag, &
42                      comm2d, status, infompi)
43
44 END SUBROUTINE updateBound

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

I am confused between the shift of the values near to the bounds done by the "updateBound" routine  and the main loop (at line 161 in main code)  which calls the routine "Explicit".
For a given process (say number 1) ( i use 4 here for execution), i send to the east process (3) the penultimate column left column, to the north process (0) the penultimate row top ,and to the others (mpi_proc_null=-2)
the penultimate right column and the bottom row. But how the 4  processes are synchronous ? I don't understand too why all the processes go through the solving piece of code calling the "Explicit" routine.

Here'i the sequential version of this simulation :

49       program heat
50       implicit none
51 ! size of the discretization
52       integer size_x, maxStep
53       integer lda
54       double precision, allocatable:: x(:,:),b(:,:), x0(:,:)
55       double precision dt, h, result, epsilon
56
57 ! index loop
58       integer i,j, step
59       double precision t
60 !
61       intrinsic abs
62 !
63 !
64       print *,' Size of the square '
66       h       = 1.0/(size_x+1)
67       dt      = 0.25*h*h
68       maxStep = 100
69       print *, 'Max. number of steps '
71       epsilon = 1.d-8
72 !
73       allocate(x(0:size_x+1,0:size_x+1))
74       allocate(x0(0:size_x+1,0:size_x+1))
75       allocate(b(0:size_x+1,0:size_x+1))
76 !
77 !
78 ! initialization
79 !
80       call initvalues(x0, b, size_x+1, size_x )
81 !
82       step = 0
83       t    = 0.0
84 ! REPEAT
85  10   continue
86 !
87          step = step + 1
88          t    = t + dt
89 !
90          call Explicit(x0, x, b, size_x+1, size_x, size_x, dt, &
91             h, result)
92          result = sqrt(result)
93        if ((result.gt.epsilon).and.(step.lt.maxStep)) goto 10
94 !
95         write(*,*)
96         write(*,*) ' Convergence after ', step,' steps '
97         write(*,*) '      Problem size              ',  size_x*size_x
98 !

------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Sorry for this long post but i would like to clarify my problem as much as it was possible.

Any explanation would help me.