I have a problem with my MPI code, it hangs when the code is run on multiple nodes. It successfullycompletes when run on a single node. I am not sure how to debug this. Can someone help me debug this issue?

Program Usage:

mpicc -o string string.cpp
mpirun -np
4 -npernode 2 -hostfile hosts ./string 12 0.1 0.9 10 2

 

MPI_Reduce Hangs in 2nd iteration: (Output cout statements from my program)

 

1st Iteration (Timestep 1)

-----------------------------------------------------

0 Waiting for MPI_Reduce()

0 Done Waiting for MPI_Reduce()

 

1 Waiting for MPI_Reduce()

1 Done Waiting for MPI_Reduce()

 

2 Waiting for MPI_Reduce()

2 Done Waiting for MPI_Reduce()

 

3 Waiting for MPI_Reduce()

3 Done Waiting for MPI_Reduce()

 

0 Sending to right  task      = 1

0 Receiving from right task   = 1

 

1 Receiving from left task   = 0

1 Sending to left task       = 0

 

1 Sending to right  task      = 2

1 Receiving from right task   = 2

 

 

2 Receiving from left task   = 1

2 Sending to left task       = 1

 

2 Sending to right  task      = 3

2 Receiving from right task   = 3

 

3 Receiving from left task   = 2

3 Sending to left task       = 2

 

 

 

2nd Iteration (Timestep 2)

-----------------------------------------------------

0 Waiting for MPI_Reduce()

 

1 Waiting for MPI_Reduce()

1 Done Waiting for MPI_Reduce()

 

2 Waiting for MPI_Reduce()

 

3 Waiting for MPI_Reduce()

 

 

 

My Code:

 

#include <iostream>

#include <vector>

#include <stdio.h>

#include <stdlib.h>

#include "mpi.h"

 

#define MASTER 0

int RtoL = 10;

int LtoR = 20;

 

int main ( int argc, char **argv )

{

    int nprocs, taskid;

    FILE *f = NULL;

    int left, right, i_start, i_end;

    float sum = 0;

    MPI_Status status;

    float *y, *yold;

    float *v, *vold;

 

    //  const int NUM_MASSES = 1000;

    //  const float Ktension = 0.1;

    //  const float Kdamping = 0.9;

    //  const float duration = 10.0;

 

#if 0

    if ( argc != 5 ) {

        std::cout << "usage: " << argv[0] << " NUM_MASSES durationInSecs Ktension Kdamping\n";

        return 2;

    }

#endif

 

    int NUM_MASSES  = atoi ( argv[1] );

    float duration = atof ( argv[2] );

    float Ktension = atof ( argv[3] );

    float Kdamping = atof ( argv[4] );

    const int PICKUP_POS = NUM_MASSES / 7;

    const int OVERSAMPLING = 16;

 

    MPI_Init(&argc,&argv);

    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);

    MPI_Comm_rank(MPI_COMM_WORLD,&taskid);

 

    if (taskid  == 0) {

        f = fopen ( "rstring.raw", "wb" );

        if (!f) {

            std::cout << "can't open output file\n";

            return 1;

        }

    }

 

    y = new float[NUM_MASSES];

    yold = new float[NUM_MASSES];

    v = new float[NUM_MASSES];

 

    for (int i = 0; i < NUM_MASSES; i++ ) {

        v[i]  = 0.0f;

        yold[i] = y[i] = 0.0f;

        if (i == NUM_MASSES/2 )

            yold[i] = 1.0;

    }

 

    if (taskid == 0) {

        left = -1;

        right = 1;

    } else if (taskid == nprocs - 1) {

        left = taskid - 1;

        right = -1;

    } else {

        left = taskid - 1;

        right = taskid + 1;

    }

 

    i_start = taskid * (NUM_MASSES/nprocs);

    i_end = i_start + (NUM_MASSES/nprocs);

 

    int numIters = duration * 44100 * OVERSAMPLING;;

    if (argc == 6) {

        numIters = atoi(argv[5]);

    }

 

    for ( int t = 0; t < numIters; t++ ) {

        float sum = 0;

        float gsum = 0;

 

        for ( int i = i_start; i < i_end; i++ ) {

            if ( i == 0 || i == NUM_MASSES-1 ) {

            } else {

                float accel = Ktension * (yold[i+1] + yold[i-1] - 2*yold[i]);

                v[i] += accel;

                v[i] *= Kdamping;

                y[i] = yold[i] + v[i];

                sum += y[i];

            }

        }

 

        std::cout << taskid << " Waiting for MPI_Reduce()" << std::endl;

        MPI_Reduce(&sum, &gsum, 1, MPI_FLOAT, MPI_SUM, MASTER, MPI_COMM_WORLD);

        std::cout << taskid << " Done Waiting for MPI_Reduce()" << std::endl;

 

        if (taskid != 0) {

            MPI_Recv(&y[i_start-1], 1, MPI_FLOAT, left, LtoR, MPI_COMM_WORLD, &status);

            std::cout << taskid << " Receiving from left task   = " << left << std::endl;

            MPI_Send(&y[i_start],   1, MPI_FLOAT, left, RtoL, MPI_COMM_WORLD);

            std::cout << taskid << " Sending to left task       = " << left << std::endl;

        }

        if (taskid != nprocs - 1) {

            MPI_Send(&y[i_end-1],1, MPI_FLOAT, right, LtoR, MPI_COMM_WORLD);

            std::cout << taskid <<" Sending to right  task      = " << right << std::endl;

            MPI_Recv(&y[i_end],  1, MPI_FLOAT, right, RtoL, MPI_COMM_WORLD, &status);

            std::cout << taskid <<" Receiving from right task   = " << right << std::endl;

        }

 

        //printf("After Reduce task = %d yold = %f %f %f %f\n", taskid,yold[0], yold[1], yold[2], yold[3]);

        //printf("After Reduce task = %d y = %f %f %f %f\n", taskid, y[0], y[1], y[2], y[3]);

        //printf("After Reduce task = %d v = %f %f %f %f\n", taskid, v[0], v[1], v[2], v[3]);

 

        float *tmp = y;

        y = yold;

        yold = tmp;

 

        if (taskid == 0) {

            //std::cout<< "sum = " << gsum << std::endl;

            if ( t % OVERSAMPLING == 0 ) {

                fwrite ( &gsum, sizeof(float), 1, f );

            }

        }

    }

    if (taskid  == 0) {

        fclose ( f );

    }

    MPI_Finalize();

}