It is a quad core machine (Intel Core2 Quad CPU Q9550 - 64bit).
OpenMPI is the one that comes with Fedora 13.
I tested it on a cluster of Intel Xeon (32bit) with Gigabit Ethernet
with OpenMPI 1.3.3, the results are:
// with no timeout
Total time: 1.00309 secs
Estimated calls: 1116537
MPI_Test calls: 395488
Average: 0.0000018129 secs
Max: 0.0002990606 secs [91608]
Min: 0.0000008807 secs [694]
Successful MPI_Test(): 0.0000421085 secs
// with timeout
Total time: 1.00342 secs
Estimated calls: 1140521
MPI_Test calls: 1004
Average: 0.0000018511 secs
Max: 0.0000100477 secs [1003]
Min: 0.0000008813 secs [729]
Successful MPI_Test(): 0.0000100477 secs
I have seen similar behavior using IBM MPI on Power5+/AIX.
On Fri, Nov 12, 2010 at 11:00 AM, Jeff Squyres <jsquyres_at_[hidden]> wrote:
> What kind of network did you do these tests on?
>
> Were both processes on the same machine or different machines?
>
>
> On Nov 10, 2010, at 12:29 AM, Yiannis Papadopoulos wrote:
>
>> Hello,
>>
>> I am using OpenMPI 1.4.1. I have a small test case that calls
>> MPI_Test() too many times. I see one or two random time spikes when
>> this happens. On the other hand, if I avoid calling MPI_Test() based
>> on a timeout, this problem disappears.
>>
>> For example, with no timeout, the results I'm getting are:
>> Total time: 1.00008 secs
>> MPI_Test calls: 5541425
>> Average: 0.0000001234 secs
>> Max: Â Â 0.0002560616 secs [4374247]
>> Min: Â Â 0.0000000000 secs [1]
>> Successful MPI_Test(): 0.0000090599 secs
>>
>> With timeout of 0.0001secs (which means I just delay a bit calling MPI_Test()):
>> Total time: 1.00009 secs
>> MPI_Test calls: 9946
>> Average: 0.0000000018 secs
>> Max: Â Â 0.0000011921 secs [98]
>> Min: Â Â 0.0000000000 secs [1]
>> Successful MPI_Test(): 0.0000011921 secs
>>
>> With a timeout of 0.01secs:
>> Total time: 1.0001 secs
>> MPI_Test calls: 101
>> Average: 0.0000001936 secs
>> Max: Â Â 0.0000028610 secs [100]
>> Min: Â Â 0.0000000000 secs [2]
>> Success: 0.0000028610 secs
>>
>>
>> Things to notice:
>> 1) the total time is similar
>> 2) the number of MPI_Test() calls varies drastically
>> 3) the average MPI_Test() call time is significantly less when a
>> timeout is introduced
>> 4) the call that takes the most time is not the last one (the
>> successful) as one would expect except the case that the timeout is a
>> bit big (0.01secs).
>> 5) the time of the slowest MPI_Test() call is 2 orders of magnitude
>> higher than the the successful MPI_Test() in the non-timeout version.
>>
>> Does this qualify as a bug?
>>
>> Thanks
>>
>>
>> PS The code I have used follows. I compiled it with
>>
>> Â mpic++ -O3 mpi_test_delay.cc -o mpi_test_delay_no_timeout
>>
>> for no timeout and
>>
>> Â mpic++ -O3 -DTIMEOUT=0.01 mpi_test_delay.cc -o mpi_test_delay
>>
>> for a timeout of 0.01secs.
>>
>> // mpi_test_delay.cc
>> #include <iostream>
>> #include <iomanip>
>> #include <mpi.h>
>> #include <unistd.h>
>> #include <vector>
>> #include <algorithm>
>> #include <numeric>
>>
>> int main(int argc, char* argv[])
>> {
>> Â MPI_Init(&argc, &argv);
>>
>> Â MPI_Comm comm = MPI_COMM_WORLD;
>> Â int rank = MPI_PROC_NULL;
>> Â int size = MPI_PROC_NULL;
>> Â MPI_Comm_rank(comm, &rank);
>> Â MPI_Comm_size(comm, &size);
>>
>> Â if (size<2) {
>> Â Â MPI_Abort(comm, -1);
>> Â }
>>
>> Â const std::size_t bufsize = 1024;
>> Â char buffer[bufsize];
>>
>> Â // warm-up
>> Â unsigned int N = 0;
>> Â if (rank==0) {
>> Â Â sleep(1);
>> Â Â MPI_Send(buffer, bufsize, MPI_BYTE, 1, 0, comm);
>> Â }
>> Â else {
>> Â Â MPI_Request req;
>> Â Â MPI_Irecv(buffer, bufsize, MPI_BYTE, MPI_ANY_TAG, 0, comm, &req);
>> Â Â for (int flag = 0; flag==0; ++N) {
>> Â Â Â MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
>> Â Â }
>> Â }
>>
>> Â // measurement container
>> Â std::vector<double> time_res;
>> Â time_res.reserve(N * 1.1);
>>
>> Â MPI_Barrier(comm);
>>
>> Â // benchmark MPI_Test
>> Â double total_time = MPI_Wtime();
>>
>> Â if (rank==0) {
>> Â Â sleep(1);
>> Â Â MPI_Send(buffer, bufsize, MPI_BYTE, 1, 0, comm);
>> Â }
>> Â else if (rank==1) {
>> Â Â MPI_Request req;
>> Â Â MPI_Irecv(buffer, bufsize, MPI_BYTE, MPI_ANY_TAG, 0, comm, &req);
>>
>> #ifdef TIMEOUT
>> Â Â double timeout = 0.0;
>> #endif
>> Â Â for (int flag = 0; flag==0; ) {
>> #ifdef TIMEOUT
>> Â Â Â double ntimeout = MPI_Wtime();
>> Â Â Â if ( (ntimeout-timeout)<TIMEOUT) continue;
>> Â Â Â timeout = ntimeout;
>> #endif
>> Â Â Â double time = MPI_Wtime();
>> Â Â Â MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
>> Â Â Â time = MPI_Wtime() - time;
>> Â Â Â time_res.push_back(time);
>> Â Â }
>> Â }
>> Â total_time = MPI_Wtime() - total_time;
>>
>> Â MPI_Barrier(comm);
>>
>> Â // print total time for execution, number of MPI_Test calls,
>> Â // average, max and min time for MPI_Test and how much was the final
>> Â // MPI_Test
>> Â if (rank==1) {
>> Â Â double average = std::accumulate(time_res.begin(), time_res.end(),
>> 0.0)/time_res.size();
>> Â Â std::vector<double>::iterator max =
>> std::max_element(time_res.begin(), time_res.end());
>> Â Â std::vector<double>::iterator min =
>> std::min_element(time_res.begin(), time_res.end());
>>
>> Â Â std::cout << "Total time: " << total_time << " secs\n"
>> Â Â Â Â Â Â Â << "MPI_Test calls: " << time_res.size() << '\n'
>> Â Â Â Â Â Â Â << std::fixed << std::setprecision(10)
>> Â Â Â Â Â Â Â << "Average: " << average << " secs\n"
>> Â Â Â Â Â Â Â << "Max: Â Â " << *max << " secs [" <<
>> std::distance(time_res.begin(), max) << "]\n"
>> Â Â Â Â Â Â Â << "Min: Â Â " << *min << " secs [" <<
>> std::distance(time_res.begin(), min) << "]\n"
>> Â Â Â Â Â Â Â << "Success: " << time_res[time_res.size()-1] << " secs"
>> << std::endl;
>> Â }
>>
>> Â MPI_Finalize();
>>
>> Â return 0;
>> }
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>
> --
> Jeff Squyres
> jsquyres_at_[hidden]
> For corporate legal information go to:
> http://www.cisco.com/web/about/doing_business/legal/cri/
>
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
|