Upgraded VT version to 5.14
authorjurenz
Wed Nov 14 13:29:18 2012 +0000 (6 months ago)
changeset 21003d8337d642968
parent 21002 a892facfe86f
child 21004 7a45f29db6bf
Upgraded VT version to 5.14
ompi/contrib/vt/vt/ChangeLog
ompi/contrib/vt/vt/INSTALL
ompi/contrib/vt/vt/Makefile.am
ompi/contrib/vt/vt/VERSION
ompi/contrib/vt/vt/config/defaults/bgq
ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4
ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4
ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4
ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4
ompi/contrib/vt/vt/config/m4/acinclude.dl.m4
ompi/contrib/vt/vt/config/m4/acinclude.getcpu.m4
ompi/contrib/vt/vt/config/m4/acinclude.java.m4
ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4
ompi/contrib/vt/vt/config/m4/acinclude.memhooks.m4
ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4
ompi/contrib/vt/vt/config/m4/acinclude.pform.m4
ompi/contrib/vt/vt/config/m4/acinclude.plugincntr.m4
ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4
ompi/contrib/vt/vt/config/m4/acinclude.timer.m4
ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4
ompi/contrib/vt/vt/configure.in
ompi/contrib/vt/vt/doc/FILTER.SPEC
ompi/contrib/vt/vt/doc/UserManual.html
ompi/contrib/vt/vt/doc/UserManual.pdf
ompi/contrib/vt/vt/extlib/otf/AUTHORS
ompi/contrib/vt/vt/extlib/otf/ChangeLog
ompi/contrib/vt/vt/extlib/otf/VERSION
ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Process.c
ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_State.c
ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c
ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c
ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.h
ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c
ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h
ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c
ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.h
ompi/contrib/vt/vt/extlib/otf/tools/otfaux/otfaux.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_dispersion.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.h
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_marker.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/datastructs.h
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.h
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/process_dispersion.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.cpp
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.cpp
ompi/contrib/vt/vt/include/vt_inttypes.h.in
ompi/contrib/vt/vt/rfg/Makefile.am
ompi/contrib/vt/vt/rfg/rfg_filter.c
ompi/contrib/vt/vt/rfg/rfg_filter.h
ompi/contrib/vt/vt/rfg/rfg_groups.c
ompi/contrib/vt/vt/rfg/rfg_groups.h
ompi/contrib/vt/vt/rfg/rfg_regions.c
ompi/contrib/vt/vt/rfg/rfg_regions.h
ompi/contrib/vt/vt/tools/opari/ChangeLog
ompi/contrib/vt/vt/tools/opari/doc/ChangeLog
ompi/contrib/vt/vt/tools/opari/tool/process_c.cc
ompi/contrib/vt/vt/tools/opari/tool/process_f.cc
ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.cc
ompi/contrib/vt/vt/tools/vtfilter/old/vt_tracefilter.cc
ompi/contrib/vt/vt/tools/vtfilter/vt_filter_trc.cc
ompi/contrib/vt/vt/tools/vtsetup/vtsetup-data.xml.in
ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.h
ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc
ompi/contrib/vt/vt/tools/vtwrapper/vtc++-wrapper-data.txt.in
ompi/contrib/vt/vt/tools/vtwrapper/vtcc-wrapper-data.txt.in
ompi/contrib/vt/vt/tools/vtwrapper/vtfort-wrapper-data.txt.in
ompi/contrib/vt/vt/tools/vtwrapper/vtnvcc-wrapper-data.txt.in
ompi/contrib/vt/vt/util/Makefile.am
ompi/contrib/vt/vt/util/hash.c
ompi/contrib/vt/vt/util/hash.h
ompi/contrib/vt/vt/vtlib/Makefile.am
ompi/contrib/vt/vt/vtlib/vt_comp_gnu.c
ompi/contrib/vt/vt/vtlib/vt_cudart.c
ompi/contrib/vt/vt/vtlib/vt_cudartwrap.c
ompi/contrib/vt/vt/vtlib/vt_cudartwrap.h
ompi/contrib/vt/vt/vtlib/vt_cupti_activity.c
ompi/contrib/vt/vt/vtlib/vt_env.c
ompi/contrib/vt/vt/vtlib/vt_env.h
ompi/contrib/vt/vt/vtlib/vt_getcpu.c
ompi/contrib/vt/vt/vtlib/vt_gpu.c
ompi/contrib/vt/vt/vtlib/vt_metric_papi.c
ompi/contrib/vt/vt/vtlib/vt_mpicom.c
ompi/contrib/vt/vt/vtlib/vt_mpicom.h
ompi/contrib/vt/vt/vtlib/vt_mpifile.c
ompi/contrib/vt/vt/vtlib/vt_mpireq.c
ompi/contrib/vt/vt/vtlib/vt_mpireq.h
ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c
ompi/contrib/vt/vt/vtlib/vt_otf_sum.c
ompi/contrib/vt/vt/vtlib/vt_pform_bgp.c
ompi/contrib/vt/vt/vtlib/vt_pform_bgq.c
ompi/contrib/vt/vt/vtlib/vt_pform_crayxe.c
ompi/contrib/vt/vt/vtlib/vt_pform_linux.c
ompi/contrib/vt/vt/vtlib/vt_pform_sun.c
ompi/contrib/vt/vt/vtlib/vt_plugin_cntr.c
ompi/contrib/vt/vt/vtlib/vt_plugin_cntr_int.h
ompi/contrib/vt/vt/vtlib/vt_thrd.c
ompi/contrib/vt/vt/vtlib/vt_thrd.h
ompi/contrib/vt/vt/vtlib/vt_trc.c
ompi/contrib/vt/vt/vtlib/vt_user_region.c
     1.1 --- a/ompi/contrib/vt/vt/ChangeLog	Wed Nov 14 04:52:39 2012 +0000
     1.2 +++ b/ompi/contrib/vt/vt/ChangeLog	Wed Nov 14 13:29:18 2012 +0000
     1.3 @@ -1,7 +1,53 @@
     1.4 -5.13.1openmpi
     1.5 -	- updated version of internal OTF to 1.11.2openmpi
     1.6 +5.14openmpi
     1.7 +	- updated version of internal OTF to 1.12.1openmpi
     1.8  	  (see extlib/otf/ChangeLog)
     1.9 +	- added support for filtering functions of specific call paths
    1.10 +	- introduced new environment variable VT_MPI_IGNORE_FILTER to
    1.11 +	  enable/disable recording of MPI communication events although its
    1.12 +	  corresponding functions are filtered
    1.13 +	- fixed undefined reference error for vt_get_mpi_f_in_place___ when
    1.14 +	  linking the Fortran MPI wrapper library (libvt-fmpi) built for
    1.15 +	  SGI-MPT (--with-sgimpt)
    1.16  	- compiler wrappers:
    1.17 +		- added option '-vt:inst-exclude-file[-list]' to exclude source
    1.18 +		  files from the automatic instrumentation by the compiler or
    1.19 +		  PDT/TAU
    1.20 +		- added option '-vt:opari-exclude-file[-list]' to exclude source
    1.21 +		  files from the instrumentation of OpenMP constructs by OPARI
    1.22 +		- consider preprocessed files (-vt:preprocess) for reuse
    1.23 +		  (-vt:reusefiles)
    1.24 +		- do additionally scan for comments (pdbcomment) if performing
    1.25 +		  PDT/TAU instrumentation to prevent instrumenting functions
    1.26 +		  within comments
    1.27 +	- OPARI: (see tools/opari/ChangeLog:26,27)
    1.28 +
    1.29 +5.13.2
    1.30 +	- added support for IBM BlueGene/Q
    1.31 +	- added support for PAPI 5 (aka PAPI-V)
    1.32 +	- fixed CUDA runtime API wrapper for CUDA 5 
    1.33 +	- fixed "Cannot find communicator" error occurred when completing
    1.34 +	  a non-blocking MPI communication using an already freed communicator
    1.35 +	  (e.g. MPI_Irecv(...,comm,...), MPI_Comm_free(comm), MPI_Wait())
    1.36 +	- fixed 'gnu' compiler instrumentation for functions defined within
    1.37 +	  shared objects
    1.38 +	- vtunify:
    1.39 +		- disable OpenMP parallelization if PGI compiler version < 9
    1.40 +		  is used (threadprivate not supported)
    1.41 +	- OPARI: (see tools/opari/ChangeLog:24,25)
    1.42 +
    1.43 +5.13.1
    1.44 +	- updated version of internal OTF to 1.11.2goldfish
    1.45 +	  (see extlib/otf/ChangeLog)
    1.46 +	- use high precision timer RTC on Cray XE/XK6 platforms using the
    1.47 +	  Cray compiler
    1.48 +	- fixed parsing symbol list file given by VT_GNU_NMFILE if it is
    1.49 +	  generated by nm with multiple input files
    1.50 +	- compiler wrappers:
    1.51 +		- added detection of Cray compiler's OpenMP flag (-h omp) to
    1.52 +		  enable OPARI instrumentation
    1.53 +		- add OPARI option '-nosrc' when using the Cray compiler
    1.54 +		  (at least the Fortran compiler does not understand
    1.55 +		   #line constructs)
    1.56  		- vtnvcc:
    1.57  			- add path to cuda.h to the PDT parser command
    1.58  			- exclude *.cu source files from instrumenting with
    1.59 @@ -13,7 +59,7 @@
    1.60  		- enhanced precision of timestamp conversion from local 
    1.61  		  to global 
    1.62  
    1.63 -5.13openmpi
    1.64 +5.13
    1.65  	- updated version of internal OTF to 1.11.1goldfish
    1.66  	  (see extlib/otf/ChangeLog)
    1.67  	- added support for highly parallel trace writing using the 
     2.1 --- a/ompi/contrib/vt/vt/INSTALL	Wed Nov 14 04:52:39 2012 +0000
     2.2 +++ b/ompi/contrib/vt/vt/INSTALL	Wed Nov 14 13:29:18 2012 +0000
     2.3 @@ -225,6 +225,10 @@
     2.4                                give the command for PDT Fortran source code parser,
     2.5                                default: f95parse, f90parse, or gfparse
     2.6  
     2.7 +      --with-pdt-comment=PDTCOMMENT
     2.8 +                              give the command for PDT comment parser,
     2.9 +                              default: pdbcomment
    2.10 +
    2.11        --with-papi-dir=PAPIDIR
    2.12                                give the path for PAPI, default: /usr
    2.13  
    2.14 @@ -345,7 +349,7 @@
    2.15          (e.g. RANLIB, AR, MPICC, CXXFLAGS).
    2.16  
    2.17      Examples:
    2.18 -      BlueGene/P:
    2.19 +      BlueGene/P and BlueGene/Q:
    2.20          % ./configure --host=powerpc64-ibm-linux-gnu
    2.21  
    2.22        Cray XK6:
     3.1 --- a/ompi/contrib/vt/vt/Makefile.am	Wed Nov 14 04:52:39 2012 +0000
     3.2 +++ b/ompi/contrib/vt/vt/Makefile.am	Wed Nov 14 13:29:18 2012 +0000
     3.3 @@ -22,6 +22,7 @@
     3.4  	VERSION \
     3.5  	config/defaults/bgl \
     3.6  	config/defaults/bgp \
     3.7 +	config/defaults/bgq \
     3.8  	config/defaults/crayxt \
     3.9  	config/defaults/crayxe \
    3.10  	config/defaults/ibm \
     4.1 --- a/ompi/contrib/vt/vt/VERSION	Wed Nov 14 04:52:39 2012 +0000
     4.2 +++ b/ompi/contrib/vt/vt/VERSION	Wed Nov 14 13:29:18 2012 +0000
     4.3 @@ -1,1 +1,1 @@
     4.4 -5.13.1openmpi
     4.5 +5.14openmpi
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/ompi/contrib/vt/vt/config/defaults/bgq	Wed Nov 14 13:29:18 2012 +0000
     5.3 @@ -0,0 +1,20 @@
     5.4 +CC="bgxlc_r"
     5.5 +CXX="bgxlC_r"
     5.6 +FC="bgxlf95_r"
     5.7 +MPICC="mpixlc_r"
     5.8 +MPICXX="mpixlcxx_r"
     5.9 +CFLAGS="-O3 -qstrict"
    5.10 +CXXFLAGS="-O3 -qstrict -qminimaltoc"
    5.11 +CC_FOR_BUILD="xlc_r"
    5.12 +CXX_FOR_BUILD="xlC_r"
    5.13 +CFLAGS_FOR_BUILD="-O3 -qstrict"
    5.14 +CXXFLAGS_FOR_BUILD="-O3 -qstrict"
    5.15 +enable_shared="no"
    5.16 +with_cross_prefix="bg"
    5.17 +with_mpibgq="yes"
    5.18 +with_cxxrtlib="-L/opt/ibmcmp/vacpp/bg/12.1/bglib64 -libmc++ -lstdc++"
    5.19 +with_shlibc="/lib64/libc.so.6"
    5.20 +
    5.21 +# Disable compiler optimization for the OTF library to work around a
    5.22 +# not yet evaluated segmentation fault occurs when flushing the trace buffer.
    5.23 +with_otf_flags="CFLAGS=-O0"
     6.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4	Wed Nov 14 04:52:39 2012 +0000
     6.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4	Wed Nov 14 13:29:18 2012 +0000
     6.3 @@ -10,6 +10,8 @@
     6.4  	VT_WRAPPER_CC_TAUINST_OPTS=
     6.5  	VT_WRAPPER_CC_TAUINST_PARSE_BIN=
     6.6  	VT_WRAPPER_CC_TAUINST_PARSE_OPTS=
     6.7 +	VT_WRAPPER_CC_TAUINST_COMMENT_BIN=
     6.8 +	VT_WRAPPER_CC_TAUINST_COMMENT_OPTS=
     6.9  	VT_WRAPPER_CC_COMPINST_COMPILER_FLAGS=
    6.10  	VT_WRAPPER_CC_DEFAULT_PARTYPE="seq"
    6.11  
    6.12 @@ -23,6 +25,8 @@
    6.13  	VT_WRAPPER_CXX_TAUINST_OPTS=
    6.14  	VT_WRAPPER_CXX_TAUINST_PARSE_BIN=
    6.15  	VT_WRAPPER_CXX_TAUINST_PARSE_OPTS=
    6.16 +	VT_WRAPPER_CXX_TAUINST_COMMENT_BIN=
    6.17 +	VT_WRAPPER_CXX_TAUINST_COMMENT_OPTS=
    6.18  	VT_WRAPPER_CXX_COMPINST_COMPILER_FLAGS=
    6.19  	VT_WRAPPER_CXX_DEFAULT_PARTYPE="seq"
    6.20  
    6.21 @@ -36,6 +40,8 @@
    6.22  	VT_WRAPPER_FC_TAUINST_OPTS=
    6.23  	VT_WRAPPER_FC_TAUINST_PARSE_BIN=
    6.24  	VT_WRAPPER_FC_TAUINST_PARSE_OPTS=
    6.25 +	VT_WRAPPER_FC_TAUINST_COMMENT_BIN=
    6.26 +	VT_WRAPPER_FC_TAUINST_COMMENT_OPTS=
    6.27  	VT_WRAPPER_FC_COMPINST_COMPILER_FLAGS=
    6.28  	VT_WRAPPER_FC_DEFAULT_PARTYPE="seq"
    6.29  
    6.30 @@ -45,11 +51,12 @@
    6.31  	VT_WRAPPER_NVCC_EXTRA_LIBS=
    6.32  	VT_WRAPPER_NVCC_CPP=$CPP
    6.33  	VT_WRAPPER_NVCC_EXTRA_CPPFLAGS=
    6.34 -
    6.35  	VT_WRAPPER_NVCC_DYNINST_COMPILER_FLAGS=
    6.36  	VT_WRAPPER_NVCC_TAUINST_OPTS=
    6.37  	VT_WRAPPER_NVCC_TAUINST_PARSE_BIN=
    6.38  	VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS=
    6.39 +	VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN=
    6.40 +	VT_WRAPPER_NVCC_TAUINST_COMMENT_OPTS=
    6.41  	VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS=
    6.42  	VT_WRAPPER_NVCC_AVAIL_INST="manual"
    6.43  	VT_WRAPPER_NVCC_DEFAULT_INST="manual"
    6.44 @@ -257,12 +264,16 @@
    6.45  		VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-force_flat_namespace"
    6.46  	])
    6.47  
    6.48 -	AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xyes"],
    6.49 +	AS_IF([test x"$enable_shared" = "xyes"],
    6.50  	[
    6.51 -		VT_WRAPPER_CC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.52 -		VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.53 -		VT_WRAPPER_FC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_FC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.54 -		VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.55 +		case $PLATFORM in
    6.56 +			bgp | bgq)
    6.57 +				VT_WRAPPER_CC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.58 +				VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.59 +				VT_WRAPPER_FC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_FC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.60 +				VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-dy"
    6.61 +				;;
    6.62 +		esac
    6.63  	])
    6.64  
    6.65  	AS_IF([test x"$compinst_type" = "xpgi9" -o x"$compinst_type" = "xcraycce"],
    6.66 @@ -283,6 +294,10 @@
    6.67  					VT_WRAPPER_OPARI_OPTS="-nodecl"
    6.68  					break
    6.69  					;;
    6.70 +				*Cray\ C*)
    6.71 +					VT_WRAPPER_OPARI_OPTS="-nosrc"
    6.72 +					break
    6.73 +					;;
    6.74  			esac
    6.75  		done
    6.76  
    6.77 @@ -342,15 +357,19 @@
    6.78  		VT_WRAPPER_CC_TAUINST_OPTS="-c -spec \${datadir}/TAUINST.SPEC"
    6.79  		VT_WRAPPER_CC_TAUINST_PARSE_BIN="$tauinst_cparse_cmd"
    6.80  		VT_WRAPPER_CC_TAUINST_PARSE_OPTS="$mpiincdir"
    6.81 +		VT_WRAPPER_CC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
    6.82  		VT_WRAPPER_CXX_TAUINST_OPTS="-c++ -spec \${datadir}/TAUINST.SPEC"
    6.83  		VT_WRAPPER_CXX_TAUINST_PARSE_BIN="$tauinst_cxxparse_cmd"
    6.84  		VT_WRAPPER_CXX_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS"
    6.85 +		VT_WRAPPER_CXX_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
    6.86  		VT_WRAPPER_FC_TAUINST_OPTS="-fortran -spec \${datadir}/TAUINST.SPEC"
    6.87  		VT_WRAPPER_FC_TAUINST_PARSE_BIN="$tauinst_fparse_cmd"
    6.88  		VT_WRAPPER_FC_TAUINST_PARSE_OPTS="$fmpiincdir"
    6.89 +		VT_WRAPPER_FC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
    6.90  		VT_WRAPPER_NVCC_TAUINST_OPTS="$VT_WRAPPER_CC_TAUINST_OPTS"
    6.91  		VT_WRAPPER_NVCC_TAUINST_PARSE_BIN="$VT_WRAPPER_CC_TAUINST_PARSE_BIN"
    6.92  		VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS $CUDATKINCDIR"
    6.93 +		VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
    6.94  		VT_WRAPPER_NVCC_AVAIL_INST="$VT_WRAPPER_NVCC_AVAIL_INST tauinst"
    6.95  		VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST tauinst"
    6.96  	])
    6.97 @@ -365,6 +384,8 @@
    6.98  	AC_SUBST(VT_WRAPPER_CC_TAUINST_OPTS)
    6.99  	AC_SUBST(VT_WRAPPER_CC_TAUINST_PARSE_BIN)
   6.100  	AC_SUBST(VT_WRAPPER_CC_TAUINST_PARSE_OPTS)
   6.101 +	AC_SUBST(VT_WRAPPER_CC_TAUINST_COMMENT_BIN)
   6.102 +	AC_SUBST(VT_WRAPPER_CC_TAUINST_COMMENT_OPTS)
   6.103  	AC_SUBST(VT_WRAPPER_CC_COMPINST_COMPILER_FLAGS)
   6.104  	AC_SUBST(VT_WRAPPER_CC_DEFAULT_PARTYPE)
   6.105  
   6.106 @@ -378,6 +399,8 @@
   6.107  	AC_SUBST(VT_WRAPPER_CXX_TAUINST_OPTS)
   6.108  	AC_SUBST(VT_WRAPPER_CXX_TAUINST_PARSE_BIN)
   6.109  	AC_SUBST(VT_WRAPPER_CXX_TAUINST_PARSE_OPTS)
   6.110 +	AC_SUBST(VT_WRAPPER_CXX_TAUINST_COMMENT_BIN)
   6.111 +	AC_SUBST(VT_WRAPPER_CXX_TAUINST_COMMENT_OPTS)
   6.112  	AC_SUBST(VT_WRAPPER_CXX_COMPINST_COMPILER_FLAGS)
   6.113  	AC_SUBST(VT_WRAPPER_CXX_DEFAULT_PARTYPE)
   6.114  
   6.115 @@ -391,6 +414,8 @@
   6.116  	AC_SUBST(VT_WRAPPER_FC_TAUINST_OPTS)
   6.117  	AC_SUBST(VT_WRAPPER_FC_TAUINST_PARSE_BIN)
   6.118  	AC_SUBST(VT_WRAPPER_FC_TAUINST_PARSE_OPTS)
   6.119 +	AC_SUBST(VT_WRAPPER_FC_TAUINST_COMMENT_BIN)
   6.120 +	AC_SUBST(VT_WRAPPER_FC_TAUINST_COMMENT_OPTS)
   6.121  	AC_SUBST(VT_WRAPPER_FC_COMPINST_COMPILER_FLAGS)
   6.122  	AC_SUBST(VT_WRAPPER_FC_DEFAULT_PARTYPE)
   6.123  
   6.124 @@ -404,6 +429,8 @@
   6.125  	AC_SUBST(VT_WRAPPER_NVCC_TAUINST_OPTS)
   6.126  	AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_BIN)
   6.127  	AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS)
   6.128 +	AC_SUBST(VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN)
   6.129 +	AC_SUBST(VT_WRAPPER_NVCC_TAUINST_COMMENT_OPTS)
   6.130  	AC_SUBST(VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS)
   6.131  	AC_SUBST(VT_WRAPPER_NVCC_AVAIL_INST)
   6.132  	AC_SUBST(VT_WRAPPER_NVCC_DEFAULT_INST)
     7.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4	Wed Nov 14 04:52:39 2012 +0000
     7.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4	Wed Nov 14 13:29:18 2012 +0000
     7.3 @@ -127,9 +127,16 @@
     7.4  	AS_IF([test x"$cuda_error" = "xno"],
     7.5  	[
     7.6  		have_cuda="yes"
     7.7 +	],
     7.8 +	[
     7.9 +dnl		if no CUDA found, remove content of CUDATKLIBDIR to prevent adding them
    7.10 +dnl		to the linker flags when using the VT compiler wrappers
    7.11 +		CUDATKLIBDIR=
    7.12  	])
    7.13  
    7.14  	AC_SUBST(CUDATKDIR)
    7.15  	AC_SUBST(CUDATKINCDIR)
    7.16  	AC_SUBST(CUDATKLIBDIR)
    7.17 +	AC_SUBST(CUDALIB)
    7.18 +	AC_SUBST(CUDARTLIB)
    7.19  ])
     8.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4	Wed Nov 14 04:52:39 2012 +0000
     8.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4	Wed Nov 14 13:29:18 2012 +0000
     8.3 @@ -67,4 +67,13 @@
     8.4  			have_cudartwrap="yes"
     8.5  		])
     8.6  	])
     8.7 +
     8.8 +dnl	if CUPTI found, CUPTILIB already contains CUDATKLIBDIR and CUDARTLIB;
     8.9 +dnl	remove content of CUDATKLIBDIR and CUDARTLIB to prevent double linking when
    8.10 +dnl	using the VT compiler wrappers
    8.11 +	AS_IF([test x"$have_cupti" = "xyes"],
    8.12 +	[
    8.13 +		CUDATKLIBDIR=
    8.14 +		CUDARTLIB=
    8.15 +	])
    8.16  ])
     9.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4	Wed Nov 14 04:52:39 2012 +0000
     9.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4	Wed Nov 14 13:29:18 2012 +0000
     9.3 @@ -56,10 +56,10 @@
     9.4  		AS_IF([test x"$CUPTILIB" = x -a x"$cupti_error" = "xno"],
     9.5  		[
     9.6  			sav_LIBS=$LIBS
     9.7 -			LIBS="$LIBS $CUPTILIBDIR -lcupti $CUDATKLIBDIR $CUDALIB"
     9.8 +			LIBS="$LIBS $CUPTILIBDIR -lcupti $CUDATKLIBDIR $CUDALIB $CUDARTLIB"
     9.9  			AC_MSG_CHECKING([whether linking with -lcupti works])
    9.10  			AC_TRY_LINK([],[],
    9.11 -			[AC_MSG_RESULT([yes]); CUPTILIB="-lcupti $CUDATKLIBDIR $CUDALIB"],[AC_MSG_RESULT([no])])
    9.12 +			[AC_MSG_RESULT([yes]); CUPTILIB="-lcupti $CUDATKLIBDIR $CUDALIB $CUDARTLIB"],[AC_MSG_RESULT([no])])
    9.13  			LIBS=$sav_LIBS
    9.14  		])
    9.15  
    9.16 @@ -108,8 +108,8 @@
    9.17  		])
    9.18  	])
    9.19  
    9.20 -dnl	if no CUPTI found, remove content of CUPTILIBDIR to prevent adding the
    9.21 -dnl	'-LCUPTILIBDIR' linker flag by the VT compiler wrappers
    9.22 +dnl	if no CUPTI found, remove content of CUPTILIBDIR to prevent adding them
    9.23 +dnl	to the linker flags when using the VT compiler wrappers
    9.24  	AS_IF([test x"$have_cupti" = "xno"],
    9.25  	[CUPTILIBDIR=])
    9.26  
    10.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.dl.m4	Wed Nov 14 04:52:39 2012 +0000
    10.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.dl.m4	Wed Nov 14 13:29:18 2012 +0000
    10.3 @@ -38,9 +38,9 @@
    10.4  		AC_MSG_NOTICE([error: dynamic linking library (libdl) isn't suitable on this platform])
    10.5  		dl_error="yes"
    10.6  	])
    10.7 -	AS_IF([test "$PLATFORM" = "bgp"],
    10.8 +	AS_IF([test "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
    10.9  	[
   10.10 -dnl		RTLD_NEXT available but not working on BG/P platforms
   10.11 +dnl		RTLD_NEXT available but not working on BG/P (and BG/Q?) platforms
   10.12  		ac_cv_have_decl_RTLD_NEXT="no"
   10.13  	])
   10.14  	AS_IF([test "$PLATFORM" = "crayxt" -o "$PLATFORM" = "crayxe"],
    11.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.getcpu.m4	Wed Nov 14 04:52:39 2012 +0000
    11.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.getcpu.m4	Wed Nov 14 13:29:18 2012 +0000
    11.3 @@ -19,7 +19,10 @@
    11.4  
    11.5  		AS_IF([test x"$getcpu_error" = "xno"],
    11.6  		[
    11.7 +			sav_CPPFLAGS=$CPPFLAGS
    11.8 +			CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
    11.9  			AC_CHECK_FUNC([sched_getcpu], [], [getcpu_error="yes"])
   11.10 +			CPPFLAGS=$sav_CPPFLAGS
   11.11  		])
   11.12  
   11.13  		AS_IF([test x"$getcpu_error" = "xno" -a x"$cross_compiling" = "xno"],
   11.14 @@ -27,6 +30,7 @@
   11.15  			AC_MSG_CHECKING([whether sched_getcpu works])
   11.16  			AC_TRY_RUN(
   11.17  [
   11.18 +#define _GNU_SOURCE
   11.19  #include <sched.h>
   11.20  int main() { return (sched_getcpu() != -1) ? 0 : 1; }
   11.21  ],
    12.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.java.m4	Wed Nov 14 04:52:39 2012 +0000
    12.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.java.m4	Wed Nov 14 13:29:18 2012 +0000
    12.3 @@ -16,7 +16,7 @@
    12.4  
    12.5  	AS_IF([test x"$check_java" = "xyes"],
    12.6  	[
    12.7 -		AS_IF([test "$PLATFORM" = "bgp"],
    12.8 +		AS_IF([test "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
    12.9  		[
   12.10  			AC_MSG_NOTICE([error: Java tracing not supported on this platform])
   12.11  			java_error="yes"
    13.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4	Wed Nov 14 04:52:39 2012 +0000
    13.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4	Wed Nov 14 13:29:18 2012 +0000
    13.3 @@ -59,10 +59,14 @@
    13.4  
    13.5  	AS_IF([test x"$check_libwrap" != "xno"],
    13.6  	[
    13.7 -		AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xno"],
    13.8 +		AS_IF([test x"$enable_shared" = "xno"],
    13.9  		[
   13.10 -			AC_MSG_NOTICE([error: library tracing requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
   13.11 -			libwrap_error="yes"
   13.12 +			case $PLATFORM in
   13.13 +				bgp | bgq)
   13.14 +					AC_MSG_NOTICE([error: library tracing requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
   13.15 +					libwrap_error="yes"
   13.16 +					;;
   13.17 +			esac
   13.18  		])
   13.19  
   13.20  		AS_IF([test x"$libwrap_error" = "xno"],
    14.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.memhooks.m4	Wed Nov 14 04:52:39 2012 +0000
    14.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.memhooks.m4	Wed Nov 14 13:29:18 2012 +0000
    14.3 @@ -26,7 +26,10 @@
    14.4  			  [])])])
    14.5  		])
    14.6  
    14.7 -		AS_IF([test x"$memhooks_error" = "xno"], [have_memhooks="yes"])
    14.8 +		AS_IF([test x"$memhooks_error" = "xno"],
    14.9 +		[
   14.10 +			have_memhooks="yes"
   14.11 +		])
   14.12  	])
   14.13  ])
   14.14  
    15.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4	Wed Nov 14 04:52:39 2012 +0000
    15.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4	Wed Nov 14 13:29:18 2012 +0000
    15.3 @@ -183,6 +183,23 @@
    15.4  		])
    15.5  	])
    15.6  
    15.7 +	AC_ARG_WITH(mpibgq,
    15.8 +		AC_HELP_STRING([--with-mpibgq], [set MPI-libs for IBM BG/Q]),
    15.9 +	[
   15.10 +		AS_IF([test x"$withval" = "xyes" -a x"$inside_openmpi" = "xno"],
   15.11 +		[
   15.12 +			MPILIB="-lmpich"
   15.13 +			PMPILIB="-lmpich"
   15.14 +			FMPILIB="-lfmpich"
   15.15 +			MPICFLAGS="$MPICFLAGS -DMPICH_IGNORE_CXX_SEEK"
   15.16 +			check_mpi2_thread="no"; have_mpi2_thread="yes"
   15.17 +			check_mpi2_1sided="no"; have_mpi2_1sided="yes"
   15.18 +			check_mpi2_extcoll="no"; have_mpi2_extcoll="yes"
   15.19 +			ac_cv_have_decl_MPI_IN_PLACE="yes"
   15.20 +			ac_cv_have_decl_MPI_ROOT="yes"
   15.21 +		])
   15.22 +	])
   15.23 +
   15.24  	AC_ARG_WITH(mpich,
   15.25  		AC_HELP_STRING([--with-mpich], [set MPI-libs for MPICH]),
   15.26  	[
   15.27 @@ -311,6 +328,7 @@
   15.28  		ac_cv_func_MPI_Type_create_f90_integer="yes"
   15.29  		ac_cv_func_MPI_Type_create_f90_real="yes"
   15.30  		ac_cv_func_MPI_Type_create_struct="yes"
   15.31 +		ac_cv_func_MPI_Type_dup="yes"
   15.32  		ac_cv_func_MPI_Type_match_size="yes"
   15.33  		ac_cv_func_PMPI_Win_test="yes"
   15.34  		ac_cv_func_PMPI_Win_lock="yes"
   15.35 @@ -754,6 +772,7 @@
   15.36                                          MPI_Type_create_f90_integer \
   15.37                                          MPI_Type_create_f90_real \
   15.38                                          MPI_Type_create_struct \
   15.39 +                                        MPI_Type_dup \
   15.40                                          MPI_Type_match_size])
   15.41  			
   15.42  dnl			check for MPI-2 Thread support
   15.43 @@ -965,76 +984,69 @@
   15.44  		fmpiwraplib_error="yes"
   15.45  	])
   15.46  
   15.47 -	AS_IF([test x"$check_fc_conv" = "xyes" -a x"$fmpiwraplib_error" = "xno"],
   15.48 +	AS_IF([test x"$fmpiwraplib_error" = "xno"],
   15.49  	[
   15.50  		sav_CC=$CC
   15.51  		sav_CPPFLAGS=$CPPFLAGS
   15.52 -		sav_LIBS=$LIBS
   15.53  		CC=$MPICC
   15.54  		CPPFLAGS="$CPPFLAGS $MPICFLAGS $MPIINCDIR"
   15.55 -		LIBS="$LIBS $MPILIBDIR $MPILIB"
   15.56  
   15.57 -dnl		check for handle conversion: MPI_Comm
   15.58 -		AC_CHECK_DECL([MPI_Comm_f2c],
   15.59 -		 [AC_CHECK_DECL([MPI_Comm_c2f],
   15.60 -		   [VT_MPIGEN_HAVE_FC_CONV_COMM=1], [], [#include "mpi.h"])],
   15.61 -		 [], [#include "mpi.h"])
   15.62 +		AS_IF([test x"$check_fc_conv" = "xyes"],
   15.63 +		[
   15.64 +dnl			check for MPI handle conversion functions
   15.65  
   15.66 -dnl		check for handle conversion: MPI_Errhandler
   15.67 -		AC_CHECK_DECL([MPI_Errhandler_f2c],
   15.68 -		 [AC_CHECK_DECL([MPI_Errhandler_c2f],
   15.69 -		   [VT_MPIGEN_HAVE_FC_CONV_ERRH=1], [], [#include "mpi.h"])],
   15.70 -		 [], [#include "mpi.h"])
   15.71 +			AC_CHECK_DECL([MPI_Comm_f2c],
   15.72 +			 [AC_CHECK_DECL([MPI_Comm_c2f],
   15.73 +			   [VT_MPIGEN_HAVE_FC_CONV_COMM=1], [], [#include "mpi.h"])],
   15.74 +			 [], [#include "mpi.h"])
   15.75  
   15.76 -dnl		check for handle conversion: MPI_File
   15.77 -		AC_CHECK_DECL([MPI_File_f2c],
   15.78 -		 [AC_CHECK_DECL([MPI_File_c2f],
   15.79 -		   [VT_MPIGEN_HAVE_FC_CONV_FILE=1], [], [#include "mpi.h"])],
   15.80 -		 [], [#include "mpi.h"])
   15.81 +			AC_CHECK_DECL([MPI_Errhandler_f2c],
   15.82 +			 [AC_CHECK_DECL([MPI_Errhandler_c2f],
   15.83 +			   [VT_MPIGEN_HAVE_FC_CONV_ERRH=1], [], [#include "mpi.h"])],
   15.84 +			 [], [#include "mpi.h"])
   15.85  
   15.86 -dnl		check for handle conversion: MPI_Group
   15.87 -		AC_CHECK_DECL([MPI_Group_f2c],
   15.88 -		 [AC_CHECK_DECL([MPI_Group_c2f],
   15.89 -		   [VT_MPIGEN_HAVE_FC_CONV_GROUP=1], [], [#include "mpi.h"])],
   15.90 -		 [], [#include "mpi.h"])
   15.91 +			AC_CHECK_DECL([MPI_File_f2c],
   15.92 +			 [AC_CHECK_DECL([MPI_File_c2f],
   15.93 +			   [VT_MPIGEN_HAVE_FC_CONV_FILE=1], [], [#include "mpi.h"])],
   15.94 +			 [], [#include "mpi.h"])
   15.95  
   15.96 -dnl		check for handle conversion: MPI_Info
   15.97 -		AC_CHECK_DECL([MPI_Info_f2c],
   15.98 -		 [AC_CHECK_DECL([MPI_Info_c2f],
   15.99 -		   [VT_MPIGEN_HAVE_FC_CONV_INFO=1], [], [#include "mpi.h"])],
  15.100 -		 [], [#include "mpi.h"])
  15.101 +			AC_CHECK_DECL([MPI_Group_f2c],
  15.102 +			 [AC_CHECK_DECL([MPI_Group_c2f],
  15.103 +			   [VT_MPIGEN_HAVE_FC_CONV_GROUP=1], [], [#include "mpi.h"])],
  15.104 +			 [], [#include "mpi.h"])
  15.105  
  15.106 -dnl		check for handle conversion: MPI_Op
  15.107 -		AC_CHECK_DECL([MPI_Op_f2c],
  15.108 -		 [AC_CHECK_DECL([MPI_Op_c2f],
  15.109 -		   [VT_MPIGEN_HAVE_FC_CONV_OP=1], [], [#include "mpi.h"])],
  15.110 -		 [], [#include "mpi.h"])
  15.111 +			AC_CHECK_DECL([MPI_Info_f2c],
  15.112 +			 [AC_CHECK_DECL([MPI_Info_c2f],
  15.113 +			   [VT_MPIGEN_HAVE_FC_CONV_INFO=1], [], [#include "mpi.h"])],
  15.114 +			 [], [#include "mpi.h"])
  15.115  
  15.116 -dnl		check for handle conversion: MPI_Request
  15.117 -		AC_CHECK_DECL([MPI_Request_f2c],
  15.118 -		 [AC_CHECK_DECL([MPI_Request_c2f],
  15.119 -		   [VT_MPIGEN_HAVE_FC_CONV_REQUEST=1], [], [#include "mpi.h"])],
  15.120 -		 [], [#include "mpi.h"])
  15.121 +			AC_CHECK_DECL([MPI_Op_f2c],
  15.122 +			 [AC_CHECK_DECL([MPI_Op_c2f],
  15.123 +			   [VT_MPIGEN_HAVE_FC_CONV_OP=1], [], [#include "mpi.h"])],
  15.124 +			 [], [#include "mpi.h"])
  15.125  
  15.126 -dnl		check for handle conversion: MPI_Status
  15.127 -		AC_CHECK_DECL([MPI_Status_f2c],
  15.128 -		 [AC_CHECK_DECL([MPI_Status_c2f],
  15.129 -		   [VT_MPIGEN_HAVE_FC_CONV_STATUS=1], [], [#include "mpi.h"])],
  15.130 -		 [], [#include "mpi.h"])
  15.131 +			AC_CHECK_DECL([MPI_Request_f2c],
  15.132 +			 [AC_CHECK_DECL([MPI_Request_c2f],
  15.133 +			   [VT_MPIGEN_HAVE_FC_CONV_REQUEST=1], [], [#include "mpi.h"])],
  15.134 +			 [], [#include "mpi.h"])
  15.135  
  15.136 -dnl		check for handle conversion: MPI_Datatype
  15.137 -		AC_CHECK_DECL([MPI_Type_f2c],
  15.138 -		 [AC_CHECK_DECL([MPI_Type_c2f],
  15.139 -		   [VT_MPIGEN_HAVE_FC_CONV_TYPE=1], [], [#include "mpi.h"])],
  15.140 -		 [], [#include "mpi.h"])
  15.141 +			AC_CHECK_DECL([MPI_Status_f2c],
  15.142 +			 [AC_CHECK_DECL([MPI_Status_c2f],
  15.143 +			   [VT_MPIGEN_HAVE_FC_CONV_STATUS=1], [], [#include "mpi.h"])],
  15.144 +			 [], [#include "mpi.h"])
  15.145  
  15.146 -dnl		check for handle conversion: MPI_Win
  15.147 -		AC_CHECK_DECL([MPI_Win_f2c],
  15.148 -		 [AC_CHECK_DECL([MPI_Win_c2f],
  15.149 -		   [VT_MPIGEN_HAVE_FC_CONV_WIN=1], [], [#include "mpi.h"])],
  15.150 -		 [], [#include "mpi.h"])
  15.151 +			AC_CHECK_DECL([MPI_Type_f2c],
  15.152 +			 [AC_CHECK_DECL([MPI_Type_c2f],
  15.153 +			   [VT_MPIGEN_HAVE_FC_CONV_TYPE=1], [], [#include "mpi.h"])],
  15.154 +			 [], [#include "mpi.h"])
  15.155  
  15.156 -dnl		check for MPI-2 constants
  15.157 +			AC_CHECK_DECL([MPI_Win_f2c],
  15.158 +			 [AC_CHECK_DECL([MPI_Win_c2f],
  15.159 +			   [VT_MPIGEN_HAVE_FC_CONV_WIN=1], [], [#include "mpi.h"])],
  15.160 +			 [], [#include "mpi.h"])
  15.161 +		])
  15.162 +
  15.163 +dnl		check for MPI-2 constants to convert
  15.164  
  15.165  		AC_CHECK_DECLS([MPI_IN_PLACE],
  15.166  		 [VT_MPIGEN_HAVE_FC_CONV_MPI2CONST=1; have_mpi2_const="yes"], [], [#include "mpi.h"])
  15.167 @@ -1044,7 +1056,6 @@
  15.168  
  15.169  		CC=$sav_CC
  15.170  		CPPFLAGS=$sav_CPPFLAGS
  15.171 -		LIBS=$sav_LIBS
  15.172  	])
  15.173  
  15.174  	AC_SUBST(VT_MPIGEN_HAVE_FC_CONV_COMM)
    16.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.pform.m4	Wed Nov 14 04:52:39 2012 +0000
    16.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.pform.m4	Wed Nov 14 13:29:18 2012 +0000
    16.3 @@ -9,11 +9,11 @@
    16.4  
    16.5  	AC_ARG_WITH(platform,
    16.6  		AC_HELP_STRING([--with-platform=PLATFORM],
    16.7 -		[configure for given platform (altix,bgl,bgp,crayt3e,crayx1,crayxt,crayxe,ibm,linux,macos,necsx,origin,sicortex,sun,generic), default: automatically by configure]),
    16.8 +		[configure for given platform (altix,bgl,bgp,bgq,crayt3e,crayx1,crayxt,crayxe,ibm,linux,macos,necsx,origin,sicortex,sun,generic), default: automatically by configure]),
    16.9  	[
   16.10  		AC_MSG_RESULT([skipped (--with-platform=$withval)])
   16.11  
   16.12 -		pform_list="altix bgl bgp crayt3e crayx1 crayxt crayxe ibm linux macos necsx origin sicortex sun generic"
   16.13 +		pform_list="altix bgl bgp bgq crayt3e crayx1 crayxt crayxe ibm linux macos necsx origin sicortex sun generic"
   16.14  		pform_found="no"
   16.15  		for p in $pform_list
   16.16  		do
   16.17 @@ -32,15 +32,17 @@
   16.18  				[PLATFORM=altix],
   16.19  				[AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgl/BlueLight],
   16.20  				 [PLATFORM=bgl],
   16.21 -				 [AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys],
   16.22 -				  [PLATFORM=bgp],
   16.23 -				  [AS_IF([test "$host_cpu" = "x86_64" -a "x`uname -r | grep -q cray_gem && echo TRUE`" = "xTRUE"],
   16.24 -				   [PLATFORM=crayxe],
   16.25 -				   [AS_IF([test "$host_cpu" = "x86_64" -a -d /opt/xt-boot],
   16.26 -				    [PLATFORM=crayxt],
   16.27 -				    [AS_IF([test "$host_cpu" = "mips64" -a -d /opt/sicortex],
   16.28 -				     [PLATFORM=sicortex],
   16.29 -				     [PLATFORM=linux])])])])])])
   16.30 +				 [AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys/drivers/ppcfloor/hwi],
   16.31 +				  [PLATFORM=bgq],
   16.32 +				  [AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys],
   16.33 +				   [PLATFORM=bgp],
   16.34 +				   [AS_IF([test "$host_cpu" = "x86_64" -a "x`uname -r | grep -q cray_gem && echo TRUE`" = "xTRUE"],
   16.35 +				    [PLATFORM=crayxe],
   16.36 +				    [AS_IF([test "$host_cpu" = "x86_64" -a -d /opt/xt-boot],
   16.37 +				     [PLATFORM=crayxt],
   16.38 +				     [AS_IF([test "$host_cpu" = "mips64" -a -d /opt/sicortex],
   16.39 +				      [PLATFORM=sicortex],
   16.40 +				      [PLATFORM=linux])])])])])])])
   16.41  				;;
   16.42  			sunos* | solaris*)
   16.43  				PLATFORM=sun
   16.44 @@ -81,11 +83,14 @@
   16.45  		BITMODE=$withval
   16.46          ])
   16.47  
   16.48 -
   16.49 -	AS_IF([test "$PLATFORM" = "bgp"],
   16.50 -	[
   16.51 -		CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor/arch/include"
   16.52 -	])
   16.53 +	case $PLATFORM in
   16.54 +		bgp)
   16.55 +			CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor/arch/include"
   16.56 +			;;
   16.57 +		bgq)
   16.58 +			CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor"
   16.59 +			;;
   16.60 +	esac
   16.61  
   16.62  	AC_SUBST(PLATFORM)
   16.63  	AC_SUBST(BITMODE)
    17.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.plugincntr.m4	Wed Nov 14 04:52:39 2012 +0000
    17.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.plugincntr.m4	Wed Nov 14 13:29:18 2012 +0000
    17.3 @@ -13,10 +13,14 @@
    17.4  	
    17.5  	AS_IF([test x"$check_plugin_cntr" = "xyes"],
    17.6  	[
    17.7 -		AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xno"],
    17.8 +		AS_IF([test x"$enable_shared" = "xno"],
    17.9  		[
   17.10 -			AC_MSG_NOTICE([error: the plugin counter support requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
   17.11 -			plugin_cntr_error="yes"
   17.12 +			case $PLATFORM in
   17.13 +				bgp | bgq)
   17.14 +					AC_MSG_NOTICE([error: the plugin counter support requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
   17.15 +					plugin_cntr_error="yes"
   17.16 +					;;
   17.17 +			esac
   17.18  		])
   17.19  
   17.20  		AS_IF([test x"$plugin_cntr_error" = "xno"],
    18.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4	Wed Nov 14 04:52:39 2012 +0000
    18.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4	Wed Nov 14 13:29:18 2012 +0000
    18.3 @@ -9,6 +9,7 @@
    18.4  	tauinst_cparse_cmd=
    18.5  	tauinst_cxxparse_cmd=
    18.6  	tauinst_fparse_cmd=
    18.7 +	tauinst_comment_cmd=
    18.8  
    18.9  	AC_ARG_ENABLE(tauinst,
   18.10  		AC_HELP_STRING([--enable-tauinst],
   18.11 @@ -51,6 +52,15 @@
   18.12  		tauinst_fparse_cmd=$withval
   18.13  	])
   18.14  
   18.15 +	AC_ARG_WITH(pdt-comment,
   18.16 +		AC_HELP_STRING([--with-pdt-comment=PDTCOMMENT],
   18.17 +		[give the command for PDT comment parser, default: pdbcomment]),
   18.18 +	[
   18.19 +		AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"],
   18.20 +		[AC_MSG_ERROR([value of '--with-pdt-comment' not properly set!])])
   18.21 +		tauinst_comment_cmd=$withval
   18.22 +	])
   18.23 +
   18.24  	AS_IF([test "$check_tauinst" = "yes"],
   18.25  	[
   18.26  		AC_CHECK_PROG(tauinst_cmd, tau_instrumentor, tau_instrumentor)
   18.27 @@ -77,6 +87,9 @@
   18.28  			[
   18.29  				tauinst_fparse_cmd=
   18.30  			])
   18.31 +			AC_CHECK_PROG(tauinst_comment_cmd, pdbcomment, pdbcomment)
   18.32 +			AS_IF([test x"$tauinst_comment_cmd" = x],
   18.33 +			[AC_MSG_WARN([no pdbcomment found; You might experience compile-time problems with comments if using TAU instrumentation])])
   18.34  
   18.35  			AS_IF([test x"$tauinst_cparse_cmd$tauinst_cxxparse_cmd$tauinst_fparse_cmd" = x],
   18.36  			[
    19.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4	Wed Nov 14 04:52:39 2012 +0000
    19.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4	Wed Nov 14 13:29:18 2012 +0000
    19.3 @@ -58,9 +58,9 @@
    19.4  		AC_DEFINE([TIMER_RTS_GET_TIMEBASE], [1], [Use `rts_get_timebase' function])
    19.5  		timer=TIMER_RTS_GET_TIMEBASE
    19.6  		;;
    19.7 -	bgp)
    19.8 -		AC_DEFINE([TIMER_BGP_GET_TIMEBASE], [1], [Use `_bgp_GetTimeBase' function])
    19.9 -		timer=TIMER_BGP_GET_TIMEBASE
   19.10 +	bgp | bgq)
   19.11 +		AC_DEFINE([TIMER_GET_TIMEBASE], [1], [Use `GetTimeBase' function])
   19.12 +		timer=TIMER_GET_TIMEBASE
   19.13  		;;
   19.14  	ibm)
   19.15  		AC_DEFINE([TIMER_POWER_REALTIME], [1], [IBM Power family Real-Time-Clock])
   19.16 @@ -90,10 +90,6 @@
   19.17  		AC_DEFINE([TIMER_GETTIMEOFDAY], [3], [Use `gettimeofday' function])
   19.18  		timer=TIMER_CYCLE_COUNTER
   19.19  
   19.20 -		case `$CC -V 2>&1` in
   19.21 -			*Cray*)	timer=TIMER_GETTIMEOFDAY ;;
   19.22 -		esac
   19.23 -
   19.24  		AS_IF([test $PLATFORM = "crayxt"],
   19.25  		[
   19.26  			AC_TRY_COMPILE([],
   19.27 @@ -128,7 +124,7 @@
   19.28  	AC_MSG_NOTICE([selected timer: $timer])
   19.29  
   19.30  	case $timer in
   19.31 -		TIMER_RTS_GET_TIMEBASE | TIMER_BGP_GET_TIMEBASE | TIMER_SYSSX_HGTIME | TIMER_GETTIMEOFDAY)
   19.32 +		TIMER_RTS_GET_TIMEBASE | TIMER_GET_TIMEBASE | TIMER_SYSSX_HGTIME | TIMER_GETTIMEOFDAY)
   19.33  			timer_is_global=yes
   19.34  			timer_is_global_def=1
   19.35  			;;
    20.1 --- a/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4	Wed Nov 14 04:52:39 2012 +0000
    20.2 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4	Wed Nov 14 13:29:18 2012 +0000
    20.3 @@ -30,7 +30,7 @@
    20.4  
    20.5  	AS_IF([test x"$check_vtrun" = "xyes"],
    20.6  	[
    20.7 -		AS_IF([test "$PLATFORM" = "bgl" -o "$PLATFORM" = "bgp"],
    20.8 +		AS_IF([test "$PLATFORM" = "bgl" -o "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
    20.9  		[
   20.10  			AC_MSG_NOTICE([error: application execution wrapper not supported on this platform])
   20.11  			vtrun_error="yes"
    21.1 --- a/ompi/contrib/vt/vt/configure.in	Wed Nov 14 04:52:39 2012 +0000
    21.2 +++ b/ompi/contrib/vt/vt/configure.in	Wed Nov 14 13:29:18 2012 +0000
    21.3 @@ -99,6 +99,8 @@
    21.4  AC_PROG_CXXCPP
    21.5  AM_PROG_AS
    21.6  
    21.7 +AC_C_BIGENDIAN
    21.8 +
    21.9  # Do we want to support Fortran
   21.10  check_fortran="yes"
   21.11  force_fortran="no"
   21.12 @@ -203,11 +205,17 @@
   21.13  SHREXT=$shrext_cmds
   21.14  AC_SUBST(SHREXT)
   21.15  
   21.16 -# If building of shared libraries is desired on BlueGene/P, add '-Wl,-dy'
   21.17 -# to linker flags on BlueGene/P platforms to lead libtool to believe that
   21.18 -# dynamic linking is the default behaviour of the linker.
   21.19 -AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xyes"],
   21.20 -[export LDFLAGS="$LDFLAGS $lt_prog_compiler_wl-dy"])
   21.21 +# If building of shared libraries is desired on BlueGene/P/Q, add '-Wl,-dy'
   21.22 +# to linker flags to lead libtool to believe that dynamic linking is the
   21.23 +# default behaviour of the linker.
   21.24 +AS_IF([test x"$enable_shared" = "xyes"],
   21.25 +[
   21.26 +	case $PLATFORM in
   21.27 +		bgp | bgq)
   21.28 +			export LDFLAGS="$LDFLAGS $lt_prog_compiler_wl-dy"
   21.29 +			;;
   21.30 +	esac
   21.31 +])
   21.32  
   21.33  # Check for BSD compatible symbol lister command
   21.34  #AC_PROG_NM # already been checked by AC_PROG_LIBTOOL
    22.1 --- a/ompi/contrib/vt/vt/doc/FILTER.SPEC	Wed Nov 14 04:52:39 2012 +0000
    22.2 +++ b/ompi/contrib/vt/vt/doc/FILTER.SPEC	Wed Nov 14 13:29:18 2012 +0000
    22.3 @@ -2,26 +2,33 @@
    22.4  #
    22.5  # Syntax: <functions> -- <limit> [S:<[min-]max-stack-level>] [R]
    22.6  #     or: <groups>    -- <limit> [S:<[min-]max-stack-level>] [R] G
    22.7 +#     or: <function-call-path> -- <limit> C
    22.8  #
    22.9  # functions, groups  Semicolon-separated list of functions/groups.
   22.10  #                    (can contain wildcards)
   22.11  #
   22.12 +# function-call-path Semicolon-separated list of functions in a call path.
   22.13 +#                    (MUST NOT contain wildcards)
   22.14 +#
   22.15  # limit              call limit
   22.16 -#                    Stop recording of function/group when the specified call
   22.17 +#                    Stop recording of functions/groups when the specified call
   22.18  #                    limit is reached.
   22.19 -#                    (0 = don't record function/group, -1 = record unlimited)
   22.20 +#                    (0 = don't record functions/groups, -1 = record unlimited)
   22.21  #
   22.22  # S:<[min-]max-stack-level>
   22.23  #                    minimum/maximum call stack level
   22.24 -#                    Don't record function/group called beyond the specified
   22.25 +#                    Don't record functions/groups called beyond the specified
   22.26  #                    stack level boundaries.
   22.27  #                    (values must be > 0, only valid if call limit is != 0)
   22.28  #
   22.29  # R                  Attribute for recursive filtering.
   22.30 -#                    Don't record callees of filtered function/group.
   22.31 +#                    Don't record callees of filtered functions/groups.
   22.32  #
   22.33  # G                  Attribute for filtering function groups.
   22.34  #
   22.35 +# C                  Attribute for filtering a call path.
   22.36 +#                    (impies recursive filtering 'R')
   22.37 +#
   22.38  # Example:
   22.39  #
   22.40  #   add;sub;mul;div -- 1000
   22.41 @@ -34,6 +41,25 @@
   22.42  # when they are called between call stack level 5 and 10 but at most 3000000
   22.43  # times.
   22.44  #
   22.45 +#
   22.46 +# Call Path Specific Filtering:
   22.47 +#
   22.48 +# The 'C' attribute indicates that the listed functions specify a call path
   22.49 +# - a specific sequence of function calls. Recording of the last function in the
   22.50 +# list will be stopped if the specified call limit is reached.
   22.51 +# The call path must begin with the root function, typically main, and MUST NOT
   22.52 +# contain wildcards.
   22.53 +#
   22.54 +# Example:
   22.55 +#
   22.56 +#   main;foo;bar -- 0 C
   22.57 +#
   22.58 +# This filter directive causes that the function "bar" called from "foo" which
   22.59 +# prior was called from "main" will never be recorded. Since call path filtering
   22.60 +# impies recursiveness (see attribute 'R') all callee functions of this call
   22.61 +# path will be excluded from recording as well.
   22.62 +#
   22.63 +#
   22.64  # Rank Specific Filtering:
   22.65  #
   22.66  # Use the '@' clauses to restrict all the following filter directives to the
    23.1 --- a/ompi/contrib/vt/vt/doc/UserManual.html	Wed Nov 14 04:52:39 2012 +0000
    23.2 +++ b/ompi/contrib/vt/vt/doc/UserManual.html	Wed Nov 14 13:29:18 2012 +0000
    23.3 @@ -67,7 +67,7 @@
    23.4  <P>
    23.5  
    23.6  <P>
    23.7 -<B><BIG CLASS="XHUGE">VampirTrace 5.13&nbsp;User Manual</BIG></B>
    23.8 +<B><BIG CLASS="XHUGE">VampirTrace 5.14&nbsp;User Manual</BIG></B>
    23.9  <BR>
   23.10  <BR>
   23.11  <BR>
   23.12 @@ -98,226 +98,226 @@
   23.13  <!--Table of Contents-->
   23.14  
   23.15  <UL CLASS="TofC">
   23.16 -<LI><A NAME="tex2html127"
   23.17 +<LI><A NAME="tex2html128"
   23.18    HREF="#SECTION00200000000000000000">Introduction</A>
   23.19 -<LI><A NAME="tex2html128"
   23.20 +<LI><A NAME="tex2html129"
   23.21    HREF="#SECTION00300000000000000000">Instrumentation</A>
   23.22  <UL>
   23.23 -<LI><A NAME="tex2html129"
   23.24 +<LI><A NAME="tex2html130"
   23.25    HREF="#SECTION00310000000000000000">Compiler Wrappers</A>
   23.26 -<LI><A NAME="tex2html130"
   23.27 +<LI><A NAME="tex2html131"
   23.28    HREF="#SECTION00320000000000000000">Instrumentation Types</A>
   23.29 -<LI><A NAME="tex2html131"
   23.30 +<LI><A NAME="tex2html132"
   23.31    HREF="#SECTION00330000000000000000">Automatic Instrumentation</A>
   23.32  <UL>
   23.33 -<LI><A NAME="tex2html132"
   23.34 +<LI><A NAME="tex2html133"
   23.35    HREF="#SECTION00331000000000000000">Supported Compilers</A>
   23.36 -<LI><A NAME="tex2html133"
   23.37 +<LI><A NAME="tex2html134"
   23.38    HREF="#SECTION00332000000000000000">Notes for Using the GNU, Intel, PathScale, or Open64 Compiler</A>
   23.39 -<LI><A NAME="tex2html134"
   23.40 +<LI><A NAME="tex2html135"
   23.41    HREF="#SECTION00333000000000000000">Notes on Instrumentation of Inline Functions</A>
   23.42 -<LI><A NAME="tex2html135"
   23.43 +<LI><A NAME="tex2html136"
   23.44    HREF="#SECTION00334000000000000000">Instrumentation of Loops with OpenUH Compiler</A>
   23.45  </UL>
   23.46 -<LI><A NAME="tex2html136"
   23.47 +<LI><A NAME="tex2html137"
   23.48    HREF="#SECTION00340000000000000000">Manual Instrumentation</A>
   23.49  <UL>
   23.50 -<LI><A NAME="tex2html137"
   23.51 +<LI><A NAME="tex2html138"
   23.52    HREF="#SECTION00341000000000000000">Using the VampirTrace API</A>
   23.53 -<LI><A NAME="tex2html138"
   23.54 +<LI><A NAME="tex2html139"
   23.55    HREF="#SECTION00342000000000000000">Measurement Controls</A>
   23.56  </UL>
   23.57 -<LI><A NAME="tex2html139"
   23.58 +<LI><A NAME="tex2html140"
   23.59    HREF="#SECTION00350000000000000000">Source Instrumentation Using PDT/TAU</A>
   23.60 -<LI><A NAME="tex2html140"
   23.61 +<LI><A NAME="tex2html141"
   23.62    HREF="#SECTION00360000000000000000">Binary Instrumentation Using Dyninst</A>
   23.63  <UL>
   23.64 -<LI><A NAME="tex2html141"
   23.65 +<LI><A NAME="tex2html142"
   23.66    HREF="#SECTION00361000000000000000">Static Binary Instrumentation</A>
   23.67  </UL>
   23.68 -<LI><A NAME="tex2html142"
   23.69 +<LI><A NAME="tex2html143"
   23.70    HREF="#SECTION00370000000000000000">Runtime Instrumentation Using VTRun</A>
   23.71 -<LI><A NAME="tex2html143"
   23.72 +<LI><A NAME="tex2html144"
   23.73    HREF="#SECTION00380000000000000000">Tracing Java Applications Using JVMTI</A>
   23.74 -<LI><A NAME="tex2html144"
   23.75 +<LI><A NAME="tex2html145"
   23.76    HREF="#SECTION00390000000000000000">Tracing Calls to 3rd-Party Libraries</A>
   23.77  </UL>
   23.78  <BR>
   23.79 -<LI><A NAME="tex2html145"
   23.80 +<LI><A NAME="tex2html146"
   23.81    HREF="#SECTION00400000000000000000">Runtime Measurement</A>
   23.82  <UL>
   23.83 -<LI><A NAME="tex2html146"
   23.84 +<LI><A NAME="tex2html147"
   23.85    HREF="#SECTION00410000000000000000">Trace File Name and Location</A>
   23.86 -<LI><A NAME="tex2html147"
   23.87 +<LI><A NAME="tex2html148"
   23.88    HREF="#SECTION00420000000000000000">Environment Variables</A>
   23.89 -<LI><A NAME="tex2html148"
   23.90 +<LI><A NAME="tex2html149"
   23.91    HREF="#SECTION00430000000000000000">Influencing Trace Buffer Size</A>
   23.92 -<LI><A NAME="tex2html149"
   23.93 +<LI><A NAME="tex2html150"
   23.94    HREF="#SECTION00440000000000000000">Profiling an Application</A>
   23.95 -<LI><A NAME="tex2html150"
   23.96 +<LI><A NAME="tex2html151"
   23.97    HREF="#SECTION00450000000000000000">Unification of Local Traces</A>
   23.98 -<LI><A NAME="tex2html151"
   23.99 +<LI><A NAME="tex2html152"
  23.100    HREF="#SECTION00460000000000000000">Synchronized Buffer Flush</A>
  23.101 -<LI><A NAME="tex2html152"
  23.102 +<LI><A NAME="tex2html153"
  23.103    HREF="#SECTION00470000000000000000">Enhanced Timer Synchronization</A>
  23.104 -<LI><A NAME="tex2html153"
  23.105 +<LI><A NAME="tex2html154"
  23.106    HREF="#SECTION00480000000000000000">Environment Configuration Using VTSetup</A>
  23.107  </UL>
  23.108  <BR>
  23.109 -<LI><A NAME="tex2html154"
  23.110 +<LI><A NAME="tex2html155"
  23.111    HREF="#SECTION00500000000000000000">Recording Additional Events and Counters</A>
  23.112  <UL>
  23.113 -<LI><A NAME="tex2html155"
  23.114 +<LI><A NAME="tex2html156"
  23.115    HREF="#SECTION00510000000000000000">Hardware Performance Counters</A>
  23.116 -<LI><A NAME="tex2html156"
  23.117 +<LI><A NAME="tex2html157"
  23.118    HREF="#SECTION00520000000000000000">Resource Usage Counters</A>
  23.119 -<LI><A NAME="tex2html157"
  23.120 +<LI><A NAME="tex2html158"
  23.121    HREF="#SECTION00530000000000000000">Memory Allocation Counter</A>
  23.122 -<LI><A NAME="tex2html158"
  23.123 +<LI><A NAME="tex2html159"
  23.124    HREF="#SECTION00540000000000000000">CPU ID Counter</A>
  23.125 -<LI><A NAME="tex2html159"
  23.126 +<LI><A NAME="tex2html160"
  23.127    HREF="#SECTION00550000000000000000">NVIDIA CUDA</A>
  23.128 -<LI><A NAME="tex2html160"
  23.129 +<LI><A NAME="tex2html161"
  23.130    HREF="#SECTION00560000000000000000">Pthread API Calls</A>
  23.131 -<LI><A NAME="tex2html161"
  23.132 +<LI><A NAME="tex2html162"
  23.133    HREF="#SECTION00570000000000000000">Plugin Counter Metrics</A>
  23.134 -<LI><A NAME="tex2html162"
  23.135 +<LI><A NAME="tex2html163"
  23.136    HREF="#SECTION00580000000000000000">I/O Calls</A>
  23.137 -<LI><A NAME="tex2html163"
  23.138 +<LI><A NAME="tex2html164"
  23.139    HREF="#SECTION00590000000000000000">fork/system/exec Calls</A>
  23.140 -<LI><A NAME="tex2html164"
  23.141 +<LI><A NAME="tex2html165"
  23.142    HREF="#SECTION005100000000000000000">MPI Correctness Checking Using UniMCI</A>
  23.143 -<LI><A NAME="tex2html165"
  23.144 +<LI><A NAME="tex2html166"
  23.145    HREF="#SECTION005110000000000000000">User-defined Counters</A>
  23.146 -<LI><A NAME="tex2html166"
  23.147 +<LI><A NAME="tex2html167"
  23.148    HREF="#SECTION005120000000000000000">User-defined Markers</A>
  23.149 -<LI><A NAME="tex2html167"
  23.150 +<LI><A NAME="tex2html168"
  23.151    HREF="#SECTION005130000000000000000">User-defined Communcation</A>
  23.152  </UL>
  23.153  <BR>
  23.154 -<LI><A NAME="tex2html168"
  23.155 +<LI><A NAME="tex2html169"
  23.156    HREF="#SECTION00600000000000000000">Filtering &amp; Grouping</A>
  23.157  <UL>
  23.158 -<LI><A NAME="tex2html169"
  23.159 +<LI><A NAME="tex2html170"
  23.160    HREF="#SECTION00610000000000000000">Function Filtering</A>
  23.161 -<LI><A NAME="tex2html170"
  23.162 +<LI><A NAME="tex2html171"
  23.163    HREF="#SECTION00620000000000000000">Java Specific Filtering</A>
  23.164 -<LI><A NAME="tex2html171"
  23.165 +<LI><A NAME="tex2html172"
  23.166    HREF="#SECTION00630000000000000000">Function Grouping</A>
  23.167  </UL>
  23.168  <BR>
  23.169 -<LI><A NAME="tex2html172"
  23.170 +<LI><A NAME="tex2html173"
  23.171    HREF="#SECTION00700000000000000000">VampirTrace Installation</A>
  23.172  <UL>
  23.173 -<LI><A NAME="tex2html173"
  23.174 +<LI><A NAME="tex2html174"
  23.175    HREF="#SECTION00710000000000000000">Basics</A>
  23.176 -<LI><A NAME="tex2html174"
  23.177 +<LI><A NAME="tex2html175"
  23.178    HREF="#SECTION00720000000000000000">Configure Options</A>
  23.179 -<LI><A NAME="tex2html175"
  23.180 +<LI><A NAME="tex2html176"
  23.181    HREF="#SECTION00730000000000000000">Cross Compilation</A>
  23.182 -<LI><A NAME="tex2html176"
  23.183 +<LI><A NAME="tex2html177"
  23.184    HREF="#SECTION00740000000000000000">Environment Set-Up</A>
  23.185 -<LI><A NAME="tex2html177"
  23.186 +<LI><A NAME="tex2html178"
  23.187    HREF="#SECTION00750000000000000000">Notes for Developers</A>
  23.188  </UL>
  23.189  <BR>
  23.190 -<LI><A NAME="tex2html178"
  23.191 +<LI><A NAME="tex2html179"
  23.192    HREF="#SECTION00800000000000000000">Command Reference</A>
  23.193  <UL>
  23.194 -<LI><A NAME="tex2html179"
  23.195 +<LI><A NAME="tex2html180"
  23.196    HREF="#SECTION00810000000000000000">Compiler Wrappers (vtcc,vtcxx,vtfort)</A>
  23.197 -<LI><A NAME="tex2html180"
  23.198 +<LI><A NAME="tex2html181"
  23.199    HREF="#SECTION00820000000000000000">Local Trace Unifier (vtunify)</A>
  23.200 -<LI><A NAME="tex2html181"
  23.201 +<LI><A NAME="tex2html182"
  23.202    HREF="#SECTION00830000000000000000">Binary Instrumentor (vtdyn)</A>
  23.203 -<LI><A NAME="tex2html182"
  23.204 +<LI><A NAME="tex2html183"
  23.205    HREF="#SECTION00840000000000000000">Trace Filter Tool (vtfilter)</A>
  23.206 -<LI><A NAME="tex2html183"
  23.207 +<LI><A NAME="tex2html184"
  23.208    HREF="#SECTION00850000000000000000">Library Wrapper Generator (vtlibwrapgen)</A>
  23.209 -<LI><A NAME="tex2html184"
  23.210 +<LI><A NAME="tex2html185"
  23.211    HREF="#SECTION00860000000000000000">Application Execution Wrapper (vtrun)</A>
  23.212 -<LI><A NAME="tex2html185"
  23.213 +<LI><A NAME="tex2html186"
  23.214    HREF="#SECTION00870000000000000000">IOFSL server startup script (vtiofsl-start)</A>
  23.215 -<LI><A NAME="tex2html186"
  23.216 +<LI><A NAME="tex2html187"
  23.217    HREF="#SECTION00880000000000000000">IOFSL server shutdown script (vtiofsl-stop)</A>
  23.218  </UL>
  23.219  <BR>
  23.220 -<LI><A NAME="tex2html187"
  23.221 +<LI><A NAME="tex2html188"
  23.222    HREF="#SECTION00900000000000000000">Counter Specifications</A>
  23.223  <UL>
  23.224 -<LI><A NAME="tex2html188"
  23.225 +<LI><A NAME="tex2html189"
  23.226    HREF="#SECTION00910000000000000000">PAPI</A>
  23.227 -<LI><A NAME="tex2html189"
  23.228 +<LI><A NAME="tex2html190"
  23.229    HREF="#SECTION00920000000000000000">CPC</A>
  23.230 -<LI><A NAME="tex2html190"
  23.231 +<LI><A NAME="tex2html191"
  23.232    HREF="#SECTION00930000000000000000">NEC SX Hardware Performance Counter</A>
  23.233 -<LI><A NAME="tex2html191"
  23.234 +<LI><A NAME="tex2html192"
  23.235    HREF="#SECTION00940000000000000000">Resource Usage</A>
  23.236  </UL>
  23.237  <BR>
  23.238 -<LI><A NAME="tex2html192"
  23.239 +<LI><A NAME="tex2html193"
  23.240    HREF="#SECTION001000000000000000000">Using VampirTrace with IOFSL</A>
  23.241  <UL>
  23.242 -<LI><A NAME="tex2html193"
  23.243 +<LI><A NAME="tex2html194"
  23.244    HREF="#SECTION001010000000000000000">Introduction</A>
  23.245 -<LI><A NAME="tex2html194"
  23.246 +<LI><A NAME="tex2html195"
  23.247    HREF="#SECTION001020000000000000000">Overview</A>
  23.248  <UL>
  23.249 -<LI><A NAME="tex2html195"
  23.250 +<LI><A NAME="tex2html196"
  23.251    HREF="#SECTION001021000000000000000">File handling in OTF</A>
  23.252 -<LI><A NAME="tex2html196"
  23.253 +<LI><A NAME="tex2html197"
  23.254    HREF="#SECTION001022000000000000000">I/O Forwarding Scalability Layer</A>
  23.255 -<LI><A NAME="tex2html197"
  23.256 +<LI><A NAME="tex2html198"
  23.257    HREF="#SECTION001023000000000000000">Architecture</A>
  23.258  </UL>
  23.259 -<LI><A NAME="tex2html198"
  23.260 +<LI><A NAME="tex2html199"
  23.261    HREF="#SECTION001030000000000000000">Installation</A>
  23.262  <UL>
  23.263 -<LI><A NAME="tex2html199"
  23.264 +<LI><A NAME="tex2html200"
  23.265    HREF="#SECTION001031000000000000000">Support Libraries</A>
  23.266 -<LI><A NAME="tex2html200"
  23.267 +<LI><A NAME="tex2html201"
  23.268    HREF="#SECTION001032000000000000000">Building IOFSL</A>
  23.269 -<LI><A NAME="tex2html201"
  23.270 +<LI><A NAME="tex2html202"
  23.271    HREF="#SECTION001033000000000000000">Building VampirTrace &amp; OTF</A>
  23.272  </UL>
  23.273 -<LI><A NAME="tex2html202"
  23.274 +<LI><A NAME="tex2html203"
  23.275    HREF="#SECTION001040000000000000000">Usage Examples</A>
  23.276  <UL>
  23.277 -<LI><A NAME="tex2html203"
  23.278 +<LI><A NAME="tex2html204"
  23.279    HREF="#SECTION001041000000000000000">Using VampirTrace with IOFSL on Cray XK6 / with PBS</A>
  23.280 -<LI><A NAME="tex2html204"
  23.281 +<LI><A NAME="tex2html205"
  23.282    HREF="#SECTION001042000000000000000">Manual Usage</A>
  23.283  </UL>
  23.284  </UL>
  23.285  <BR>
  23.286 -<LI><A NAME="tex2html205"
  23.287 +<LI><A NAME="tex2html206"
  23.288    HREF="#SECTION001100000000000000000">FAQ</A>
  23.289  <UL>
  23.290 -<LI><A NAME="tex2html206"
  23.291 +<LI><A NAME="tex2html207"
  23.292    HREF="#SECTION001110000000000000000">Can I use different compilers for VampirTrace and my application?</A>
  23.293 -<LI><A NAME="tex2html207"
  23.294 +<LI><A NAME="tex2html208"
  23.295    HREF="#SECTION001120000000000000000">Why does my application need such a long time for starting?</A>
  23.296 -<LI><A NAME="tex2html208"
  23.297 +<LI><A NAME="tex2html209"
  23.298    HREF="#SECTION001130000000000000000">How can I limit compiler instrumentation?</A>
  23.299 -<LI><A NAME="tex2html209"
  23.300 +<LI><A NAME="tex2html210"
  23.301    HREF="#SECTION001140000000000000000">Why do I see multiple
  23.302  I/O operations for a single (un)formatted file read/write from my Fortran
  23.303  application?</A>
  23.304 -<LI><A NAME="tex2html210"
  23.305 +<LI><A NAME="tex2html211"
  23.306    HREF="#SECTION001150000000000000000">The application has run to completion, but there is no *.otf file. What can I do?</A>
  23.307 -<LI><A NAME="tex2html211"
  23.308 +<LI><A NAME="tex2html212"
  23.309    HREF="#SECTION001160000000000000000">What limitations are associated with "on/off" and buffer rewind?</A>
  23.310 -<LI><A NAME="tex2html212"
  23.311 +<LI><A NAME="tex2html213"
  23.312    HREF="#SECTION001170000000000000000">VampirTrace warns that it ``cannot lock file a.lock'', what's wrong?</A>
  23.313 -<LI><A NAME="tex2html213"
  23.314 +<LI><A NAME="tex2html214"
  23.315    HREF="#SECTION001180000000000000000">Can I relocate my VampirTrace installation without rebuilding from source?</A>
  23.316 -<LI><A NAME="tex2html214"
  23.317 +<LI><A NAME="tex2html215"
  23.318    HREF="#SECTION001190000000000000000">What are the byte counts in collective communication records?</A>
  23.319 -<LI><A NAME="tex2html215"
  23.320 +<LI><A NAME="tex2html216"
  23.321    HREF="#SECTION0011100000000000000000">I get ``error: unknown asm constraint letter''</A>
  23.322 -<LI><A NAME="tex2html216"
  23.323 +<LI><A NAME="tex2html217"
  23.324    HREF="#SECTION0011110000000000000000">I have a question that is not answered in this document!</A>
  23.325 -<LI><A NAME="tex2html217"
  23.326 +<LI><A NAME="tex2html218"
  23.327    HREF="#SECTION0011120000000000000000">I need support for additional features so I can trace application xyz.</A>
  23.328  </UL></UL>
  23.329  <!--End of Table of Contents-->
  23.330 @@ -355,20 +355,20 @@
  23.331  <P>
  23.332  After a successful tracing run, VampirTrace writes all collected data to a  
  23.333  trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
  23.334 -  HREF="#foot1549"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.335 +  HREF="#foot1569"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.336   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.337  As a result, the information is available for post-mortem analysis and 
  23.338  visualization by various tools. 
  23.339  Most notably, VampirTrace provides the input data for the Vampir analysis  
  23.340  and visualization tool<A NAME="tex2html2"
  23.341 -  HREF="#foot1550"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.342 +  HREF="#foot1570"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.343   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. 
  23.344  
  23.345  <P>
  23.346  VampirTrace is included in OpenMPI&nbsp;1.3 and later versions.
  23.347  If not disabled explicitly, VampirTrace is built automatically when installing
  23.348  OpenMPI<A NAME="tex2html3"
  23.349 -  HREF="#foot1551"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.350 +  HREF="#foot1571"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.351   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.352  
  23.353  <P>
  23.354 @@ -1185,7 +1185,7 @@
  23.355    The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
  23.356    The lists end with <TT>END[_FILE]_&lt;INCLUDE|EXCLUDE&gt;_LIST</TT>. For further information on selective 
  23.357    profiling have a look at the TAU documentation<A NAME="tex2html4"
  23.358 -  HREF="#foot1575"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.359 +  HREF="#foot1595"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.360   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.361    To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
  23.362  <PRE>
  23.363 @@ -1202,7 +1202,7 @@
  23.364  The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to 
  23.365  instrument the application during runtime (binary instrumentation), by using 
  23.366  Dyninst<A NAME="tex2html5"
  23.367 -  HREF="#foot1576"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.368 +  HREF="#foot1596"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.369   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.370  Recompiling is not necessary for this kind of instrumentation,
  23.371  but relinking:
  23.372 @@ -1353,7 +1353,7 @@
  23.373    VampirTrace is also capable to trace calls to third party libraries, which come with
  23.374    at least one C header file even without the library's source code. If VampirTrace was
  23.375    built with support for library tracing (the CTool library<A NAME="tex2html6"
  23.376 -  HREF="#foot1577"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.377 +  HREF="#foot1597"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.378   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>  is required), the tool <TT>vtlibwrapgen</TT> can be used to
  23.379    generate a wrapper library to intercept each call to the actual library functions.
  23.380    This wrapper library can be linked to the application or used in combination with the
  23.381 @@ -1670,6 +1670,10 @@
  23.382  <TD ALIGN="LEFT">Enable tracing of MPI events?</TD>
  23.383  <TD ALIGN="LEFT">yes</TD>
  23.384  </TR>
  23.385 +<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_MPI_IGNORE_FILTER"></A><TT>VT_MPI_IGNORE_FILTER</TT></TD>
  23.386 +<TD ALIGN="LEFT">Enable tracing of MPI communication events although its corresponding functions are filtered?</TD>
  23.387 +<TD ALIGN="LEFT">no</TD>
  23.388 +</TR>
  23.389  <TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_OMPTRACE"></A><TT>VT_OMPTRACE</TT></TD>
  23.390  <TD ALIGN="LEFT">Enable tracing of OpenMP events instrumented by OPARI?</TD>
  23.391  <TD ALIGN="LEFT">yes</TD>
  23.392 @@ -2025,7 +2029,7 @@
  23.393  
  23.394  <UL>
  23.395  <LI>CLAPACK <A NAME="tex2html7"
  23.396 -  HREF="#foot1587"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.397 +  HREF="#foot1607"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.398   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
  23.399  </LI>
  23.400  <LI>AMD ACML
  23.401 @@ -3244,6 +3248,9 @@
  23.402  or
  23.403  <BR>  <TT>&lt;groups&gt; - &lt;limit&gt; [S:&lt;[min-]max-stack-level&gt;] [R] G</TT>
  23.404  <BR>
  23.405 +or
  23.406 +<BR>  <TT>&lt;function-call-path&gt; - &lt;limit&gt; C</TT>
  23.407 +<BR>
  23.408  <P>
  23.409  <BR>
  23.410  <TABLE CELLPADDING=3>
  23.411 @@ -3259,17 +3266,29 @@
  23.412  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.413  <TD ALIGN="LEFT">&nbsp;</TD>
  23.414  </TR>
  23.415 +<TR><TD ALIGN="LEFT"><TT>function-call-path</TT></TD>
  23.416 +<TD ALIGN="LEFT">Semicolon-separated list of</TD>
  23.417 +</TR>
  23.418 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.419 +<TD ALIGN="LEFT">functions in a call path.</TD>
  23.420 +</TR>
  23.421 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.422 +<TD ALIGN="LEFT">(MUST NOT contain wildcards)</TD>
  23.423 +</TR>
  23.424 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.425 +<TD ALIGN="LEFT">&nbsp;</TD>
  23.426 +</TR>
  23.427  <TR><TD ALIGN="LEFT"><TT>limit</TT></TD>
  23.428  <TD ALIGN="LEFT">call limit</TD>
  23.429  </TR>
  23.430  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.431 -<TD ALIGN="LEFT">Stop recording of function/group when</TD>
  23.432 +<TD ALIGN="LEFT">Stop recording of functions/groups when</TD>
  23.433  </TR>
  23.434  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.435  <TD ALIGN="LEFT">the specified call limit is reached.</TD>
  23.436  </TR>
  23.437  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.438 -<TD ALIGN="LEFT">(0 = don't record function/group,</TD>
  23.439 +<TD ALIGN="LEFT">(0 = don't record functions/groups,</TD>
  23.440  </TR>
  23.441  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.442  <TD ALIGN="LEFT">-1 record unlimited)</TD>
  23.443 @@ -3277,6 +3296,12 @@
  23.444  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.445  <TD ALIGN="LEFT">&nbsp;</TD>
  23.446  </TR>
  23.447 +</TABLE>
  23.448 +<BR>
  23.449 +
  23.450 +<P>
  23.451 +<BR>
  23.452 +<TABLE CELLPADDING=3>
  23.453  <TR><TD ALIGN="LEFT"><TT>S:&lt;[min-]max-stack-level&gt;</TT></TD>
  23.454  <TD ALIGN="LEFT">&nbsp;</TD>
  23.455  </TR>
  23.456 @@ -3284,7 +3309,7 @@
  23.457  <TD ALIGN="LEFT">minimum/maximum call stack level</TD>
  23.458  </TR>
  23.459  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.460 -<TD ALIGN="LEFT">Don't record function/group called</TD>
  23.461 +<TD ALIGN="LEFT">Don't record functions/groups called</TD>
  23.462  </TR>
  23.463  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.464  <TD ALIGN="LEFT">beyond the specified stack level</TD>
  23.465 @@ -3310,9 +3335,24 @@
  23.466  <TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.467  <TD ALIGN="LEFT">function/group.</TD>
  23.468  </TR>
  23.469 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.470 +<TD ALIGN="LEFT">&nbsp;</TD>
  23.471 +</TR>
  23.472  <TR><TD ALIGN="LEFT"><TT>G</TT></TD>
  23.473  <TD ALIGN="LEFT">Attribute for filtering function groups.</TD>
  23.474  </TR>
  23.475 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.476 +<TD ALIGN="LEFT">&nbsp;</TD>
  23.477 +</TR>
  23.478 +<TR><TD ALIGN="LEFT"><TT>C</TT></TD>
  23.479 +<TD ALIGN="LEFT">Attribute for filtering function a call path.</TD>
  23.480 +</TR>
  23.481 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.482 +<TD ALIGN="LEFT">(implies recursive filtering <TT>R</TT>)</TD>
  23.483 +</TR>
  23.484 +<TR><TD ALIGN="LEFT">&nbsp;</TD>
  23.485 +<TD ALIGN="LEFT">&nbsp;</TD>
  23.486 +</TR>
  23.487  </TABLE>
  23.488  <BR>
  23.489  
  23.490 @@ -3338,6 +3378,33 @@
  23.491    tool to generate them automatically. This tool reads a provided trace
  23.492    and decides whether a function should be filtered or not, based on the evaluation of 
  23.493  <H2><A NAME="SECTION00611000000000000000">
  23.494 +Call Path Specific Filtering</A>
  23.495 +</H2>
  23.496 +
  23.497 +<P>
  23.498 +The 'C' attribute indicates that the listed functions specify a call path
  23.499 +  - a specific sequence of function calls. Recording of the last function in the
  23.500 +  list will be stopped if the specified call limit is reached.
  23.501 +  The call path must begin with the root function, typically main, and MUST NOT
  23.502 +  contain wildcards.
  23.503 +<BR>
  23.504 +<P>
  23.505 +Example:
  23.506 +
  23.507 +<P>
  23.508 +<PRE>
  23.509 +  main;foo;bar -- 0 C
  23.510 +</PRE>
  23.511 +
  23.512 +<P>
  23.513 +This filter directive causes that the function <TT>bar</TT> called from <TT>foo</TT> which
  23.514 +  prior was called from <TT>main</TT> will never be recorded. Since call path filtering
  23.515 +  impies recursiveness (see attribute <TT>R</TT>) all callee functions of this call
  23.516 +  path will be excluded from recording as well.
  23.517 +
  23.518 +<P>
  23.519 +
  23.520 +<H2><A NAME="SECTION00612000000000000000">
  23.521  Rank Specific Filtering</A>
  23.522  </H2>
  23.523  
  23.524 @@ -3364,7 +3431,7 @@
  23.525  
  23.526  <P>
  23.527  
  23.528 -<H4><A NAME="SECTION00611010000000000000">
  23.529 +<H4><A NAME="SECTION00612010000000000000">
  23.530  Attention:</A>
  23.531  </H4>
  23.532    The rank specific rules are activated later than usual at MPI_Init, because
  23.533 @@ -3611,7 +3678,7 @@
  23.534  enable support for Dyninst instrumentation,
  23.535                                default: enable if found by configure.
  23.536                                <SPAN  CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
  23.537 -  HREF="#foot1609"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.538 +  HREF="#foot1629"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.539   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
  23.540  
  23.541  <P>
  23.542 @@ -3633,9 +3700,9 @@
  23.543                                instrumentation by using TAU, default: enable if
  23.544                                found by configure.
  23.545                                <SPAN  CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
  23.546 -  HREF="#foot1610"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.547 +  HREF="#foot1630"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.548   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
  23.549 -  HREF="#foot1611"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.550 +  HREF="#foot1631"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.551   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
  23.552  
  23.553  <P>
  23.554 @@ -3939,6 +4006,13 @@
  23.555  
  23.556  <P>
  23.557  </DD>
  23.558 +<DT><STRONG><TT>-with-pdt-comment=PDTCOMMENT</TT></STRONG></DT>
  23.559 +<DD>&nbsp;
  23.560 +<BR>
  23.561 +give the command for PDT comment parser, default: <TT>pdbcomment</TT>
  23.562 +
  23.563 +<P>
  23.564 +</DD>
  23.565  <DT><STRONG><TT>-with-papi-dir=PAPIDIR</TT></STRONG></DT>
  23.566  <DD>&nbsp;
  23.567  <BR>
  23.568 @@ -4204,7 +4278,7 @@
  23.569  
  23.570  <P>
  23.571  To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
  23.572 -  HREF="#foot1612"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.573 +  HREF="#foot1632"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.574   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
  23.575  
  23.576  <P>
  23.577 @@ -4338,7 +4412,7 @@
  23.578  Examples:
  23.579  
  23.580  <P>
  23.581 -BlueGene/P:
  23.582 +BlueGene/P and BlueGene/Q:
  23.583  
  23.584  <P>
  23.585  <PRE>
  23.586 @@ -4446,6 +4520,25 @@
  23.587      tauinst           automatic source code instrumentation by
  23.588                        using PDT/TAU
  23.589  
  23.590 +  -vt:inst-exclude-file-list &lt;file&gt;[,file,...]
  23.591 +                      Set list of source files to be excluded
  23.592 +                      from the automatic instrumentation by the
  23.593 +                      compiler or PDT/TAU.
  23.594 +                      (file names can contain wildcards)
  23.595 +
  23.596 +  -vt:inst-exclude-file &lt;file&gt;
  23.597 +                      Set pathname of file containing a list of
  23.598 +                      source files to be excluded from the
  23.599 +                      automatic instrumentation by the compiler
  23.600 +                      or PDT/TAU.
  23.601 +                      (file names can contain wildcards, one file
  23.602 +                       name per line)
  23.603 +
  23.604 +   Note when using an exclusion list for automatic compiler
  23.605 +   instrumentation:
  23.606 +   If a source file from the exclusion list is involved in a
  23.607 +   compile step, the instrumentation is disabled for this step.
  23.608 +
  23.609    -vt:opari &lt;!args&gt;   Set options for OPARI command. (see
  23.610                        share/vampirtrace/doc/opari/Readme.html)
  23.611  
  23.612 @@ -4457,6 +4550,19 @@
  23.613                        Set pathname of the OPARI runtime table file.
  23.614                        (default: opari.tab.c)
  23.615  
  23.616 +  -vt:opari-exclude-file-list &lt;file&gt;[,file,...]
  23.617 +                      Set list of source files to be excluded from
  23.618 +                      the instrumentation of OpenMP constructs by
  23.619 +                      OPARI.
  23.620 +                      (file names can contain wildcards)
  23.621 +
  23.622 +  -vt:opari-exclude-file &lt;file&gt;
  23.623 +                      Set pathname of file containing a list of
  23.624 +                      source files to be excluded from the
  23.625 +                      instrumentation of OpenMP constructs by OPARI.
  23.626 +                      (file names can contain wildcards, one file name
  23.627 +                       per line)
  23.628 +
  23.629    -vt:noopari         Disable instrumentation of OpenMP contructs
  23.630                        by OPARI.
  23.631  
  23.632 @@ -5334,7 +5440,7 @@
  23.633  
  23.634  <P>
  23.635  When using the IOFSL integration, all write requests in OTF are issued using the zoidfs API<A NAME="tex2html12"
  23.636 -  HREF="#foot3199"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.637 +  HREF="#foot3235"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.638   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. Those writes are handled by the IOFSL forwarding servers and aggregated into a single file using the atomic append feature. The offset in the multifile is returned to OTF and stored in a second file, the so called index file, in order to maintain the mapping between written blocks and streams. For any block of a stream written into the multifile, the index file contains the ID of the stream, the start of the block, and its length. This allows for an efficient reading of blocks since only the index file has to be scanned for entries for a given stream ID. Additionally, a large number of logical files (streams) can be stored using only two physical files.
  23.639  
  23.640  <P>
  23.641 @@ -5346,7 +5452,7 @@
  23.642  <P>
  23.643  In order to use this setup, IOFSL and VampirTrace have to be compiled in order.
  23.644  In the following sections, the directory <TT>&lt;install_dir&gt;</TT> should be replaced with a - possibly user-local - directory used for installation, e.g. <TT>$HOME/local</TT><A NAME="tex2html13"
  23.645 -  HREF="#foot3203"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.646 +  HREF="#foot3239"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.647   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.648  The installation procedure for IOFSL is described at https://trac.mcs.anl.gov/projects/iofsl/wiki/Building.
  23.649  Currently the <TT>iofsl_vampir</TT> git branch is required.
  23.650 @@ -5525,7 +5631,7 @@
  23.651  PBS Options</A>
  23.652  </H4>
  23.653  It is important to reserve a sufficient number of processor cores. The number of cores requested must be large enough to contain the number of application cores plus the number of cores required for the IOFSL server instances. Each IOFSL server will run on a dedicated node<A NAME="tex2html14"
  23.654 -  HREF="#foot3247"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.655 +  HREF="#foot3283"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.656   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.Thus N_allocated &#8805;((N_IOFSL * 16) + N_Application) must hold.
  23.657  <P>
  23.658  Example using 64 server instances:
  23.659 @@ -5622,10 +5728,10 @@
  23.660  The server is configured using a configuration file.
  23.661  At server start-up, this file is provided using the <TT>-config</TT> argument.
  23.662  The cray XK6 configuration file is provided in the package<A NAME="tex2html15"
  23.663 -  HREF="#foot3387"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.664 +  HREF="#foot3423"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.665   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.666  For more information about the options available please refer to the IOFSL documentation<A NAME="tex2html16"
  23.667 -  HREF="#foot3388"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.668 +  HREF="#foot3424"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.669   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
  23.670  The most important option is the <TT>serverlist</TT> entry in the <TT>bmi</TT> section which takes a list of server addresses, e.g. :
  23.671  <PRE>
  23.672 @@ -6031,99 +6137,99 @@
  23.673  into the official VampirTrace package.
  23.674  <BR><HR><H4>Footnotes</H4>
  23.675  <DL>
  23.676 -<DT><A NAME="foot1549">... (OTF)</A><A
  23.677 +<DT><A NAME="foot1569">... (OTF)</A><A
  23.678   HREF="#tex2html1"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.679   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.680  <DD>http://www.tu-dresden.de/zih/otf
  23.681  
  23.682  </DD>
  23.683 -<DT><A NAME="foot1550">... tool </A><A
  23.684 +<DT><A NAME="foot1570">... tool </A><A
  23.685   HREF="#tex2html2"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.686   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.687  <DD>http://www.vampir.eu
  23.688  
  23.689  </DD>
  23.690 -<DT><A NAME="foot1551">...
  23.691 +<DT><A NAME="foot1571">...
  23.692  Open MPI </A><A
  23.693   HREF="#tex2html3"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.694   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.695  <DD>http://www.open-mpi.org/faq/?category=vampirtrace
  23.696  
  23.697  </DD>
  23.698 -<DT><A NAME="foot1575">... documentation </A><A
  23.699 +<DT><A NAME="foot1595">... documentation </A><A
  23.700   HREF="#tex2html4"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.701   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.702  <DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
  23.703  
  23.704  </DD>
  23.705 -<DT><A NAME="foot1576">...
  23.706 +<DT><A NAME="foot1596">...
  23.707  Dyninst </A><A
  23.708   HREF="#tex2html5"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.709   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.710  <DD>http://www.dyninst.org
  23.711  
  23.712  </DD>
  23.713 -<DT><A NAME="foot1577">... library </A><A
  23.714 +<DT><A NAME="foot1597">... library </A><A
  23.715   HREF="#tex2html6"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.716   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.717  <DD>http://sourceforge.net/projects/ctool
  23.718  
  23.719  </DD>
  23.720 -<DT><A NAME="foot1587">... CLAPACK</A><A
  23.721 +<DT><A NAME="foot1607">... CLAPACK</A><A
  23.722   HREF="#tex2html7"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.723   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.724  <DD>www.netlib.org/clapack
  23.725  
  23.726  </DD>
  23.727 -<DT><A NAME="foot1609">... Dyninst </A><A
  23.728 +<DT><A NAME="foot1629">... Dyninst </A><A
  23.729   HREF="#tex2html8"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.730   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.731  <DD>http://www.dyninst.org
  23.732  
  23.733  </DD>
  23.734 -<DT><A NAME="foot1610">... PDToolkit </A><A
  23.735 +<DT><A NAME="foot1630">... PDToolkit </A><A
  23.736   HREF="#tex2html9"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.737   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.738  <DD>http://www.cs.uoregon.edu/research/pdt/home.php
  23.739  
  23.740  </DD>
  23.741 -<DT><A NAME="foot1611">... TAU </A><A
  23.742 +<DT><A NAME="foot1631">... TAU </A><A
  23.743   HREF="#tex2html10"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.744   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.745  <DD>http://tau.uoregon.edu
  23.746  
  23.747  </DD>
  23.748 -<DT><A NAME="foot1612">... CTool </A><A
  23.749 +<DT><A NAME="foot1632">... CTool </A><A
  23.750   HREF="#tex2html11"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.751   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.752  <DD>http://sourceforge.net/projects/ctool
  23.753  
  23.754  </DD>
  23.755 -<DT><A NAME="foot3199">... API</A><A
  23.756 +<DT><A NAME="foot3235">... API</A><A
  23.757   HREF="#tex2html12"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.758   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.759  <DD>The OTF master control file is written using POSIX I/O in any case.
  23.760  
  23.761  </DD>
  23.762 -<DT><A NAME="foot3203">...$HOME/local</A><A
  23.763 +<DT><A NAME="foot3239">...$HOME/local</A><A
  23.764   HREF="#tex2html13"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.765   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.766  <DD>The software packages can be installed in different directories.
  23.767  
  23.768  </DD>
  23.769 -<DT><A NAME="foot3247">... node</A><A
  23.770 +<DT><A NAME="foot3283">... node</A><A
  23.771   HREF="#tex2html14"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.772   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.773  <DD>The server makes use of all the nodes resources by multithreading and allocating large I/O buffers
  23.774  
  23.775  </DD>
  23.776 -<DT><A NAME="foot3387">... package</A><A
  23.777 +<DT><A NAME="foot3423">... package</A><A
  23.778   HREF="#tex2html15"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.779   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.780  <DD><TT>tools/vtiofsl/platform/crayxk6-iofwd.cf</TT>
  23.781  
  23.782  </DD>
  23.783 -<DT><A NAME="foot3388">... documentation</A><A
  23.784 +<DT><A NAME="foot3424">... documentation</A><A
  23.785   HREF="#tex2html16"><SUP><IMG  ALIGN="BOTTOM" BORDER="1" ALT="[*]"
  23.786   SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
  23.787  <DD>https://trac.mcs.anl.gov/projects/iofsl/wiki/ConfigurationFile
    24.1 Binary file ompi/contrib/vt/vt/doc/UserManual.pdf has changed
    25.1 --- a/ompi/contrib/vt/vt/extlib/otf/AUTHORS	Wed Nov 14 04:52:39 2012 +0000
    25.2 +++ b/ompi/contrib/vt/vt/extlib/otf/AUTHORS	Wed Nov 14 13:29:18 2012 +0000
    25.3 @@ -6,6 +6,7 @@
    25.4  Robert Dietrich <robert.dietrich AT zih.tu-dresden.de>
    25.5  Jens Doleschal <jens.doleschal AT tu-dresden.de>
    25.6  Thomas Ilsche <thomas.ilsche AT tu-dresden.de>
    25.7 +Mathias Korepkat <mathias.korepkat AT tu-dresden.de>
    25.8  Andre Groetzsch <andre.groetzsch AT tu-dresden.de>
    25.9  Michael Heyde <michael.heyde AT tu-dresden.de>
   25.10  Michael Kluge <michael.kluge AT tu-dresden.de>
    26.1 --- a/ompi/contrib/vt/vt/extlib/otf/ChangeLog	Wed Nov 14 04:52:39 2012 +0000
    26.2 +++ b/ompi/contrib/vt/vt/extlib/otf/ChangeLog	Wed Nov 14 13:29:18 2012 +0000
    26.3 @@ -1,12 +1,27 @@
    26.4 -1.11.3openmpi
    26.5 -	- otfaux: fixed build errors on Solaris and NetBSD
    26.6 +1.12.1openmpi
    26.7 +	- implemented workaround to avoid setting otf_errno when a false
    26.8 +	  error happens during OTF_RBuffer_Jump to a bogus zlib sync point
    26.9  
   26.10 -1.11.2openmpi
   26.11 +1.12salmon
   26.12 +	- OTF library:
   26.13 +		- fixed potential segmentation fault when appending a new
   26.14 +		  key-value pair to a list after removing another one
   26.15 +	- OTFAUX library:
   26.16 +		- 'OTFAUX_ThumbnailReader_read()' now correctly return success
   26.17 +		  after reading the thumbnail
   26.18 +	- otfprofile:
   26.19 +		- create VampirTrace filter file from irregularity analysis
   26.20 +	- otfaux:
   26.21 +		- fixed build errors on Solaris and NetBSD
   26.22 +		- fixed "time not increasing" error when generating inline
   26.23 +		  snapshots
   26.24 +
   26.25 +1.11.2goldfish
   26.26  	- lib OTFAUX:
   26.27  		- speed-up messages matching, if no snapshots should
   26.28  		  be generated
   26.29  
   26.30 -1.11.1openmpi
   26.31 +1.11.1goldfish
   26.32  	- new 'OTF_MasterControl_clone()' function to simplify making a copy
   26.33  	  of a master control object
   26.34  	- otfaux:
    27.1 --- a/ompi/contrib/vt/vt/extlib/otf/VERSION	Wed Nov 14 04:52:39 2012 +0000
    27.2 +++ b/ompi/contrib/vt/vt/extlib/otf/VERSION	Wed Nov 14 13:29:18 2012 +0000
    27.3 @@ -6,8 +6,8 @@
    27.4  # <major>.<minor>.<sub>. If sub is zero, then it is omitted.
    27.5  
    27.6  major=1
    27.7 -minor=11
    27.8 -sub=3
    27.9 +minor=12
   27.10 +sub=1
   27.11  
   27.12  # string is used for alpha, beta, or release tags. If it is non-empty, it will
   27.13  # be appended to the version number.
   27.14 @@ -24,7 +24,8 @@
   27.15  # 1.8.*        sturgeon
   27.16  # 1.9.*        sawfish
   27.17  # 1.10.*       coelacanth
   27.18 -# 1.11.*       goldfish 
   27.19 +# 1.11.*       goldfish
   27.20 +# 1.12.*       salmon
   27.21  #
   27.22  
   27.23  string=openmpi
   27.24 @@ -49,5 +50,5 @@
   27.25  #                      release, age must be incremented. Otherwise, reset age
   27.26  #                      to '0'.
   27.27  
   27.28 -library=6:2:5
   27.29 +library=6:3:5
   27.30  
    28.1 Binary file ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf has changed
    29.1 Binary file ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf has changed
    30.1 --- a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Process.c	Wed Nov 14 04:52:39 2012 +0000
    30.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Process.c	Wed Nov 14 13:29:18 2012 +0000
    30.3 @@ -390,12 +390,12 @@
    30.4  
    30.5  int
    30.6  OTFAUX_Process_enqueueRecv( OTFAUX_Process* process,
    30.7 -                                 uint64_t eventTime,
    30.8 -                                 uint32_t receiverProcessId,
    30.9 -                                 uint32_t comm,
   30.10 -                                 uint32_t tag,
   30.11 -                                 uint32_t length,
   30.12 -                                 uint32_t scl )
   30.13 +                            uint64_t eventTime,
   30.14 +                            uint32_t receiverProcessId,
   30.15 +                            uint32_t comm,
   30.16 +                            uint32_t tag,
   30.17 +                            uint32_t length,
   30.18 +                            uint32_t scl )
   30.19  {
   30.20      OTFAUX_ReciveQueue* queue;
   30.21      OTFAUX_Message* recv;
   30.22 @@ -403,8 +403,6 @@
   30.23      if ( !process )
   30.24          return 0;
   30.25  
   30.26 -    cleanup_pending_sends( process, eventTime );
   30.27 -
   30.28      queue = get_queue( process, receiverProcessId, comm, tag, 1 );
   30.29      if ( !queue )
   30.30          return 0;
   30.31 @@ -429,18 +427,16 @@
   30.32  
   30.33  int
   30.34  OTFAUX_Process_enterFunction( OTFAUX_Process* process,
   30.35 -                                   uint64_t eventTime,
   30.36 -                                   uint32_t function,
   30.37 -                                   uint32_t scl,
   30.38 -                                   void* eventData )
   30.39 +                              uint64_t eventTime,
   30.40 +                              uint32_t function,
   30.41 +                              uint32_t scl,
   30.42 +                              void* eventData )
   30.43  {
   30.44      OTFAUX_FunctionCall* call;
   30.45  
   30.46      if ( !process )
   30.47          return 0;
   30.48  
   30.49 -    cleanup_pending_sends( process, eventTime );
   30.50 -
   30.51      if ( !stack_empty( &process->sharedState->functionCalls ) )
   30.52      {
   30.53          /* take it out of the object pool */
   30.54 @@ -477,8 +473,6 @@
   30.55      if ( !process )
   30.56          return 0;
   30.57  
   30.58 -    cleanup_pending_sends( process, eventTime );
   30.59 -
   30.60      if ( stack_empty( &process->functionStack ) )
   30.61          return 0;
   30.62  
   30.63 @@ -493,16 +487,16 @@
   30.64  
   30.65  int
   30.66  OTFAUX_Process_sendMessage( OTFAUX_Process* process,
   30.67 -                                 uint64_t eventTime,
   30.68 -                                 uint32_t receiverProcessId,
   30.69 -                                 uint32_t comm,
   30.70 -                                 uint32_t tag,
   30.71 -                                 uint32_t length,
   30.72 -                                 uint32_t scl,
   30.73 -                                 uint64_t* recvTime,
   30.74 -                                 uint32_t* recvLength,
   30.75 -                                 uint32_t* recvScl,
   30.76 -                                 void* eventData )
   30.77 +                            uint64_t eventTime,
   30.78 +                            uint32_t receiverProcessId,
   30.79 +                            uint32_t comm,
   30.80 +                            uint32_t tag,
   30.81 +                            uint32_t length,
   30.82 +                            uint32_t scl,
   30.83 +                            uint64_t* recvTime,
   30.84 +                            uint32_t* recvLength,
   30.85 +                            uint32_t* recvScl,
   30.86 +                            void* eventData )
   30.87  {
   30.88      OTFAUX_ReciveQueue* queue;
   30.89      OTFAUX_Message* msg;
   30.90 @@ -510,8 +504,6 @@
   30.91      if ( !process )
   30.92          return 0;
   30.93  
   30.94 -    cleanup_pending_sends( process, eventTime );
   30.95 -
   30.96      /* MsgMatching */
   30.97      queue = get_queue( process, receiverProcessId, comm, tag, 0 );
   30.98      if ( !queue )
   30.99 @@ -529,7 +521,16 @@
  30.100      *recvScl = msg->recvScl;
  30.101      msg->eventData = eventData;
  30.102  
  30.103 -    stack_add( &process->pendingSends, &msg->e );
  30.104 +    /* only maintain the pending messages, if we want to write snapshots */
  30.105 +    if ( process->sharedState->writeSendSnapshot )
  30.106 +    {
  30.107 +        stack_add( &process->pendingSends, &msg->e );
  30.108 +    }
  30.109 +    else
  30.110 +    {
  30.111 +        release_event_data( process, msg->eventData );
  30.112 +        free( msg );
  30.113 +    }
  30.114  
  30.115      return 1;
  30.116  }
  30.117 @@ -592,8 +593,6 @@
  30.118      if ( !process )
  30.119          return 0;
  30.120  
  30.121 -    cleanup_pending_sends( process, eventTime );
  30.122 -
  30.123      entry = stack_next( &process->pendingCollOps );
  30.124      while ( entry != &process->pendingCollOps )
  30.125      {
  30.126 @@ -647,8 +646,6 @@
  30.127      if ( !process )
  30.128          return 0;
  30.129  
  30.130 -    cleanup_pending_sends( process, eventTime );
  30.131 -
  30.132      entry = stack_next( &process->pendingCollOps );
  30.133      while ( entry != &process->pendingCollOps )
  30.134      {
  30.135 @@ -671,11 +668,11 @@
  30.136  
  30.137  int
  30.138  OTFAUX_Process_openFile( OTFAUX_Process* process,
  30.139 -                              uint64_t eventTime,
  30.140 -                              uint32_t fileId,
  30.141 -                              uint64_t handleId,
  30.142 -                              uint32_t scl,
  30.143 -                              void* eventData )
  30.144 +                         uint64_t eventTime,
  30.145 +                         uint32_t fileId,
  30.146 +                         uint64_t handleId,
  30.147 +                         uint32_t scl,
  30.148 +                         void* eventData )
  30.149  {
  30.150      OTFAUX_File* file;
  30.151      Stack *entry;
  30.152 @@ -683,8 +680,6 @@
  30.153      if ( !process )
  30.154          return 0;
  30.155  
  30.156 -    cleanup_pending_sends( process, eventTime );
  30.157 -
  30.158      entry = stack_next( &process->openFiles );
  30.159      while ( entry != &process->openFiles )
  30.160      {
  30.161 @@ -735,8 +730,6 @@
  30.162      if ( !process )
  30.163          return 0;
  30.164  
  30.165 -    cleanup_pending_sends( process, eventTime );
  30.166 -
  30.167      entry = stack_next( &process->openFiles );
  30.168      while ( entry != &process->openFiles )
  30.169      {
  30.170 @@ -770,8 +763,6 @@
  30.171      if ( !process )
  30.172          return 0;
  30.173  
  30.174 -    cleanup_pending_sends( process, eventTime );
  30.175 -
  30.176      entry = stack_next( &process->pendingFileOps );
  30.177      while ( entry != &process->pendingFileOps )
  30.178      {
  30.179 @@ -821,8 +812,6 @@
  30.180      if ( !process )
  30.181          return 0;
  30.182  
  30.183 -    cleanup_pending_sends( process, eventTime );
  30.184 -
  30.185      entry = stack_next( &process->pendingFileOps );
  30.186      while ( entry != &process->pendingFileOps )
  30.187      {
  30.188 @@ -931,14 +920,14 @@
  30.189      int ret = 1;
  30.190      Stack* entry;
  30.191  
  30.192 -    cleanup_pending_sends( process, snapshotTime );
  30.193 -
  30.194      if ( !process )
  30.195          return 0;
  30.196  
  30.197      if ( !process->sharedState->writeSendSnapshot )
  30.198          return 1;
  30.199  
  30.200 +    cleanup_pending_sends( process, snapshotTime );
  30.201 +
  30.202      entry = stack_next( &process->pendingSends );
  30.203      while ( ret && entry != &process->pendingSends )
  30.204      {
    31.1 --- a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_State.c	Wed Nov 14 04:52:39 2012 +0000
    31.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_State.c	Wed Nov 14 13:29:18 2012 +0000
    31.3 @@ -673,14 +673,14 @@
    31.4                                                      snapshotTime,
    31.5                                                      userData );
    31.6              ret = ret && OTFAUX_Process_writeSends( process,
    31.7 -                                       snapshotTime,
    31.8 -                                       userData );
    31.9 +                                                    snapshotTime,
   31.10 +                                                    userData );
   31.11              ret = ret && OTFAUX_Process_writeOpenFiles( process,
   31.12 -                                           snapshotTime,
   31.13 -                                           userData );
   31.14 +                                                        snapshotTime,
   31.15 +                                                        userData );
   31.16              ret = ret && OTFAUX_Process_writeCollOps( process,
   31.17 -                                         snapshotTime,
   31.18 -                                         userData );
   31.19 +                                                      snapshotTime,
   31.20 +                                                      userData );
   31.21              ret = ret && OTFAUX_Process_writeFileOps( process,
   31.22                                                        snapshotTime,
   31.23                                                        userData );
    32.1 --- a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c	Wed Nov 14 04:52:39 2012 +0000
    32.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c	Wed Nov 14 13:29:18 2012 +0000
    32.3 @@ -235,13 +235,14 @@
    32.4      status = 1;
    32.5      for (i = 0; i < tn_reader->nprocs; i++)
    32.6      {
    32.7 +        char comma;
    32.8          status = fscanf( tn_reader->file, "%llx:", &process );
    32.9          if (1 != status)
   32.10              goto out;
   32.11          for (j = 0; j < tn_reader->width; ++j)
   32.12          {
   32.13 -            status = fscanf( tn_reader->file, "%x,", &functions[j] );
   32.14 -            if (1 != status)
   32.15 +            status = fscanf( tn_reader->file, "%x%c", &functions[j], &comma );
   32.16 +            if ( 2 != status || comma != ',' )
   32.17                  goto out;
   32.18          }
   32.19          if (handler)
   32.20 @@ -249,12 +250,17 @@
   32.21              handler( data, process, functions );
   32.22          }
   32.23  
   32.24 -        if ( fgetc( tn_reader->file ) != '\n' && !feof( tn_reader->file ) )
   32.25 +        if ( fgetc( tn_reader->file ) != '\n' )
   32.26          {
   32.27 -            break;
   32.28 +            goto out;
   32.29          }
   32.30      }
   32.31  
   32.32 +    if ( fgetc( tn_reader->file ) != EOF )
   32.33 +    {
   32.34 +        return 0;
   32.35 +    }
   32.36 +
   32.37  out:
   32.38      free( functions );
   32.39  
    33.1 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c	Wed Nov 14 04:52:39 2012 +0000
    33.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c	Wed Nov 14 13:29:18 2012 +0000
    33.3 @@ -230,7 +230,7 @@
    33.4  
    33.5  	p->kvPair = pair;
    33.6  	list->kvCurrent = p->kvNext;
    33.7 -    
    33.8 +
    33.9  	list->count++;
   33.10  
   33.11  	return 0;
   33.12 @@ -819,7 +819,13 @@
   33.13  			if ( p->kvNext ) {
   33.14  				p->kvNext->kvPrev = p->kvPrev;
   33.15  			}
   33.16 -			free(p);
   33.17 +
   33.18 +			/* move the deleted element after the end of the list */
   33.19 +			p->kvPrev = list->kvEnd;
   33.20 +			p->kvNext = NULL;
   33.21 +			list->kvEnd->kvNext=p;
   33.22 +			list->kvEnd= p;
   33.23 +
   33.24  			list->count--;
   33.25  			return 0;
   33.26  		}
    34.1 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.h	Wed Nov 14 04:52:39 2012 +0000
    34.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.h	Wed Nov 14 13:29:18 2012 +0000
    34.3 @@ -231,9 +231,9 @@
    34.4  	uint32_t key_count;             /* number of different keys in list --> user-relevant */
    34.5  	uint32_t count;                 /* total number of entries in list (treat byte arrays particular) --> internal use only */
    34.6  	uint32_t size;                  /* number of allocated entries --> internal */
    34.7 -	OTF_KeyValuePairList *kvBegin;
    34.8 -	OTF_KeyValuePairList *kvEnd;
    34.9 -	OTF_KeyValuePairList *kvCurrent;
   34.10 +	OTF_KeyValuePairList *kvBegin;   /* first element of the list */
   34.11 +	OTF_KeyValuePairList *kvEnd;     /* last allocated element of the list, may be used or not */
   34.12 +	OTF_KeyValuePairList *kvCurrent; /* first unused element in the list, insert new ones here */
   34.13  };
   34.14  
   34.15  /** @endcond */
    35.1 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c	Wed Nov 14 04:52:39 2012 +0000
    35.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c	Wed Nov 14 13:29:18 2012 +0000
    35.3 @@ -660,6 +660,9 @@
    35.4  
    35.5  
    35.6  	int ret;
    35.7 +#ifdef HAVE_ZLIB
    35.8 +	int otf_errno_backup;
    35.9 +#endif
   35.10  	size_t read;
   35.11  	/* uint64_t currentPos; */
   35.12  	uint32_t i;
   35.13 @@ -676,7 +679,43 @@
   35.14  	}
   35.15  
   35.16  	rbuffer->pos= 0;
   35.17 +#ifdef HAVE_ZLIB
   35.18 +	/*
   35.19 +	 * ooooooooooooo   .oooooo.   oooooooooo.     .oooooo.
   35.20 +	 * 8'   888   `8  d8P'  `Y8b  `888'   `Y8b   d8P'  `Y8b
   35.21 +	 *      888      888      888  888      888 888      888
   35.22 +	 *      888      888      888  888      888 888      888
   35.23 +	 *      888      888      888  888      888 888      888
   35.24 +	 *      888      `88b    d88'  888     d88' `88b    d88'
   35.25 +	 *     o888o      `Y8bood8P'  o888bood8P'    `Y8bood8P'
   35.26 +	 *
   35.27 +	 * BIG TODO / FIXME --- this is a temporary workaround, waiting to be
   35.28 +	 * replaced by a better workaround.
   35.29 +	 * When seeking in a zlib compressed file it is possible to find a sync
   35.30 +	 * point marker that is not actually a sync point. Then the inflate will
   35.31 +	 * fail with an error. Usually this happens in
   35.32 +	 * OTF_RBuffer_getFileProperties, where it will just retry, so this is not
   35.33 +	 * too bad. I have no idea what happens if this happens in
   35.34 +	 * OTF_RBuffer_searchTime (a.k.a. partial loading)
   35.35 +	 * Well, in any case - if the error code is set, vtunify will notice that
   35.36 +	 * sooner or later and die thinking that something went wrong. We don't
   35.37 +	 * want that to happen while there was no real error, so we reset the error
   35.38 +	 * code.
   35.39 +	 *
   35.40 +	 * Also this is not threadsafe )-;
   35.41 +	 *
   35.42 +	 * [tilsche/juenz, 12.11.2012]
   35.43 +	 */
   35.44 +	otf_errno_backup= otf_errno;
   35.45 +#endif
   35.46  	read= OTF_File_read( rbuffer->file, rbuffer->buffer, rbuffer->jumpsize );
   35.47 +#ifdef HAVE_ZLIB
   35.48 +	if ( otf_errno != otf_errno_backup ) {
   35.49 +
   35.50 +		otf_errno= otf_errno_backup;
   35.51 +		return 0;
   35.52 +	}
   35.53 +#endif
   35.54  
   35.55  	rbuffer->end= (uint32_t) read;
   35.56  
    36.1 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h	Wed Nov 14 04:52:39 2012 +0000
    36.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h	Wed Nov 14 13:29:18 2012 +0000
    36.3 @@ -17,7 +17,7 @@
    36.4  
    36.5  
    36.6  #define OTF_VERSION_MAJOR	1
    36.7 -#define OTF_VERSION_MINOR	11
    36.8 +#define OTF_VERSION_MINOR	12
    36.9  #define OTF_VERSION_SUB 	1
   36.10  #define OTF_VERSION_STRING	"openmpi"
   36.11  
    37.1 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c	Wed Nov 14 04:52:39 2012 +0000
    37.2 +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c	Wed Nov 14 13:29:18 2012 +0000
    37.3 @@ -81,6 +81,11 @@
    37.4  
    37.5  	int ret;
    37.6  
    37.7 +	/*
    37.8 +	 * Write a timestamp at the very end of a trace to avoid traces with a huge tail
    37.9 +	 * of timestamp-less events (e.g. fake-KV-counters) that require
   37.10 +	 * very inefficient (n^2) backwards search for searching the last timestamp.
   37.11 +	 */
   37.12  	if( (uint32_t) -1 != wbuffer->process ) {
   37.13  
   37.14  		OTF_WBuffer_writeUint64( wbuffer, wbuffer->time );
    38.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.cpp	Wed Nov 14 04:52:39 2012 +0000
    38.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.cpp	Wed Nov 14 13:29:18 2012 +0000
    38.3 @@ -561,3 +561,146 @@
    38.4  
    38.5      return OTF_RETURN_OK;
    38.6  }
    38.7 +
    38.8 +
    38.9 +int handleNoOp( void *firsthandlerarg, uint64_t time, uint32_t process,
   38.10 +		OTF_KeyValueList* list ) {
   38.11 +
   38.12 +
   38.13 +    Control* control= (Control*) firsthandlerarg;
   38.14 +       
   38.15 +    while ( control->checkTime( time ) )
   38.16 +        ;
   38.17 +  
   38.18 +    if ( control->copyEvents )
   38.19 +	return ( 0 == OTF_Writer_writeNoOpKV( control->writer, time,
   38.20 +		process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
   38.21 +
   38.22 +    return OTF_RETURN_OK;
   38.23 +}
   38.24 +
   38.25 +
   38.26 +int handleEventComment( void *firsthandlerarg, uint64_t time, uint32_t process,
   38.27 +		const char* comment, OTF_KeyValueList* list ) {
   38.28 +
   38.29 +
   38.30 +    Control* control= (Control*) firsthandlerarg;
   38.31 +       
   38.32 +    while ( control->checkTime( time ) )
   38.33 +        ;
   38.34 +  
   38.35 +    if ( control->copyEvents )
   38.36 +	return ( 0 == OTF_Writer_writeEventCommentKV( control->writer, time,
   38.37 +		process, comment, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
   38.38 +
   38.39 +    return OTF_RETURN_OK;
   38.40 +}
   38.41 +
   38.42 +
   38.43 +int handleBeginProcess( void *firsthandlerarg, uint64_t time,
   38.44 +		uint32_t process, OTF_KeyValueList* list ) {
   38.45 +
   38.46 +
   38.47 +    Control* control= (Control*) firsthandlerarg;
   38.48 +       
   38.49 +    while ( control->checkTime( time ) )
   38.50 +        ;
   38.51 +  
   38.52 +    if ( control->copyEvents )
   38.53 +	return ( 0 == OTF_Writer_writeBeginProcessKV( control->writer, time,
   38.54 +		process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
   38.55 +
   38.56 +    return OTF_RETURN_OK;
   38.57 +}
   38.58 +
   38.59 +
   38.60 +int handleEndProcess( void *firsthandlerarg, uint64_t time,
   38.61 +		uint32_t process, OTF_KeyValueList* list ) {
   38.62 +
   38.63 +
   38.64 +    Control* control= (Control*) firsthandlerarg;
   38.65 +       
   38.66 +    while ( control->checkTime( time ) )
   38.67 +        ;
   38.68 +  
   38.69 +    if ( control->copyEvents )
   38.70 +	return ( 0 == OTF_Writer_writeEndProcessKV( control->writer, time,
   38.71 +		process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
   38.72 +
   38.73 +    return OTF_RETURN_OK;
   38.74 +}
   38.75 +
   38.76 +
   38.77 +int handleRMAPut( void *firsthandlerarg, uint64_t time, uint32_t process,
   38.78 +        uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
   38.79 +        uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
   38.80 +
   38.81 +
   38.82 +    Control* control= (Control*) firsthandlerarg;
   38.83 +       
   38.84 +    while ( control->checkTime( time ) )
   38.85 +        ;
   38.86 +  
   38.87 +    if ( control->copyEvents )
   38.88 +        return ( 0 == OTF_Writer_writeRMAPutKV( control->writer, time,
   38.89 +                process, origin, target, communicator, tag, bytes, scltoken, list )
   38.90 +                 ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
   38.91 +
   38.92 +    return OTF_RETURN_OK;
   38.93 +}
   38.94 +
   38.95 +
   38.96 +int handleRMAPutRemoteEnd( void *firsthandlerarg, uint64_t time,
   38.97 +        uint32_t process, uint32_t origin, uint32_t target, uint32_t communicator,
   38.98 +        uint32_t tag, uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
   38.99 +
  38.100 +
  38.101 +    Control* control= (Control*) firsthandlerarg;
  38.102 +       
  38.103 +    while ( control->checkTime( time ) )
  38.104 +        ;
  38.105 +  
  38.106 +    if ( control->copyEvents )
  38.107 +        return ( 0 == OTF_Writer_writeRMAPutRemoteEndKV( control->writer,
  38.108 +                time, process, origin, target, communicator, tag, bytes, scltoken, list )
  38.109 +                 ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
  38.110 +
  38.111 +    return OTF_RETURN_OK;
  38.112 +}
  38.113 +
  38.114 +
  38.115 +int handleRMAGet( void *firsthandlerarg, uint64_t time, uint32_t process,
  38.116 +        uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
  38.117 +        uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
  38.118 +
  38.119 +
  38.120 +    Control* control= (Control*) firsthandlerarg;
  38.121 +       
  38.122 +    while ( control->checkTime( time ) )
  38.123 +        ;
  38.124 +  
  38.125 +    if ( control->copyEvents )
  38.126 +        return ( 0 == OTF_Writer_writeRMAGetKV( control->writer, time,
  38.127 +                process, origin, target, communicator, tag, bytes, scltoken, list )
  38.128 +                 ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
  38.129 +
  38.130 +    return OTF_RETURN_OK;
  38.131 +}
  38.132 +
  38.133 +
  38.134 +int handleRMAEnd( void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t remote,
  38.135 +	uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList* list ) {
  38.136 +
  38.137 +
  38.138 +    Control* control= (Control*) firsthandlerarg;
  38.139 +       
  38.140 +    while ( control->checkTime( time ) )
  38.141 +        ;
  38.142 +  
  38.143 +    if ( control->copyEvents )
  38.144 +        return ( 0 == OTF_Writer_writeRMAEndKV( control->writer, time,
  38.145 +                process, remote, communicator, tag, scltoken, list )
  38.146 +                 ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
  38.147 +
  38.148 +    return OTF_RETURN_OK;
  38.149 +}
    39.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.h	Wed Nov 14 04:52:39 2012 +0000
    39.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/Handler.h	Wed Nov 14 13:29:18 2012 +0000
    39.3 @@ -91,4 +91,39 @@
    39.4      uint32_t fileid, uint64_t matchingId, uint64_t handleId, uint32_t operation,
    39.5      uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *kvlist );
    39.6  
    39.7 +int handleNoOp( void *firsthandlerarg, uint64_t time, uint32_t process,
    39.8 +		OTF_KeyValueList* list );
    39.9 +
   39.10 +
   39.11 +int handleEventComment( void *firsthandlerarg, uint64_t time, uint32_t process,
   39.12 +		const char* comment, OTF_KeyValueList* list );
   39.13 +
   39.14 +
   39.15 +int handleBeginProcess( void *firsthandlerarg, uint64_t time,
   39.16 +		uint32_t process, OTF_KeyValueList* list );
   39.17 +
   39.18 +
   39.19 +int handleEndProcess( void *firsthandlerarg, uint64_t time,
   39.20 +		uint32_t process, OTF_KeyValueList* list );
   39.21 +
   39.22 +
   39.23 +int handleRMAPut( void *firsthandlerarg, uint64_t time, uint32_t process,
   39.24 +        uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
   39.25 +        uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
   39.26 +
   39.27 +
   39.28 +int handleRMAPutRemoteEnd( void *firsthandlerarg, uint64_t time,
   39.29 +        uint32_t process, uint32_t origin, uint32_t target, uint32_t communicator,
   39.30 +        uint32_t tag, uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
   39.31 +
   39.32 +
   39.33 +int handleRMAGet( void *firsthandlerarg, uint64_t time, uint32_t process,
   39.34 +        uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
   39.35 +        uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
   39.36 +
   39.37 +
   39.38 +int handleRMAEnd( void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t remote,
   39.39 +	uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList* list );
   39.40 +
   39.41 +
   39.42  #endif /* OTFTOVTF3_HANDLER_H */
    40.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/otfaux.cpp	Wed Nov 14 04:52:39 2012 +0000
    40.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfaux/otfaux.cpp	Wed Nov 14 13:29:18 2012 +0000
    40.3 @@ -582,6 +582,11 @@
    40.4          def_wstream= OTF_Writer_getStream( writer, 0 );
    40.5      }
    40.6  
    40.7 +    /* increase buffer size for writing definitions (and markers), if necessary */
    40.8 +    if ( 10240 > buffersize ) {
    40.9 +        OTF_WStream_setBufferSizes( def_wstream, 10240 );
   40.10 +    }
   40.11 +
   40.12      OTF_HandlerArray_getCopyHandler_stream( handlers, def_wstream );
   40.13  
   40.14      Control* control= new Control( writer, def_wstream, verbose,
   40.15 @@ -811,6 +816,64 @@
   40.16      OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.17          OTF_ENDFILEOP_RECORD );
   40.18  	
   40.19 +    OTF_HandlerArray_setHandler( handlers,
   40.20 +        (OTF_FunctionPointer*) handleNoOp,
   40.21 +        OTF_NOOP_RECORD );
   40.22 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.23 +        OTF_NOOP_RECORD );
   40.24 +
   40.25 +
   40.26 +    OTF_HandlerArray_setHandler( handlers,
   40.27 +        (OTF_FunctionPointer*) handleEventComment,
   40.28 +        OTF_EVENTCOMMENT_RECORD );
   40.29 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.30 +        OTF_EVENTCOMMENT_RECORD );
   40.31 +
   40.32 +
   40.33 +    OTF_HandlerArray_setHandler( handlers,
   40.34 +        (OTF_FunctionPointer*) handleBeginProcess,
   40.35 +        OTF_BEGINPROCESS_RECORD );
   40.36 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.37 +        OTF_BEGINPROCESS_RECORD );
   40.38 +
   40.39 +
   40.40 +    OTF_HandlerArray_setHandler( handlers,
   40.41 +        (OTF_FunctionPointer*) handleEndProcess,
   40.42 +        OTF_ENDPROCESS_RECORD );
   40.43 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.44 +        OTF_ENDPROCESS_RECORD );
   40.45 +
   40.46 +
   40.47 +    OTF_HandlerArray_setHandler( handlers,
   40.48 +        (OTF_FunctionPointer*) handleRMAPut,
   40.49 +        OTF_RMAPUT_RECORD );
   40.50 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.51 +        OTF_RMAPUT_RECORD );
   40.52 +
   40.53 +
   40.54 +
   40.55 +    OTF_HandlerArray_setHandler( handlers,
   40.56 +        (OTF_FunctionPointer*) handleRMAPutRemoteEnd,
   40.57 +        OTF_RMAPUTRE_RECORD );
   40.58 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.59 +        OTF_RMAPUTRE_RECORD );
   40.60 +
   40.61 +
   40.62 +
   40.63 +    OTF_HandlerArray_setHandler( handlers,
   40.64 +        (OTF_FunctionPointer*) handleRMAGet,
   40.65 +        OTF_RMAGET_RECORD );
   40.66 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.67 +        OTF_RMAGET_RECORD );
   40.68 +
   40.69 +
   40.70 +
   40.71 +    OTF_HandlerArray_setHandler( handlers,
   40.72 +        (OTF_FunctionPointer*) handleRMAEnd,
   40.73 +        OTF_RMAEND_RECORD );
   40.74 +    OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control, 
   40.75 +        OTF_RMAEND_RECORD );
   40.76 +
   40.77  	
   40.78  	if ( doThumbnail ) {
   40.79  		uint32_t i;
    41.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp	Wed Nov 14 04:52:39 2012 +0000
    41.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp	Wed Nov 14 13:29:18 2012 +0000
    41.3 @@ -227,11 +227,11 @@
    41.4  			fprintf( c->outfile, "(#%llu) \tDefProcessGroup: stream %u, group %u, name \"%s\", procs ",
    41.5  				(long long unsigned) c->num, stream, group, name );
    41.6  
    41.7 -			for( i= 0; i < (numberOfProcs - 1); ++i ) {
    41.8 -				fprintf( c->outfile, "%u, ", procs[i] );
    41.9 +			const char* sep= "";
   41.10 +			for( i= 0; i < numberOfProcs; ++i ) {
   41.11 +				fprintf( c->outfile, "%s%u", sep, procs[i] );
   41.12 +				sep= ", ";
   41.13  			}
   41.14 -			
   41.15 -			fprintf( c->outfile, "%u", procs[i] );
   41.16  		
   41.17  			printKeyValueList(c, kvlist);
   41.18  		}
    42.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common	Wed Nov 14 04:52:39 2012 +0000
    42.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common	Wed Nov 14 13:29:18 2012 +0000
    42.3 @@ -14,6 +14,7 @@
    42.4  	$(OTFPROFILESRCDIR)/create_csv.h \
    42.5  	$(OTFPROFILESRCDIR)/create_marker.h \
    42.6  	$(OTFPROFILESRCDIR)/create_latex.h \
    42.7 +	$(OTFPROFILESRCDIR)/create_filter.h \
    42.8  	$(OTFPROFILESRCDIR)/datastructs.h \
    42.9  	$(OTFPROFILESRCDIR)/otfprofile.h \
   42.10  	$(OTFPROFILESRCDIR)/process_dispersion.h \
   42.11 @@ -26,6 +27,7 @@
   42.12  	$(OTFPROFILESRCDIR)/create_csv.cpp \
   42.13  	$(OTFPROFILESRCDIR)/create_marker.cpp \
   42.14  	$(OTFPROFILESRCDIR)/create_latex.cpp \
   42.15 +	$(OTFPROFILESRCDIR)/create_filter.cpp \
   42.16  	$(OTFPROFILESRCDIR)/otfprofile.cpp \
   42.17  	$(OTFPROFILESRCDIR)/process_dispersion.cpp \
   42.18  	$(OTFPROFILESRCDIR)/summarize_data.cpp \
    43.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp	Wed Nov 14 04:52:39 2012 +0000
    43.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp	Wed Nov 14 13:29:18 2012 +0000
    43.3 @@ -123,7 +123,7 @@
    43.4                      char cmd[1024];
    43.5  
    43.6                      snprintf( cmd, sizeof( cmd ) - 1,
    43.7 -                        "otfshrink -i %s -o %s -f %s",
    43.8 +                        "otfshrink -i %s -o %s -f %s -k",
    43.9                          alldata.params.input_file_prefix.c_str(),
   43.10                          alldata.params.clustering.shrink_output_prefix.c_str(),
   43.11                          alldata.params.clustering.map_file_name.c_str() );
    44.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp	Wed Nov 14 04:52:39 2012 +0000
    44.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp	Wed Nov 14 13:29:18 2012 +0000
    44.3 @@ -18,7 +18,8 @@
    44.4  
    44.5  
    44.6  using namespace std;
    44.7 -
    44.8 +/*store current callpath for each process  */
    44.9 +map<uint32_t,string> callpathMap;
   44.10  
   44.11  static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
   44.12  
   44.13 @@ -442,6 +443,17 @@
   44.14      list<StackType>& stack= alldata->stackPerProcess[ process ];
   44.15      stack.push_back( StackType( function, time ) );
   44.16  
   44.17 +    if (alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   44.18 +    {
   44.19 +        /* store current callpath */
   44.20 +        std::ostringstream os;
   44.21 +        os << " "<<function;
   44.22 +        callpathMap[process] += os.str();
   44.23 +        /* save maximum length, for buffer allocation in reduce_data.cpp*/
   44.24 +        if(alldata->maxCallpathLength < callpathMap[process].length())
   44.25 +            alldata->maxCallpathLength = callpathMap[process].length();
   44.26 +    }
   44.27 +    
   44.28      return OTF_RETURN_OK;
   44.29  }
   44.30  
   44.31 @@ -500,12 +512,17 @@
   44.32  
   44.33      stack.pop_back();
   44.34  
   44.35 -    /*
   44.36 -    cerr << " func " << func << " @ process " << process << ": " << 
   44.37 -        "excl " << excl << " ticks, incl " << incl << " ticks" << endl;
   44.38 -    */
   44.39      alldata->functionMapPerRank[ Pair( process, func ) ].add( 1, excl, incl );
   44.40  
   44.41 +    if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   44.42 +    {
   44.43 +        /* store function by process, callpath and functionId*/
   44.44 +        alldata->functionCallpathMapPerRank[ TripleCallpath( process, callpathMap[process],func ) ].add( 1, excl, incl );
   44.45 +        alldata->functionCallpathMapPerRank[ TripleCallpath( process, callpathMap[process],func ) ].callpath = callpathMap[process];
   44.46 +        /* reduce callpath step at leave */
   44.47 +        callpathMap[process] = callpathMap[process].substr (0,callpathMap[process].find_last_of(" "));
   44.48 +    }
   44.49 +
   44.50      return OTF_RETURN_OK;
   44.51  }
   44.52  
   44.53 @@ -1481,7 +1498,7 @@
   44.54  
   44.55  
   44.56  bool CollectData( AllData& alldata ) {
   44.57 -
   44.58 +	alldata.maxCallpathLength = 0;
   44.59      bool error= false;
   44.60  
   44.61      /* start runtime measurement for collecting data */
    45.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_dispersion.cpp	Wed Nov 14 04:52:39 2012 +0000
    45.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_dispersion.cpp	Wed Nov 14 13:29:18 2012 +0000
    45.3 @@ -23,9 +23,10 @@
    45.4  /* fence between statistics parts within the buffer for consistency checking */
    45.5  enum { FENCE= 0xDEADBEEF };
    45.6  
    45.7 +/*store current callpath for each process  */
    45.8 +map<uint32_t,string> callpath;
    45.9  
   45.10  static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
   45.11 -
   45.12      Progress& progress= alldata.progress;
   45.13  
   45.14      progress.cur_bytes= 0;
   45.15 @@ -280,6 +281,15 @@
   45.16      list<StackType>& stack= alldata->stackPerProcess[ process ];
   45.17      stack.push_back( StackType( function, time ) );
   45.18  
   45.19 +    if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   45.20 +    {
   45.21 +        /* create callpath */
   45.22 +        /* add callpath step on enter event */
   45.23 +        std::ostringstream os;
   45.24 +        os << " " << function;
   45.25 +        callpath[process] += os.str();
   45.26 +    }
   45.27 +
   45.28      return OTF_RETURN_OK;
   45.29  }
   45.30  
   45.31 @@ -297,43 +307,68 @@
   45.32      list<StackType>::reverse_iterator parent_it= ++stack.rbegin();
   45.33  
   45.34      uint64_t func= top.fid;
   45.35 -    uint64_t incl= time - top.timestamp;
   45.36 -    uint64_t excl= incl - top.childDuration;
   45.37 +    uint64_t incl_time= time - top.timestamp;
   45.38 +    uint64_t excl_time= incl_time - top.childDuration;
   45.39     
   45.40      map< uint64_t, FunctionData>::const_iterator it= alldata->functionMapGlobal.find( func );
   45.41      assert ( alldata->functionMapGlobal.end() != it );
   45.42      FunctionData functionData= it->second;
   45.43 -    
   45.44 -    double time_excl_min = functionData.excl_time.min;
   45.45 -    double time_excl_max = functionData.excl_time.max;
   45.46 -    double time_excl= excl;
   45.47 +
   45.48 +    double time_min = functionData.DISPERSION_OPTION.min;
   45.49 +    double time_max = functionData.DISPERSION_OPTION.max;
   45.50 +    double time_max_c = 0;
   45.51 +    double time_min_c = 0;
   45.52 +    double time_a= DISPERSION_OPTION;
   45.53 +
   45.54 +    if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   45.55 +    {
   45.56 +        /* get currentfunction from functionCallpathMapGlobal */
   45.57 +        map< PairCallpath, FunctionData,ltPairCallpath>::const_iterator itc= alldata->functionCallpathMapGlobal.find( PairCallpath(func,callpath[process]) );
   45.58 +
   45.59 +        assert ( alldata->functionCallpathMapGlobal.end() != itc );
   45.60 +        FunctionData functionCallpathData= itc->second;
   45.61 +
   45.62 +        time_min_c = functionCallpathData.DISPERSION_OPTION.min;
   45.63 +        time_max_c = functionCallpathData.DISPERSION_OPTION.max;
   45.64 +    }
   45.65  
   45.66      if ( parent_it != stack.rend() ) {
   45.67  
   45.68 -        parent_it->childDuration += incl;
   45.69 +        parent_it->childDuration += incl_time;
   45.70  
   45.71      }
   45.72  
   45.73      stack.pop_back();
   45.74  
   45.75 -    if ( time_excl_max > time_excl_min) {
   45.76 +    if ( time_max > time_min) {
   45.77     
   45.78 -        uint64_t bin = (uint64_t) ( ( log(time_excl) - log(time_excl_min) ) /
   45.79 -                                   ( log(time_excl_max) - log(time_excl_min) )
   45.80 +        uint64_t bin = (uint64_t) ( ( log(time_a) - log(time_min) ) /
   45.81 +                                   ( log(time_max) - log(time_min) )
   45.82                                     * 100 );
   45.83 -/*        cerr << " func " << func << " @process " << process << " : " << " bin " << bin << " , excl " << excl << " ticks "<< endl; 
   45.84 -*/        
   45.85 -        alldata->functionDurationSectionMapPerRank[ Triple(process, func, bin ) ].add( 1, excl, incl );
   45.86 -        
   45.87 +        uint64_t bin_c = 0;
   45.88 +        if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   45.89 +            bin_c = (uint64_t) ( ( log(time_a) - log(time_min_c) ) /
   45.90 +                                   ( log(time_max_c) - log(time_min_c) )
   45.91 +                                   * 100 );
   45.92 +        alldata->functionDurationSectionMapPerRank[ Triple(process, func, bin )]
   45.93 +                                                .add( 1, excl_time, incl_time );
   45.94 +        if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
   45.95 +            alldata->functionDurationSectionCallpathMapPerRank[ Quadruple(process, func,callpath[process], bin_c ) ].add( 1, excl_time,callpath[process], incl_time );
   45.96      }
   45.97      
   45.98 -    if ( time_excl_max == time_excl || time_excl_min == time_excl ) {
   45.99 -        alldata->functionMinMaxLocationMap [ func ].add( excl, process, (time-incl) );
  45.100 -/*
  45.101 -        cerr << " func " << func << " @process " << process << " : " << " time " << (time-incl) << " excl " << excl << endl;
  45.102 -*/
  45.103 +    if ( time_max == time_a || time_min == time_a ) {
  45.104 +        alldata->functionMinMaxLocationMap [ func ].add( excl_time, process, (time-incl_time) );
  45.105      }
  45.106 -       
  45.107 +
  45.108 +    if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  45.109 +    {
  45.110 +        if ( time_max == time_a || time_min == time_a ) {
  45.111 +            alldata->functionMinMaxLocationCallpathMap [ callpath[process] ].add( excl_time, process, (time-incl_time) );
  45.112 +        }
  45.113 +        /* go one step back on callpath, because of this leave */
  45.114 +        callpath[process] = callpath[process].substr (0,callpath[process].find_last_of(" "));
  45.115 +    }
  45.116 +
  45.117      return OTF_RETURN_OK;
  45.118  }
  45.119  
  45.120 @@ -357,9 +392,16 @@
  45.121  
  45.122          int s1, s2;
  45.123  
  45.124 -        size= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */
  45.125          num_fences++;
  45.126 -          
  45.127 +        if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  45.128 +        {
  45.129 +            size= alldata.functionCallpathMapGlobal.size(); /* map< PairCallpath, FunctionData, ltPairCallpath > functionCallpathMapGlobal; */
  45.130 +            num_fences++;
  45.131 +        }
  45.132 +        else
  45.133 +        {
  45.134 +            size= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */
  45.135 +        }
  45.136          /* get bytesize multiplying all pieces */
  45.137            
  45.138          MPI_Pack_size( num_fences, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
  45.139 @@ -368,9 +410,24 @@
  45.140          MPI_Pack_size( 1 + size * 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
  45.141          MPI_Pack_size( size * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
  45.142          buffer_size += s1 + s2;
  45.143 - 
  45.144 +
  45.145 +        /* get bytesize multiplying all pieces */
  45.146 +        MPI_Pack_size( 1 + size * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
  45.147 +        MPI_Pack_size( size * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
  45.148 +        buffer_size += s1 + s2;
  45.149 +
  45.150 +        if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  45.151 +        {
  45.152 +        	map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it=    alldata.functionCallpathMapGlobal.begin();
  45.153 +        	map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator itend= alldata.functionCallpathMapGlobal.end();
  45.154 +        	for ( ; it != itend; ++it ) {
  45.155 +        		MPI_Pack_size( it->second.callpath.length(), MPI_CHAR, MPI_COMM_WORLD, &s1 );
  45.156 +        		buffer_size += s1;
  45.157 +        	}
  45.158 +        }
  45.159 +
  45.160      } 
  45.161 -       
  45.162 +
  45.163      /* broadcast buffer size */
  45.164      MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD );
  45.165  
  45.166 @@ -378,6 +435,10 @@
  45.167      buffer= new char[ buffer_size ];
  45.168      assert( buffer );
  45.169  
  45.170 +    uint64_t callpath_length=0;
  45.171 +    MPI_Allreduce(&(alldata.maxCallpathLength),&callpath_length,1,MPI_UNSIGNED_LONG_LONG,MPI_MAX,MPI_COMM_WORLD);
  45.172 +    char* callpath = new char[callpath_length];
  45.173 +
  45.174      if ( 0 == alldata.myRank ) {
  45.175         /* pack parts */
  45.176         
  45.177 @@ -410,17 +471,50 @@
  45.178            MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.179            MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.180         }
  45.181 -       
  45.182 +
  45.183         /* extra check that doesn't cost too much */
  45.184         MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.185 +
  45.186 +       if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  45.187 +       {
  45.188 +           /* pack size of functionCallpathMapGlobal */
  45.189 +           func_map_global_size= alldata.functionCallpathMapGlobal.size();
  45.190 +           MPI_Pack( &func_map_global_size, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.191 +
  45.192 +           /* pack functionCallpathMapGlobal */
  45.193 +           {
  45.194 +               map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it=    alldata.functionCallpathMapGlobal.begin();
  45.195 +               map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator itend= alldata.functionCallpathMapGlobal.end();
  45.196 +               uint64_t len;
  45.197 +               for ( ; it != itend; ++it ) {
  45.198 +                   len = it->first.b.length();
  45.199 +                   MPI_Pack( (void*) &it->first.a,                1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.200 +                   MPI_Pack( (void*) &len,                1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.201 +                   MPI_Pack( (void*) &it->second.count.min,     1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.202 +                   MPI_Pack( (void*) &it->second.count.max,     1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.203 +                   MPI_Pack( (void*) &it->second.count.sum,     1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.204 +                   MPI_Pack( (void*) &it->second.count.cnt,     1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.205 +
  45.206 +                   MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.207 +                   MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.208 +                   MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.209 +                   MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.210 +
  45.211 +                   MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.212 +                   MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.213 +                   MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE,        buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.214 +                   MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.215 +                   MPI_Pack( (void*) it->first.b.c_str(), len, MPI_CHAR, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.216 +               }
  45.217 +           }
  45.218 +                  /* extra check that doesn't cost too much */
  45.219 +                  MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
  45.220 +       }
  45.221      }
  45.222 -   
  45.223 -   
  45.224 +
  45.225      /* broadcast definitions buffer */
  45.226      MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD );
  45.227 -
  45.228      /* unpack definitions from buffer */
  45.229 -
  45.230      if ( 0 != alldata.myRank ) {
  45.231         
  45.232         /* unpack parts */
  45.233 @@ -433,7 +527,7 @@
  45.234         /* unpack size of functionMapGlobal */
  45.235         uint64_t func_map_global_size= 0;
  45.236         MPI_Unpack( buffer, buffer_size, &buffer_pos, &func_map_global_size, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.237 -          
  45.238 +
  45.239            /* unpack functionMapGlobal */
  45.240            for ( uint64_t i= 0; i < func_map_global_size; i++ ) {
  45.241               
  45.242 @@ -459,14 +553,56 @@
  45.243               
  45.244               alldata.functionMapGlobal[ func ].add( tmp );
  45.245            }
  45.246 -          
  45.247            /* extra check that doesn't cost too much */
  45.248            fence= 0;
  45.249            MPI_Unpack( buffer, buffer_size, &buffer_pos, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.250            assert( FENCE == fence );
  45.251 +
  45.252 +          /* unpack size of functionMapGlobal */
  45.253 +          func_map_global_size= 0;
  45.254 +          MPI_Unpack( buffer, buffer_size, &buffer_pos, &func_map_global_size, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.255 +          if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
  45.256 +          {
  45.257 +              /* unpack functionMapCallpathGlobal */
  45.258 +              for ( uint64_t i= 0; i < func_map_global_size; i++ ) {
  45.259 +
  45.260 +                  uint64_t func;
  45.261 +                  FunctionData tmp;
  45.262 +                  uint64_t len;
  45.263 +
  45.264 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &func,              1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.265 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &len,              1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.266 +
  45.267 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.min,     1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.268 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.max,     1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.269 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.sum,     1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.270 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.cnt,     1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.271 +
  45.272 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.min, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.273 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.max, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.274 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.sum, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.275 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.276 +
  45.277 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.min, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.278 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.max, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.279 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.sum, 1, MPI_DOUBLE,        MPI_COMM_WORLD );
  45.280 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.281 +                  MPI_Unpack( buffer, buffer_size, &buffer_pos, callpath, len, MPI_CHAR, MPI_COMM_WORLD );
  45.282 +
  45.283 +                  tmp.callpath = callpath;
  45.284 +                  tmp.callpath = tmp.callpath.substr (0,len);
  45.285 +                  alldata.functionCallpathMapGlobal[ PairCallpath(func,tmp.callpath) ].add( tmp );
  45.286 +              }
  45.287 +
  45.288 +              /* extra check that doesn't cost too much */
  45.289 +              fence= 0;
  45.290 +              MPI_Unpack( buffer, buffer_size, &buffer_pos, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
  45.291 +              assert( FENCE == fence );
  45.292 +          }
  45.293      }
  45.294 -
  45.295      delete[] buffer;
  45.296 +    if(callpath_length > 0)
  45.297 +        delete[] callpath;
  45.298  }
  45.299  #endif /* OTFPROFILE_MPI */
  45.300  
  45.301 @@ -588,20 +724,16 @@
  45.302  bool CollectDispersion( AllData& alldata ) {
  45.303  
  45.304      bool error= false;
  45.305 -
  45.306      /* start runtime measurement for collecting dispersion information */
  45.307      StartMeasurement( alldata, 1, true, "collect dispersion information" );
  45.308  
  45.309      /* open OTF file manager and reader */
  45.310 -
  45.311      OTF_FileManager* manager=
  45.312          OTF_FileManager_open( alldata.params.max_file_handles );
  45.313      assert( manager );
  45.314 -
  45.315      OTF_Reader* reader=
  45.316          OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
  45.317      assert( reader );
  45.318 -
  45.319      do {
  45.320  
  45.321  #ifdef OTFPROFILE_MPI
  45.322 @@ -609,14 +741,12 @@
  45.323          /* share definitions needed for reading events to workers */
  45.324  
  45.325          if ( 1 < alldata.numRanks ) {
  45.326 -
  45.327              share_profiledata( alldata );
  45.328  
  45.329          }
  45.330  #endif /* OTFPROFILE_MPI */
  45.331  
  45.332          /* read data from events */
  45.333 -
  45.334          if ( !alldata.params.read_from_stats ) {
  45.335              
  45.336              VerbosePrint( alldata, 1, true, "reading events for dispersion\n" );
  45.337 @@ -638,18 +768,15 @@
  45.338  #endif /* OTFPROFILE_MPI */
  45.339  
  45.340      } while( false );
  45.341 -
  45.342      /* close OTF file manager and reader */
  45.343  
  45.344      OTF_Reader_close( reader );
  45.345      OTF_FileManager_close( manager );
  45.346 -
  45.347      if ( !error ) {
  45.348  
  45.349          /* stop runtime measurement for collecting data */
  45.350          StopMeasurement( alldata, true, "collect dispersion information" );
  45.351  
  45.352      }
  45.353 -
  45.354      return !error;
  45.355  }
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.cpp	Wed Nov 14 13:29:18 2012 +0000
    46.3 @@ -0,0 +1,340 @@
    46.4 +/*
    46.5 + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
    46.6 + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
    46.7 +*/
    46.8 +
    46.9 +#include <iostream>
   46.10 +#include <sstream>
   46.11 +#include <fstream>
   46.12 +#include <string>
   46.13 +#include <functional>
   46.14 +#include <math.h>
   46.15 +#include <string.h>
   46.16 +#include <time.h>
   46.17 +
   46.18 +#include "otfprofile.h"
   46.19 +#include "create_filter.h"
   46.20 +#include "OTF_inttypes.h"
   46.21 +#include "OTF_Definitions.h"
   46.22 +#include "OTF_Platform.h"
   46.23 +
   46.24 +CTree<string> *cur_pos;
   46.25 +CTree<string> callpath_tree;
   46.26 +
   46.27 +map<uint32_t, CTree<string>*> filtered;
   46.28 +uint64_t maxCount, filterCount;
   46.29 +uint32_t idCount;
   46.30 +map<Pair_int, int, ltPair_int> edgesEdg;
   46.31 +map<Pair_int, uint64_t, ltPair_int> edgesEdg_sec;
   46.32 +map<string, CTree<string>*> CallpathNodes;
   46.33 +
   46.34 +#define STARTTEXT "# VampirTrace dispersion callpath filter specification"
   46.35 +#define STARTTEXT2 "# generated with otfprofile"
   46.36 +#define STARTTEXT3 "# previous filter content"
   46.37 +
   46.38 +bool CreateFilter(AllData& alldata) {
   46.39 +	maxCount = idCount = filterCount = 0;
   46.40 +	callpath_tree.parent = NULL;
   46.41 +	callpath_tree.id = idCount++;
   46.42 +	callpath_tree.item = " ";
   46.43 +	callpath_tree.str_hash = 0;
   46.44 +	callpath_tree.n = 0;
   46.45 +	callpath_tree.rule = FILTERNOT;
   46.46 +
   46.47 +	bool error = false;
   46.48 +	cur_pos = NULL;
   46.49 +
   46.50 +	// Use this function only if marker is set and dispersion data
   46.51 +	// is collected
   46.52 +	if ((alldata.params.dispersion.options & DISPERSION_OPT_FILTER) == 0 )
   46.53 +		return error;
   46.54 +
   46.55 +	/*create string streams to save callpath tree information and
   46.56 +     filter information*/
   46.57 +	std::ostringstream filter_os;
   46.58 +	addOldToTree(alldata, filter_os);
   46.59 +	// setup file writer
   46.60 +	string filter_file_name = alldata.params.output_file_prefix + ".filter";
   46.61 +	fstream filter_file;
   46.62 +	filter_file.open(filter_file_name.c_str(), ios::out | ios::trunc);
   46.63 +	if (!filter_file.good()) {
   46.64 +		cerr << "ERROR: Unable to open file '" << filter_file_name
   46.65 +				<< "' for writing." << endl;
   46.66 +		return error;
   46.67 +	}
   46.68 +
   46.69 +	time_t t = time(0);
   46.70 +	char* ts = ctime(&t);
   46.71 +	ts[strlen(ts)-1] = '\0';
   46.72 +
   46.73 +	filter_file << STARTTEXT << endl << STARTTEXT2 << " on "<< ts << endl;
   46.74 +
   46.75 +	map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>
   46.76 +	   ::const_iterator itc = alldata.functionDispersionCallpathMap.begin();
   46.77 +	map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>
   46.78 +        ::const_iterator itcend = alldata.functionDispersionCallpathMap.end();
   46.79 +
   46.80 +	list<string> callpathes;
   46.81 +	string tmp;
   46.82 +	string word;
   46.83 +
   46.84 +	while (itc != itcend) {
   46.85 +
   46.86 +		if (itc->first.b == "") {
   46.87 +			itc++;
   46.88 +			continue;
   46.89 +		}
   46.90 +
   46.91 +		tmp = "";
   46.92 +		maxCount += itc->second.count;
   46.93 +		parsePath(alldata, itc->first.b, itc->second.filterRule,
   46.94 +				itc->second.count, (itc->second.excl_time_95_percent
   46.95 +						/ alldata.timerResolution));
   46.96 +
   46.97 +		itc++;
   46.98 +	}
   46.99 +
  46.100 +	filter_file << endl << endl;
  46.101 +	postOrder(&callpath_tree, filter_file);
  46.102 +
  46.103 +	filter_file << endl << endl << STARTTEXT3 << endl << filter_os.str()
  46.104 +			<< endl;
  46.105 +	filter_file.close();
  46.106 +
  46.107 +	while (!callpath_tree.children.empty()) {
  46.108 +		CTree<string>* tmp = callpath_tree.children.back();
  46.109 +		callpath_tree.children.pop_back();
  46.110 +		if (tmp != NULL) {
  46.111 +			delete (tmp);
  46.112 +			tmp = NULL;
  46.113 +		}
  46.114 +	}
  46.115 +
  46.116 +	return !error;
  46.117 +}
  46.118 +
  46.119 +CTree<string>* addToTree(string parent, uint32_t pid, string child,
  46.120 +		uint32_t cid, int rule, uint64_t n, double timeFilt) {
  46.121 +	if (cur_pos == NULL) {
  46.122 +		for (uint32_t i = 0; i < callpath_tree.children.size(); i++)
  46.123 +			if (callpath_tree.children.at(i)->item == parent) {
  46.124 +				cur_pos = callpath_tree.children.at(i);
  46.125 +				break;
  46.126 +			}
  46.127 +	}
  46.128 +
  46.129 +	if (cur_pos == NULL) {
  46.130 +
  46.131 +		CTree<string> *children = new CTree<string> ();
  46.132 +		children = children;
  46.133 +		children->item = parent;
  46.134 +		children->str_hash = pid;
  46.135 +
  46.136 +		children->parent = &callpath_tree;
  46.137 +		if (child == "") {
  46.138 +			children->n = n;
  46.139 +			children->rule = rule;
  46.140 +		} else {
  46.141 +			children->n = 1;
  46.142 +			children->rule = FILTERNOT;
  46.143 +		}
  46.144 +
  46.145 +		children->id = idCount++;
  46.146 +		callpath_tree.children.push_back(children);
  46.147 +		cur_pos = children;
  46.148 +	}
  46.149 +
  46.150 +	CTree<string> *tmp;
  46.151 +	tmp = cur_pos;
  46.152 +	while (parent != cur_pos->item) {
  46.153 +		if (cur_pos->parent == NULL)
  46.154 +			break;
  46.155 +		cur_pos = cur_pos->parent;
  46.156 +	}
  46.157 +
  46.158 +	bool exists = false;
  46.159 +	for (uint32_t i = 0; i < cur_pos->children.size(); i++) {
  46.160 +		if ((cur_pos->children.at(i))->item == child) {
  46.161 +			exists = true;
  46.162 +			cur_pos = (cur_pos->children.at(i));
  46.163 +			cur_pos->n += n;
  46.164 +			if (timeFilt > 0)
  46.165 +				cur_pos->timeFilt = timeFilt;
  46.166 +			if (rule != -1 && cur_pos->rule != PREVFILTER) {
  46.167 +				if (cur_pos->rule == -1)
  46.168 +					cur_pos->rule = rule;
  46.169 +				else if (cur_pos->rule != FILTERNOT)
  46.170 +					cur_pos->rule = rule;
  46.171 +
  46.172 +				tmp = cur_pos;
  46.173 +				if (rule == FILTERNOT)
  46.174 +					while (tmp->parent != NULL && tmp->parent->rule
  46.175 +							== FILTERREC) {
  46.176 +						tmp->parent->rule = FILTEROUT;
  46.177 +						tmp = tmp->parent;
  46.178 +					}
  46.179 +			}
  46.180 +			return NULL;
  46.181 +		}
  46.182 +	}
  46.183 +	if (child == "")
  46.184 +		exists = true;
  46.185 +
  46.186 +	if (!exists) {
  46.187 +
  46.188 +		CTree<string> *children = new CTree<string> ();
  46.189 +		children->item = child;
  46.190 +		children->rule = rule;
  46.191 +		children->n = n;
  46.192 +		children->id = idCount++;
  46.193 +		children->str_hash = cid;
  46.194 +		children->parent = cur_pos;
  46.195 +		if (timeFilt > 0)
  46.196 +			children->timeFilt = timeFilt;
  46.197 +		cur_pos->children.push_back(children);
  46.198 +		cur_pos = children;
  46.199 +
  46.200 +		tmp = cur_pos;
  46.201 +		if (rule == FILTERNOT)
  46.202 +			while (tmp->parent != NULL && tmp->parent->rule == FILTERREC) {
  46.203 +				tmp->parent->rule = FILTEROUT;
  46.204 +				tmp = tmp->parent;
  46.205 +			}
  46.206 +
  46.207 +		return cur_pos;
  46.208 +	}
  46.209 +	return NULL;
  46.210 +}
  46.211 +
  46.212 +void trimString(string& str) {
  46.213 +	string::size_type pos1 = str.find_first_not_of(' ');
  46.214 +	string::size_type pos2 = str.find_last_not_of(' ');
  46.215 +	str = str.substr(pos1 == string::npos ? 0 : pos1,
  46.216 +			pos2 == string::npos ? str.length() - 1 : pos2 - pos1 + 1);
  46.217 +}
  46.218 +
  46.219 +void addOldToTree(AllData& alldata, std::ostringstream& old_filter) {
  46.220 +	vector<string> pathes;
  46.221 +
  46.222 +	if( !alldata.params.dispersion.filter_file_name.empty())
  46.223 +	{
  46.224 +        string filter_file_name = alldata.params.dispersion.filter_file_name;
  46.225 +        ifstream b_file(filter_file_name.c_str());
  46.226 +        if (b_file.good()) {
  46.227 +            string line;
  46.228 +            while (std::getline(b_file, line)) {
  46.229 +                trimString(line);
  46.230 +                if (line.substr(0, 1) != "#" && line.find_first_of("--")
  46.231 +                        != line.npos) {
  46.232 +                    pathes.push_back(line);
  46.233 +                } else {
  46.234 +                    if (line != STARTTEXT && line != STARTTEXT2 && line
  46.235 +                            != STARTTEXT3)
  46.236 +                        old_filter << line << endl;
  46.237 +                }
  46.238 +            }
  46.239 +            b_file.close();
  46.240 +        }
  46.241 +
  46.242 +        while (!pathes.empty()) {
  46.243 +            string path = pathes.back();
  46.244 +            string tmp_path = path;
  46.245 +            pathes.pop_back();
  46.246 +            int pos = path.find_last_of("--") - 1;
  46.247 +            string tail = path.substr(pos, path.length() - pos);
  46.248 +            path = path.substr(0, pos);
  46.249 +            trimString(path);
  46.250 +            trimString(tail);
  46.251 +            string func = path.substr(path.find_last_of(";") + 1, path.length()
  46.252 +                    - (path.find_last_of(";") + 1));
  46.253 +            if (tail.find("C") != tail.npos && tail.find(" 0 ") != tail.npos) {
  46.254 +                parsePath(alldata, path, FILTEROUT);
  46.255 +            } else {
  46.256 +                old_filter << tmp_path << endl;
  46.257 +            }
  46.258 +        }
  46.259 +	}
  46.260 +}
  46.261 +
  46.262 +void postOrder(CTree<string>* node, fstream& filter_file) {
  46.263 +
  46.264 +
  46.265 +	if(node->rule != FILTERREC)
  46.266 +	{
  46.267 +        for (uint32_t i = 0; i < node->children.size(); i++)
  46.268 +            postOrder(node->children.at(i), filter_file);
  46.269 +	}
  46.270 +	CTree<string>* tmp;
  46.271 +	if (node->rule != FILTERNOT && node->rule != -1) {
  46.272 +		string path = "";
  46.273 +		path = node->item + " -- 0 C";
  46.274 +		tmp = node->parent;
  46.275 +		while (tmp->parent != NULL) {
  46.276 +			if (tmp->rule == FILTERNOT || tmp->rule == -1)
  46.277 +				path = tmp->item + ";" + path;
  46.278 +			tmp = tmp->parent;
  46.279 +		}
  46.280 +		filter_file << path << endl;
  46.281 +		filterCount++;
  46.282 +
  46.283 +	}
  46.284 +}
  46.285 +
  46.286 +void parsePath(AllData& alldata, string path, int rule, uint64_t n,
  46.287 +		double timeB) {
  46.288 +	trimString(path);
  46.289 +	if (rule != 1) {
  46.290 +		timeB = -1;
  46.291 +	}
  46.292 +
  46.293 +	cur_pos = NULL;
  46.294 +	uint32_t cid;
  46.295 +	string child;
  46.296 +	while (path != "") {
  46.297 +		uint32_t pid = atoi(path.substr(0, path.find_first_of(" ")).c_str());
  46.298 +		string parent = alldata.functionIdNameMap[pid];
  46.299 +		path = path.substr(path.find_first_of(" ") + 1);
  46.300 +		if (path == "") {
  46.301 +			if (cur_pos != NULL)
  46.302 +				break;
  46.303 +			cid = 5;
  46.304 +			child = parent;
  46.305 +			child = "";
  46.306 +		} else {
  46.307 +			cid = atoi(path.substr(0, path.find_first_of(" ")).c_str());
  46.308 +			child = alldata.functionIdNameMap[cid];
  46.309 +		}
  46.310 +
  46.311 +		if (path.find_first_of(" ") == path.npos) {
  46.312 +			addToTree(parent, pid, child, cid, rule, n, timeB);
  46.313 +			break;
  46.314 +		} else
  46.315 +			addToTree(parent, pid, child, cid, -1, 0, -1);
  46.316 +	}
  46.317 +}
  46.318 +
  46.319 +void parsePath(AllData& alldata, string path, int rule) {
  46.320 +	cur_pos = NULL;
  46.321 +	uint32_t cid;
  46.322 +	string child;
  46.323 +	while (path != "") {
  46.324 +		uint32_t pid = 5;
  46.325 +		string parent = path.substr(0, path.find_first_of(";"));
  46.326 +		path = path.substr(path.find_first_of(";") + 1);
  46.327 +		if (path == "") {
  46.328 +			if (cur_pos != NULL)
  46.329 +				break;
  46.330 +			cid = 5;
  46.331 +			child = parent;
  46.332 +			child = "";
  46.333 +		} else {
  46.334 +			cid = 5;
  46.335 +			child = path.substr(0, path.find_first_of(";"));
  46.336 +		}
  46.337 +		if (path.find_first_of(";") == path.npos) {
  46.338 +			addToTree(parent, pid, child, cid, rule, 0, -1);
  46.339 +			break;
  46.340 +		} else
  46.341 +			addToTree(parent, pid, child, cid, -1, 0, -1);
  46.342 +	}
  46.343 +}
    47.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.h	Wed Nov 14 13:29:18 2012 +0000
    47.3 @@ -0,0 +1,113 @@
    47.4 +/*
    47.5 + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
    47.6 + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
    47.7 + */
    47.8 +
    47.9 +#ifndef CREATE_FILTER_H
   47.10 +#define CREATE_FILTER_H
   47.11 +
   47.12 +#include "datastructs.h"
   47.13 +#include <vector>
   47.14 +
   47.15 +/**
   47.16 + * Enumeration to set filter method:
   47.17 + *   - FILTERREC: recursive filter
   47.18 + *   - FILTERNOT: don't filter this path
   47.19 + *   - FILTEROUT: filter this path, but not recursively
   47.20 + *   - TIMEFILTER: filter this path to a specific point of time
   47.21 + *   - PREVFILTER: filter that was set by an other filterfile
   47.22 + */
   47.23 +enum filterRule {
   47.24 +    FILTERREC = 0, FILTERNOT = 1, FILTEROUT = 2, TIMEFILTER = 3, PREVFILTER = 4
   47.25 +};
   47.26 +
   47.27 +struct Pair_int {
   47.28 +
   47.29 +    uint32_t a;
   47.30 +    uint32_t b;
   47.31 +
   47.32 +    Pair_int() :
   47.33 +        a(0), b(0) {
   47.34 +    }
   47.35 +    Pair_int(long aa, long bb) :
   47.36 +        a(aa), b(bb) {
   47.37 +    }
   47.38 +    ~Pair_int() {
   47.39 +    }
   47.40 +};
   47.41 +
   47.42 +struct ltPair_int {
   47.43 +
   47.44 +    bool operator()(const Pair_int& p1, const Pair_int& p2) const {
   47.45 +
   47.46 +        /* a is the major number for comparison, this gives a better
   47.47 +         order when reducing the entries over the first argument */
   47.48 +
   47.49 +        if (p1.a == p2.a) {
   47.50 +
   47.51 +            return p1.b < p2.b;
   47.52 +
   47.53 +        } else {
   47.54 +
   47.55 +            return p1.a < p2.a;
   47.56 +        }
   47.57 +    }
   47.58 +};
   47.59 +
   47.60 +/**
   47.61 + * CTree is a Tree to save Path information. It Points to the Parent node and
   47.62 + *  contains child notes, as well as
   47.63 + * filter rule, name, id and hash_str.
   47.64 + */
   47.65 +template<class T> class CTree {
   47.66 +public:
   47.67 +    T item;
   47.68 +    int rule;
   47.69 +    uint64_t n;
   47.70 +    uint32_t id;
   47.71 +    size_t str_hash;
   47.72 +
   47.73 +    CTree<T> *parent;
   47.74 +    std::vector<CTree<std::string>*> children;
   47.75 +    double timeFilt;
   47.76 +    CTree() {
   47.77 +        timeFilt = 0;
   47.78 +        rule = 1;
   47.79 +        n = 0;
   47.80 +        id = 0;
   47.81 +        str_hash = 0;
   47.82 +    }
   47.83 +    ~CTree() {
   47.84 +
   47.85 +        while (!children.empty()) {
   47.86 +            CTree<T>* tmp = children.back();
   47.87 +            children.pop_back();
   47.88 +            if (tmp != NULL) {
   47.89 +                delete (tmp);
   47.90 +                tmp = NULL;
   47.91 +            }
   47.92 +        }
   47.93 +    }
   47.94 +};
   47.95 +
   47.96 +/* create Filtert */
   47.97 +bool CreateFilter(AllData& alldata);
   47.98 +
   47.99 +/* add path element to path tree */
  47.100 +CTree<string>* addToTree(string parent, uint32_t pid, string child,
  47.101 +        uint32_t cid, int rule, uint64_t n, string timeFilt);
  47.102 +
  47.103 +/* load filter information from an existing filter file (result.filter)*/
  47.104 +void addOldToTree(AllData& alldata, std::ostringstream& old_filter);
  47.105 +
  47.106 +/* iteration step through the callpath tree to write filter file */
  47.107 +void postOrder(CTree<string>* node, fstream& filter_file);
  47.108 +
  47.109 +/* parse callpath and add pairs of parent and childs to the tree  */
  47.110 +void parsePath(AllData& alldata, string path, int rule, uint64_t n,
  47.111 +        double timeB);
  47.112 +
  47.113 +/* parse callpath and add pairs of parent and childs to the tree  */
  47.114 +void parsePath(AllData& alldata, string path, int rule);
  47.115 +
  47.116 +#endif /* CREATE_FILTER_H */
    48.1 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp	Wed Nov 14 04:52:39 2012 +0000
    48.2 +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp	Wed Nov 14 13:29:18 2012 +0000
    48.3 @@ -1,7 +1,7 @@
    48.4  /*
    48.5   This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
    48.6   Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
    48.7 -*/
    48.8 + */
    48.9  
   48.10  #include <fstream>
   48.11  #include <iostream>
   48.12 @@ -27,20 +27,18 @@
   48.13  #include "OTF_Definitions.h"
   48.14  #include "OTF_Platform.h"
   48.15  
   48.16 -
   48.17  using namespace std;
   48.18  
   48.19 -
   48.20  /* maximum number of process groups */
   48.21 -const uint32_t Grouping::MAX_GROUPS= 16;
   48.22 +const uint32_t Grouping::MAX_GROUPS = 16;
   48.23  
   48.24  /* global constants to customize tables and chart plots */
   48.25 -static const int FUNC_TABLE_LEN= 50;
   48.26 -static const int CTR_TABLE_LEN= 50;
   48.27 +static const int FUNC_TABLE_LEN = 50;
   48.28 +static const int CTR_TABLE_LEN = 50;
   48.29  static const string COLOR_SEND = "red";
   48.30  static const string COLOR_RECV = "blue";
   48.31  static const string COLOR_YBAR = "green";
   48.32 -static const string COLOR_MINMAX= "black";
   48.33 +static const string COLOR_MINMAX = "black";
   48.34  static const size_t FUNC_NAME_MAX_LEN = 32;
   48.35  static const size_t CTR_NAME_MAX_LEN = 20;
   48.36  static const int YBAR_SIZE = 4;
   48.37 @@ -53,55 +51,61 @@
   48.38  static vector<string> xLabels;
   48.39  static int xLabelNum = 0;
   48.40  
   48.41 -class SpaceSeparator: public std::numpunct<char>
   48.42 -{
   48.43 +class SpaceSeparator: public std::numpunct<char> {
   48.44  public:
   48.45 -    SpaceSeparator(std::size_t refs): std::numpunct<char>(refs) {}
   48.46 +    SpaceSeparator(std::size_t refs) :
   48.47 +        std::numpunct<char>(refs) {
   48.48 +    }
   48.49  protected:
   48.50 -    char do_thousands_sep() const { return ' '; }
   48.51 -    std::string do_grouping() const { return "\03"; }
   48.52 +    char do_thousands_sep() const {
   48.53 +        return ' ';
   48.54 +    }
   48.55 +    std::string do_grouping() const {
   48.56 +        return "\03";
   48.57 +    }
   48.58  };
   48.59  
   48.60  /* The basic metric types for y bar charts */
   48.61  enum metricType {
   48.62 -  INVOCATIONS,
   48.63 -  MSGLENGTH,
   48.64 -  DURATION
   48.65 +    INVOCATIONS, MSGLENGTH, DURATION
   48.66  };
   48.67  typedef enum metricType metric_t;
   48.68  
   48.69  /* Pair of minimum and maximum value */
   48.70 -template <class type> class MinMaxPair {
   48.71 -  public:
   48.72 +template<class type> class MinMaxPair {
   48.73 +public:
   48.74      type min;
   48.75      type max;
   48.76  
   48.77 -    MinMaxPair( type a = (type) OTF_UINT64_MAX, type b= (type) 0) :
   48.78 -            min( a ), max( b ) {}
   48.79 -    ~MinMaxPair() {}
   48.80 +    MinMaxPair(type a = (type) OTF_UINT64_MAX, type b = (type) 0) :
   48.81 +        min(a), max(b) {
   48.82 +    }
   48.83 +    ~MinMaxPair() {
   48.84 +    }
   48.85  };
   48.86  
   48.87  /* The minimum and maximum message data (count, byte, duration) */
   48.88  struct MinMaxMsgData {
   48.89 -  //MinMaxPair<uint64_t> count;
   48.90 -  //MinMaxPair<uint64_t> bytes;
   48.91 -  MinMaxPair<double> count;
   48.92 -  MinMaxPair<double> bytes;
   48.93 -  MinMaxPair<double> duration;
   48.94 -
   48.95 -  MinMaxMsgData( ) {
   48.96 -    initialize();
   48.97 -  }
   48.98 -  ~MinMaxMsgData( ) {}
   48.99 -
  48.100 -  void initialize(){
  48.101 -    count.min = OTF_UINT64_MAX;
  48.102 -    count.max = 0;
  48.103 -    bytes.min = OTF_UINT64_MAX;
  48.104 -    bytes.max = 0;
  48.105 -    duration.min = numeric_limits<double>::max();
  48.106 -    duration.max = 0;
  48.107 -  }
  48.108 +    //MinMaxPair<uint64_t> count;
  48.109 +    //MinMaxPair<uint64_t> bytes;
  48.110 +    MinMaxPair<double> count;
  48.111 +    MinMaxPair<double> bytes;
  48.112 +    MinMaxPair<double> duration;
  48.113 +
  48.114 +    MinMaxMsgData() {
  48.115 +        initialize();
  48.116 +    }
  48.117 +    ~MinMaxMsgData() {
  48.118 +    }
  48.119 +
  48.120 +    void initialize() {
  48.121 +        count.min = OTF_UINT64_MAX;
  48.122 +        count.max = 0;
  48.123 +        bytes.min = OTF_UINT64_MAX;
  48.124 +        bytes.max = 0;
  48.125 +        duration.min = numeric_limits<double>::max();
  48.126 +        duration.max = 0;
  48.127 +    }
  48.128  };
  48.129  
  48.130  /* function declarations */
  48.131 @@ -118,29 +122,28 @@
  48.132   *
  48.133   * @return the result string
  48.134   */
  48.135 -static string convertBase2Exponent(uint64_t exp)
  48.136 -{
  48.137 -  stringstream result;
  48.138 -  if(exp < 10){
  48.139 -    result << (1 << exp);
  48.140 -  }else{
  48.141 -    if(exp < 20){ // K
  48.142 -      result << (1 << (exp - 10)) << "K";
  48.143 -    }else{
  48.144 -      if(exp < 30){ // M
  48.145 -        result << (1 << (exp - 20)) << "M";
  48.146 -      }else{
  48.147 -        if(exp < 40){ // G
  48.148 -          result << (1 << (exp - 30)) << "G";
  48.149 -        }else{
  48.150 -          // T
  48.151 -          result << (1 << (exp - 40)) << "T";
  48.152 +static string convertBase2Exponent(uint64_t exp) {
  48.153 +    stringstream result;
  48.154 +    if (exp < 10) {
  48.155 +        result << (1 << exp);
  48.156 +    } else {
  48.157 +        if (exp < 20) { // K
  48.158 +            result << (1 << (exp - 10)) << "K";
  48.159 +        } else {
  48.160 +            if (exp < 30) { // M
  48.161 +                result << (1 << (exp - 20)) << "M";
  48.162 +            } else {
  48.163 +                if (exp < 40) { // G
  48.164 +                    result << (1 << (exp - 30)) << "G";
  48.165 +                } else {
  48.166 +                    // T
  48.167 +                    result << (1 << (exp - 40)) << "T";
  48.168 +                }
  48.169 +            }
  48.170          }
  48.171 -      }
  48.172      }
  48.173 -  }
  48.174 -
  48.175 -  return result.str();
  48.176 +
  48.177 +    return result.str();
  48.178  }
  48.179  
  48.180  /*
  48.181 @@ -152,44 +155,43 @@
  48.182   *
  48.183   * @return the resulting divisor
  48.184   */
  48.185 -static uint64_t getScaleQuantifierLog2(double min, double max, char& unit)
  48.186 -{
  48.187 -  unit = ' ';
  48.188 -
  48.189 -  /* after half of the steps, the limit like K, M, G should be reached */
  48.190 -  double limit = (min+max)/2; //steps/2*interval;
  48.191 -
  48.192 -  /*if(limit > (1 << 60)){
  48.193 -    unit = 'E';
  48.194 -    return (1 << 60);
  48.195 -  }
  48.196 -  
  48.197 -  if(limit > (1 << 50)){
  48.198 -    unit = 'P';
  48.199 -    return (1 << 50);
  48.200 -  }
  48.201 -  
  48.202 -  if(limit > (1 << 40)){
  48.203 -    unit = 'T';
  48.204 -    return (1 << 40);
  48.205 -  }*/
  48.206 -  
  48.207 -  if(limit > (1 << 30)){
  48.208 -    unit = 'G';
  48.209 -    return (1 << 30);
  48.210 -  }
  48.211 -
  48.212 -  if(limit > (1 << 20)){
  48.213 -    unit = 'M';
  48.214 -    return (1 << 20);
  48.215 -  }
  48.216 -
  48.217 -  if(limit > 1024){
  48.218 -    unit = 'K';
  48.219 -    return 1024;
  48.220 -  }
  48.221 -
  48.222 -  return 1;
  48.223 +static uint64_t getScaleQuantifierLog2(double min, double max, char& unit) {
  48.224 +    unit = ' ';
  48.225 +
  48.226 +    /* after half of the steps, the limit like K, M, G should be reached */
  48.227 +    double limit = (min + max) / 2; //steps/2*interval;
  48.228 +
  48.229 +    /*if(limit > (1 << 60)){
  48.230 +     unit = 'E';
  48.231 +     return (1 << 60);
  48.232 +     }
  48.233 +
  48.234 +     if(limit > (1 << 50)){
  48.235 +     unit = 'P';
  48.236 +     return (1 << 50);
  48.237 +     }
  48.238 +
  48.239 +     if(limit > (1 << 40)){
  48.240 +     unit = 'T';
  48.241 +     return (1 << 40);
  48.242 +     }*/
  48.243 +
  48.244 +    if (limit > (1 << 30)) {
  48.245 +        unit = 'G';
  48.246 +        return (1 << 30);
  48.247 +    }
  48.248 +
  48.249 +    if (limit > (1 << 20)) {
  48.250 +        unit = 'M';
  48.251 +        return (1 << 20);
  48.252 +    }
  48.253 +
  48.254 +    if (limit > 1024) {
  48.255 +        unit = 'K';
  48.256 +        return 1024;
  48.257 +    }
  48.258 +
  48.259 +    return 1;
  48.260  }
  48.261  
  48.262  /*
  48.263 @@ -203,50 +205,49 @@
  48.264   * @return the resulting divisor
  48.265   */
  48.266  static uint64_t getScaleQuantifierLog10(double min, double max, char& unit,
  48.267 -                                        string& unitString)
  48.268 -{
  48.269 -  unit = ' ';
  48.270 -
  48.271 -  /* after half of the steps, the limit like K, M, G should be reached */
  48.272 -  double limit = (min+max)/2; //steps/2*interval;
  48.273 -
  48.274 -  if(limit > 1e18){
  48.275 -    unit = 'E';
  48.276 -    unitString = "Exa";
  48.277 -    return (uint64_t)1e18;
  48.278 -  }
  48.279 -  
  48.280 -  if(limit > 1e15){
  48.281 -    unit = 'P';
  48.282 -    unitString = "Peta";
  48.283 -    return (uint64_t)1e15;
  48.284 -  }
  48.285 -  
  48.286 -  if(limit > 1e12){
  48.287 -    unit = 'T';
  48.288 -    unitString = "Tera";
  48.289 -    return (uint64_t)1e12;
  48.290 -  }
  48.291 -
  48.292 -  if(limit > 1e9){
  48.293 -    unit = 'G';
  48.294 -    unitString = "Giga";
  48.295 -    return (uint64_t)1e9;
  48.296 -  }
  48.297 -
  48.298 -  if(limit > 1e6){
  48.299 -    unit = 'M';
  48.300 -    unitString = "Mega";
  48.301 -    return (uint64_t)1e6;
  48.302 -  }
  48.303 -
  48.304 -  if(limit > 1e3){
  48.305 -    unit = 'K';
  48.306 -    unitString = "Kilo";
  48.307 -    return (uint64_t)1e3;
  48.308 -  }
  48.309 -
  48.310 -  return 1;
  48.311 +        string& unitString) {
  48.312 +    unit = ' ';
  48.313 +
  48.314 +    /* after half of the steps, the limit like K, M, G should be reached */
  48.315 +    double limit = (min + max) / 2; //steps/2*interval;
  48.316 +
  48.317 +    if (limit > 1e18) {
  48.318 +        unit = 'E';
  48.319 +        unitString = "Exa";
  48.320 +        return (uint64_t) 1e18;
  48.321 +    }
  48.322 +
  48.323 +    if (limit > 1e15) {
  48.324 +        unit = 'P';
  48.325 +        unitString = "Peta";
  48.326 +        return (uint64_t) 1e15;
  48.327 +    }
  48.328 +
  48.329 +    if (limit > 1e12) {
  48.330 +        unit = 'T';
  48.331 +        unitString = "Tera";
  48.332 +        return (uint64_t) 1e12;
  48.333 +    }
  48.334 +
  48.335 +    if (limit > 1e9) {
  48.336 +        unit = 'G';
  48.337 +        unitString = "Giga";
  48.338 +        return (uint64_t) 1e9;
  48.339 +    }
  48.340 +
  48.341 +    if (limit > 1e6) {
  48.342 +        unit = 'M';
  48.343 +        unitString = "Mega";
  48.344 +        return (uint64_t) 1e6;
  48.345 +    }
  48.346 +
  48.347 +    if (limit > 1e3) {
  48.348 +        unit = 'K';
  48.349 +        unitString = "Kilo";
  48.350 +        return (uint64_t) 1e3;
  48.351 +    }
  48.352 +
  48.353 +    return 1;
  48.354  }
  48.355  
  48.356  /*
  48.357 @@ -256,21 +257,21 @@
  48.358   * @param max reference to the maximum value of the scale
  48.359   * @param steps the number of tick values to be created
  48.360   */
  48.361 -static void makeNiceScaleTicks(double& min, double& max, uint8_t& steps)
  48.362 -{
  48.363 -  double interval = (max - min) / steps;
  48.364 -
  48.365 -  /* get a nice maximum value */
  48.366 -  if(max > (uint64_t)max){
  48.367 -    max = (uint64_t)max + 1;
  48.368 -  }
  48.369 -
  48.370 -  if(min - interval < 0) min = 0;
  48.371 -  else{
  48.372 -    min = (uint64_t)min;
  48.373 -  }
  48.374 -
  48.375 -  //cout << "New min value: " << min << " New max value: " << max << endl;
  48.376 +static void makeNiceScaleTicks(double& min, double& max, uint8_t& steps) {
  48.377 +    double interval = (max - min) / steps;
  48.378 +
  48.379 +    /* get a nice maximum value */
  48.380 +    if (max > (uint64_t) max) {
  48.381 +        max = (uint64_t) max + 1;
  48.382 +    }
  48.383 +
  48.384 +    if (min - interval < 0)
  48.385 +        min = 0;
  48.386 +    else {
  48.387 +        min = (uint64_t) min;
  48.388 +    }
  48.389 +
  48.390 +    //cout << "New min value: " << min << " New max value: " << max << endl;
  48.391  }
  48.392  
  48.393  /*
  48.394 @@ -280,17 +281,17 @@
  48.395   * @param max reference to the maximum value of the scale
  48.396   * @param steps the number of tick values to be created
  48.397   
  48.398 -static void makeNiceScaleTicks(uint64_t& min, uint64_t& max, uint8_t& steps)
  48.399 -{
  48.400 -  double interval = (max - min) / steps;
  48.401 -
  48.402 -  if(min - interval < 0) min = 0;
  48.403 -  else{
  48.404 -    min = (uint64_t)min;
  48.405 -  }
  48.406 -
  48.407 -  //cout << "New min value: " << min << " New max value: " << max << endl;
  48.408 -}*/
  48.409 + static void makeNiceScaleTicks(uint64_t& min, uint64_t& max, uint8_t& steps)
  48.410 + {
  48.411 + double interval = (max - min) / steps;
  48.412 +
  48.413 + if(min - interval < 0) min = 0;
  48.414 + else{
  48.415 + min = (uint64_t)min;
  48.416 + }
  48.417 +
  48.418 + //cout << "New min value: " << min << " New max value: " << max << endl;
  48.419 + }*/
  48.420  
  48.421  /*
  48.422   * Converts the OTF collective definition to a string.
  48.423 @@ -298,16 +299,26 @@
  48.424   * @param id the OTF collective definition (input)
  48.425   * @param string the collective definition as string (output)
  48.426   */
  48.427 -static void collectiveId2String(uint64_t id, string& name)
  48.428 -{
  48.429 -  switch(id){
  48.430 -    case OTF_COLLECTIVE_TYPE_BARRIER: name = "BARRIER"; break;
  48.431 -    case OTF_COLLECTIVE_TYPE_ONE2ALL: name = "ONETOALL"; break;
  48.432 -    case OTF_COLLECTIVE_TYPE_ALL2ONE: name = "ALLTOONE"; break;
  48.433 -    case OTF_COLLECTIVE_TYPE_ALL2ALL: name = "ALLTOALL"; break;
  48.434 -    case OTF_COLLECTIVE_TYPE_UNKNOWN: name = "UNKNOWN"; break;
  48.435 -    default: name = "";
  48.436 -  }
  48.437 +static void collectiveId2String(uint64_t id, string& name) {
  48.438 +    switch (id) {
  48.439 +    case OTF_COLLECTIVE_TYPE_BARRIER:
  48.440 +        name = "BARRIER";
  48.441 +        break;
  48.442 +    case OTF_COLLECTIVE_TYPE_ONE2ALL:
  48.443 +        name = "ONETOALL";
  48.444 +        break;
  48.445 +    case OTF_COLLECTIVE_TYPE_ALL2ONE:
  48.446 +        name = "ALLTOONE";
  48.447 +        break;
  48.448 +    case OTF_COLLECTIVE_TYPE_ALL2ALL:
  48.449 +        name = "ALLTOALL";
  48.450 +        break;
  48.451 +    case OTF_COLLECTIVE_TYPE_UNKNOWN:
  48.452 +        name = "UNKNOWN";
  48.453 +        break;
  48.454 +    default:
  48.455 +        name = "";
  48.456 +    }
  48.457  }
  48.458  
  48.459  /*
  48.460 @@ -315,67 +326,70 @@
  48.461   * 
  48.462   * @param tex the latex file output stream
  48.463   */
  48.464 -static void write_header(fstream& tex)
  48.465 -{
  48.466 -	tex << "\\documentclass[a4paper,10pt]{article}" << endl;
  48.467 -	tex << "\\nonstopmode" << endl;
  48.468 -	tex << "\\usepackage{amssymb}" << endl;
  48.469 -	tex << "\\usepackage{longtable}" << endl;
  48.470 -  tex << "\\usepackage{ifthen}" << endl;
  48.471 -	tex << "\\usepackage{pgfplots}" << endl;
  48.472 -  tex << "\\pgfplotsset{compat=1.4}" << endl << endl;
  48.473 -  tex << "\\textwidth=16.1cm \\textheight=27.0cm \\topmargin=-1.8cm" << endl;
  48.474 -	tex << "\\oddsidemargin=0.1cm \\evensidemargin=0.1cm \\footskip=45pt" << endl;
  48.475 -	tex << endl;
  48.476 -	tex << "\\begin{document}" << endl;
  48.477 -	tex << endl;
  48.478 -
  48.479 -  tex << "\\pgfplotsset{" << endl;
  48.480 -  tex << "/pgfplots/log number format basis/.code 2 args={" << endl;
  48.481 -  tex << "  \\ifdim #1 pt=2pt" << endl;
  48.482 -  tex << "    \\ifdim #2 pt>0.5pt" << endl;
  48.483 -  tex << "      \\ifdim #2 pt<10pt" << endl;
  48.484 -  tex << "        \\pgfmathparse{#1^#2}" << endl;
  48.485 -  tex << "        \\pgfmathtruncatemacro\\r{\\pgfmathresult} \\r " << endl;
  48.486 -  tex << "      \\else" << endl;
  48.487 -  tex << "        \\ifdim #2 pt<20pt" << endl;
  48.488 -	tex << "          \\pgfmathparse{#1^(#2 - 10)}" << endl;
  48.489 -	tex << "          \\pgfmathprintnumber{\\pgfmathresult}K" << endl;
  48.490 -  tex << "        \\else" << endl;
  48.491 -	tex << "          \\ifdim #2 pt<30pt" << endl;
  48.492 -	tex << "            \\pgfmathparse{#1^(#2 - 20)}" << endl;
  48.493 -	tex << "            \\pgfmathprintnumber{\\pgfmathresult}M" << endl;
  48.494 -	tex << "          \\else" << endl;
  48.495 -	tex << "            \\ifdim #2 pt<40pt" << endl;
  48.496 -	tex << "              \\pgfmathparse{#1^(#2 - 30)}" << endl;
  48.497 -	tex << "              \\pgfmathprintnumber{\\pgfmathresult}G" << endl;
  48.498 -	tex << "            \\else" << endl;
  48.499 -	tex << "              \\ifdim #2 pt<50pt" << endl;
  48.500 -	tex << "                \\pgfmathparse{#1^(#2 - 40)}" << endl;
  48.501 -	tex << "                \\pgfmathprintnumber{\\pgfmathresult}T" << endl;
  48.502 -	tex << "              \\else" << endl;
  48.503 -  tex << "                \\ifdim #2 pt<60pt" << endl;
  48.504 -	tex << "                  \\pgfmathparse{#1^(#2 - 50)}" << endl;
  48.505 -	tex << "                  \\pgfmathprintnumber{\\pgfmathresult}P" << endl;
  48.506 -	tex << "                \\else" << endl;
  48.507 -	tex << "                  \\ifdim #2 pt<70pt" << endl;
  48.508 -	tex << "                    \\pgfmathparse{#1^(#2 - 60)}" << endl;
  48.509 -	tex << "                    \\pgfmathprintnumber{\\pgfmathresult}E" << endl;
  48.510 -	tex << "                  \\else" << endl;
  48.511 -	tex << "                    >1Z" << endl;
  48.512 -  tex << "                  \\fi" << endl;
  48.513 -  tex << "                \\fi" << endl;
  48.514 -  tex << "              \\fi" << endl;
  48.515 -	tex << "            \\fi" << endl;
  48.516 -	tex << "          \\fi" << endl;
  48.517 -  tex << "        \\fi" << endl;
  48.518 -  tex << "      \\fi" << endl;
  48.519 -  tex << "    \\fi" << endl;
  48.520 -  tex << "  \\fi" << endl;
  48.521 -  tex << "  \\ifdim #1 pt=10pt" << endl;
  48.522 -  tex << "    $#1^{\\pgfmathprintnumber{#2}}$" << endl;
  48.523 -  tex << "  \\fi" << endl;
  48.524 -  tex << "}}" << endl << endl;
  48.525 +static void write_header(fstream& tex) {
  48.526 +    tex << "\\documentclass[a4paper,10pt]{article}" << endl;
  48.527 +    tex << "\\nonstopmode" << endl;
  48.528 +    tex << "\\usepackage{amssymb}" << endl;
  48.529 +    tex << "\\usepackage{longtable}" << endl;
  48.530 +    tex << "\\usepackage{ifthen}" << endl;
  48.531 +    tex << "\\usepackage{pgfplots}" << endl;
  48.532 +    tex
  48.533 +            << "\\usepackage[linkcolor=red,pagecolor=red,pdfborder={1 1 1}]{hyperref}"
  48.534 +            << endl;
  48.535 +    tex << "\\pgfplotsset{compat=1.4}" << endl << endl;
  48.536 +    tex << "\\textwidth=16.1cm \\textheight=27.0cm \\topmargin=-1.8cm" << endl;
  48.537 +    tex << "\\oddsidemargin=0.1cm \\evensidemargin=0.1cm \\footskip=45pt"
  48.538 +            << endl;
  48.539 +    tex << endl;
  48.540 +    tex << "\\begin{document}" << endl;
  48.541 +    tex << endl;
  48.542 +
  48.543 +    tex << "\\pgfplotsset{" << endl;
  48.544 +    tex << "/pgfplots/log number format basis/.code 2 args={" << endl;
  48.545 +    tex << "  \\ifdim #1 pt=2pt" << endl;
  48.546 +    tex << "    \\ifdim #2 pt>0.5pt" << endl;
  48.547 +    tex << "      \\ifdim #2 pt<10pt" << endl;
  48.548 +    tex << "        \\pgfmathparse{#1^#2}" << endl;
  48.549 +    tex << "        \\pgfmathtruncatemacro\\r{\\pgfmathresult} \\r " << endl;
  48.550 +    tex << "      \\else" << endl;
  48.551 +    tex << "        \\ifdim #2 pt<20pt" << endl;
  48.552 +    tex << "          \\pgfmathparse{#1^(#2 - 10)}" << endl;
  48.553 +    tex << "          \\pgfmathprintnumber{\\pgfmathresult}K" << endl;
  48.554 +    tex << "        \\else" << endl;
  48.555 +    tex << "          \\ifdim #2 pt<30pt" << endl;
  48.556 +    tex << "            \\pgfmathparse{#1^(#2 - 20)}" << endl;
  48.557 +    tex << "            \\pgfmathprintnumber{\\pgfmathresult}M" << endl;
  48.558 +    tex << "          \\else" << endl;
  48.559 +    tex << "            \\ifdim #2 pt<40pt" << endl;
  48.560 +    tex << "              \\pgfmathparse{#1^(#2 - 30)}" << endl;
  48.561 +    tex << "              \\pgfmathprintnumber{\\pgfmathresult}G" << endl;
  48.562 +    tex << "            \\else" << endl;
  48.563 +    tex << "              \\ifdim #2 pt<50pt" << endl;
  48.564 +    tex << "                \\pgfmathparse{#1^(#2 - 40)}" << endl;
  48.565 +    tex << "                \\pgfmathprintnumber{\\pgfmathresult}T" << endl;
  48.566 +    tex << "              \\else" << endl;
  48.567 +    tex << "                \\ifdim #2 pt<60pt" << endl;
  48.568 +    tex << "                  \\pgfmathparse{#1^(#2 - 50)}" << endl;
  48.569 +    tex << "                  \\pgfmathprintnumber{\\pgfmathresult}P" << endl;
  48.570 +    tex << "                \\else" << endl;
  48.571 +    tex << "                  \\ifdim #2 pt<70pt" << endl;
  48.572 +    tex << "                    \\pgfmathparse{#1^(#2 - 60)}" << endl;
  48.573 +    tex << "                    \\pgfmathprintnumber{\\pgfmathresult}E" << endl;
  48.574 +    tex << "                  \\else" << endl;
  48.575 +    tex << "                    >1Z" << endl;
  48.576 +    tex << "                  \\fi" << endl;
  48.577 +    tex << "                \\fi" << endl;
  48.578 +    tex << "              \\fi" << endl;
  48.579 +    tex << "            \\fi" << endl;
  48.580 +    tex << "          \\fi" << endl;
  48.581 +    tex << "        \\fi" << endl;
  48.582 +    tex << "      \\fi" << endl;
  48.583 +    tex << "    \\fi" << endl;
  48.584 +    tex << "  \\fi" << endl;
  48.585 +    tex << "  \\ifdim #1 pt=10pt" << endl;
  48.586 +    tex << "    $#1^{\\pgfmathprintnumber{#2}}$" << endl;
  48.587 +    tex << "  \\fi" << endl;
  48.588 +    tex << "}}" << endl << endl;
  48.589  }
  48.590  
  48.591  /*
  48.592 @@ -383,10 +397,9 @@
  48.593   *
  48.594   * @param tex the latex file output stream
  48.595   */
  48.596 -static void write_footer(fstream& tex)
  48.597 -{
  48.598 -	tex << endl;
  48.599 -  tex << "\\end{document}" << endl;
  48.600 +static void write_footer(fstream& tex) {
  48.601 +    tex << endl;
  48.602 +    tex << "\\end{document}" << endl;
  48.603  }
  48.604  
  48.605  /*
  48.606 @@ -395,74 +408,74 @@
  48.607   * @param tex the latex file output stream
  48.608   * @param alldata structure containing all the needed information
  48.609   */
  48.610 -static void write_traceProperties(fstream& tex, struct AllData& alldata)
  48.611 -{
  48.612 -  tex << "\\begin{titlepage}\\thispagestyle{empty}" << endl;
  48.613 -  tex << "\\begin{huge}\\begin{flushleft}\\bf{OTF Profile}"
  48.614 -         "\\end{flushleft}\\end{huge}" << endl;
  48.615 -  tex << "\\hrule" << endl;
  48.616 -  tex << "\\begin{flushright}\\textbf{\\large Trace Properties}"
  48.617 -         "\\end{flushright}" << endl;
  48.618 -
  48.619 -  tex << "\\vspace{0.5\\baselineskip}" << endl;
  48.620 -
  48.621 -  tex << "\\begin{flushleft}" << endl;
  48.622 -  tex << "\\begin{tabular}{ll}" << endl;
  48.623 -  tex << "\\bf{OTF Version:} & \\verb|" << alldata.version.c_str() << "| \\\\" << endl;
  48.624 -  tex << "\\bf{Creator:} & \\verb|" << alldata.creator << "|\\\\" << endl;
  48.625 -
  48.626 -  /* parse the file path and write only the file's name */
  48.627 -  {
  48.628 -    string purFile = alldata.params.input_file_prefix;
  48.629 -    size_t found;
  48.630 -
  48.631 -    found = alldata.params.input_file_prefix.rfind('/');
  48.632 -    
  48.633 -    if (found != string::npos){
  48.634 -      purFile = alldata.params.input_file_prefix.substr(found+1, string::npos);
  48.635 +static void write_traceProperties(fstream& tex, struct AllData& alldata) {
  48.636 +    tex << "\\begin{titlepage}\\thispagestyle{empty}" << endl;
  48.637 +    tex << "\\begin{huge}\\begin{flushleft}\\bf{OTF Profile}"
  48.638 +        "\\end{flushleft}\\end{huge}" << endl;
  48.639 +    tex << "\\hrule" << endl;
  48.640 +    tex << "\\begin{flushright}\\textbf{\\large Trace Properties}"
  48.641 +        "\\end{flushright}" << endl;
  48.642 +
  48.643 +    tex << "\\vspace{0.5\\baselineskip}" << endl;
  48.644 +
  48.645 +    tex << "\\begin{flushleft}" << endl;
  48.646 +    tex << "\\begin{tabular}{ll}" << endl;
  48.647 +    tex << "\\bf{OTF Version:} & \\verb|" << alldata.version.c_str()
  48.648 +            << "| \\\\" << endl;
  48.649 +    tex << "\\bf{Creator:} & \\verb|" << alldata.creator << "|\\\\" << endl;
  48.650 +
  48.651 +    /* parse the file path and write only the file's name */
  48.652 +    {
  48.653 +        string purFile = alldata.params.input_file_prefix;
  48.654 +        size_t found;
  48.655 +
  48.656 +        found = alldata.params.input_file_prefix.rfind('/');
  48.657 +
  48.658 +        if (found != string::npos) {
  48.659 +            purFile = alldata.params.input_file_prefix.substr(found + 1,
  48.660 +                    string::npos);
  48.661 +        }
  48.662 +
  48.663 +        tex << "\\bf{File:} & \\verb|" << purFile << ".otf|";
  48.664      }
  48.665  
  48.666 -    tex << "\\bf{File:} & \\verb|" << purFile << ".otf|";
  48.667 -  }
  48.668 -
  48.669 -  tex << endl << "\\end{tabular}" << endl << endl;
  48.670 -
  48.671 -  tex << "\\vspace{1\\baselineskip}" << endl;
  48.672 -
  48.673 -  tex << "\\begin{tabular}{ll}" << endl;
  48.674 -  tex << "\\bf{Number of Processes:} & \\verb|" 
  48.675 -      << alldata.allProcesses.size() << "|\\\\" << endl;
  48.676 -
  48.677 -  {
  48.678 -    char unit; 
  48.679 -    string unitString; //unused
  48.680 -    uint64_t quant;
  48.681 -    
  48.682 -    quant = getScaleQuantifierLog10(alldata.timerResolution,
  48.683 -                                    alldata.timerResolution,
  48.684 -                                    unit, unitString);
  48.685 -
  48.686 -    tex << "\\bf{Timer Resolution:} & \\verb|"
  48.687 -        << alldata.timerResolution/(double)quant << " " << unit << "Hz|" << endl;
  48.688 -  }
  48.689 -
  48.690 -  tex << "\\end{tabular}" << endl << endl;
  48.691 -
  48.692 -  tex << "\\vspace{1\\baselineskip}" << endl;
  48.693 -
  48.694 -  if(!alldata.comments.empty()){
  48.695 -    tex << "\\begin{tabular}{l}\\bf{Comments:}\\end{tabular}" << endl;
  48.696 -    tex << "\\begin{quote}\\begin{verbatim}" << endl;
  48.697 -    tex << alldata.comments << endl;
  48.698 -    tex << "\\end{verbatim}\\end{quote}" << endl;
  48.699 -  }
  48.700 -
  48.701 -  tex << "\\end{flushleft}" << endl;
  48.702 -  tex << "\\vspace*{\\fill}" << endl;
  48.703 -  tex << "\\begin{flushright}\\today\\end{flushright}" << endl;
  48.704 -
  48.705 -  tex << "\\end{titlepage}" << endl <<endl;
  48.706 -	tex << "\\newpage" << endl << endl;
  48.707 +    tex << endl << "\\end{tabular}" << endl << endl;
  48.708 +
  48.709 +    tex << "\\vspace{1\\baselineskip}" << endl;
  48.710 +
  48.711 +    tex << "\\begin{tabular}{ll}" << endl;
  48.712 +    tex << "\\bf{Number of Processes:} & \\verb|"
  48.713 +            << alldata.allProcesses.size() << "|\\\\" << endl;
  48.714 +
  48.715 +    {
  48.716 +        char unit;
  48.717 +        string unitString; //unused
  48.718 +        uint64_t quant;
  48.719 +
  48.720 +        quant = getScaleQuantifierLog10(alldata.timerResolution,
  48.721 +                alldata.timerResolution, unit, unitString);
  48.722 +
  48.723 +        tex << "\\bf{Timer Resolution:} & \\verb|" << alldata.timerResolution
  48.724 +                / (double) quant << " " << unit << "Hz|" << endl;
  48.725 +    }
  48.726 +
  48.727 +    tex << "\\end{tabular}" << endl << endl;
  48.728 +
  48.729 +    tex << "\\vspace{1\\baselineskip}" << endl;
  48.730 +
  48.731 +    if (!alldata.comments.empty()) {
  48.732 +        tex << "\\begin{tabular}{l}\\bf{Comments:}\\end{tabular}" << endl;
  48.733 +        tex << "\\begin{quote}\\begin{verbatim}" << endl;
  48.734 +        tex << alldata.comments << endl;
  48.735 +        tex << "\\end{verbatim}\\end{quote}" << endl;
  48.736 +    }
  48.737 +
  48.738 +    tex << "\\end{flushleft}" << endl;
  48.739 +    tex << "\\vspace*{\\fill}" << endl;
  48.740 +    tex << "\\begin{flushright}\\today\\end{flushright}" << endl;
  48.741 +
  48.742 +    tex << "\\end{titlepage}" << endl << endl;
  48.743 +    tex << "\\newpage" << endl << endl;
  48.744  }
  48.745  
  48.746  /*
  48.747 @@ -477,67 +490,70 @@
  48.748   *
  48.749   * @return true if color encoding was successful.
  48.750   */
  48.751 -static bool get_color_gray(double min, double max, double value,
  48.752 -                   float& red, float& green, float& blue)
  48.753 -{
  48.754 -	if((value == min) || (min == max)){
  48.755 -		red = 0.9f; green = 0.9f; blue = 0.9f;
  48.756 -		return true;
  48.757 -	}
  48.758 -
  48.759 -	if(value == max){
  48.760 -		red = 1.0; green = 0.0; blue = 0.0;
  48.761 -		return true;
  48.762 -	}
  48.763 -
  48.764 -	double factor = (max - min) / 5.0;
  48.765 -	uint32_t part = (uint32_t) (((value - min) * 5.0) / (max - min));
  48.766 -	double min_temp = min + (factor * part);
  48.767 -	double max_temp = min + (factor * (part + 1.0));
  48.768 -	double part_temp;
  48.769 -
  48.770 -	if(value == min_temp)
  48.771 -		part_temp = 0.0;
  48.772 -	else if(value == max_temp)
  48.773 -		part_temp = 1.0;
  48.774 -	else
  48.775 -		part_temp = (value - min_temp) / (max_temp - min_temp);
  48.776 -  
  48.777 -	if (part == 0)
  48.778 -		part_temp = part_temp / 2;
  48.779 -
  48.780 -	switch(part){
  48.781 -		case 0 :
  48.782 -			red = (float) (0.9 - part_temp);
  48.783 -			green = (float) (0.9 - part_temp);
  48.784 -			blue = (float) (0.9 - part_temp);
  48.785 -			break;
  48.786 -		case 1 :
  48.787 -			red = (float) (0.0);
  48.788 -			green = (float) (part_temp);
  48.789 -			blue = (float) (1.0);
  48.790 -			break;
  48.791 -		case 2 :
  48.792 -			red = (float) (0.0);
  48.793 -			green = (float) (1.0);
  48.794 -			blue = (float) (1.0 - part_temp);
  48.795 -			break;
  48.796 -		case 3 :
  48.797 -			red = (float) (part_temp);
  48.798 -			green = (float) (1.0);
  48.799 -			blue = (float) (0.0);
  48.800 -			break;
  48.801 -		case 4 :
  48.802 -			red = (float) (1.0);
  48.803 -			green = (float) (1.0 - part_temp);
  48.804 -			blue = (float) (0.0);
  48.805 -			break;
  48.806 -		default :
  48.807 -      cerr << "Error in get_color(). Wrong part calculated." << endl;
  48.808 -      return false;
  48.809 -	}
  48.810 -
  48.811 -	return true;
  48.812 +static bool get_color_gray(double min, double max, double value, float& red,
  48.813 +        float& green, float& blue) {
  48.814 +    if ((value == min) || (min == max)) {
  48.815 +        red = 0.9f;
  48.816 +        green = 0.9f;
  48.817 +        blue = 0.9f;
  48.818 +        return true;
  48.819 +    }
  48.820 +
  48.821 +    if (value == max) {
  48.822 +        red = 1.0;
  48.823 +        green = 0.0;
  48.824 +        blue = 0.0;
  48.825 +        return true;
  48.826 +    }
  48.827 +
  48.828 +    double factor = (max - min) / 5.0;
  48.829 +    uint32_t part = (uint32_t) (((value - min) * 5.0) / (max - min));
  48.830 +    double min_temp = min + (factor * part);
  48.831 +    double max_temp = min + (factor * (part + 1.0));
  48.832 +    double part_temp;
  48.833 +
  48.834 +    if (value == min_temp)
  48.835 +        part_temp = 0.0;
  48.836 +    else if (value == max_temp)
  48.837 +        part_temp = 1.0;
  48.838 +    else
  48.839 +        part_temp = (value - min_temp) / (max_temp - min_temp);
  48.840 +
  48.841 +    if (part == 0)
  48.842 +        part_temp = part_temp / 2;
  48.843 +
  48.844 +    switch (part) {
  48.845 +    case 0:
  48.846 +        red = (float) (0.9 - part_temp);
  48.847 +        green = (float) (0.9 - part_temp);
  48.848 +        blue = (float) (0.9 - part_temp);
  48.849 +        break;
  48.850 +    case 1:
  48.851 +        red = (float) (0.0);
  48.852 +        green = (float) (part_temp);
  48.853 +        blue = (float) (1.0);
  48.854 +        break;
  48.855 +    case 2:
  48.856 +        red = (float) (0.0);
  48.857 +        green = (float) (1.0);
  48.858 +        blue = (float) (1.0 - part_temp);
  48.859 +        break;
  48.860 +    case 3:
  48.861 +        red = (float) (part_temp);
  48.862 +        green = (float) (1.0);
  48.863 +        blue = (float) (0.0);
  48.864 +        break;
  48.865 +    case 4:
  48.866 +        red = (float) (1.0);
  48.867 +        green = (float) (1.0 - part_temp);
  48.868 +        blue = (float) (0.0);
  48.869 +        break;
  48.870 +    default:
  48.871 +        cerr << "Error in get_color(). Wrong part calculated." << endl;
  48.872 +        return false;
  48.873 +    }
  48.874 +
  48.875 +    return true;
  48.876  }
  48.877  
  48.878  /*
  48.879 @@ -545,7 +561,7 @@
  48.880   * FunctionData (counterValue, exclTime, inclTime)
  48.881   * sorted by counter ID first, then function ID
  48.882   */
  48.883 -typedef std::map< uint64_t, FunctionData >::const_iterator itFunc_t;
  48.884 +typedef std::map<uint64_t, FunctionData>::const_iterator itFunc_t;
  48.885  
  48.886  /*
  48.887   * Write a latex function summary table to the given file stream.
  48.888 @@ -553,78 +569,83 @@
  48.889   * @param tex the given file stream (reference)
  48.890   * @param alldata the global data
  48.891   */
  48.892 -static void write_functionTable(fstream& tex, struct AllData& alldata)
  48.893 -{
  48.894 -  int mapSize = alldata.functionMapGlobal.size();
  48.895 -  int max = FUNC_TABLE_LEN;
  48.896 -  int count = 0;
  48.897 -
  48.898 -  /* sort by exclusive time (key) into multimap */
  48.899 -  std::multimap<double, itFunc_t> sortedMap;
  48.900 -
  48.901 -  {
  48.902 -    itFunc_t it = alldata.functionMapGlobal.begin();
  48.903 -    itFunc_t itend = alldata.functionMapGlobal.end();
  48.904 -
  48.905 -    while(itend != it){
  48.906 -      /* only functions, which are at least invoked once */
  48.907 -      if(it->second.count.cnt){
  48.908 -        sortedMap.insert(
  48.909 -          pair<double, itFunc_t>((double)(it->second.excl_time.sum), it));
  48.910 -      }
  48.911 -      it++;
  48.912 +static void write_functionTable(fstream& tex, struct AllData& alldata) {
  48.913 +    int mapSize = alldata.functionMapGlobal.size();
  48.914 +    int max = FUNC_TABLE_LEN;
  48.915 +    int count = 0;
  48.916 +
  48.917 +    /* sort by exclusive time (key) into multimap */
  48.918 +    std::multimap<double, itFunc_t> sortedMap;
  48.919 +
  48.920 +    {
  48.921 +        itFunc_t it = alldata.functionMapGlobal.begin();
  48.922 +        itFunc_t itend = alldata.functionMapGlobal.end();
  48.923 +
  48.924 +        while (itend != it) {
  48.925 +            /* only functions, which are at least invoked once */
  48.926 +            if (it->second.count.cnt) {
  48.927 +                sortedMap.insert(pair<double, itFunc_t> (
  48.928 +                        (double) (it->second.excl_time.sum), it));
  48.929 +            }
  48.930 +            it++;
  48.931 +        }
  48.932      }
  48.933 -  }
  48.934 -    
  48.935 -  /* write the table head */
  48.936 -  if(mapSize < max) max = mapSize;
  48.937 -
  48.938 -  tex << "\\begin{center}\\small" << endl;
  48.939 -  tex << "{\\Large \\bf Top " << max << " of " << mapSize << " Functions}";
  48.940 -	tex << endl << "\\bigskip" << endl;
  48.941 -	tex << "\\begin{longtable}{|l||r|r|r|}" << endl << endl;
  48.942 -	tex << "   \\hline" << endl;
  48.943 -	tex << "   \\bf Function & \\bf invocations[\\#] & " <<
  48.944 -         "\\bf excl. time[sec] $\\nabla$ & \\bf incl. time[sec] \\\\" << endl;
  48.945 -	tex << "   \\hline\\hline" << endl;
  48.946 -  
  48.947 -  /* write the sorted function table */
  48.948 -  {
  48.949 -    std::multimap<double, itFunc_t>::const_reverse_iterator it =
  48.950 -      sortedMap.rbegin();
  48.951 -    std::multimap<double, itFunc_t>::const_reverse_iterator itend =
  48.952 -      sortedMap.rend();
  48.953 -    SpaceSeparator facet(1); //1 - don't delete when done
  48.954 -    std::locale prev = tex.imbue(std::locale(std::locale(), &facet));
  48.955 -
  48.956 -    /* for a given max number of functions */
  48.957 -    while(itend != it){
  48.958 -      if(it->second->second.count.cnt) {
  48.959 -        string func_name = alldata.functionIdNameMap[it->second->first];
  48.960 -        if(func_name.size() > FUNC_NAME_MAX_LEN)
  48.961 -          func_name.resize(FUNC_NAME_MAX_LEN);
  48.962 -        tex << "  \\verb|" << func_name << "| & "
  48.963 -            << "  \\verb|" << it->second->second.count.cnt << "| & "
  48.964 -            << "  \\verb|" << it->second->second.excl_time.sum/alldata.timerResolution << "| & "
  48.965 -            << "  \\verb|" << it->second->second.incl_time.sum/alldata.timerResolution << "| \\\\" << endl;
  48.966 -      }
  48.967 -      it++;
  48.968 -      count++;
  48.969 -
  48.970 -      /* stop after the given maximum number of functions */
  48.971 -      if(max == count) break;
  48.972 -
  48.973 -      /* draw a horizontal line every 3 function entries */
  48.974 -      if((count % 3) == 0) tex << "      \\hline" << endl;
  48.975 +
  48.976 +    /* write the table head */
  48.977 +    if (mapSize < max)
  48.978 +        max = mapSize;
  48.979 +
  48.980 +    tex << "\\begin{center}\\small" << endl;
  48.981 +    tex << "{\\Large \\bf Top " << max << " of " << mapSize << " Functions}";
  48.982 +    tex << endl << "\\bigskip" << endl;
  48.983 +    tex << "\\begin{longtable}{|l||r|r|r|}" << endl << endl;
  48.984 +    tex << "   \\hline" << endl;
  48.985 +    tex << "   \\bf Function & \\bf invocations[\\#] & "
  48.986 +            << "\\bf excl. time[sec] $\\nabla$ & \\bf incl. time[sec] \\\\"
  48.987 +            << endl;
  48.988 +    tex << "   \\hline\\hline" << endl;
  48.989 +
  48.990 +    /* write the sorted function table */
  48.991 +    {
  48.992 +        std::multimap<double, itFunc_t>::const_reverse_iterator it =
  48.993 +                sortedMap.rbegin();
  48.994 +        std::multimap<double, itFunc_t>::const_reverse_iterator itend =
  48.995 +                sortedMap.rend();
  48.996 +        SpaceSeparator facet(1); //1 - don't delete when done
  48.997 +        std::locale prev = tex.imbue(std::locale(std::locale(), &facet));
  48.998 +
  48.999 +        /* for a given max number of functions */
 48.1000 +        while (itend != it) {
 48.1001 +            if (it->second->second.count.cnt) {
 48.1002 +                string func_name = alldata.functionIdNameMap[it->second->first];
 48.1003 +                if (func_name.size() > FUNC_NAME_MAX_LEN)
 48.1004 +                    func_name.resize(FUNC_NAME_MAX_LEN);
 48.1005 +                tex << "  \\verb|" << func_name << "| & " << "  \\verb|"
 48.1006 +                        << it->second->second.count.cnt << "| & "
 48.1007 +                        << "  \\verb|" << it->second->second.excl_time.sum
 48.1008 +                        / alldata.timerResolution << "| & " << "  \\verb|"
 48.1009 +                        << it->second->second.incl_time.sum
 48.1010 +                                / alldata.timerResolution << "| \\\\" << endl;
 48.1011 +            }
 48.1012 +            it++;
 48.1013 +            count++;
 48.1014 +
 48.1015 +            /* stop after the given maximum number of functions */
 48.1016 +            if (max == count)
 48.1017 +                break;
 48.1018 +
 48.1019 +            /* draw a horizontal line every 3 function entries */
 48.1020 +            if ((count % 3) == 0)
 48.1021 +                tex << "      \\hline" << endl;
 48.1022 +        }
 48.1023 +
 48.1024 +        tex.imbue(prev); //restore previous locale
 48.1025      }
 48.1026 -    
 48.1027 -    tex.imbue(prev);  //restore previous locale
 48.1028 -  }  
 48.1029 -
 48.1030 -  tex << "   \\hline" << endl;
 48.1031 -	tex << "\\end{longtable}" << endl << endl;
 48.1032 -  tex << "\\end{center}" << endl;
 48.1033 -	tex << "\\newpage" << endl << endl;
 48.1034 +
 48.1035 +    tex << "   \\hline" << endl;
 48.1036 +    tex << "\\end{longtable}" << endl << endl;
 48.1037 +    tex << "\\end{center}" << endl;
 48.1038 +    tex << "\\newpage" << endl << endl;
 48.1039  
 48.1040  }
 48.1041  
 48.1042 @@ -633,92 +654,100 @@
 48.1043   * FunctionData (counterValue, exclTime, inclTime)
 48.1044   * sorted by counter ID first, then function ID
 48.1045   */
 48.1046 -typedef std::map< Pair, FunctionData, ltPair >::const_iterator itCtr_t;
 48.1047 +typedef std::map<Pair, FunctionData, ltPair>::const_iterator itCtr_t;
 48.1048  /* sorted by exclusive time */
 48.1049  typedef std::multimap<double, itCtr_t> sortedCtrMap_t;
 48.1050  
 48.1051 -/* 
 48.1052 +/*
 48.1053   * Writes the sorted function table.
 48.1054 - * 
 48.1055 + *
 48.1056   * @param tex reference to the latex output stream
 48.1057   * @param sortedMap reference to the sorted counter map
 48.1058   * @param alldata the global data structure
 48.1059   */
 48.1060  static void write_counterTable(fstream& tex, sortedCtrMap_t& sortedMap,
 48.1061 -                               struct AllData& alldata)
 48.1062 -{
 48.1063 -  uint64_t timerRes = alldata.timerResolution;
 48.1064 -  sortedCtrMap_t::const_reverse_iterator it = sortedMap.rbegin();
 48.1065 -  sortedCtrMap_t::const_reverse_iterator itend = sortedMap.rend();
 48.1066 -  string ctr_name = "";
 48.1067 -
 48.1068 -  unsigned long int count = 0;
 48.1069 -  unsigned long int max = CTR_TABLE_LEN;
 48.1070 -  
 48.1071 -  if(max > sortedMap.size()) max = sortedMap.size();
 48.1072 -
 48.1073 -  if(itend != it){
 48.1074 -    ctr_name = alldata.counterIdNameMap[it->second->first.a];
 48.1075 -    if(ctr_name.size() > CTR_NAME_MAX_LEN)
 48.1076 -      ctr_name.resize(CTR_NAME_MAX_LEN);
 48.1077 -  }else return;
 48.1078 -
 48.1079 -  /* get the minimum and maximum value to define the unit */
 48.1080 -  double maxVal = 0;
 48.1081 -  double minVal = numeric_limits<double>::max();
 48.1082 -  while(it != itend){
 48.1083 -    double currVal = it->second->second.excl_time.sum/it->first*timerRes;
 48.1084 -    if(currVal < minVal) minVal = currVal;
 48.1085 -    if(currVal > maxVal) maxVal = currVal;
 48.1086 -    
 48.1087 -    it++;
 48.1088 -    count++;
 48.1089 -
 48.1090 -    /* stop after the given maximum number of functions */
 48.1091 -    if(max == count) break;
 48.1092 -  }
 48.1093 -
 48.1094 -  char unit = ' ';
 48.1095 -  string unitL = "";
 48.1096 -  uint64_t divisor = getScaleQuantifierLog10(minVal, maxVal, unit, unitL);
 48.1097 -
 48.1098 -  //cout << "Divisor: " << divisor << " unit: " << string(&unit,1) << endl;
 48.1099 -
 48.1100 -  tex << "\\begin{center}\\small" << endl;
 48.1101 -  tex << "{\\large \\bf \\verb|" << ctr_name << "| [" << unitL << "] (Top "
 48.1102 -      << max << ")}" << endl;
 48.1103 -  tex << "\\begin{longtable}{|l||r|r|}" << endl;
 48.1104 -  tex << "   \\hline" << endl;
 48.1105 -  tex << "   \\bf Function & " << "\\bf excl. time[sec] & \\bf \\verb|"
 48.1106 -                               << ctr_name << "/sec| \\\\" << endl;
 48.1107 -  tex << "   \\hline\\hline" << endl;
 48.1108 -
 48.1109 -  it = sortedMap.rbegin(); // reset iterator
 48.1110 -  count = 0; // reset counter
 48.1111 -  while(it != itend){
 48.1112 -    string func_name = alldata.functionIdNameMap[it->second->first.b];
 48.1113 -    if(func_name.size() > FUNC_NAME_MAX_LEN) func_name.resize(FUNC_NAME_MAX_LEN);
 48.1114 -
 48.1115 -    tex <<
 48.1116 -        "  \\verb|" << func_name << "| & " << // function name
 48.1117 -        "  \\verb|" << it->first/timerRes << "| & " <<  // exclusive time
 48.1118 -        "  \\verb|" << it->second->second.excl_time.sum/it->first*timerRes/divisor << "| \\\\" << endl;
 48.1119 -
 48.1120 -    it++;
 48.1121 -    count++;
 48.1122 -
 48.1123 -    /* stop after the given maximum number of functions */
 48.1124 -    if(max == count) break;
 48.1125 -
 48.1126 -    /* draw a horizontal line every 3 function entries */
 48.1127 -    if((count % 3) == 0) tex << "      \\hline" << endl;
 48.1128 -  }
 48.1129 -
 48.1130 -  tex << "   \\hline" << endl;
 48.1131 -  tex << "\\end{longtable}" << endl << endl;
 48.1132 -  tex << "\\end{center}" << endl;
 48.1133 -  /*tex << "\\bigskip" << endl << endl;*/
 48.1134 -  tex << "\\newpage" << endl << endl;
 48.1135 +        struct AllData& alldata) {
 48.1136 +    uint64_t timerRes = alldata.timerResolution;
 48.1137 +    sortedCtrMap_t::const_reverse_iterator it = sortedMap.rbegin();
 48.1138 +    sortedCtrMap_t::const_reverse_iterator itend = sortedMap.rend();
 48.1139 +    string ctr_name = "";
 48.1140 +
 48.1141 +    unsigned long int count = 0;
 48.1142 +    unsigned long int max = CTR_TABLE_LEN;
 48.1143 +
 48.1144 +    if (max > sortedMap.size())
 48.1145 +        max = sortedMap.size();
 48.1146 +
 48.1147 +    if (itend != it) {
 48.1148 +        ctr_name = alldata.counterIdNameMap[it->second->first.a];
 48.1149 +        if (ctr_name.size() > CTR_NAME_MAX_LEN)
 48.1150 +            ctr_name.resize(CTR_NAME_MAX_LEN);
 48.1151 +    } else
 48.1152 +        return;
 48.1153 +
 48.1154 +    /* get the minimum and maximum value to define the unit */
 48.1155 +    double maxVal = 0;
 48.1156 +    double minVal = numeric_limits<double>::max();
 48.1157 +    while (it != itend) {
 48.1158 +        double currVal = it->second->second.excl_time.sum / it->first
 48.1159 +                * timerRes;
 48.1160 +        if (currVal < minVal)
 48.1161 +            minVal = currVal;
 48.1162 +        if (currVal > maxVal)
 48.1163 +            maxVal = currVal;
 48.1164 +
 48.1165 +        it++;
 48.1166 +        count++;
 48.1167 +
 48.1168 +        /* stop after the given maximum number of functions */
 48.1169 +        if (max == count)
 48.1170 +            break;
 48.1171 +    }
 48.1172 +
 48.1173 +    char unit = ' ';
 48.1174 +    string unitL = "";
 48.1175 +    uint64_t divisor = getScaleQuantifierLog10(minVal, maxVal, unit, unitL);
 48.1176 +
 48.1177 +    //cout << "Divisor: " << divisor << " unit: " << string(&unit,1) << endl;
 48.1178 +
 48.1179 +    tex << "\\begin{center}\\small" << endl;
 48.1180 +    tex << "{\\large \\bf \\verb|" << ctr_name << "| [" << unitL << "] (Top "
 48.1181 +            << max << ")}" << endl;
 48.1182 +    tex << "\\begin{longtable}{|l||r|r|}" << endl;
 48.1183 +    tex << "   \\hline" << endl;
 48.1184 +    tex << "   \\bf Function & " << "\\bf excl. time[sec] & \\bf \\verb|"
 48.1185 +            << ctr_name << "/sec| \\\\" << endl;
 48.1186 +    tex << "   \\hline\\hline" << endl;
 48.1187 +
 48.1188 +    it = sortedMap.rbegin(); // reset iterator
 48.1189 +    count = 0; // reset counter
 48.1190 +    while (it != itend) {
 48.1191 +        string func_name = alldata.functionIdNameMap[it->second->first.b];
 48.1192 +        if (func_name.size() > FUNC_NAME_MAX_LEN)
 48.1193 +            func_name.resize(FUNC_NAME_MAX_LEN);
 48.1194 +
 48.1195 +        tex << "  \\verb|" << func_name << "| & " << // function name
 48.1196 +                "  \\verb|" << it->first / timerRes << "| & " << // exclusive time
 48.1197 +                "  \\verb|" << it->second->second.excl_time.sum / it->first
 48.1198 +                * timerRes / divisor << "| \\\\" << endl;
 48.1199 +
 48.1200 +        it++;
 48.1201 +        count++;
 48.1202 +
 48.1203 +        /* stop after the given maximum number of functions */
 48.1204 +        if (max == count)
 48.1205 +            break;
 48.1206 +
 48.1207 +        /* draw a horizontal line every 3 function entries */
 48.1208 +        if ((count % 3) == 0)
 48.1209 +            tex << "      \\hline" << endl;
 48.1210 +    }
 48.1211 +
 48.1212 +    tex << "   \\hline" << endl;
 48.1213 +    tex << "\\end{longtable}" << endl << endl;
 48.1214 +    tex << "\\end{center}" << endl;
 48.1215 +    /*tex << "\\bigskip" << endl << endl;*/
 48.1216 +    tex << "\\newpage" << endl << endl;
 48.1217  }
 48.1218  
 48.1219  /*
 48.1220 @@ -727,61 +756,63 @@
 48.1221   * FunctionData.excl_time ... exclusive counter values
 48.1222   * FunctionData.incl_time ... inclusive counter values
 48.1223   */
 48.1224 -static void write_counterTables(fstream& tex, struct AllData& alldata)
 48.1225 -{
 48.1226 -  /*
 48.1227 -   * source data map is sorted by counter ID, then function ID
 48.1228 -   * precondition: sorted by counterID !!!
 48.1229 -   */
 48.1230 -  {
 48.1231 -    /* separate into single maps, sorted by exclusive time */ 
 48.1232 -    sortedCtrMap_t currMap;
 48.1233 -    uint64_t lastCtrID;
 48.1234 -    itCtr_t it = alldata.counterMapGlobal.begin();
 48.1235 -    itCtr_t itend = alldata.counterMapGlobal.end();
 48.1236 -
 48.1237 -    /* set the counter ID of the first entry */
 48.1238 -    if(itend != it) lastCtrID = it->first.a;
 48.1239 -
 48.1240 -    /* iterate the source map of counters (sorted by counter ID) */
 48.1241 -    while(itend != it){
 48.1242 -      /* insert only functions with counter values != 0 */
 48.1243 -      if(it->second.count.cnt){
 48.1244 -        itFunc_t itFuncMapEnd = alldata.functionMapGlobal.end();
 48.1245 -        itFunc_t itFuncPos = alldata.functionMapGlobal.find(it->first.b);
 48.1246 -        double excl_time = 0;
 48.1247 -
 48.1248 -        /* check if function ID is in global function table */
 48.1249 -        if(itFuncMapEnd != itFuncPos){
 48.1250 -          excl_time = itFuncPos->second.excl_time.sum;
 48.1251 -        }else
 48.1252 -          cerr << "ERROR: Function ID not in global function table!" << endl;
 48.1253 -
 48.1254 -        /* check for next counter ID */
 48.1255 -        if(lastCtrID == it->first.a){
 48.1256 -          currMap.insert(pair<double, itCtr_t>(excl_time, it));
 48.1257 -        }else{
 48.1258 -          /* create latex output for current counter */
 48.1259 -          write_counterTable(tex, currMap, alldata);
 48.1260 -
 48.1261 -          /* found next counter ID (source map is sorted by counter ID) */
 48.1262 -          currMap.clear();
 48.1263 -          
 48.1264 -          currMap.insert(pair<double, itCtr_t>(excl_time, it));
 48.1265 +static void write_counterTables(fstream& tex, struct AllData& alldata) {
 48.1266 +    /*
 48.1267 +     * source data map is sorted by counter ID, then function ID
 48.1268 +     * precondition: sorted by counterID !!!
 48.1269 +     */
 48.1270 +    {
 48.1271 +        /* separate into single maps, sorted by exclusive time */
 48.1272 +        sortedCtrMap_t currMap;
 48.1273 +        uint64_t lastCtrID;
 48.1274 +        itCtr_t it = alldata.counterMapGlobal.begin();
 48.1275 +        itCtr_t itend = alldata.counterMapGlobal.end();
 48.1276 +
 48.1277 +        /* set the counter ID of the first entry */
 48.1278 +        if (itend != it)
 48.1279 +            lastCtrID = it->first.a;
 48.1280 +
 48.1281 +        /* iterate the source map of counters (sorted by counter ID) */
 48.1282 +        while (itend != it) {
 48.1283 +            /* insert only functions with counter values != 0 */
 48.1284 +            if (it->second.count.cnt) {
 48.1285 +                itFunc_t itFuncMapEnd = alldata.functionMapGlobal.end();
 48.1286 +                itFunc_t itFuncPos =
 48.1287 +                        alldata.functionMapGlobal.find(it->first.b);
 48.1288 +                double excl_time = 0;
 48.1289 +
 48.1290 +                /* check if function ID is in global function table */
 48.1291 +                if (itFuncMapEnd != itFuncPos) {
 48.1292 +                    excl_time = itFuncPos->second.excl_time.sum;
 48.1293 +                } else
 48.1294 +                    cerr << "ERROR: Function ID not in global function table!"
 48.1295 +                            << endl;
 48.1296 +
 48.1297 +                /* check for next counter ID */
 48.1298 +                if (lastCtrID == it->first.a) {
 48.1299 +                    currMap.insert(pair<double, itCtr_t> (excl_time, it));
 48.1300 +                } else {
 48.1301 +                    /* create latex output for current counter */
 48.1302 +                    write_counterTable(tex, currMap, alldata);
 48.1303 +
 48.1304 +                    /* found next counter ID (source map is sorted by counter ID) */
 48.1305 +                    currMap.clear();
 48.1306 +
 48.1307 +                    currMap.insert(pair<double, itCtr_t> (excl_time, it));
 48.1308 +                }
 48.1309 +
 48.1310 +                /* set the last counter ID for next iteration */
 48.1311 +                lastCtrID = it->first.a;
 48.1312 +            }
 48.1313 +
 48.1314 +            it++;
 48.1315          }
 48.1316  
 48.1317 -        /* set the last counter ID for next iteration */
 48.1318 -        lastCtrID = it->first.a;
 48.1319 -      }
 48.1320 -
 48.1321 -      it++;
 48.1322 +        /* write last counter table */
 48.1323 +        write_counterTable(tex, currMap, alldata);
 48.1324      }
 48.1325  
 48.1326 -    /* write last counter table */
 48.1327 -    write_counterTable(tex, currMap, alldata);
 48.1328 -  }
 48.1329 -
 48.1330 -  tex << "\\newpage" << endl << endl;
 48.1331 +    tex << "\\newpage" << endl << endl;
 48.1332  }
 48.1333  
 48.1334  /*
 48.1335 @@ -792,60 +823,25 @@
 48.1336   */
 48.1337  static void write_Dispersion(fstream& tex, struct AllData& alldata)
 48.1338  {
 48.1339 -  map< Pair, FunctionDispersionData, gtPair >::const_iterator it = 
 48.1340 +  map< Pair, FunctionDispersionData, gtPair >::const_iterator it =
 48.1341            alldata.functionDispersionMap.begin();
 48.1342 -  map< Pair, FunctionDispersionData, gtPair >::const_iterator itend = 
 48.1343 +  map< Pair, FunctionDispersionData, gtPair >::const_iterator itend =
 48.1344            alldata.functionDispersionMap.end();
 48.1345 -    
 48.1346 -  /* write boxplot command (vertical standalone)
 48.1347 -  tex << "%#1: position, #2: median, #3: 1/4 quartile, #4: 3/4 quartile, #5: min, #6: max" << endl;
 48.1348 -  tex << "\\newcommand{\\boxplotlv}[6]{" << endl;
 48.1349 -  tex << "  \\filldraw[fill=green!20] (#1,#3) rectangle (#1+0.5,#4);% draw the box" << endl;
 48.1350 -  tex << "  \\draw (#1,#2) -- (#1+0.5,#2);% node[right]{$\\textsc{#2}$};% median" << endl;
 48.1351 -  tex << "  \\draw (#1+0.25,#4) -- (#1+0.25,#6);% node[right]{$\\textsc{#4}$};% draw upper whisker" << endl;
 48.1352 -  tex << "  \\draw (#1+0.25,#3) -- (#1+0.25,#5);% draw lower whisker" << endl;
 48.1353 -  tex << "  \\draw (#1,#5) -- (#1+0.5,#5);% node[right]{$\\textsc{#5}$};% draw min" << endl;
 48.1354 -  tex << "  \\draw (#1,#6) -- (#1+0.5,#6);% node[right]{$\\textsc{#6}$};% draw max" << endl;
 48.1355 -  tex << "  %\\draw (#1,#6) node[above,xshift=0.5cm]{$ \\textsc{Function x}$};" << endl;
 48.1356 -  tex << "}" << endl;*/
 48.1357 -  
 48.1358 -  /*
 48.1359 -  tex << "\\newcommand{\\fdbplot}[6]{%" << endl;
 48.1360 -  tex << "\\begin{tikzpicture}" << endl;
 48.1361 -  tex << "\\begin{axis}[" << endl;
 48.1362 -  tex << "  width=14cm, height=3cm," << endl;
 48.1363 -  tex << "  xmin=#4, xmax=#5, " << endl;
 48.1364 -  tex << "  restrict x to domain=#4:#5," << endl;
 48.1365 -  tex << "  ymin=0.5, ymax=1.5," << endl;
 48.1366 -  tex << "  axis y line=none," << endl;
 48.1367 -  tex << "  axis x line=bottom,x axis line style={-,line width=1pt}," << endl;
 48.1368 -  tex << "  xmode=#6,log basis x=10, enlarge x limits={value=0.02,true}," << endl;
 48.1369 -  tex << "  scaled x ticks" << endl;
 48.1370 -  tex << "]" << endl;
 48.1371 -  tex << "    \\filldraw[fill=green!20,line width=0.2mm] (axis cs:#2,0.85) rectangle (axis cs:#3,1.15);% draw the box " << endl;
 48.1372 -  tex << "    \\draw[line width=0.2mm, color=red] (axis cs:#1,0.85) -- (axis cs:#1,1.15);              % median" << endl;
 48.1373 -  tex << "    \\draw[line width=0.2mm] (axis cs:#3,1) -- (axis cs:#5,1);                         % line right" << endl;
 48.1374 -  tex << "    \\draw[line width=0.2mm] (axis cs:#5,0.85) -- (axis cs:#5,1.15);                   % max" << endl;
 48.1375 -  tex << "    \\draw[line width=0.2mm] (axis cs:#2,1) -- (axis cs:#4,1);                         % line left" << endl;
 48.1376 -  tex << "    \\draw[line width=0.2mm] (axis cs:#4,0.85) -- (axis cs:#4,1.15);                   % min" << endl;
 48.1377 -  tex << "\\end{axis}" << endl;
 48.1378 -  tex << "\\end{tikzpicture}" << endl;
 48.1379 -  tex << "}" << endl;*/
 48.1380 -  
 48.1381 +
 48.1382    const unsigned int BP_WIDTH = 13;
 48.1383    const unsigned int LABEL_WIDTH = 3; /* avoid overlapping of labels */
 48.1384 -  
 48.1385 +
 48.1386    tex << "\\begin{center}" << endl;
 48.1387    tex << "{\\Large \\bf Top 50 Dispersion of Functions (in seconds)}";
 48.1388 -	tex << endl << "\\bigskip" << endl;
 48.1389 +    tex << endl << "\\bigskip" << endl;
 48.1390    tex << "\\end{center}" << endl;
 48.1391 -  
 48.1392 +
 48.1393    // define a counter for label shifting and lengths for \ifdim compare
 48.1394    tex << "\\newcounter{shiftctr}" << endl;
 48.1395    tex << "\\newlength{\\lowqpos}" << endl;
 48.1396    tex << "\\newlength{\\medianpos}" << endl;
 48.1397 -  
 48.1398 -  tex.precision(0);
 48.1399 +
 48.1400 +  tex.precision(6);
 48.1401    tex << "%#1: min, #2: 1/4 quartile, #3: 1/4pos, #4: median, #5: medianpos, #6: 3/4 quartile, #7: 3/4pos, #8: max" << endl;
 48.1402    tex << "\\newcommand{\\boxplotlh}[8]{" << endl;
 48.1403    tex << "\\begin{tikzpicture}" << endl;
 48.1404 @@ -859,7 +855,7 @@
 48.1405    tex << "  \\filldraw[fill=green!20] (#3,0) rectangle (#7,0.5);% box" << endl;
 48.1406    tex << "  \\draw (0,0) node[below]{$t_{min}:#1$} -- (0,0.5);" << endl;
 48.1407    tex << "  \\draw (0,0.25) -- (#3,0.25);% left whisker" << endl << endl;
 48.1408 -  
 48.1409 +
 48.1410    tex << "  % check overlap of lower quartile label" << endl;
 48.1411    tex << "  \\ifdim #3 pt > " << BP_WIDTH-LABEL_WIDTH << "pt" << endl;
 48.1412    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1413 @@ -869,7 +865,7 @@
 48.1414    tex << "    \\fi" << endl;
 48.1415    tex << "  \\fi" << endl;
 48.1416    tex << "  \\node at (#3,0) [below,yshift=-\\theshiftctr mm] {$t_{1/4}:#2$};" << endl << endl;
 48.1417 -  
 48.1418 +
 48.1419    tex << "  % check overlap of median label" << endl;
 48.1420    tex << "  \\ifdim #5 pt > " << BP_WIDTH-LABEL_WIDTH << "pt" << endl;
 48.1421    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1422 @@ -900,133 +896,931 @@
 48.1423    tex << "      \\fi" << endl;
 48.1424    tex << "    \\else" << endl;
 48.1425    tex << "      \\ifdim #7 pt < \\medianpos" << endl;
 48.1426 -	tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1427 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1428    tex << "      \\fi" << endl;
 48.1429    tex << "    \\fi" << endl;
 48.1430    tex << "  \\fi" << endl;
 48.1431    tex << "  \\node at (#7,0) [below,yshift=-\\theshiftctr mm] {$t_{3/4}:#6$};" << endl << endl;
 48.1432 -            
 48.1433 +
 48.1434    tex << "  \\draw (#7,0.25) -- (" << BP_WIDTH << ",0.25);% right whisker" << endl;
 48.1435    tex << "  \\draw (" << BP_WIDTH << ",0.5) -- (" << BP_WIDTH << ",0) node[below]{$t_{max}:#8$};" << endl;
 48.1436    tex << "\\end{small}" << endl;
 48.1437    tex << "\\end{tikzpicture}" << endl;
 48.1438    tex << "}" << endl;
 48.1439 -  
 48.1440 +
 48.1441    tex.setf(ios::fixed, ios::floatfield);
 48.1442 -  tex.precision(6);
 48.1443 -  
 48.1444 +  tex.precision(7);
 48.1445 +
 48.1446    tex << "\\begin{flushleft}" << endl;
 48.1447 -  
 48.1448 +
 48.1449    unsigned long int count = 1;
 48.1450    while ( itend != it ) {
 48.1451      if(count % 50 == 0) break;
 48.1452 -    
 48.1453 +
 48.1454      // write only 9 plots per page */
 48.1455 -    if(count % 9 == 0){
 48.1456 +    if(count % 8 == 0){
 48.1457        tex << "\\newpage" << endl << endl;
 48.1458      }
 48.1459 -    
 48.1460 +
 48.1461      // init value for plot dimensions
 48.1462      double factor = it->second.excl_time_maximum - it->second.excl_time_minimum;
 48.1463      double lowq =  it->second.excl_time_low_quartile - it->second.excl_time_minimum;
 48.1464      double median = it->second.excl_time_median - it->second.excl_time_minimum;
 48.1465      double topq = it->second.excl_time_top_quartile - it->second.excl_time_minimum;
 48.1466 -    
 48.1467 +
 48.1468      string func_name = alldata.functionIdNameMap[it->first.b];
 48.1469      if(func_name.size() > 2*FUNC_NAME_MAX_LEN)
 48.1470        func_name.resize(2*FUNC_NAME_MAX_LEN);
 48.1471      tex << "\\verb|" << func_name << "|";
 48.1472 -    
 48.1473 +
 48.1474      if((factor <= 0) | (lowq < 0) | (median < 0) | (topq < 0)){
 48.1475        cout.setf(ios::scientific, ios::floatfield);
 48.1476        cout.precision(5);
 48.1477 -      
 48.1478 +
 48.1479        cout << endl << "Cannot create latex output!" << endl
 48.1480            << "min: " << it->second.excl_time_minimum << ", "
 48.1481            << "low quartile: " << it->second.excl_time_low_quartile << ", "
 48.1482            << "median: " << it->second.excl_time_median << ", "
 48.1483            << "top quartile: " << it->second.excl_time_top_quartile << ", "
 48.1484            << "maximum: " << it->second.excl_time_maximum << endl;
 48.1485 -      
 48.1486 +
 48.1487        count++;
 48.1488        it++;
 48.1489      }
 48.1490 -    
 48.1491 +
 48.1492      /*** calculation of the boxplot's positions ***/
 48.1493      /* logarithmic */
 48.1494 -    if( it->second.excl_time_top_quartile < 
 48.1495 -        ((it->second.excl_time_maximum-it->second.excl_time_minimum)/2 
 48.1496 +    if( it->second.excl_time_top_quartile <
 48.1497 +        ((it->second.excl_time_maximum-it->second.excl_time_minimum)/2
 48.1498          + it->second.excl_time_minimum) ){
 48.1499        tex << " ($log_{10}$)";
 48.1500 -      
 48.1501 +
 48.1502        factor = BP_WIDTH / log10(factor);
 48.1503 -      
 48.1504 +
 48.1505        if(lowq > 0) lowq = log10(lowq) * factor;
 48.1506        else lowq = 0;
 48.1507 -      
 48.1508 +
 48.1509        if(median > 0) median = log10(median) * factor;
 48.1510        else median = lowq;
 48.1511 -      
 48.1512 +
 48.1513        if(topq > 0) topq = log10(topq) * factor;
 48.1514        else topq = median;
 48.1515      /* linear */
 48.1516      }else{
 48.1517        factor = BP_WIDTH / factor;
 48.1518 -      
 48.1519 +
 48.1520        if(lowq > 0) lowq = lowq * factor;
 48.1521        else lowq = 0;
 48.1522 -      
 48.1523 +
 48.1524        if(median > 0) median = median * factor;
 48.1525        else median = lowq;
 48.1526 -      
 48.1527 +
 48.1528        if(topq > 0) topq = topq * factor;
 48.1529        else topq = median;
 48.1530      }
 48.1531 -    
 48.1532 +
 48.1533      tex << endl;
 48.1534 -    
 48.1535 +
 48.1536      /* write the values and their plot x-coordinate */
 48.1537      /* min, 1/4 quartile, 1/4pos, median, medianpos, 3/4 quartile, 3/4pos, max */
 48.1538      {
 48.1539        tex << "\\boxplotlh{";
 48.1540 -      tex.precision(5);
 48.1541 +      tex.precision(7);
 48.1542        tex.setf(ios::scientific, ios::floatfield);
 48.1543        tex << it->second.excl_time_minimum/(double)alldata.timerResolution << "}{"
 48.1544            << it->second.excl_time_low_quartile/(double)alldata.timerResolution << "}{";
 48.1545        tex.setf(ios::fixed, ios::floatfield);
 48.1546 -      tex.precision(6);
 48.1547 +      tex.precision(7);
 48.1548        tex << lowq << "}{";
 48.1549        tex.setf(ios::scientific, ios::floatfield);
 48.1550 -      tex.precision(5);
 48.1551 +      tex.precision(7);
 48.1552        tex << it->second.excl_time_median/(double)alldata.timerResolution << "}{";
 48.1553        tex.setf(ios::fixed, ios::floatfield);
 48.1554 -      tex.precision(6);
 48.1555 +      tex.precision(7);
 48.1556        tex << median << "}{";
 48.1557        tex.setf(ios::scientific, ios::floatfield);
 48.1558 -      tex.precision(5);
 48.1559 +      tex.precision(7);
 48.1560        tex << it->second.excl_time_top_quartile/(double)alldata.timerResolution << "}{";
 48.1561 -      tex.precision(6);
 48.1562 +      tex.precision(7);
 48.1563        tex.setf(ios::fixed, ios::floatfield);
 48.1564        tex << topq << "}{";
 48.1565        tex.setf(ios::scientific, ios::floatfield);
 48.1566 -      tex.precision(5);
 48.1567 -      tex << it->second.excl_time_maximum/(double)alldata.timerResolution 
 48.1568 +      tex.precision(7);
 48.1569 +      tex << it->second.excl_time_maximum/(double)alldata.timerResolution
 48.1570            << "}" << endl;
 48.1571        tex << "\\smallskip" << endl << endl;
 48.1572      }
 48.1573 -    
 48.1574 +
 48.1575      count++;
 48.1576      it++;
 48.1577    }
 48.1578 -  
 48.1579 +
 48.1580    tex << "\\end{flushleft}" << endl << endl;
 48.1581 -  
 48.1582 +
 48.1583    tex.setf(ios::floatfield);
 48.1584 -  tex.precision(6);
 48.1585 +  tex.precision(7);
 48.1586  
 48.1587    tex << "\\newpage" << endl << endl;
 48.1588  }
 48.1589  
 48.1590 + /*
 48.1591 + * Write a latex dispersion by function and callpath diagram.
 48.1592 + *
 48.1593 + * @param tex the given file stream (reference)
 48.1594 + * @param alldata the global data
 48.1595 + */
 48.1596 +static void write_Dispersion_callpath(fstream& tex, struct AllData& alldata) {
 48.1597 +    map<Pair, FunctionDispersionData, gtPair>::const_iterator it =
 48.1598 +            alldata.functionDispersionMap.begin();
 48.1599 +    map<Pair, FunctionDispersionData, gtPair>::const_iterator itend =
 48.1600 +            alldata.functionDispersionMap.end();
 48.1601 +    map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>::const_iterator itc =
 48.1602 +            alldata.functionDispersionCallpathMap.begin();
 48.1603 +    map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>::const_iterator itcend =
 48.1604 +            alldata.functionDispersionCallpathMap.end();
 48.1605 +
 48.1606 +    map<int, string> label;
 48.1607 +    const unsigned int BP_WIDTH = 15;
 48.1608 +    const unsigned int LABEL_WIDTH = 3; /* avoid overlapping of labels */
 48.1609 +
 48.1610 +    tex << "\\begin{center}" << endl;
 48.1611 +    tex
 48.1612 +            << "{\\Large \\bf Top 50 Dispersion of Functions by Callpath (in seconds)}";
 48.1613 +    tex << endl << "\\bigskip" << endl;
 48.1614 +    tex << "\\end{center}" << endl;
 48.1615 +
 48.1616 +    // define a counter for label shifting and lengths for \ifdim compare
 48.1617 +    tex << "\\newcounter{shiftctr}" << endl;
 48.1618 +    tex << "\\newlength{\\lowqpos}" << endl;
 48.1619 +    tex << "\\newlength{\\medianpos}" << endl;
 48.1620 +
 48.1621 +    tex.precision(5);
 48.1622 +    tex << "\\newcommand{\\boxplotlhs}[6]{" << endl;
 48.1623 +    tex << "\\begin{tikzpicture}" << endl;
 48.1624 +    tex << "\\begin{small}" << endl;
 48.1625 +
 48.1626 +    tex << "  \\draw (0,0.5) -- (" << BP_WIDTH << ",0.5);% left whisker"
 48.1627 +            << endl;
 48.1628 +    tex << "  \\draw (0,0.5) node[above right]{$#1$} -- (0,0.0);% left whisker"
 48.1629 +            << endl;
 48.1630 +    tex << "  \\draw (" << BP_WIDTH / 5
 48.1631 +            << ",0.5) node[above, xshift=6]{$#2$} -- (" << BP_WIDTH / 5
 48.1632 +            << ",0.25);% left whisker" << endl;
 48.1633 +    tex << "  \\draw (" << 2 * BP_WIDTH / 5 << ",0.5) node[above]{$#3$} -- ("
 48.1634 +            << 2 * BP_WIDTH / 5 << ",0.25);% left whisker" << endl;
 48.1635 +    tex << "  \\draw (" << 3 * BP_WIDTH / 5 << ",0.5) node[above]{$#4$} -- ("
 48.1636 +            << 3 * BP_WIDTH / 5 << ",0.25);% left whisker" << endl;
 48.1637 +    tex << "  \\draw (" << 4 * BP_WIDTH / 5 << ",0.5) node[above]{$#5$} -- ("
 48.1638 +            << 4 * BP_WIDTH / 5 << ",0.25);% left whisker" << endl;
 48.1639 +    tex << "  \\draw (" << BP_WIDTH << ",0.5) node[above]{$#6$} -- ("
 48.1640 +            << BP_WIDTH << ",0.25);% left whisker" << endl;
 48.1641 +
 48.1642 +    tex << "\\end{small}" << endl;
 48.1643 +    tex << "\\end{tikzpicture}" << endl;
 48.1644 +    tex << "}" << endl;
 48.1645 +
 48.1646 +    tex
 48.1647 +            << "%#1: minpos, #2: 1/4 quartile, #3: 1/4pos, #4: median, #5: medianpos, #6: 3/4 quartile, #7: 3/4pos, #8: max"
 48.1648 +            << endl;
 48.1649 +    tex << "\\newcommand{\\boxplotlh}[9]{" << endl;
 48.1650 +    tex << "\\begin{tikzpicture}" << endl;
 48.1651 +    tex << "\\begin{small}" << endl;
 48.1652 +    tex << "  % set all counters and lengths to zero" << endl;
 48.1653 +    tex << "  \\setcounter{shiftctr}{0}" << endl;
 48.1654 +    tex << "  \\setlength{\\lowqpos}{#4 pt}" << endl;
 48.1655 +    tex << "  \\addtolength{\\lowqpos}{" << LABEL_WIDTH << "pt}" << endl;
 48.1656 +    tex << "  \\setlength{\\medianpos}{#6 pt}" << endl;
 48.1657 +    tex << "  \\addtolength{\\medianpos}{" << LABEL_WIDTH << "pt}" << endl;
 48.1658 +    tex << "  \\draw (0,0.0) node[below right]{$t_{min}:#2$};" << endl;
 48.1659 +    tex << "  \\filldraw[fill=green!20] (#4,0) rectangle (#8,0.5);% box"
 48.1660 +            << endl;
 48.1661 +    tex << "  \\draw (#1,0)  -- (#1,0.5);" << endl;
 48.1662 +    tex << "  \\draw (#1,0.25) -- (#4,0.25);% left whisker" << endl << endl;
 48.1663 +
 48.1664 +    tex << "  % check overlap of lower quartile label" << endl;
 48.1665 +    tex << "  \\ifdim #4 pt > " << BP_WIDTH - (2 * LABEL_WIDTH) << "pt" << endl;
 48.1666 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1667 +    tex << "  \\else" << endl;
 48.1668 +    tex << "    \\ifdim #4 pt < 2pt" << endl;
 48.1669 +    tex << "      \\addtocounter{shiftctr}{4}" << endl;
 48.1670 +    tex << "    \\fi" << endl;
 48.1671 +    tex << "  \\fi" << endl;
 48.1672 +    tex
 48.1673 +            << "  \\node at (#4,0) [below right,yshift=-\\theshiftctr mm] {$t_{1/4}:#3$};"
 48.1674 +            << endl << endl;
 48.1675 +
 48.1676 +    tex << "  % check overlap of median label" << endl;
 48.1677 +    tex << "  \\ifdim #6 pt > " << BP_WIDTH - (2 * LABEL_WIDTH) << "pt" << endl;
 48.1678 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1679 +    tex << "  \\else" << endl;
 48.1680 +    tex << "    \\ifnum\\theshiftctr=4" << endl;
 48.1681 +    tex << "      \\ifdim #6 pt < " << LABEL_WIDTH << "pt" << endl;
 48.1682 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1683 +    tex << "      \\else" << endl;
 48.1684 +    tex << "        \\setcounter{shiftctr}{0}" << endl;
 48.1685 +    tex << "      \\fi" << endl;
 48.1686 +    tex << "    \\else" << endl;
 48.1687 +    tex << "      \\ifdim #6 pt < \\lowqpos" << endl;
 48.1688 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1689 +    tex << "      \\fi" << endl;
 48.1690 +    tex << "    \\fi" << endl;
 48.1691 +    tex << "  \\fi" << endl;
 48.1692 +    tex
 48.1693 +            << "  \\draw[color=red] (#6,0.5) -- (#6,0) node[below right,color=black,yshift=-\\theshiftctr mm]{$t_{med}:#5$};"
 48.1694 +            << endl << endl;
 48.1695 +    tex << "}" << endl;
 48.1696 +
 48.1697 +    tex
 48.1698 +            << "%#1: minpos, #2: 1/4 quartile, #3: 1/4pos, #4: median, #5: medianpos, #6: 3/4 quartile, #7: 3/4pos, #8: max"
 48.1699 +            << endl;
 48.1700 +    tex << "\\newcommand{\\boxplotlhd}[9]{" << endl;
 48.1701 +    tex << "\\begin{tikzpicture}" << endl;
 48.1702 +    tex << "\\begin{small}" << endl;
 48.1703 +    tex << "  % set all counters and lengths to zero" << endl;
 48.1704 +    tex << "  \\setcounter{shiftctr}{0}" << endl;
 48.1705 +    tex << "  \\setlength{\\lowqpos}{#4 pt}" << endl;
 48.1706 +    tex << "  \\addtolength{\\lowqpos}{" << LABEL_WIDTH << "pt}" << endl;
 48.1707 +    tex << "  \\setlength{\\medianpos}{#6 pt}" << endl;
 48.1708 +    tex << "  \\addtolength{\\medianpos}{" << LABEL_WIDTH << "pt}" << endl;
 48.1709 +    tex << "  \\draw (0,0.0);" << endl;
 48.1710 +    tex << "  \\filldraw[fill=green!20] (#4,0) rectangle (#8,0.5);% box"
 48.1711 +            << endl;
 48.1712 +    tex << "  \\draw (#1,0)  -- (#1,0.5);" << endl;
 48.1713 +    tex << "  \\draw (#1,0.25) -- (#4,0.25);% left whisker" << endl << endl;
 48.1714 +
 48.1715 +    tex << "  % check overlap of lower quartile label" << endl;
 48.1716 +    tex << "  \\ifdim #4 pt > " << BP_WIDTH - LABEL_WIDTH << "pt" << endl;
 48.1717 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1718 +    tex << "  \\else" << endl;
 48.1719 +    tex << "    \\ifdim #4 pt < 2pt" << endl;
 48.1720 +    tex << "      \\addtocounter{shiftctr}{4}" << endl;
 48.1721 +    tex << "    \\fi" << endl;
 48.1722 +    tex << "  \\fi" << endl;
 48.1723 +
 48.1724 +    tex << "  % check overlap of median label" << endl;
 48.1725 +    tex << "  \\ifdim #6 pt > " << BP_WIDTH - LABEL_WIDTH << "pt" << endl;
 48.1726 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1727 +    tex << "  \\else" << endl;
 48.1728 +    tex << "    \\ifnum\\theshiftctr=4" << endl;
 48.1729 +    tex << "      \\ifdim #6 pt < " << LABEL_WIDTH << "pt" << endl;
 48.1730 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1731 +    tex << "      \\else" << endl;
 48.1732 +    tex << "        \\setcounter{shiftctr}{0}" << endl;
 48.1733 +    tex << "      \\fi" << endl;
 48.1734 +    tex << "    \\else" << endl;
 48.1735 +    tex << "      \\ifdim #6 pt < \\lowqpos" << endl;
 48.1736 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1737 +    tex << "      \\fi" << endl;
 48.1738 +    tex << "    \\fi" << endl;
 48.1739 +    tex << "  \\fi" << endl;
 48.1740 +    tex << "  \\draw[color=red] (#6,0.5) -- (#6,0);" << endl << endl;
 48.1741 +    tex << "}" << endl;
 48.1742 +
 48.1743 +    tex << "\\newcommand{\\boxplotlhdn}[6]{" << endl;
 48.1744 +    tex << "% check overlap of higher quartile label" << endl;
 48.1745 +    tex << "  \\ifdim #2 pt > " << BP_WIDTH - LABEL_WIDTH << "pt" << endl;
 48.1746 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1747 +    tex << "  \\else" << endl;
 48.1748 +    tex << "    \\ifnum\\theshiftctr>0" << endl;
 48.1749 +    tex << "      \\ifdim #2 pt < \\lowqpos" << endl;
 48.1750 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1751 +    tex << "      \\else" << endl;
 48.1752 +    tex << "        \\setcounter{shiftctr}{0}" << endl;
 48.1753 +    tex << "      \\fi" << endl;
 48.1754 +    tex << "    \\else" << endl;
 48.1755 +    tex << "      \\ifdim #2 pt < \\medianpos" << endl;
 48.1756 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1757 +    tex << "      \\fi" << endl;
 48.1758 +    tex << "    \\fi" << endl;
 48.1759 +    tex << "  \\fi" << endl;
 48.1760 +
 48.1761 +    tex << "  \\draw (#2,0.25) -- (#6,0.25);% right whisker" << endl;
 48.1762 +    tex << "  \\draw (#6,0.5) -- (#6,0);" << endl;
 48.1763 +    tex << "  \\draw (" << BP_WIDTH << ",0.25);" << endl;
 48.1764 +    tex
 48.1765 +            << "  \\draw[color=red] (#4,0.5) -- (#4,0)node[below right]{$t_{95}:#5$};"
 48.1766 +            << endl << endl;
 48.1767 +    tex << "\\end{small}" << endl;
 48.1768 +    tex << "\\end{tikzpicture}" << endl;
 48.1769 +    tex << "}" << endl;
 48.1770 +
 48.1771 +    tex << "\\newcommand{\\boxplotlhn}[6]{" << endl;
 48.1772 +    tex << "% check overlap of higher quartile label" << endl;
 48.1773 +    tex << "  \\ifdim #2 pt > " << BP_WIDTH - (2 * LABEL_WIDTH) << "pt" << endl;
 48.1774 +    tex << "    \\addtocounter{shiftctr}{4}" << endl;
 48.1775 +    tex << "  \\else" << endl;
 48.1776 +    tex << "    \\ifnum\\theshiftctr>0" << endl;
 48.1777 +    tex << "      \\ifdim #2 pt < \\lowqpos" << endl;
 48.1778 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1779 +    tex << "      \\else" << endl;
 48.1780 +    tex << "        \\setcounter{shiftctr}{0}" << endl;
 48.1781 +    tex << "      \\fi" << endl;
 48.1782 +    tex << "    \\else" << endl;
 48.1783 +    tex << "      \\ifdim #2 pt < \\medianpos" << endl;
 48.1784 +    tex << "        \\addtocounter{shiftctr}{4}" << endl;
 48.1785 +    tex << "      \\fi" << endl;
 48.1786 +    tex << "    \\fi" << endl;
 48.1787 +    tex << "  \\fi" << endl;
 48.1788 +    tex
 48.1789 +            << "  \\node at (#2,0) [below right,yshift=-\\theshiftctr mm] {$t_{3/4}:#1$};"
 48.1790 +            << endl << endl;
 48.1791 +
 48.1792 +    tex << "  \\draw (#2,0.25) -- (#6,0.25);% right whisker" << endl;
 48.1793 +    tex << "  \\draw (#6,0.5) -- (#6,0);" << endl;
 48.1794 +    tex << "  \\draw (" << BP_WIDTH << ",0.0) node[below left]{$t_{max}:#3$};"
 48.1795 +            << endl;
 48.1796 +    tex << "  \\draw[color=red] (#4,0.5) -- (#4,0);" << endl << endl;
 48.1797 +    tex << "\\end{small}" << endl;
 48.1798 +    tex << "\\end{tikzpicture}" << endl;
 48.1799 +    tex << "}" << endl;
 48.1800 +
 48.1801 +    tex.setf(ios::fixed, ios::floatfield);
 48.1802 +    tex.precision(7);
 48.1803 +
 48.1804 +    tex << "\\begin{flushleft}" << endl;
 48.1805 +
 48.1806 +    unsigned long int count = 1, countc = 0;
 48.1807 +    int pageSize = 3;
 48.1808 +    while (itend != it) {
 48.1809 +        if (count % 50 == 0)
 48.1810 +            break;
 48.1811 +
 48.1812 +        // init value for plot dimensions
 48.1813 +        double factor = it->second.excl_time_maximum
 48.1814 +                - it->second.excl_time_minimum;
 48.1815 +        double lowq = it->second.excl_time_low_quartile
 48.1816 +                - it->second.excl_time_minimum;
 48.1817 +        double median = it->second.excl_time_median
 48.1818 +                - it->second.excl_time_minimum;
 48.1819 +        double topq = it->second.excl_time_top_quartile
 48.1820 +                - it->second.excl_time_minimum;
 48.1821 +        double t_95 = it->second.excl_time_95_percent
 48.1822 +                - it->second.excl_time_minimum;
 48.1823 +        string func_name = alldata.functionIdNameMap[it->first.b];
 48.1824 +
 48.1825 +        if (pageSize >= 40) {
 48.1826 +            tex << "\\newpage" << endl << endl;
 48.1827 +            tex.precision(7);
 48.1828 +            pageSize = 0;
 48.1829 +        }
 48.1830 +
 48.1831 +        if (func_name.size() > 2 * FUNC_NAME_MAX_LEN)
 48.1832 +            func_name.resize(2 * FUNC_NAME_MAX_LEN);
 48.1833 +        pageSize += 1;
 48.1834 +        tex << "\\verb|" << func_name << "|";
 48.1835 +        if ((factor <= 0) | (lowq < 0) | (median < 0) | (topq < 0)) {
 48.1836 +            cout.setf(ios::scientific, ios::floatfield);
 48.1837 +            cout.precision(5);
 48.1838 +
 48.1839 +            cout << endl << "Cannot create latex output!" << endl << "min: "
 48.1840 +                    << it->second.excl_time_minimum << ", " << "low quartile: "
 48.1841 +                    << it->second.excl_time_low_quartile << ", " << "median: "
 48.1842 +                    << it->second.excl_time_median << ", " << "top quartile: "
 48.1843 +                    << it->second.excl_time_top_quartile << ", " << "95%: "
 48.1844 +                    << it->second.excl_time_95_percent << ", " << "maximum: "
 48.1845 +                    << it->second.excl_time_maximum << endl;
 48.1846 +
 48.1847 +            count++;
 48.1848 +            it++;
 48.1849 +        }
 48.1850 +        /*** calculation of the boxplot's positions ***/
 48.1851 +        /* logarithmic */
 48.1852 +        double pos[6];
 48.1853 +        if (it->second.excl_time_top_quartile < ((it->second.excl_time_maximum
 48.1854 +                - it->second.excl_time_minimum) / 2
 48.1855 +                + it->second.excl_time_minimum)) {
 48.1856 +            tex << " ($log_{10}$)";
 48.1857 +
 48.1858 +            factor = log10(it->second.excl_time_maximum) - log10(
 48.1859 +                    it->second.excl_time_minimum);
 48.1860 +            lowq = log10(it->second.excl_time_low_quartile) - log10(
 48.1861 +                    it->second.excl_time_minimum);
 48.1862 +            median = log10(it->second.excl_time_median) - log10(
 48.1863 +                    it->second.excl_time_minimum);
 48.1864 +            topq = log10(it->second.excl_time_top_quartile) - log10(
 48.1865 +                    it->second.excl_time_minimum);
 48.1866 +            t_95 = log10(it->second.excl_time_95_percent) - log10(
 48.1867 +                    it->second.excl_time_minimum);
 48.1868 +
 48.1869 +            factor = BP_WIDTH / factor;
 48.1870 +
 48.1871 +            if (lowq > 0)
 48.1872 +                lowq = lowq * factor;
 48.1873 +            else
 48.1874 +                lowq = 0;
 48.1875 +
 48.1876 +            if (median > 0)
 48.1877 +                median = median * factor;
 48.1878 +            else
 48.1879 +                median = lowq;
 48.1880 +
 48.1881 +            if (topq > 0)
 48.1882 +                topq = topq * factor;
 48.1883 +            else
 48.1884 +                topq = median;
 48.1885 +
 48.1886 +            if (t_95 > 0)
 48.1887 +                t_95 = t_95 * factor;
 48.1888 +            else
 48.1889 +                t_95 = topq;
 48.1890 +
 48.1891 +            for (int i = 0; i < 6; i++) {
 48.1892 +                pos[i] = (i * (BP_WIDTH / 5) / factor) + log10(
 48.1893 +                        it->second.excl_time_minimum);
 48.1894 +                pos[i] = pow(10, pos[i]);
 48.1895 +                pos[i] = pos[i] / (double) alldata.timerResolution;
 48.1896 +            }
 48.1897 +            /* linear */
 48.1898 +        } else {
 48.1899 +            factor = BP_WIDTH / factor;
 48.1900 +
 48.1901 +            if (lowq > 0)
 48.1902 +                lowq = lowq * factor;
 48.1903 +            else
 48.1904 +                lowq = 0;
 48.1905 +
 48.1906 +            if (median > 0)
 48.1907 +                median = median * factor;
 48.1908 +            else
 48.1909 +                median = lowq;
 48.1910 +
 48.1911 +            if (topq > 0)
 48.1912 +                topq = topq * factor;
 48.1913 +            else
 48.1914 +                topq = median;
 48.1915 +
 48.1916 +            if (t_95 > 0)
 48.1917 +                t_95 = t_95 * factor;
 48.1918 +            else
 48.1919 +                t_95 = topq;
 48.1920 +
 48.1921 +            for (int i = 0; i < 6; i++) {
 48.1922 +                pos[i] = (i * (BP_WIDTH / 5) / factor)
 48.1923 +                        + it->second.excl_time_minimum;
 48.1924 +                pos[i] = pos[i] / (double) alldata.timerResolution;
 48.1925 +            }
 48.1926 +        }
 48.1927 +
 48.1928 +        tex << " -n " << it->second.count;
 48.1929 +        tex << endl;
 48.1930 +        tex << "\\smallskip" << endl << endl;
 48.1931 +
 48.1932 +        /* write the values and their plot x-coordinate */
 48.1933 +        /* min, 1/4 quartile, 1/4pos, median, medianpos, 3/4 quartile, 3/4pos, max */
 48.1934 +        {
 48.1935 +            tex.precision(5);
 48.1936 +            tex << "\\boxplotlhs" << "{" << pos[0] << "}" << "{" << pos[1]
 48.1937 +                    << "}" << "{" << pos[2] << "}" << "{" << pos[3] << "}"
 48.1938 +                    << "{" << pos[4] << "}" << "{" << pos[5] << "}" << endl;
 48.1939 +
 48.1940 +            pageSize += 2;
 48.1941 +
 48.1942 +            tex << "\\boxplotlh{0}{ ";
 48.1943 +            tex.precision(7);
 48.1944 +            tex.setf(ios::scientific, ios::floatfield);
 48.1945 +            tex << it->second.excl_time_minimum
 48.1946 +                    / (double) alldata.timerResolution << "}{"
 48.1947 +                    << it->second.excl_time_low_quartile
 48.1948 +                            / (double) alldata.timerResolution << "}{";
 48.1949 +            tex.setf(ios::fixed, ios::floatfield);
 48.1950 +            tex.precision(7);
 48.1951 +            tex << lowq << "}{";
 48.1952 +            tex.setf(ios::scientific, ios::floatfield);
 48.1953 +            tex.precision(7);
 48.1954 +            tex << it->second.excl_time_median
 48.1955 +                    / (double) alldata.timerResolution << "}{";
 48.1956 +            tex.setf(ios::fixed, ios::floatfield);
 48.1957 +            tex.precision(7);
 48.1958 +            tex << median << "}{";
 48.1959 +            tex.setf(ios::scientific, ios::floatfield);
 48.1960 +            tex.precision(7);
 48.1961 +            tex << it->second.excl_time_top_quartile
 48.1962 +                    / (double) alldata.timerResolution << "}{";
 48.1963 +            tex.precision(7);
 48.1964 +            tex.setf(ios::fixed, ios::floatfield);
 48.1965 +            tex << topq << "}{";
 48.1966 +            tex.setf(ios::scientific, ios::floatfield);
 48.1967 +            tex.precision(7);
 48.1968 +            tex << it->second.excl_time_maximum
 48.1969 +                    / (double) alldata.timerResolution << "}" << endl;
 48.1970 +            tex << "\\boxplotlhn";
 48.1971 +            tex << "{";
 48.1972 +            tex.setf(ios::scientific, ios::floatfield);
 48.1973 +            tex.precision(7);
 48.1974 +            tex << it->second.excl_time_top_quartile
 48.1975 +                    / (double) alldata.timerResolution << "}{";
 48.1976 +            tex.precision(7);
 48.1977 +            tex.setf(ios::fixed, ios::floatfield);
 48.1978 +            tex << topq << "}{";
 48.1979 +            tex.setf(ios::scientific, ios::floatfield);
 48.1980 +            tex.precision(7);
 48.1981 +            tex << it->second.excl_time_maximum
 48.1982 +                    / (double) alldata.timerResolution << "}{";
 48.1983 +            tex.precision(7);
 48.1984 +            tex.setf(ios::fixed, ios::floatfield);
 48.1985 +            tex << t_95 << "}{";
 48.1986 +            tex.setf(ios::scientific, ios::floatfield);
 48.1987 +            tex.precision(7);
 48.1988 +            tex << it->second.excl_time_95_percent
 48.1989 +                    / (double) alldata.timerResolution << "}{";
 48.1990 +            tex.precision(7);
 48.1991 +            tex << BP_WIDTH << "}" << endl;
 48.1992 +            tex << "\\smallskip" << endl << endl;
 48.1993 +            pageSize += 4;
 48.1994 +        }
 48.1995 +
 48.1996 +        count++;
 48.1997 +        itc = alldata.functionDispersionCallpathMap.find(TripleCallpath(it->first.a,
 48.1998 +                "", it->first.b));
 48.1999 +        itc++;
 48.2000 +        while (itcend != itc && itc->first.c == it->first.b) {
 48.2001 +
 48.2002 +            // init value for plot dimensions
 48.2003 +            double min = itc->second.excl_time_minimum
 48.2004 +                    - it->second.excl_time_minimum;
 48.2005 +            double max = itc->second.excl_time_maximum
 48.2006 +                    - it->second.excl_time_minimum;
 48.2007 +            ;
 48.2008 +            double factor = it->second.excl_time_maximum
 48.2009 +                    - it->second.excl_time_minimum;
 48.2010 +            double lowq = itc->second.excl_time_low_quartile
 48.2011 +                    - it->second.excl_time_minimum;
 48.2012 +            double median = itc->second.excl_time_median
 48.2013 +                    - it->second.excl_time_minimum;
 48.2014 +            double topq = itc->second.excl_time_top_quartile
 48.2015 +                    - it->second.excl_time_minimum;
 48.2016 +            double t_95 = itc->second.excl_time_95_percent
 48.2017 +                    - it->second.excl_time_minimum;
 48.2018 +
 48.2019 +            string func_name = alldata.functionIdNameMap[itc->first.c];
 48.2020 +
 48.2021 +            //callpath
 48.2022 +            if (itc->first.b != "") {
 48.2023 +                string tmp_callpath = "";
 48.2024 +                string word;
 48.2025 +                istringstream iss(itc->first.b, istringstream::in);
 48.2026 +                if (pageSize >= 40) {
 48.2027 +                    tex << "\\newpage" << endl << endl;
 48.2028 +                    tex.precision(5);
 48.2029 +                    tex << "\\boxplotlhs" << "{" << pos[0] << "}" << "{"
 48.2030 +                            << pos[1] << "}" << "{" << pos[2] << "}" << "{"
 48.2031 +                            << pos[3] << "}" << "{" << pos[4] << "}" << "{"
 48.2032 +                            << pos[5] << "}" << endl;
 48.2033 +                    pageSize = 2;
 48.2034 +                }
 48.2035 +                while (iss >> word) {
 48.2036 +                    word = alldata.functionIdNameMap[atoi(word.c_str())];
 48.2037 +                    if (tmp_callpath != "")
 48.2038 +                        tmp_callpath += "/" + word;
 48.2039 +                    else
 48.2040 +                        tmp_callpath += word;
 48.2041 +
 48.2042 +                }
 48.2043 +
 48.2044 +                tex << "\\verb|" << func_name << "|" << endl;
 48.2045 +                pageSize += 1;
 48.2046 +                func_name += ": " + tmp_callpath;
 48.2047 +            }
 48.2048 +
 48.2049 +            string tmp;
 48.2050 +            tex << "\\hypertarget{" << (countc) << "}{}" << endl;
 48.2051 +
 48.2052 +            label[countc] = func_name;
 48.2053 +            if ((factor <= 0) | (lowq < 0) | (median < 0) | (topq < 0)) {
 48.2054 +                cout.setf(ios::scientific, ios::floatfield);
 48.2055 +                cout.precision(5);
 48.2056 +
 48.2057 +                cout << endl << "Cannot create latex output!" << endl
 48.2058 +                        << "min: " << itc->second.excl_time_minimum << ", "
 48.2059 +                        << "low quartile: "
 48.2060 +                        << itc->second.excl_time_low_quartile << ", "
 48.2061 +                        << "median: " << itc->second.excl_time_median << ", "
 48.2062 +                        << "top quartile: "
 48.2063 +                        << itc->second.excl_time_top_quartile << ", "
 48.2064 +                        << "maximum: " << itc->second.excl_time_maximum << endl;
 48.2065 +
 48.2066 +                countc++;
 48.2067 +                itc++;
 48.2068 +                continue;
 48.2069 +            }
 48.2070 +
 48.2071 +            /*** calculation of the boxplot's positions ***/
 48.2072 +            /* logarithmic */
 48.2073 +            if (it->second.excl_time_top_quartile
 48.2074 +                    < ((it->second.excl_time_maximum
 48.2075 +                            - it->second.excl_time_minimum) / 2
 48.2076 +                            + it->second.excl_time_minimum)) {
 48.2077 +                tex << " ($log_{10}$)";
 48.2078 +
 48.2079 +                factor = log10(it->second.excl_time_maximum) - log10(
 48.2080 +                        it->second.excl_time_minimum);
 48.2081 +                lowq = log10(itc->second.excl_time_low_quartile) - log10(
 48.2082 +                        it->second.excl_time_minimum);
 48.2083 +                median = log10(itc->second.excl_time_median) - log10(
 48.2084 +                        it->second.excl_time_minimum);
 48.2085 +                topq = log10(itc->second.excl_time_top_quartile) - log10(
 48.2086 +                        it->second.excl_time_minimum);
 48.2087 +                t_95 = log10(itc->second.excl_time_95_percent) - log10(
 48.2088 +                        it->second.excl_time_minimum);
 48.2089 +                min = log10(itc->second.excl_time_minimum) - log10(
 48.2090 +                        it->second.excl_time_minimum);
 48.2091 +                max = log10(itc->second.excl_time_maximum) - log10(
 48.2092 +                        it->second.excl_time_minimum);
 48.2093 +                factor = BP_WIDTH / factor;
 48.2094 +
 48.2095 +                if (min > 0)
 48.2096 +                    min = min * factor;
 48.2097 +                else
 48.2098 +                    min = 0;
 48.2099 +
 48.2100 +                if (max > 0)
 48.2101 +                    max = max * factor;
 48.2102 +                else
 48.2103 +                    max = 0;
 48.2104 +
 48.2105 +                if (lowq > 0)
 48.2106 +                    lowq = lowq * factor;
 48.2107 +                else
 48.2108 +                    lowq = 0;
 48.2109 +
 48.2110 +                if (median > 0)
 48.2111 +                    median = median * factor;
 48.2112 +                else
 48.2113 +                    median = lowq;
 48.2114 +
 48.2115 +                if (topq > 0)
 48.2116 +                    topq = topq * factor;
 48.2117 +                else
 48.2118 +                    topq = median;
 48.2119 +
 48.2120 +                if (t_95 > 0)
 48.2121 +                    t_95 = t_95 * factor;
 48.2122 +                else
 48.2123 +                    t_95 = topq;
 48.2124 +                /* linear */
 48.2125 +            } else {
 48.2126 +                factor = BP_WIDTH / factor;
 48.2127 +
 48.2128 +                if (min > 0)
 48.2129 +                    min = min * factor;
 48.2130 +                else
 48.2131 +                    min = 0;
 48.2132 +
 48.2133 +                if (max > 0)
 48.2134 +                    max = max * factor;
 48.2135 +                else
 48.2136 +                    max = 0;
 48.2137 +
 48.2138 +                if (lowq > 0)
 48.2139 +                    lowq = lowq * factor;
 48.2140 +                else
 48.2141 +                    lowq = 0;
 48.2142 +
 48.2143 +                if (median > 0)
 48.2144 +                    median = median * factor;
 48.2145 +                else
 48.2146 +                    median = lowq;
 48.2147 +
 48.2148 +                if (topq > 0)
 48.2149 +                    topq = topq * factor;
 48.2150 +                else
 48.2151 +                    topq = median;
 48.2152 +
 48.2153 +                if (t_95 > 0)
 48.2154 +                    t_95 = t_95 * factor;
 48.2155 +                else
 48.2156 +                    t_95 = topq;
 48.2157 +            }
 48.2158 +            tex << " -n " << itc->second.count << "   ";
 48.2159 +            tex << "\\hyperlink{" << (countc) << "_ref}{ ref:" << (countc)
 48.2160 +                    << "}" << endl;
 48.2161 +            tex << endl;
 48.2162 +            /* write the values and their plot x-coordinate */
 48.2163 +            /* min, 1/4 quartile, 1/4pos, median, medianpos, 3/4 quartile, 3/4pos, max */
 48.2164 +            {
 48.2165 +                //  	tex <<"\\newline"<<endl ;
 48.2166 +                tex.precision(7);
 48.2167 +                tex << "\\boxplotlhd{" << min << "}{";
 48.2168 +                tex.precision(7);
 48.2169 +                tex.setf(ios::scientific, ios::floatfield);
 48.2170 +                tex << itc->second.excl_time_minimum
 48.2171 +                        / (double) alldata.timerResolution << "}{"
 48.2172 +                        << itc->second.excl_time_low_quartile
 48.2173 +                                / (double) alldata.timerResolution << "}{";
 48.2174 +                tex.setf(ios::fixed, ios::floatfield);
 48.2175 +                tex.precision(7);
 48.2176 +                tex << lowq << "}{";
 48.2177 +                tex.setf(ios::scientific, ios::floatfield);
 48.2178 +                tex.precision(7);
 48.2179 +                tex << itc->second.excl_time_median
 48.2180 +                        / (double) alldata.timerResolution << "}{";
 48.2181 +                tex.setf(ios::fixed, ios::floatfield);
 48.2182 +                tex.precision(7);
 48.2183 +                tex << median << "}{";
 48.2184 +                tex.setf(ios::scientific, ios::floatfield);
 48.2185 +                tex.precision(7);
 48.2186 +                tex << itc->second.excl_time_top_quartile
 48.2187 +                        / (double) alldata.timerResolution << "}{";
 48.2188 +                tex.precision(7);
 48.2189 +                tex.setf(ios::fixed, ios::floatfield);
 48.2190 +                tex << topq << "}{";
 48.2191 +                tex.setf(ios::scientific, ios::floatfield);
 48.2192 +                tex.precision(7);
 48.2193 +                tex << itc->second.excl_time_maximum
 48.2194 +                        / (double) alldata.timerResolution << "}" << endl;
 48.2195 +                tex << "\\boxplotlhdn";
 48.2196 +                tex << "{";
 48.2197 +                tex.setf(ios::scientific, ios::floatfield);
 48.2198 +                tex.precision(7);
 48.2199 +                tex << itc->second.excl_time_top_quartile
 48.2200 +                        / (double) alldata.timerResolution << "}{";
 48.2201 +                tex.precision(7);
 48.2202 +                tex.setf(ios::fixed, ios::floatfield);
 48.2203 +                tex << topq << "}{";
 48.2204 +                tex.setf(ios::scientific, ios::floatfield);
 48.2205 +                tex.precision(7);
 48.2206 +                tex << itc->second.excl_time_maximum
 48.2207 +                        / (double) alldata.timerResolution << "}{";
 48.2208 +                tex.precision(7);
 48.2209 +                tex.setf(ios::fixed, ios::floatfield);
 48.2210 +                tex << t_95 << "}{";
 48.2211 +                tex.setf(ios::scientific, ios::floatfield);
 48.2212 +                tex.precision(7);
 48.2213 +                tex << itc->second.excl_time_95_percent
 48.2214 +                        / (double) alldata.timerResolution << "}{";
 48.2215 +                tex.precision(7);
 48.2216 +                tex << max << "}" << endl;
 48.2217 +                tex << "\\smallskip" << endl << endl;
 48.2218 +                pageSize += 2;
 48.2219 +            }
 48.2220 +            countc++;
 48.2221 +            itc++;
 48.2222 +        }
 48.2223 +        it++;
 48.2224 +    }
 48.2225 +
 48.2226 +    tex << "\\newpage" << endl;
 48.2227 +    tex << "\\begin{center}" << endl;
 48.2228 +    tex << "{\\Large \\bf Callpath of Functions}";
 48.2229 +    tex << endl << "\\bigskip" << endl;
 48.2230 +    tex << "\\end{center}" << endl;
 48.2231 +    map<int, string>::iterator itl = label.begin();
 48.2232 +    map<int, string>::iterator itlend = label.end();
 48.2233 +
 48.2234 +    string func_name, tmp;
 48.2235 +    while (itl != itlend) {
 48.2236 +        func_name = itl->second;
 48.2237 +        tex << "ref:" << itl->first << ":  " << endl;
 48.2238 +        tex << "\\newline" << endl;
 48.2239 +        while (func_name.size() > 2 * FUNC_NAME_MAX_LEN + 17) {
 48.2240 +            tmp = func_name.substr(0, 2 * FUNC_NAME_MAX_LEN + 17);
 48.2241 +            func_name = func_name.substr(2 * FUNC_NAME_MAX_LEN + 17);
 48.2242 +            tex << "\\verb|" << tmp << "|" << endl;
 48.2243 +            tex << "\\newline" << endl;
 48.2244 +        }
 48.2245 +        tex << "\\verb|" << func_name << "|" << endl;
 48.2246 +        tex << "\\hyperlink{" << itl->first << "}{ref:" << itl->first << "}"
 48.2247 +                << endl;
 48.2248 +        tex << "\\hypertarget{" << itl->first << "_ref}{}" << endl;
 48.2249 +        tex << "\\newline" << endl;
 48.2250 +        tex << "\\newline" << endl;
 48.2251 +        itl++;
 48.2252 +    }
 48.2253 +    tex << "\\end{flushleft}" << endl << endl;
 48.2254 +
 48.2255 +    tex.setf(ios::floatfield);
 48.2256 +    tex.precision(6);
 48.2257 +
 48.2258 +    tex << "\\newpage" << endl << endl;
 48.2259 +}
 48.2260 +
 48.2261 +/*
 48.2262 + * Write a latex dispersion by function and callpath diagram.
 48.2263 + *
 48.2264 + * @param tex the given file stream (reference)
 48.2265 + * @param alldata the global data
 48.2266 + */
 48.2267 +
 48.2268 +static void write_Dispersion_histogram(fstream& tex, struct AllData& alldata) {
 48.2269 +
 48.2270 +    map<Pair, FunctionDispersionData, gtPair>::const_iterator it =
 48.2271 +            alldata.functionDispersionMap.begin();
 48.2272 +    map<Pair, FunctionDispersionData, gtPair>::const_iterator itend =
 48.2273 +            alldata.functionDispersionMap.end();
 48.2274 +
 48.2275 +    map<Pair, FunctionData, ltPair>::const_iterator itc =
 48.2276 +            alldata.functionDurationSectionMapGlobal.begin();
 48.2277 +    map<Pair, FunctionData, ltPair>::const_iterator itcend =
 48.2278 +            alldata.functionDurationSectionMapGlobal.end();
 48.2279 +
 48.2280 +    const unsigned int BP_WIDTH = 15;
 48.2281 +    const unsigned int BP_HEIGHT = 7;
 48.2282 +
 48.2283 +    tex << "\\begin{center}" << endl;
 48.2284 +    tex << "{\\Large \\bf Top 20 Dispersion of Functions}";
 48.2285 +    tex << endl << "\\bigskip" << endl;
 48.2286 +    tex << "\\end{center}" << endl;
 48.2287 +
 48.2288 +    int count = 1;
 48.2289 +    string tmp = "";
 48.2290 +    while (it != itend) {
 48.2291 +        if (count % 20 == 0)
 48.2292 +            break;
 48.2293 +
 48.2294 +        //draw boxes for histograph
 48.2295 +        itc = alldata.functionDurationSectionMapGlobal.find(
 48.2296 +                Pair(it->first.b, 0));
 48.2297 +        double sum_max = 0;
 48.2298 +        while (itc != itcend && itc->first.a == it->first.b) {
 48.2299 +            if (sum_max < itc->second.count.sum)
 48.2300 +                sum_max = itc->second.count.sum;
 48.2301 +            itc++;
 48.2302 +        }
 48.2303 +        itc = alldata.functionDurationSectionMapGlobal.find(
 48.2304 +                Pair(it->first.b, 0));
 48.2305 +        std::ostringstream os;
 48.2306 +        double xfactor = (BP_WIDTH - 1.5)
 48.2307 +                / (log10(it->second.excl_time_maximum) - log10(
 48.2308 +                        it->second.excl_time_minimum));
 48.2309 +
 48.2310 +        double yfactor = (BP_HEIGHT - 1) / sum_max;
 48.2311 +        while (itc != itcend && itc->first.a == it->first.b) {
 48.2312 +
 48.2313 +            double ypos = ((itc->second.count.sum) * yfactor) + 0.5;
 48.2314 +            double xpos1 = log10(itc->second.excl_time.min) - log10(
 48.2315 +                    it->second.excl_time_minimum);
 48.2316 +            double xpos2 = log10(itc->second.excl_time.max) - log10(
 48.2317 +                    it->second.excl_time_minimum);
 48.2318 +            if (xpos1 > 0)
 48.2319 +                xpos1 = xpos1 * xfactor;
 48.2320 +            else
 48.2321 +                xpos1 = 0;
 48.2322 +            if (xpos2 > 0)
 48.2323 +                xpos2 = xpos2 * xfactor;
 48.2324 +            else
 48.2325 +                xpos2 = 0;
 48.2326 +
 48.2327 +            os << "   \\filldraw[fill=blue!20] (" << xpos1 + 0.5
 48.2328 +                    << ",0.5) rectangle (" << xpos2 + 0.5 << "," << ypos
 48.2329 +                    << ");% box" << endl;
 48.2330 +            itc++;
 48.2331 +        }
 48.2332 +
 48.2333 +        // draw axes
 48.2334 +        for (int i = 0; i < 6; i++) {
 48.2335 +            os << "    \\draw (" << (i * (double) ((BP_WIDTH - 1.5) / 5)) + 0.5
 48.2336 +                    << ",0.4) node[below right]{$" << (pow(10, ((i
 48.2337 +                    * (double) ((BP_WIDTH - 1.5) / 5)) / xfactor) + log10(
 48.2338 +                    it->second.excl_time_minimum)))
 48.2339 +                    / (double) alldata.timerResolution
 48.2340 +
 48.2341 +            << "$} -- (" << (i * ((BP_WIDTH - 1.5) / 5)) + 0.5 << ",0.5);"
 48.2342 +                    << endl;
 48.2343 +
 48.2344 +            os << "    \\draw (0.4," << (i * ((double) (BP_HEIGHT - 1) / 5))
 48.2345 +                    + 0.5 << ") node[left]{$" << (i * ((BP_HEIGHT - 1) / 5))
 48.2346 +                    * (sum_max / 5) << "$} -- (0.5," << (i
 48.2347 +                    * ((double) (BP_HEIGHT - 1) / 5)) + 0.5 << ");" << endl;
 48.2348 +
 48.2349 +        }
 48.2350 +
 48.2351 +        //draw histograph
 48.2352 +        tex << "\\verb|" << alldata.functionIdNameMap[it->first.b] << "|"
 48.2353 +                << endl;
 48.2354 +        tex << "\\newline" << endl;
 48.2355 +
 48.2356 +        tex << "\\begin{tikzpicture}" << endl;
 48.2357 +        tex << "\\begin{small}" << endl;
 48.2358 +
 48.2359 +        tex << "  \\draw (0.4," << (BP_HEIGHT - 0.5) << ") node[left]{$"
 48.2360 +                << sum_max << "$} -- (0.5," << (BP_HEIGHT - 0.5)
 48.2361 +                << ");% left whisker" << endl;
 48.2362 +        tex << "  \\draw (0,0.5) node[above left]{$0$} -- (" << BP_WIDTH
 48.2363 +                << ",0.5);% left whisker" << endl;
 48.2364 +        tex << "  \\draw (0.5,0.0) -- (0.5," << BP_HEIGHT << ");" << endl;
 48.2365 +
 48.2366 +        tex << "  \\draw (" << BP_WIDTH - 0.2 << ",0.4) -- (" << BP_WIDTH
 48.2367 +                << ",0.5);% left whisker" << endl;
 48.2368 +        tex << "  \\draw (" << BP_WIDTH - 0.2 << ",0.6) -- (" << BP_WIDTH
 48.2369 +                << ",0.5);% left whisker" << endl;
 48.2370 +        tex << "  \\draw (0.4," << BP_HEIGHT - 0.2 << ") -- (0.5," << BP_HEIGHT
 48.2371 +                << ");% left whisker" << endl;
 48.2372 +        tex << "  \\draw (0.6," << BP_HEIGHT - 0.2 << ") -- (0.5," << BP_HEIGHT
 48.2373 +                << ");% left whisker" << endl;
 48.2374 +
 48.2375 +        tex << os.str() << endl;
 48.2376 +
 48.2377 +        tex << "\\end{small}" << endl;
 48.2378 +        tex << "\\end{tikzpicture}" << endl;
 48.2379 +        tex << "\\newline" << endl;
 48.2380 +        tex << "\\newline" << endl;
 48.2381 +        if (count % 3 == 0)
 48.2382 +            tex << "\\newpage" << endl;
 48.2383 +        it++;
 48.2384 +        count++;
 48.2385 +    }
 48.2386 +}
 48.2387 +
 48.2388  /*
 48.2389   * Write header of a ybar chart. This function decides about the y axis scaling
 48.2390   * type (logarithmic basis 10 or 2 or linear scaling)
 48.2391 @@ -1037,144 +1831,150 @@
 48.2392   * @param xLabels the x axis labels
 48.2393   * @param minMax the min/max y values for this chart
 48.2394   */
 48.2395 -template <class type> static void write_ybarPlotHead(
 48.2396 -                 fstream& tex, uint64_t cclassType, metric_t metricType,
 48.2397 -                 vector<string> xLabels, MinMaxPair<type> minMax)
 48.2398 -{
 48.2399 -  tex << "\\begin{flushright}\\ttfamily\\small" << endl;
 48.2400 -  tex << "\\begin{tikzpicture}" << endl;
 48.2401 -  
 48.2402 -  /* define an ymin variable */
 48.2403 -  if(metricType == MSGLENGTH)
 48.2404 -    tex << "\\def \\ymin {0.5}" << endl;
 48.2405 -
 48.2406 -  /* TODO: quick hack to solve min=0 problem for logarithmic scaling */
 48.2407 -  if(metricType == DURATION){
 48.2408 -    double ymin = (double)minMax.min;
 48.2409 -    double ymax = (double)minMax.max;
 48.2410 -
 48.2411 -    /* is scaling logarithmic --> see if below (has to be the same!!!) */
 48.2412 -    if(ymax - ymin > 100 || (ymax - ymin > 0.01 && ymax < 1)){
 48.2413 -      if(ymin <= 0){
 48.2414 -        tex << "\\def \\ymin {1e-1}" << endl;
 48.2415 -      }
 48.2416 +template<class type> static void write_ybarPlotHead(fstream& tex,
 48.2417 +        uint64_t cclassType, metric_t metricType, vector<string> xLabels,
 48.2418 +        MinMaxPair<type> minMax) {
 48.2419 +    tex << "\\begin{flushright}\\ttfamily\\small" << endl;
 48.2420 +    tex << "\\begin{tikzpicture}" << endl;
 48.2421 +
 48.2422 +    /* define an ymin variable */
 48.2423 +    if (metricType == MSGLENGTH)
 48.2424 +        tex << "\\def \\ymin {0.5}" << endl;
 48.2425 +
 48.2426 +    /* TODO: quick hack to solve min=0 problem for logarithmic scaling */
 48.2427 +    if (metricType == DURATION) {
 48.2428 +        double ymin = (double) minMax.min;
 48.2429 +        double ymax = (double) minMax.max;
 48.2430 +
 48.2431 +        /* is scaling logarithmic --> see if below (has to be the same!!!) */
 48.2432 +        if (ymax - ymin > 100 || (ymax - ymin > 0.01 && ymax < 1)) {
 48.2433 +            if (ymin <= 0) {
 48.2434 +                tex << "\\def \\ymin {1e-1}" << endl;
 48.2435 +            }
 48.2436 +        }
 48.2437      }
 48.2438 -  }
 48.2439 -
 48.2440 -  tex << "\\begin{axis}[" << endl;
 48.2441 -  tex << "  width=" << PLOT_WIDTH << "cm, height=" << PLOT_HEIGHT << "cm," << endl;
 48.2442 -  tex << "  axis x line=bottom,x axis line style={-,line width=1pt}," << endl;
 48.2443 -  tex << "  axis y line=left,y axis line style={-,line width=1pt}," << endl;
 48.2444 -  tex << "  enlarge y limits={value=0.02,upper}," << endl;
 48.2445 -
 48.2446 -  // @DEBUG
 48.2447 -  // cout << "ymax=" << minMax.max << "; ymin=" << minMax.min << endl;
 48.2448 -
 48.2449 -  /*** message length y axis settings ***/
 48.2450 -  /* this works only for pgfplots since version 1.3 */
 48.2451 -  // @TODO: ymin == 0, min - max line cannot be drawn
 48.2452 -  if(metricType == MSGLENGTH){
 48.2453 -    if(logaxis){
 48.2454 -      tex << "  ymode=log,log basis y=2,ymin=\\ymin," << endl;
 48.2455 -      tex << "  try min ticks log={8}," << endl;
 48.2456 +
 48.2457 +    tex << "\\begin{axis}[" << endl;
 48.2458 +    tex << "  width=" << PLOT_WIDTH << "cm, height=" << PLOT_HEIGHT << "cm,"
 48.2459 +            << endl;
 48.2460 +    tex << "  axis x line=bottom,x axis line style={-,line width=1pt}," << endl;
 48.2461 +    tex << "  axis y line=left,y axis line style={-,line width=1pt}," << endl;
 48.2462 +    tex << "  enlarge y limits={value=0.02,upper}," << endl;
 48.2463 +
 48.2464 +    // @DEBUG
 48.2465 +    // cout << "ymax=" << minMax.max << "; ymin=" << minMax.min << endl;
 48.2466 +
 48.2467 +    /*** message length y axis settings ***/
 48.2468 +    /* this works only for pgfplots since version 1.3 */
 48.2469 +    // @TODO: ymin == 0, min - max line cannot be drawn
 48.2470 +    if (metricType == MSGLENGTH) {
 48.2471 +        if (logaxis) {
 48.2472 +            tex << "  ymode=log,log basis y=2,ymin=\\ymin," << endl;
 48.2473 +            tex << "  try min ticks log={8}," << endl;
 48.2474 +        }
 48.2475 +
 48.2476 +        /* check for label overlapping */
 48.2477 +        if ((double) minMax.max > (double) 8191)
 48.2478 +            tex << "  extra y ticks={1}, extra y tick labels={1}," << endl;
 48.2479      }
 48.2480 -    
 48.2481 -    /* check for label overlapping */
 48.2482 -    if((double)minMax.max > (double)8191)
 48.2483 -      tex << "  extra y ticks={1}, extra y tick labels={1}," << endl;
 48.2484 -  }
 48.2485 -
 48.2486 -  /*** message duration y axis settings ***/
 48.2487 -  if(metricType == DURATION){
 48.2488 -    double ymin = (double)minMax.min;
 48.2489 -    double ymax = (double)minMax.max;
 48.2490 -
 48.2491 -    // @TODO: ymin == 0, min - max line cannot be drawn
 48.2492 -    if((ymax - ymin > 1000 || (ymax - ymin > 0.01 && ymax < 1)) && logaxis){
 48.2493 -      // logarithmic mode
 48.2494 -      tex << "  ymode=log,log basis y=10,";
 48.2495 -      if(ymin <= 0){
 48.2496 -        tex << "  ymin=1e-1," << endl;
 48.2497 -      }else{
 48.2498 -        tex << "  ymin=1e" << (int)floor(log10((double)ymin)) << "," << endl;
 48.2499 -      }
 48.2500 -    }else{
 48.2501 -      // linear mode
 48.2502 -      tex << "ymin=0," << endl;
 48.2503 -      tex << "  ylabel style={at={(0,"
 48.2504 -          << PLOT_HEIGHT-1 << "cm)},rotate=-90,anchor=north west}," << endl;
 48.2505 +
 48.2506 +    /*** message duration y axis settings ***/
 48.2507 +    if (metricType == DURATION) {
 48.2508 +        double ymin = (double) minMax.min;
 48.2509 +        double ymax = (double) minMax.max;
 48.2510 +
 48.2511 +        // @TODO: ymin == 0, min - max line cannot be drawn
 48.2512 +        if ((ymax - ymin > 1000 || (ymax - ymin > 0.01 && ymax < 1)) && logaxis) {
 48.2513 +            // logarithmic mode
 48.2514 +            tex << "  ymode=log,log basis y=10,";
 48.2515 +            if (ymin <= 0) {
 48.2516 +                tex << "  ymin=1e-1," << endl;
 48.2517 +            } else {
 48.2518 +                tex << "  ymin=1e" << (int) floor(log10((double) ymin)) << ","
 48.2519 +                        << endl;
 48.2520 +            }
 48.2521 +        } else {
 48.2522 +            // linear mode
 48.2523 +            tex << "ymin=0," << endl;
 48.2524 +            tex << "  ylabel style={at={(0," << PLOT_HEIGHT - 1
 48.2525 +                    << "cm)},rotate=-90,anchor=north west}," << endl;
 48.2526 +        }
 48.2527      }
 48.2528 -  }
 48.2529 -
 48.2530 -  /*** invocations y axis settings ***/
 48.2531 -  if(metricType == INVOCATIONS){
 48.2532 -    double ymax = (double)minMax.max;
 48.2533 -    double ymin = (double)minMax.min;
 48.2534 -
 48.2535 -    if(ymax > 1000 && ymin < 100 && ymin > 0){
 48.2536 -      tex << "  ymode=log,log basis y=10,ymin=1e"
 48.2537 -          << (int)floor(log10((double)ymin)) << ",";
 48.2538 -    }else{
 48.2539 -      tex << "  ymin=0,";
 48.2540 -      //tex << "ytickmin={1}," << endl;
 48.2541 -      if(0 < ymax && ymax < 10) 
 48.2542 -        tex << "  ytick={0,...," << (uint64_t)ymax << "}," << endl;
 48.2543 +
 48.2544 +    /*** invocations y axis settings ***/
 48.2545 +    if (metricType == INVOCATIONS) {
 48.2546 +        double ymax = (double) minMax.max;
 48.2547 +        double ymin = (double) minMax.min;
 48.2548 +
 48.2549 +        if (ymax > 1000 && ymin < 100 && ymin > 0) {
 48.2550 +            tex << "  ymode=log,log basis y=10,ymin=1e" << (int) floor(log10(
 48.2551 +                    (double) ymin)) << ",";
 48.2552 +        } else {
 48.2553 +            tex << "  ymin=0,";
 48.2554 +            //tex << "ytickmin={1}," << endl;
 48.2555 +            if (0 < ymax && ymax < 10)
 48.2556 +                tex << "  ytick={0,...," << (uint64_t) ymax << "}," << endl;
 48.2557 +        }
 48.2558      }
 48.2559 -  }
 48.2560 -
 48.2561 -  tex << "ymajorgrids,xminorgrids,minor x tick num=1," << endl;
 48.2562 -
 48.2563 -  string title = ""; // the chart label
 48.2564 -  string metric = ""; // the y label
 48.2565 -
 48.2566 -  if(cclassType == OTF_COLLECTIVE_TYPE_UNKNOWN){
 48.2567 -    title = "P2P";
 48.2568 -  }else{
 48.2569 -    collectiveId2String(cclassType, title);
 48.2570 -  }
 48.2571 -
 48.2572 -  switch(metricType){
 48.2573 -    case INVOCATIONS:  
 48.2574 -      title += " Invocations";
 48.2575 -      metric = "";
 48.2576 -      break;
 48.2577 -    case DURATION:  
 48.2578 -      title += " Duration";
 48.2579 -      metric = "sec";
 48.2580 -      break;
 48.2581 -    case MSGLENGTH:  
 48.2582 -      title += " Message Length";
 48.2583 -      metric = "byte";
 48.2584 -      break;
 48.2585 -    default: break;
 48.2586 -  }
 48.2587 -  
 48.2588 -  tex << "title=" << title;
 48.2589 -  if(grouped) tex << " (average)" << endl;
 48.2590 -
 48.2591 -  tex << ",ylabel={" << metric << "}," << endl;
 48.2592 -  tex << "x tick label style={rotate=90,anchor=east,font=\\ttfamily\\footnotesize}," << endl;
 48.2593 -  tex << "tick align=outside," << endl;
 48.2594 -  tex << "tick style={line cap=round,line width=0.5pt,color=black," << endl;
 48.2595 -	tex << "      major tick length=4pt,minor tick length=8pt}," << endl;
 48.2596 -  tex << "major x tick style={line width=1, color=white}," << endl;
 48.2597 -  tex << "scaled y ticks=true," << endl;
 48.2598 -  tex << "bar width=" << YBAR_SIZE*2 << "pt," << endl;
 48.2599 -  tex << "minor grid style={color=gray, line width=0.5pt, dashed}," << endl;
 48.2600 -  tex << "xmin=-0.5," << endl;
 48.2601 -
 48.2602 -  /* concerning the ticks */
 48.2603 -  uint32_t xticks = xLabels.size();
 48.2604 -  if(xticks == 0) xticks = xLabelNum;
 48.2605 -  
 48.2606 -  tex << "xmax=" << xticks-1 << ".5," << endl;
 48.2607 -  tex << "xtick={0,...," << xticks-1 << "}," << endl;
 48.2608 -  tex << "xticklabels={" << endl;
 48.2609 -  for (unsigned int i = 0; i < xLabels.size(); i++){
 48.2610 -    tex << xLabels[i] << ",";
 48.2611 -  }
 48.2612 -
 48.2613 -  tex << "},]" << endl;
 48.2614 +
 48.2615 +    tex << "ymajorgrids,xminorgrids,minor x tick num=1," << endl;
 48.2616 +
 48.2617 +    string title = ""; // the chart label
 48.2618 +    string metric = ""; // the y label
 48.2619 +
 48.2620 +    if (cclassType == OTF_COLLECTIVE_TYPE_UNKNOWN) {
 48.2621 +        title = "P2P";
 48.2622 +    } else {
 48.2623 +        collectiveId2String(cclassType, title);
 48.2624 +    }
 48.2625 +
 48.2626 +    switch (metricType) {
 48.2627 +    case INVOCATIONS:
 48.2628 +        title += " Invocations";
 48.2629 +        metric = "";
 48.2630 +        break;
 48.2631 +    case DURATION:
 48.2632 +        title += " Duration";
 48.2633 +        metric = "sec";
 48.2634 +        break;
 48.2635 +    case MSGLENGTH:
 48.2636 +        title += " Message Length";
 48.2637 +        metric = "byte";
 48.2638 +        break;
 48.2639 +    default:
 48.2640 +        break;
 48.2641 +    }
 48.2642 +
 48.2643 +    tex << "title=" << title;
 48.2644 +    if (grouped)
 48.2645 +        tex << " (average)" << endl;
 48.2646 +
 48.2647 +    tex << ",ylabel={" << metric << "}," << endl;
 48.2648 +    tex
 48.2649 +            << "x tick label style={rotate=90,anchor=east,font=\\ttfamily\\footnotesize},"
 48.2650 +            << endl;
 48.2651 +    tex << "tick align=outside," << endl;
 48.2652 +    tex << "tick style={line cap=round,line width=0.5pt,color=black," << endl;
 48.2653 +    tex << "      major tick length=4pt,minor tick length=8pt}," << endl;
 48.2654 +    tex << "major x tick style={line width=1, color=white}," << endl;
 48.2655 +    tex << "scaled y ticks=true," << endl;
 48.2656 +    tex << "bar width=" << YBAR_SIZE * 2 << "pt," << endl;
 48.2657 +    tex << "minor grid style={color=gray, line width=0.5pt, dashed}," << endl;
 48.2658 +    tex << "xmin=-0.5," << endl;
 48.2659 +
 48.2660 +    /* concerning the ticks */
 48.2661 +    uint32_t xticks = xLabels.size();
 48.2662 +    if (xticks == 0)
 48.2663 +        xticks = xLabelNum;
 48.2664 +
 48.2665 +    tex << "xmax=" << xticks - 1 << ".5," << endl;
 48.2666 +    tex << "xtick={0,...," << xticks - 1 << "}," << endl;
 48.2667 +    tex << "xticklabels={" << endl;
 48.2668 +    for (unsigned int i = 0; i < xLabels.size(); i++) {
 48.2669 +        tex << xLabels[i] << ",";
 48.2670 +    }
 48.2671 +
 48.2672 +    tex << "},]" << endl;
 48.2673  }
 48.2674  
 48.2675  /*
 48.2676 @@ -1183,36 +1983,38 @@
 48.2677   * @param tex the latex output file stream
 48.2678   * @param legend create legend (1 send-receive; >1 no send receive)
 48.2679   */
 48.2680 -static void write_ybarPlotFoot(fstream& tex, uint8_t legend/* = 1*/)
 48.2681 -{
 48.2682 -  tex << "\\end{axis}" << endl;
 48.2683 -  tex << "\\end{tikzpicture}" << endl << endl;
 48.2684 -
 48.2685 -  tex << "\\end{flushright}" << endl;
 48.2686 -
 48.2687 -  if(legend){
 48.2688 -    tex << "\\begin{flushright}" << endl;
 48.2689 -    tex << "\\bigskip" << endl;
 48.2690 -    tex << "\\begin{tikzpicture}" << endl;
 48.2691 -    
 48.2692 -    /* create send, receive legend */
 48.2693 -    if(legend == 1){
 48.2694 -      tex << "\\node(a) at (0,0) [rectangle, draw, fill=" << COLOR_SEND << "] {};" << endl;
 48.2695 -      tex << "\\node [black,right] at (a.east) {send};" << endl;
 48.2696 -      tex << "\\node(b) at (2,0) [rectangle, draw, fill=" << COLOR_RECV << "] {};" << endl;
 48.2697 -      tex << "\\node [black,right] at (b.east) {receive};" << endl;
 48.2698 +static void write_ybarPlotFoot(fstream& tex, uint8_t legend/* = 1*/) {
 48.2699 +    tex << "\\end{axis}" << endl;
 48.2700 +    tex << "\\end{tikzpicture}" << endl << endl;
 48.2701 +
 48.2702 +    tex << "\\end{flushright}" << endl;
 48.2703 +
 48.2704 +    if (legend) {
 48.2705 +        tex << "\\begin{flushright}" << endl;
 48.2706 +        tex << "\\bigskip" << endl;
 48.2707 +        tex << "\\begin{tikzpicture}" << endl;
 48.2708 +
 48.2709 +        /* create send, receive legend */
 48.2710 +        if (legend == 1) {
 48.2711 +            tex << "\\node(a) at (0,0) [rectangle, draw, fill=" << COLOR_SEND
 48.2712 +                    << "] {};" << endl;
 48.2713 +            tex << "\\node [black,right] at (a.east) {send};" << endl;
 48.2714 +            tex << "\\node(b) at (2,0) [rectangle, draw, fill=" << COLOR_RECV
 48.2715 +                    << "] {};" << endl;
 48.2716 +            tex << "\\node [black,right] at (b.east) {receive};" << endl;
 48.2717 +        }
 48.2718 +
 48.2719 +        if (grouped) {
 48.2720 +            tex << "\\draw[|-|,color=" << COLOR_MINMAX
 48.2721 +                    << ",line width=1pt] (4,-0.2) -- (4,0.2)" << endl;
 48.2722 +            tex << "  node [right,xshift=2pt]{max}" << endl;
 48.2723 +            tex << "  node [below right,yshift=-3pt,xshift=2pt]{ min};" << endl;
 48.2724 +        }
 48.2725 +
 48.2726 +        tex << "\\end{tikzpicture}" << endl;
 48.2727 +        tex << "\\end{flushright}" << endl;
 48.2728 +        tex << "\\newpage" << endl << endl;
 48.2729      }
 48.2730 -
 48.2731 -    if(grouped){
 48.2732 -      tex << "\\draw[|-|,color=" << COLOR_MINMAX << ",line width=1pt] (4,-0.2) -- (4,0.2)" << endl;
 48.2733 -      tex << "  node [right,xshift=2pt]{max}" << endl;
 48.2734 -      tex << "  node [below right,yshift=-3pt,xshift=2pt]{ min};" << endl;
 48.2735 -    }
 48.2736 -
 48.2737 -    tex << "\\end{tikzpicture}" << endl;
 48.2738 -    tex << "\\end{flushright}" << endl;
 48.2739 -    tex << "\\newpage" << endl << endl;
 48.2740 -  }
 48.2741  }
 48.2742  
 48.2743  /*
 48.2744 @@ -1225,335 +2027,374 @@
 48.2745   * @todo maybe use the pgftable method instead of string streams ...
 48.2746   */
 48.2747  static void write_p2pAllPGFplots(fstream& tex, vector<string> xLabels,
 48.2748 -                                 struct AllData& alldata)
 48.2749 -{
 48.2750 -  /* timer resolution */
 48.2751 -  uint64_t tres = alldata.timerResolution;
 48.2752 -
 48.2753 -  /* iterator over data map */
 48.2754 -  std::map< uint64_t, MessageData >::const_iterator it = 
 48.2755 -                                           alldata.messageMapPerGroup.begin();
 48.2756 -  std::map< uint64_t, MessageData >::const_iterator itend = 
 48.2757 -                                           alldata.messageMapPerGroup.end();
 48.2758 -
 48.2759 -  /* counter for the x axis ticks */
 48.2760 -  uint32_t i = 0;
 48.2761 -
 48.2762 -  /* buffer for receive values */
 48.2763 -  stringstream ss_count_recv (stringstream::in | stringstream::out);
 48.2764 -  stringstream ss_bytes_recv (stringstream::in | stringstream::out);
 48.2765 -  /*stringstream ss_duration_recv (stringstream::in | stringstream::out);*/
 48.2766 -
 48.2767 -  /* buffer for remaining send values */
 48.2768 -  stringstream ss_bytes_send (stringstream::in | stringstream::out);
 48.2769 -  stringstream ss_duration_send (stringstream::in | stringstream::out);
 48.2770 -
 48.2771 -  /* buffers for receive and min max values */
 48.2772 -  stringstream ss_duration_send_e (stringstream::in | stringstream::out);
 48.2773 -  /*stringstream ss_duration_recv_e (stringstream::in | stringstream::out);*/
 48.2774 -  stringstream ss_bytes_send_e (stringstream::in | stringstream::out);
 48.2775 -  stringstream ss_bytes_recv_e (stringstream::in | stringstream::out);
 48.2776 -
 48.2777 -  /* plots containing duration available (old or new trace?) */
 48.2778 -  bool byt_avail = false;
 48.2779 -  bool dur_avail = false;
 48.2780 -
 48.2781 -  /* return, if there are no messages available */
 48.2782 -  if(alldata.messageMapPerGroup.empty()) return;
 48.2783 -
 48.2784 -  /*
 48.2785 -   *  Get min and max values to choose correct y axis scaling.
 48.2786 -   */
 48.2787 -  MinMaxMsgData minMax;
 48.2788 -  while ( itend != it ) {
 48.2789 -    
 48.2790 -    if(grouped){
 48.2791 -
 48.2792 -      /* invocations */
 48.2793 -      if(it->second.count_send.cnt){
 48.2794 -        uint64_t val = it->second.count_send.min;
 48.2795 -        if(val < minMax.count.min) minMax.count.min = val;
 48.2796 -
 48.2797 -        val = it->second.count_send.max;
 48.2798 -        if(val > minMax.count.max) minMax.count.max = val;
 48.2799 -      }
 48.2800 -
 48.2801 -      if(it->second.count_recv.cnt){
 48.2802 -        uint64_t val = it->second.count_recv.min;
 48.2803 -        if(val < minMax.count.min) minMax.count.min = val;
 48.2804 -
 48.2805 -        val = it->second.count_recv.max;
 48.2806 -        if(val > minMax.count.max) minMax.count.max = val;
 48.2807 -      }
 48.2808 -
 48.2809 -    }else{
 48.2810 -      /*** if processes are not grouped use the average values ***/
 48.2811 -
 48.2812 -      /* invocations */
 48.2813 -      if(it->second.count_send.cnt){
 48.2814 -        uint64_t val = it->second.count_send.sum/it->second.count_send.cnt;
 48.2815 -
 48.2816 -        if(val < minMax.count.min) minMax.count.min = val;
 48.2817 -        if(val > minMax.count.max) minMax.count.max = val;
 48.2818 -      }
 48.2819 -
 48.2820 -      if(it->second.count_recv.cnt){
 48.2821 -        uint64_t val = it->second.count_recv.sum/it->second.count_recv.cnt;
 48.2822 -
 48.2823 -        if(val < minMax.count.min) minMax.count.min = val;
 48.2824 -        if(val > minMax.count.max) minMax.count.max = val;
 48.2825 -      }
 48.2826 -
 48.2827 -      /* duration 
 48.2828 -      if(it->second.duration_send.sum > 0 && it->second.duration_send.cnt){
 48.2829 -        double val = it->second.duration_send.sum/it->second.duration_send.cnt/tres;
 48.2830 -
 48.2831 -        if(val < minMax.duration.min) minMax.duration.min = val;
 48.2832 -        if(val > minMax.duration.max) minMax.duration.max = val;
 48.2833 -
 48.2834 -        dur_avail = true;
 48.2835 -      }*/
 48.2836 +        struct AllData& alldata) {
 48.2837 +    /* timer resolution */
 48.2838 +    uint64_t tres = alldata.timerResolution;
 48.2839 +
 48.2840 +    /* iterator over data map */
 48.2841 +    std::map<uint64_t, MessageData>::const_iterator it =
 48.2842 +            alldata.messageMapPerGroup.begin();
 48.2843 +    std::map<uint64_t, MessageData>::const_iterator itend =
 48.2844 +            alldata.messageMapPerGroup.end();
 48.2845 +
 48.2846 +    /* counter for the x axis ticks */
 48.2847 +    uint32_t i = 0;
 48.2848 +
 48.2849 +    /* buffer for receive values */
 48.2850 +    stringstream ss_count_recv(stringstream::in | stringstream::out);
 48.2851 +    stringstream ss_bytes_recv(stringstream::in | stringstream::out);
 48.2852 +    /*stringstream ss_duration_recv (stringstream::in | stringstream::out);*/
 48.2853 +
 48.2854 +    /* buffer for remaining send values */
 48.2855 +    stringstream ss_bytes_send(stringstream::in | stringstream::out);
 48.2856 +    stringstream ss_duration_send(stringstream::in | stringstream::out);
 48.2857 +
 48.2858 +    /* buffers for receive and min max values */
 48.2859 +    stringstream ss_duration_send_e(stringstream::in | stringstream::out);
 48.2860 +    /*stringstream ss_duration_recv_e (stringstream::in | stringstream::out);*/
 48.2861 +    stringstream ss_bytes_send_e(stringstream::in | stringstream::out);
 48.2862 +    stringstream ss_bytes_recv_e(stringstream::in | stringstream::out);
 48.2863 +
 48.2864 +    /* plots containing duration available (old or new trace?) */
 48.2865 +    bool byt_avail = false;
 48.2866 +    bool dur_avail = false;
 48.2867 +
 48.2868 +    /* return, if there are no messages available */
 48.2869 +    if (alldata.messageMapPerGroup.empty())
 48.2870 +        return;
 48.2871 +
 48.2872 +    /*
 48.2873 +     *  Get min and max values to choose correct y axis scaling.
 48.2874 +     */
 48.2875 +    MinMaxMsgData minMax;
 48.2876 +    while (itend != it) {
 48.2877 +
 48.2878 +        if (grouped) {
 48.2879 +
 48.2880 +            /* invocations */
 48.2881 +            if (it->second.count_send.cnt) {
 48.2882 +                uint64_t val = it->second.count_send.min;
 48.2883 +                if (val < minMax.count.min)
 48.2884 +                    minMax.count.min = val;
 48.2885 +
 48.2886 +                val = it->second.count_send.max;
 48.2887 +                if (val > minMax.count.max)
 48.2888 +                    minMax.count.max = val;
 48.2889 +            }
 48.2890 +
 48.2891 +            if (it->second.count_recv.cnt) {
 48.2892 +                uint64_t val = it->second.count_recv.min;
 48.2893 +                if (val < minMax.count.min)
 48.2894 +                    minMax.count.min = val;
 48.2895 +
 48.2896 +                val = it->second.count_recv.max;
 48.2897 +                if (val > minMax.count.max)
 48.2898 +                    minMax.count.max = val;
 48.2899 +            }
 48.2900 +
 48.2901 +        } else {
 48.2902 +            /*** if processes are not grouped use the average values ***/
 48.2903 +
 48.2904 +            /* invocations */
 48.2905 +            if (it->second.count_send.cnt) {
 48.2906 +                uint64_t val = it->second.count_send.sum
 48.2907 +                        / it->second.count_send.cnt;
 48.2908 +
 48.2909 +                if (val < minMax.count.min)
 48.2910 +                    minMax.count.min = val;
 48.2911 +                if (val > minMax.count.max)
 48.2912 +                    minMax.count.max = val;
 48.2913 +            }
 48.2914 +
 48.2915 +            if (it->second.count_recv.cnt) {
 48.2916 +                uint64_t val = it->second.count_recv.sum
 48.2917 +                        / it->second.count_recv.cnt;
 48.2918 +
 48.2919 +                if (val < minMax.count.min)
 48.2920 +                    minMax.count.min = val;
 48.2921 +                if (val > minMax.count.max)
 48.2922 +                    minMax.count.max = val;
 48.2923 +            }
 48.2924 +
 48.2925 +            /* duration
 48.2926 +             if(it->second.duration_send.sum > 0 && it->second.duration_send.cnt){
 48.2927 +             double val = it->second.duration_send.sum/it->second.duration_send.cnt/tres;
 48.2928 +
 48.2929 +             if(val < minMax.duration.min) minMax.duration.min = val;
 48.2930 +             if(val > minMax.duration.max) minMax.duration.max = val;
 48.2931 +
 48.2932 +             dur_avail = true;
 48.2933 +             }*/
 48.2934 +        }
 48.2935 +
 48.2936 +        it++;
 48.2937      }
 48.2938  
 48.2939 -    it++;
 48.2940 -  }
 48.2941 -  
 48.2942 -  /* reset iterator to write the data to file output */
 48.2943 -  it = alldata.messageMapPerGroup.begin();
 48.2944 -
 48.2945 -  write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, INVOCATIONS,
 48.2946 -                     vector<string>()/*xLabels*/, minMax.count);
 48.2947 -  
 48.2948 -  tex.precision(9);
 48.2949 -  ss_count_recv.precision(9);
 48.2950 -  ss_count_recv.setf(ios::floatfield);
 48.2951 -  ss_bytes_recv.precision(9);
 48.2952 -  ss_bytes_recv.setf(ios::floatfield);
 48.2953 -  ss_bytes_send.precision(9);
 48.2954 -  ss_bytes_send.setf(ios::floatfield);
 48.2955 -  ss_duration_send.precision(9);
 48.2956 -  ss_duration_send.setf(ios::floatfield);
 48.2957 -  ss_duration_send_e.precision(9);
 48.2958 -  ss_duration_send_e.setf(ios::floatfield);
 48.2959 -  ss_bytes_send_e.precision(9);
 48.2960 -  ss_bytes_send_e.setf(ios::floatfield);
 48.2961 -  ss_bytes_recv_e.precision(9);
 48.2962 -  ss_bytes_recv_e.setf(ios::floatfield);
 48.2963 -
 48.2964 -  /* first of all write the average values */
 48.2965 -  tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND 
 48.2966 -      << ", xshift=-" << YBAR_SIZE << "]" << endl;
 48.2967 -  tex << "  coordinates{" << endl;
 48.2968 -
 48.2969 -  while ( itend != it ) {
 48.2970 -    /*** send values ***/
 48.2971 -    if(it->second.count_send.cnt){
 48.2972 -      double val = (double)it->second.count_send.sum/it->second.count_send.cnt;
 48.2973 -      
 48.2974 -      /* directly write the send data */
 48.2975 -      tex << "("<< i << "," << (double)val << ")";
 48.2976 +    /* reset iterator to write the data to file output */
 48.2977 +    it = alldata.messageMapPerGroup.begin();
 48.2978 +
 48.2979 +    write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, INVOCATIONS, vector<
 48.2980 +            string> ()/*xLabels*/, minMax.count);
 48.2981 +
 48.2982 +    tex.precision(9);
 48.2983 +    ss_count_recv.precision(9);
 48.2984 +    ss_count_recv.setf(ios::floatfield);
 48.2985 +    ss_bytes_recv.precision(9);
 48.2986 +    ss_bytes_recv.setf(ios::floatfield);
 48.2987 +    ss_bytes_send.precision(9);
 48.2988 +    ss_bytes_send.setf(ios::floatfield);
 48.2989 +    ss_duration_send.precision(9);
 48.2990 +    ss_duration_send.setf(ios::floatfield);
 48.2991 +    ss_duration_send_e.precision(9);
 48.2992 +    ss_duration_send_e.setf(ios::floatfield);
 48.2993 +    ss_bytes_send_e.precision(9);
 48.2994 +    ss_bytes_send_e.setf(ios::floatfield);
 48.2995 +    ss_bytes_recv_e.precision(9);
 48.2996 +    ss_bytes_recv_e.setf(ios::floatfield);
 48.2997 +
 48.2998 +    /* first of all write the average values */
 48.2999 +    tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND
 48.3000 +            << ", xshift=-" << YBAR_SIZE << "]" << endl;
 48.3001 +    tex << "  coordinates{" << endl;
 48.3002 +
 48.3003 +    while (itend != it) {
 48.3004 +        /*** send values ***/
 48.3005 +        if (it->second.count_send.cnt) {
 48.3006 +            double val = (double) it->second.count_send.sum
 48.3007 +                    / it->second.count_send.cnt;
 48.3008 +
 48.3009 +            /* directly write the send data */
 48.3010 +            tex << "(" << i << "," << (double) val << ")";
 48.3011 +        }
 48.3012 +
 48.3013 +        /* buffer the remaining values */
 48.3014 +        if (it->second.bytes_send.sum > 0 && it->second.bytes_send.cnt) {
 48.3015 +            ss_bytes_send << "(" << i << ","
 48.3016 +                    << (double) it->second.bytes_send.sum
 48.3017 +                            / it->second.bytes_send.cnt << ")";
 48.3018 +
 48.3019 +            byt_avail = true;
 48.3020 +        }
 48.3021 +
 48.3022 +        if (it->second.duration_send.sum > 0 && it->second.duration_send.cnt) {
 48.3023 +            double val = it->second.duration_send.sum
 48.3024 +                    / it->second.duration_send.cnt / tres;
 48.3025 +            ss_duration_send << "(" << i << "," << val << ")";
 48.3026 +
 48.3027 +            /* set min/max values for duration */
 48.3028 +            if (val < minMax.duration.min)
 48.3029 +                minMax.duration.min = val;
 48.3030 +            if (val > minMax.duration.max)
 48.3031 +                minMax.duration.max = val;
 48.3032 +
 48.3033 +            dur_avail = true;
 48.3034 +        }
 48.3035 +
 48.3036 +        /*** receive values ***/
 48.3037 +        /* buffer the values for receive in string stream buffer */
 48.3038 +        if (it->second.count_recv.cnt) {
 48.3039 +            double val = (double) it->second.count_recv.sum
 48.3040 +                    / it->second.count_recv.cnt;
 48.3041 +            ss_count_recv << "(" << i << "," << (double) val << ")";
 48.3042 +        }
 48.3043 +
 48.3044 +        if (it->second.bytes_recv.sum > 0 && it->second.bytes_recv.cnt) {
 48.3045 +            ss_bytes_recv << "(" << i << ","
 48.3046 +                    << (double) it->second.bytes_recv.sum
 48.3047 +                            / it->second.bytes_recv.cnt << ")";
 48.3048 +
 48.3049 +            byt_avail = true;
 48.3050 +        }
 48.3051 +
 48.3052 +        it++;
 48.3053 +        i++;
 48.3054      }
 48.3055 -
 48.3056 -    /* buffer the remaining values */
 48.3057 -    if(it->second.bytes_send.sum > 0 && it->second.bytes_send.cnt){
 48.3058 -      ss_bytes_send << "("<< i << ","
 48.3059 -              << (double)it->second.bytes_send.sum/it->second.bytes_send.cnt << ")";
 48.3060 -
 48.3061 -      byt_avail = true;
 48.3062 +    tex << "};" << endl;
 48.3063 +
 48.3064 +    /* write receive count */
 48.3065 +    tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_RECV
 48.3066 +            << ", xshift=" << YBAR_SIZE << "]" << endl;
 48.3067 +    tex << "  coordinates{" << endl;
 48.3068 +    tex << ss_count_recv.str() << "};" << endl;
 48.3069 +
 48.3070 +    /* check if min/max values shall be written */
 48.3071 +    if (grouped) {
 48.3072 +        it = alldata.messageMapPerGroup.begin();
 48.3073 +        i = 0;
 48.3074 +        while (itend != it) {
 48.3075 +
 48.3076 +            /*** send invocations ***/
 48.3077 +            if (it->second.count_send.cnt) {
 48.3078 +                uint64_t min = it->second.count_send.min;
 48.3079 +                uint64_t max = it->second.count_send.max;
 48.3080 +
 48.3081 +                /* write min/max values only if they differ */
 48.3082 +                if (min != max) {
 48.3083 +                    tex << "\\addplot[color=" << COLOR_MINMAX
 48.3084 +                            << ",mark=-,line width=1pt, xshift=-" << YBAR_SIZE
 48.3085 +                            << "]";
 48.3086 +                    tex << "  coordinates{" << endl;
 48.3087 +                    tex << "(" << i << "," << (double) min << ")";
 48.3088 +                    tex << "(" << i << "," << (double) max << ")";
 48.3089 +                    tex << "};" << endl;
 48.3090 +                }
 48.3091 +            }
 48.3092 +
 48.3093 +            /*** receive invocations ***/
 48.3094 +            if (it->second.count_recv.cnt) {
 48.3095 +                double min = (double) it->second.count_recv.min;
 48.3096 +                double max = (double) it->second.count_recv.max;
 48.3097 +
 48.3098 +                if (min < minMax.count.min)
 48.3099 +                    minMax.count.min = min;
 48.3100 +                if (max > minMax.count.max)
 48.3101 +                    minMax.count.max = max;
 48.3102 +
 48.3103 +                /* write min/max values only if they differ */
 48.3104 +                if (min != max) {
 48.3105 +                    tex << "\\addplot[color=" << COLOR_MINMAX
 48.3106 +                            << ",mark=-,line width=1pt, xshift=" << YBAR_SIZE
 48.3107 +                            << "]";
 48.3108 +                    tex << "  coordinates{" << endl;
 48.3109 +                    tex << "(" << i << "," << (double) min << ")";
 48.3110 +                    tex << "(" << i << "," << (double) max << ")";
 48.3111 +                    tex << "};" << endl;
 48.3112 +                }
 48.3113 +            }
 48.3114 +
 48.3115 +            /*** send message length ***/
 48.3116 +            if (byt_avail && it->second.bytes_send.cnt) {
 48.3117 +                double min = (double) it->second.bytes_send.min;
 48.3118 +                double max = (double) it->second.bytes_send.max;
 48.3119 +
 48.3120 +                if (min < minMax.bytes.min)
 48.3121 +                    minMax.bytes.min = min;
 48.3122 +                if (max > minMax.bytes.max)
 48.3123 +                    minMax.bytes.max = max;
 48.3124 +
 48.3125 +                /* write min/max values only if they differ */
 48.3126 +                if (min != max) {
 48.3127 +                    ss_bytes_send_e << "\\addplot[color=" << COLOR_MINMAX
 48.3128 +                            << ",mark=-,line width=1pt, xshift=-" << YBAR_SIZE
 48.3129 +                            << "]";
 48.3130 +                    ss_bytes_send_e << "  coordinates{" << endl;
 48.3131 +
 48.3132 +                    /* adapt zero min value for logarithmic scaling */
 48.3133 +                    if (min != 0) {
 48.3134 +                        ss_bytes_send_e << "(" << i << "," << (double) min
 48.3135 +                                << ")";
 48.3136 +                    } else
 48.3137 +                        ss_bytes_send_e << "(" << i << ",\\ymin)";
 48.3138 +
 48.3139 +                    ss_bytes_send_e << "(" << i << "," << (double) max << ")";
 48.3140 +                    ss_bytes_send_e << "};" << endl;
 48.3141 +                }
 48.3142 +            }
 48.3143 +
 48.3144 +            /*** receive message length ***/
 48.3145 +            if (byt_avail && it->second.bytes_recv.cnt) {
 48.3146 +                double min = (double) it->second.bytes_recv.min;
 48.3147 +                double max = (double) it->second.bytes_recv.max;
 48.3148 +
 48.3149 +                if (min < minMax.bytes.min)
 48.3150 +                    minMax.bytes.min = min;
 48.3151 +                if (max > minMax.bytes.max)
 48.3152 +                    minMax.bytes.max = max;
 48.3153 +
 48.3154 +                /* write min/max values only if they differ */
 48.3155 +                if (min != max) {
 48.3156 +                    ss_bytes_recv_e << "\\addplot[color=" << COLOR_MINMAX
 48.3157 +                            << ",mark=-,line width=1pt, xshift=" << YBAR_SIZE
 48.3158 +                            << "]";
 48.3159 +                    ss_bytes_recv_e << "  coordinates{" << endl;
 48.3160 +                    ss_bytes_recv_e << "(" << i << "," << (double) min << ")";
 48.3161 +                    ss_bytes_recv_e << "(" << i << "," << (double) max << ")";
 48.3162 +                    ss_bytes_recv_e << "};" << endl;
 48.3163 +                }
 48.3164 +            }
 48.3165 +
 48.3166 +            /*** send duration ***/
 48.3167 +            if (dur_avail && it->second.duration_send.cnt) {
 48.3168 +                double min = it->second.duration_send.min / tres;
 48.3169 +                double max = it->second.duration_send.max / tres;
 48.3170 +
 48.3171 +                if (min < minMax.duration.min)
 48.3172 +                    minMax.duration.min = min;
 48.3173 +                if (max > minMax.duration.max)
 48.3174 +                    minMax.duration.max = max;
 48.3175 +
 48.3176 +                /* write min/max values only if they differ */
 48.3177 +                if (min != max) {
 48.3178 +                    ss_duration_send_e << "\\addplot[color=" << COLOR_MINMAX
 48.3179 +                            << ",mark=-,line width=1pt]";
 48.3180 +                    ss_duration_send_e << "  coordinates{" << endl;
 48.3181 +                    ss_duration_send_e << "(" << i << "," << min << ")";
 48.3182 +                    ss_duration_send_e << "(" << i << "," << max << ")";
 48.3183 +                    ss_duration_send_e << "};" << endl;
 48.3184 +                }
 48.3185 +            }
 48.3186 +
 48.3187 +            it++;
 48.3188 +            i++;
 48.3189 +        }
 48.3190 +
 48.3191      }
 48.3192  
 48.3193 -    if(it->second.duration_send.sum > 0 && it->second.duration_send.cnt){
 48.3194 -      double val = it->second.duration_send.sum/it->second.duration_send.cnt/tres;
 48.3195 -      ss_duration_send << "("<< i << "," << val << ")";
 48.3196 -
 48.3197 -      /* set min/max values for duration */
 48.3198 -      if(val < minMax.duration.min) minMax.duration.min = val;
 48.3199 -      if(val > minMax.duration.max) minMax.duration.max = val;
 48.3200 -
 48.3201 -      dur_avail = true;
 48.3202 +    /* finish invocation chart */
 48.3203 +    if (byt_avail || dur_avail)
 48.3204 +        write_ybarPlotFoot(tex, 0);
 48.3205 +    else
 48.3206 +        write_ybarPlotFoot(tex, 1);
 48.3207 +
 48.3208 +    /*** write message length chart ***/
 48.3209 +    if (byt_avail) {
 48.3210 +        if (dur_avail) {
 48.3211 +            write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, MSGLENGTH,
 48.3212 +                    vector<string> (), minMax.bytes);
 48.3213 +        } else {
 48.3214 +            write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, MSGLENGTH,
 48.3215 +                    xLabels, minMax.bytes);
 48.3216 +        }
 48.3217 +
 48.3218 +        tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND
 48.3219 +                << ", xshift=-" << YBAR_SIZE << "]" << endl;
 48.3220 +        tex << "  coordinates{" << endl;
 48.3221 +        tex << ss_bytes_send.str() << "};" << endl;
 48.3222 +
 48.3223 +        tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_RECV
 48.3224 +                << ", xshift=" << YBAR_SIZE << "]" << endl;
 48.3225 +        tex << "  coordinates{" << endl;
 48.3226 +        tex << ss_bytes_recv.str() << "};" << endl;
 48.3227 +
 48.3228 +        if (grouped)
 48.3229 +            tex << ss_bytes_send_e.str() << ss_bytes_recv_e.str();
 48.3230 +
 48.3231 +        if (dur_avail)
 48.3232 +            write_ybarPlotFoot(tex, 0);
 48.3233 +        else
 48.3234 +            write_ybarPlotFoot(tex, 1);
 48.3235      }
 48.3236  
 48.3237 -    /*** receive values ***/
 48.3238 -    /* buffer the values for receive in string stream buffer */
 48.3239 -    if(it->second.count_recv.cnt){
 48.3240 -      double val = (double)it->second.count_recv.sum/it->second.count_recv.cnt;
 48.3241 -      ss_count_recv << "("<< i << "," << (double)val << ")";
 48.3242 +    /*
 48.3243 +     * Write message duration chart, if information are available.
 48.3244 +     * No differentiation between send and receive.
 48.3245 +     */
 48.3246 +    if (dur_avail) {
 48.3247 +        write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, DURATION, xLabels,
 48.3248 +                minMax.duration);
 48.3249 +
 48.3250 +        tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND
 48.3251 +                << "]" << endl;
 48.3252 +        tex << "  coordinates{" << endl;
 48.3253 +        tex << ss_duration_send.str() << "};" << endl;
 48.3254 +
 48.3255 +        if (grouped)
 48.3256 +            tex << ss_duration_send_e.str();
 48.3257 +
 48.3258 +        write_ybarPlotFoot(tex, 1);
 48.3259      }
 48.3260  
 48.3261 -    if(it->second.bytes_recv.sum > 0 && it->second.bytes_recv.cnt){
 48.3262 -      ss_bytes_recv << "("<< i << ","
 48.3263 -          << (double)it->second.bytes_recv.sum/it->second.bytes_recv.cnt << ")";
 48.3264 -
 48.3265 -      byt_avail = true;
 48.3266 -    }    
 48.3267 -
 48.3268 -    it++;
 48.3269 -    i++;
 48.3270 -  }
 48.3271 -  tex << "};" << endl;
 48.3272 -
 48.3273 -  /* write receive count */
 48.3274 -  tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_RECV 
 48.3275 -      << ", xshift=" << YBAR_SIZE << "]" << endl;
 48.3276 -  tex << "  coordinates{" << endl;
 48.3277 -  tex << ss_count_recv.str() << "};" << endl;
 48.3278 -
 48.3279 -  /* check if min/max values shall be written */
 48.3280 -  if(grouped){
 48.3281 -    it = alldata.messageMapPerGroup.begin(); i = 0;
 48.3282 -    while ( itend != it ) {
 48.3283 -
 48.3284 -      /*** send invocations ***/
 48.3285 -      if(it->second.count_send.cnt){
 48.3286 -        uint64_t min = it->second.count_send.min;
 48.3287 -        uint64_t max = it->second.count_send.max;
 48.3288 -
 48.3289 -        /* write min/max values only if they differ */
 48.3290 -        if(min != max){
 48.3291 -          tex << "\\addplot[color=" << COLOR_MINMAX
 48.3292 -              << ",mark=-,line width=1pt, xshift=-" << YBAR_SIZE << "]";
 48.3293 -          tex << "  coordinates{" << endl;
 48.3294 -          tex << "("<< i << "," << (double)min << ")";
 48.3295 -          tex << "("<< i << "," << (double)max << ")";
 48.3296 -          tex << "};" << endl;
 48.3297 -        }
 48.3298 -      }
 48.3299 -
 48.3300 -      /*** receive invocations ***/
 48.3301 -      if(it->second.count_recv.cnt){
 48.3302 -        double min = (double)it->second.count_recv.min;
 48.3303 -        double max = (double)it->second.count_recv.max;
 48.3304 -
 48.3305 -        if(min < minMax.count.min) minMax.count.min = min;
 48.3306 -        if(max > minMax.count.max) minMax.count.max = max;
 48.3307 -
 48.3308 -        /* write min/max values only if they differ */
 48.3309 -        if(min != max){
 48.3310 -          tex << "\\addplot[color=" << COLOR_MINMAX << ",mark=-,line width=1pt, xshift=" << YBAR_SIZE << "]";
 48.3311 -          tex << "  coordinates{" << endl;
 48.3312 -          tex << "("<< i << "," << (double)min << ")";
 48.3313 -          tex << "("<< i << "," << (double)max << ")";
 48.3314 -          tex << "};" << endl;
 48.3315 -        }
 48.3316 -      }
 48.3317 -
 48.3318 -      /*** send message length ***/
 48.3319 -      if(byt_avail && it->second.bytes_send.cnt){
 48.3320 -        double min = (double)it->second.bytes_send.min;
 48.3321 -        double max = (double)it->second.bytes_send.max;
 48.3322 -        
 48.3323 -        if(min < minMax.bytes.min) minMax.bytes.min = min;
 48.3324 -        if(max > minMax.bytes.max) minMax.bytes.max = max;
 48.3325 -
 48.3326 -        /* write min/max values only if they differ */
 48.3327 -        if(min != max){
 48.3328 -          ss_bytes_send_e << "\\addplot[color=" << COLOR_MINMAX << ",mark=-,line width=1pt, xshift=-" << YBAR_SIZE << "]";
 48.3329 -          ss_bytes_send_e << "  coordinates{" << endl;
 48.3330 -
 48.3331 -          /* adapt zero min value for logarithmic scaling */
 48.3332 -          if(min != 0){
 48.3333 -            ss_bytes_send_e << "("<< i << "," << (double)min << ")";       
 48.3334 -          }else ss_bytes_send_e << "("<< i << ",\\ymin)";
 48.3335 -
 48.3336 -          ss_bytes_send_e << "("<< i << "," << (double)max << ")";
 48.3337 -          ss_bytes_send_e << "};" << endl;
 48.3338 -        }
 48.3339 -      }
 48.3340 -
 48.3341 -      /*** receive message length ***/
 48.3342 -      if(byt_avail && it->second.bytes_recv.cnt){
 48.3343 -        double min = (double)it->second.bytes_recv.min;
 48.3344 -        double max = (double)it->second.bytes_recv.max;
 48.3345 -
 48.3346 -        if(min < minMax.bytes.min) minMax.bytes.min = min;
 48.3347 -        if(max > minMax.bytes.max) minMax.bytes.max = max;
 48.3348 -
 48.3349 -        /* write min/max values only if they differ */
 48.3350 -        if(min != max){
 48.3351 -          ss_bytes_recv_e << "\\addplot[color=" << COLOR_MINMAX << ",mark=-,line width=1pt, xshift=" << YBAR_SIZE << "]";
 48.3352 -          ss_bytes_recv_e << "  coordinates{" << endl;
 48.3353 -          ss_bytes_recv_e << "("<< i << "," << (double)min << ")";
 48.3354 -          ss_bytes_recv_e << "("<< i << "," << (double)max << ")";
 48.3355 -          ss_bytes_recv_e << "};" << endl;
 48.3356 -        }
 48.3357 -      }
 48.3358 -
 48.3359 -      /*** send duration ***/
 48.3360 -      if(dur_avail && it->second.duration_send.cnt){
 48.3361 -        double min = it->second.duration_send.min/tres;
 48.3362 -        double max = it->second.duration_send.max/tres;
 48.3363 -
 48.3364 -        if(min < minMax.duration.min) minMax.duration.min = min;
 48.3365 -        if(max > minMax.duration.max) minMax.duration.max = max;
 48.3366 -
 48.3367 -        /* write min/max values only if they differ */
 48.3368 -        if(min != max){
 48.3369 -          ss_duration_send_e << "\\addplot[color=" << COLOR_MINMAX << ",mark=-,line width=1pt]";
 48.3370 -          ss_duration_send_e << "  coordinates{" << endl;
 48.3371 -          ss_duration_send_e << "("<< i << "," << min << ")";
 48.3372 -          ss_duration_send_e << "("<< i << "," << max << ")";
 48.3373 -          ss_duration_send_e << "};" << endl;
 48.3374 -        }
 48.3375 -      }
 48.3376 -      
 48.3377 -      it++;
 48.3378 -      i++;
 48.3379 -    }
 48.3380 -
 48.3381 -  }
 48.3382 -
 48.3383 -  /* finish invocation chart */
 48.3384 -  if(byt_avail || dur_avail)
 48.3385 -    write_ybarPlotFoot(tex, 0);
 48.3386 -  else
 48.3387 -    write_ybarPlotFoot(tex, 1);
 48.3388 -
 48.3389 -  /*** write message length chart ***/
 48.3390 -  if(byt_avail){
 48.3391 -    if(dur_avail){
 48.3392 -      write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, MSGLENGTH,
 48.3393 -                         vector<string>(), minMax.bytes);
 48.3394 -    }else{
 48.3395 -      write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, MSGLENGTH, xLabels,
 48.3396 -                         minMax.bytes);
 48.3397 -    }
 48.3398 -
 48.3399 -    tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND
 48.3400 -        << ", xshift=-" << YBAR_SIZE << "]" << endl;
 48.3401 -    tex << "  coordinates{" << endl;
 48.3402 -    tex << ss_bytes_send.str() << "};" << endl;
 48.3403 -
 48.3404 -    tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_RECV
 48.3405 -        << ", xshift=" << YBAR_SIZE <<  "]" << endl;
 48.3406 -    tex << "  coordinates{" << endl;
 48.3407 -    tex << ss_bytes_recv.str() << "};" << endl;
 48.3408 -
 48.3409 -    if(grouped) tex << ss_bytes_send_e.str() << ss_bytes_recv_e.str();
 48.3410 -
 48.3411 -    if(dur_avail)
 48.3412 -      write_ybarPlotFoot(tex, 0);
 48.3413 -    else
 48.3414 -      write_ybarPlotFoot(tex, 1);
 48.3415 -  }
 48.3416 -
 48.3417 -  /* 
 48.3418 -   * Write message duration chart, if information are available.
 48.3419 -   * No differentiation between send and receive.
 48.3420 -   */
 48.3421 -  if(dur_avail){
 48.3422 -    write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, DURATION, xLabels,
 48.3423 -                       minMax.duration);
 48.3424 -
 48.3425 -    tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND << "]" << endl;
 48.3426 -    tex << "  coordinates{" << endl;
 48.3427 -    tex << ss_duration_send.str() << "};" << endl;
 48.3428 -
 48.3429 -    if(grouped) tex << ss_duration_send_e.str();
 48.3430 -
 48.3431 -    write_ybarPlotFoot(tex, 1);
 48.3432 -  }
 48.3433 -  
 48.3434 -  tex.precision(6);
 48.3435 +    tex.precision(6);
 48.3436  }
 48.3437  
 48.3438  /*
 48.3439 @@ -1562,29 +2403,30 @@
 48.3440   * @param alldata structure containing all summarized profiling information
 48.3441   * @param xLabels the resulting vector of process/group labels
 48.3442   */
 48.3443 -static void getXAxisLabels(struct AllData& alldata, vector<string>& xLabels)
 48.3444 -{
 48.3445 -  if(grouped){
 48.3446 -    for(map<uint64_t, set<uint64_t> >::const_iterator it =
 48.3447 -                                alldata.grouping.groupsToProcesses.begin();
 48.3448 -         it != alldata.grouping.groupsToProcesses.end(); it++){
 48.3449 -
 48.3450 -      /* map the first process id of the group to the process name */
 48.3451 -      string procFrom = alldata.processIdNameMap[*(it->second.begin())];
 48.3452 -      if(it->second.size() > 1){
 48.3453 -        string procTo = alldata.processIdNameMap[*(it->second.rbegin())];
 48.3454 -        procFrom = "\\shortstack[r]{"+procFrom+"\\\\"+"-\\\\"+procTo+"}";
 48.3455 -      }
 48.3456 -
 48.3457 -      /* add the created x axis label */
 48.3458 -      xLabels.push_back(procFrom);
 48.3459 +static void getXAxisLabels(struct AllData& alldata, vector<string>& xLabels) {
 48.3460 +    if (grouped) {
 48.3461 +        for (map<uint64_t, set<uint64_t> >::const_iterator it =
 48.3462 +                alldata.grouping.groupsToProcesses.begin(); it
 48.3463 +                != alldata.grouping.groupsToProcesses.end(); it++) {
 48.3464 +
 48.3465 +            /* map the first process id of the group to the process name */
 48.3466 +            string procFrom = alldata.processIdNameMap[*(it->second.begin())];
 48.3467 +            if (it->second.size() > 1) {
 48.3468 +                string procTo =
 48.3469 +                        alldata.processIdNameMap[*(it->second.rbegin())];
 48.3470 +                procFrom = "\\shortstack[r]{" + procFrom + "\\\\" + "-\\\\"
 48.3471 +                        + procTo + "}";
 48.3472 +            }
 48.3473 +
 48.3474 +            /* add the created x axis label */
 48.3475 +            xLabels.push_back(procFrom);
 48.3476 +        }
 48.3477 +    } else {
 48.3478 +        for (set<Process, ltProcess>::const_iterator it =
 48.3479 +                alldata.allProcesses.begin(); it != alldata.allProcesses.end(); it++) {
 48.3480 +            xLabels.push_back(alldata.processIdNameMap[it->process]);
 48.3481 +        }
 48.3482      }
 48.3483 -  }else{
 48.3484 -    for(set< Process, ltProcess >::const_iterator it=
 48.3485 -        alldata.allProcesses.begin(); it != alldata.allProcesses.end(); it++) {
 48.3486 -      xLabels.push_back( alldata.processIdNameMap[ it->process ] );
 48.3487 -    }
 48.3488 -  }
 48.3489  }
 48.3490  
 48.3491  /*
 48.3492 @@ -1594,22 +2436,21 @@
 48.3493   * @param id the process or group id
 48.3494   * @param label the label as string for the given ID
 48.3495   */
 48.3496 -static void getGroupLabel(struct AllData& alldata, uint64_t id,
 48.3497 -                          string& label)
 48.3498 -{
 48.3499 -  if(grouped){
 48.3500 -    set<uint64_t> procs = alldata.grouping.groupsToProcesses.find(id)->second;
 48.3501 -
 48.3502 -    /* map the first process id of the group to the process name */
 48.3503 -    label = alldata.processIdNameMap[*(procs.begin())];
 48.3504 -    if(procs.size() > 1){
 48.3505 -      string procTo = alldata.processIdNameMap[*(procs.rbegin())];
 48.3506 -      label = label+"\\\\"+"-\\\\"+procTo;
 48.3507 +static void getGroupLabel(struct AllData& alldata, uint64_t id, string& label) {
 48.3508 +    if (grouped) {
 48.3509 +        set<uint64_t> procs =
 48.3510 +                alldata.grouping.groupsToProcesses.find(id)->second;
 48.3511 +
 48.3512 +        /* map the first process id of the group to the process name */
 48.3513 +        label = alldata.processIdNameMap[*(procs.begin())];
 48.3514 +        if (procs.size() > 1) {
 48.3515 +            string procTo = alldata.processIdNameMap[*(procs.rbegin())];
 48.3516 +            label = label + "\\\\" + "-\\\\" + procTo;
 48.3517 +        }
 48.3518 +
 48.3519 +    } else {
 48.3520 +        label = alldata.processIdNameMap[id];
 48.3521      }
 48.3522 -    
 48.3523 -  }else{
 48.3524 -    label = alldata.processIdNameMap[id];
 48.3525 -  }
 48.3526  }
 48.3527  
 48.3528  /*
 48.3529 @@ -1618,239 +2459,256 @@
 48.3530   * @param tex the latex output file stream
 48.3531   * @param alldata structure containing all summarized profiling information
 48.3532   */
 48.3533 -static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata)
 48.3534 -{
 48.3535 -  std::map<Pair, MessageData, ltPair> msgMap = alldata.messageMapPerGroupPair;
 48.3536 -  std::map<uint64_t,uint64_t> rankToPos;
 48.3537 -
 48.3538 -  float scale = 0.7;
 48.3539 -  uint32_t gridDim = 0;
 48.3540 -  double minDataRate = 0;
 48.3541 -  double maxDataRate = 0;
 48.3542 -  uint64_t tres = alldata.timerResolution;
 48.3543 -
 48.3544 -
 48.3545 -  /* check, if grouped to set the dimension of the matrix */
 48.3546 -  if(grouped){
 48.3547 -    gridDim = alldata.grouping.numGroups();
 48.3548 -  }else{
 48.3549 -    gridDim = alldata.allProcesses.size();
 48.3550 -  }
 48.3551 -
 48.3552 -  /* remove DEBUG output 
 48.3553 -  cout << "gridDim:" << gridDim << " processes_num:" << alldata.allProcesses.size()
 48.3554 -       << " group_num:" << alldata.grouping.numGroups() << endl; */
 48.3555 -
 48.3556 -
 48.3557 -  std::map<Pair, MessageData, ltPair>::const_iterator it = msgMap.begin();
 48.3558 -  std::map<Pair, MessageData, ltPair>::const_iterator itend = msgMap.end();
 48.3559 -  
 48.3560 -  if(it != itend){
 48.3561 -    if(it->second.duration_send.sum == 0) return;
 48.3562 -    minDataRate = it->second.bytes_send.sum/it->second.duration_send.sum*tres;
 48.3563 -    maxDataRate = minDataRate;
 48.3564 -  }else{
 48.3565 -    return;
 48.3566 -  }
 48.3567 -
 48.3568 -  tex << "\\center{\\Large \\bf P2P - Message Rate (average)}" << endl;
 48.3569 -  tex << "\\bigskip" << endl << endl;
 48.3570 -
 48.3571 -  tex << "\\begin{center}" << endl;
 48.3572 -  tex << "\\begin{tikzpicture} [step=1cm,scale=" << scale
 48.3573 -      << ",every node/.style={scale=" << scale << "}]";
 48.3574 -  if(grouped) tex << "\\small" << endl;
 48.3575 -
 48.3576 -  /* preprocess data */
 48.3577 -  uint64_t ctrInt = 0;
 48.3578 -  while(it != itend){
 48.3579 -    double tmp;
 48.3580 -    
 48.3581 -    /* get list of all ranks/groups and map internal id for the grid position */
 48.3582 -
 48.3583 -    /* check if already listed */
 48.3584 -    /* TODO: vector[ctrInt]=rankID ??? */
 48.3585 -    if(rankToPos.find(it->first.a) == rankToPos.end()){
 48.3586 -      /* insert */
 48.3587 -      rankToPos.insert(pair<uint64_t,uint64_t>(it->first.a, ctrInt));
 48.3588 -
 48.3589 -      /* label the matrix */
 48.3590 -      string label;
 48.3591 -      getGroupLabel(alldata,it->first.a, label);
 48.3592 -      tex << "\\node[anchor=east] at (0," << gridDim-ctrInt-1 << ".5)"
 48.3593 -             " {\\shortstack[r]{" << label << "}};" << endl;
 48.3594 -      tex << "\\node[anchor=west,rotate=90] at (" << ctrInt << ".5,"
 48.3595 -          << gridDim << ") {\\shortstack[l]{" << label << "}};" << endl;
 48.3596 -
 48.3597 -      //cout << "Process " << it->first.a << endl;
 48.3598 -      ctrInt++;
 48.3599 +static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) {
 48.3600 +    std::map<Pair, MessageData, ltPair> msgMap = alldata.messageMapPerGroupPair;
 48.3601 +    std::map<uint64_t, uint64_t> rankToPos;
 48.3602 +
 48.3603 +    float scale = 0.7;
 48.3604 +    uint32_t gridDim = 0;
 48.3605 +    double minDataRate = 0;
 48.3606 +    double maxDataRate = 0;
 48.3607 +    uint64_t tres = alldata.timerResolution;
 48.3608 +
 48.3609 +    /* check, if grouped to set the dimension of the matrix */
 48.3610 +    if (grouped) {
 48.3611 +        gridDim = alldata.grouping.numGroups();
 48.3612 +    } else {
 48.3613 +        gridDim = alldata.allProcesses.size();
 48.3614      }
 48.3615  
 48.3616 -    /* get minimum and maximum data rate for color coding */
 48.3617 -    if(it->second.bytes_send.cnt && it->second.duration_send.cnt && 
 48.3618 -       (it->second.duration_send.sum > 0)){
 48.3619 -      tmp = it->second.bytes_send.sum/it->second.duration_send.sum*tres;
 48.3620 -      if(tmp > maxDataRate) maxDataRate = tmp;
 48.3621 -      if(tmp < minDataRate) minDataRate = tmp;
 48.3622 +    /* remove DEBUG output
 48.3623 +     cout << "gridDim:" << gridDim << " processes_num:" << alldata.allProcesses.size()
 48.3624 +     << " group_num:" << alldata.grouping.numGroups() << endl; */
 48.3625 +
 48.3626 +    std::map<Pair, MessageData, ltPair>::const_iterator it = msgMap.begin();
 48.3627 +    std::map<Pair, MessageData, ltPair>::const_iterator itend = msgMap.end();
 48.3628 +
 48.3629 +    if (it != itend) {
 48.3630 +        if (it->second.duration_send.sum == 0)
 48.3631 +            return;
 48.3632 +        minDataRate = it->second.bytes_send.sum / it->second.duration_send.sum
 48.3633 +                * tres;
 48.3634 +        maxDataRate = minDataRate;
 48.3635 +    } else {
 48.3636 +        return;
 48.3637      }
 48.3638  
 48.3639 -    /* ignore receive values */
 48.3640 -    /*
 48.3641 -    if(it->second.bytes_recv.cnt && it->second.duration_recv.cnt &&
 48.3642 -       (it->second.duration_recv.sum > 0)){
 48.3643 -      tmp = it->second.bytes_recv.sum/it->second.duration_recv.sum;
 48.3644 -      if(tmp > maxDataRate) maxDataRate = tmp;
 48.3645 -      if(tmp < minDataRate) minDataRate = tmp;
 48.3646 -    }*/
 48.3647 -
 48.3648 -    it++;
 48.3649 -  }
 48.3650 -
 48.3651 -  /* @DEBUG
 48.3652 -  cout << "Processes found: " << ctrInt << " -- min: " << minDataRate
 48.3653 -       << " max: " << maxDataRate << endl;*/
 48.3654 -
 48.3655 -  /* Quantifier (K, M, G, T) for large values */
 48.3656 -  uint8_t colorsteps = 20;
 48.3657 -  char quant = ' ';
 48.3658 -  uint64_t div = getScaleQuantifierLog2(minDataRate, maxDataRate, quant);
 48.3659 -  string unit = string(&quant,1);
 48.3660 -  unit.append("Byte/s");
 48.3661 -  maxDataRate /= div;
 48.3662 -  minDataRate /= div;
 48.3663 -
 48.3664 -  //cout << "min: " << minDataRate << " max: " << maxDataRate << " in [" << unit << "]" << endl;
 48.3665 -
 48.3666 -  makeNiceScaleTicks(minDataRate, maxDataRate, colorsteps);
 48.3667 -
 48.3668 -  /* colorize the fields */
 48.3669 -  it = msgMap.begin();
 48.3670 -  while(it != itend){
 48.3671 -    if(it->second.bytes_send.cnt && it->second.duration_send.cnt &&
 48.3672 -       (it->second.duration_send.sum > 0)){
 48.3673 -      uint64_t x = rankToPos.find(it->first.a)->second; //pos for rank
 48.3674 -      uint64_t y = gridDim-1-rankToPos.find(it->first.b)->second; //pos for receiving peer
 48.3675 -      float r,g,b;
 48.3676 -
 48.3677 -      /* @DEBUG
 48.3678 -      cout << " Process " << it->first.a << " to Peer " << it->first.b
 48.3679 -           << " datarate: " << it->second.bytes_send.sum/it->second.duration_send.sum << endl;
 48.3680 -      */
 48.3681 -      
 48.3682 -      if(0){ /* if grouped, how to get min max values??? */
 48.3683 -        /* get maximum color */
 48.3684 -        get_color_gray(minDataRate, maxDataRate,
 48.3685 -            it->second.bytes_send.max/it->second.duration_send.max*tres/div, r, g, b);
 48.3686 -
 48.3687 -        tex << "\\node[minimum size=1cm,anchor=south west] at ("
 48.3688 -            << x << "," << y << ") [rectangle, fill={rgb,1:red,"
 48.3689 -            << r << " ;green," << g  << ";blue," << b << "}] {};" << endl;
 48.3690 -
 48.3691 -        /* get minimum color */
 48.3692 -        get_color_gray(minDataRate, maxDataRate,
 48.3693 -            it->second.bytes_send.min/it->second.duration_send.min*tres/div, r, g, b);
 48.3694 -
 48.3695 -        tex << "\\node[minimum size=0.8cm,anchor=south west] at ("
 48.3696 -            << x << ".1," << y << ".1) [rectangle, fill={rgb,1:red,"
 48.3697 -            << r << " ;green," << g  << ";blue," << b << "}] {};" << endl;
 48.3698 -
 48.3699 -        /* get average color */
 48.3700 -        get_color_gray(minDataRate, maxDataRate,
 48.3701 -            it->second.bytes_send.sum/it->second.duration_send.sum*tres/div, r, g, b);
 48.3702 -
 48.3703 -        tex << "\\node[minimum size=0.6cm,anchor=south west] at ("
 48.3704 -            << x << ".2," << y << ".2) [rectangle, fill={rgb,1:red,"
 48.3705 -            << r << " ;green," << g  << ";blue," << b << "}] {};" << endl;
 48.3706 -      }else{
 48.3707 -        /* get average color */
 48.3708 -        get_color_gray(minDataRate, maxDataRate,
 48.3709 -            it->second.bytes_send.sum/it->second.duration_send.sum*tres/div, r, g, b);
 48.3710 -
 48.3711 -        tex << "\\node[minimum size=1cm,anchor=south west] at ("
 48.3712 -            << x << "," << y << ") [rectangle, fill={rgb,1:red,"
 48.3713 -            << r << " ;green," << g  << ";blue," << b << "}] {};" << endl;
 48.3714 -      }
 48.3715 +    tex << "\\center{\\Large \\bf P2P - Message Rate (average)}" << endl;
 48.3716 +    tex << "\\bigskip" << endl << endl;
 48.3717 +
 48.3718 +    tex << "\\begin{center}" << endl;
 48.3719 +    tex << "\\begin{tikzpicture} [step=1cm,scale=" << scale
 48.3720 +            << ",every node/.style={scale=" << scale << "}]";
 48.3721 +    if (grouped)
 48.3722 +        tex << "\\small" << endl;
 48.3723 +
 48.3724 +    /* preprocess data */
 48.3725 +    uint64_t ctrInt = 0;
 48.3726 +    while (it != itend) {
 48.3727 +        double tmp;
 48.3728 +
 48.3729 +        /* get list of all ranks/groups and map internal id for the grid position */
 48.3730 +
 48.3731 +        /* check if already listed */
 48.3732 +        /* TODO: vector[ctrInt]=rankID ??? */
 48.3733 +        if (rankToPos.find(it->first.a) == rankToPos.end()) {
 48.3734 +            /* insert */
 48.3735 +            rankToPos.insert(pair<uint64_t, uint64_t> (it->first.a, ctrInt));
 48.3736 +
 48.3737 +            /* label the matrix */
 48.3738 +            string label;
 48.3739 +            getGroupLabel(alldata, it->first.a, label);
 48.3740 +            tex << "\\node[anchor=east] at (0," << gridDim - ctrInt - 1
 48.3741 +                    << ".5)"
 48.3742 +                        " {\\shortstack[r]{" << label << "}};" << endl;
 48.3743 +            tex << "\\node[anchor=west,rotate=90] at (" << ctrInt << ".5,"
 48.3744 +                    << gridDim << ") {\\shortstack[l]{" << label << "}};"
 48.3745 +                    << endl;
 48.3746 +
 48.3747 +            //cout << "Process " << it->first.a << endl;
 48.3748 +            ctrInt++;
 48.3749 +        }
 48.3750 +
 48.3751 +        /* get minimum and maximum data rate for color coding */
 48.3752 +        if (it->second.bytes_send.cnt && it->second.duration_send.cnt
 48.3753 +                && (it->second.duration_send.sum > 0)) {
 48.3754 +            tmp = it->second.bytes_send.sum / it->second.duration_send.sum
 48.3755 +                    * tres;
 48.3756 +            if (tmp > maxDataRate)
 48.3757 +                maxDataRate = tmp;
 48.3758 +            if (tmp < minDataRate)
 48.3759 +                minDataRate = tmp;
 48.3760 +        }
 48.3761 +
 48.3762 +        /* ignore receive values */
 48.3763 +        /*
 48.3764 +         if(it->second.bytes_recv.cnt && it->second.duration_recv.cnt &&
 48.3765 +         (it->second.duration_recv.sum > 0)){
 48.3766 +         tmp = it->second.bytes_recv.sum/it->second.duration_recv.sum;
 48.3767 +         if(tmp > maxDataRate) maxDataRate = tmp;
 48.3768 +         if(tmp < minDataRate) minDataRate = tmp;
 48.3769 +         }*/
 48.3770 +
 48.3771 +        it++;
 48.3772      }
 48.3773 -    
 48.3774 -    it++;
 48.3775 -  }
 48.3776 -
 48.3777 -  tex << "\\draw[dotted] (-0.2,0) grid[step=1cm] ("
 48.3778 -      << gridDim << "," << gridDim << ".2);" << endl;
 48.3779 -
 48.3780 -  /* draw sender/receiver description */
 48.3781 -  tex << "\\draw (0," << gridDim << ") -- (-1.2," << gridDim+1 << ".2) "
 48.3782 -         "node [above right=-1, rotate=-45] {Receiver} "
 48.3783 -         "node [below right, rotate=-45] {Sender};" << endl;
 48.3784 -
 48.3785 -  tex << "\\end{tikzpicture}\\bigskip" << endl << endl;
 48.3786 -
 48.3787 -  tex << "\\begin{tikzpicture} [step=1cm,scale=" << scale
 48.3788 -      << ",every node/.style={scale=" << scale << "}]" << endl;
 48.3789 -
 48.3790 -  /* draw the colormap legend */
 48.3791 -  double interval = (maxDataRate - minDataRate)/(colorsteps);
 48.3792 -  int i = 0;
 48.3793 -  
 48.3794 -  tex.setf(ios::fixed, ios::floatfield);
 48.3795 -  tex.precision(4);
 48.3796 -  for(;i < (int)colorsteps+1; i++){
 48.3797 -    float r,g,b;
 48.3798 -    double value = minDataRate+i*interval;
 48.3799 -
 48.3800 -    get_color_gray(minDataRate, maxDataRate, value, r, g, b);
 48.3801 -
 48.3802 -    /* color box */
 48.3803 -    tex << "\\node[minimum size=0.95cm,anchor=south west] at ("
 48.3804 -        << i-1 << ",-2) [rectangle, fill={rgb,1:red,"
 48.3805 -        << r << " ;green," << g  << ";blue," << b << "}] {};" << endl;
 48.3806 -
 48.3807 -    /* datarate description */
 48.3808 -    tex << "\\node[anchor=east,rotate=90] at (" << i-0.5 << ",-2) {"
 48.3809 -        << value << " "<< unit << "};" << endl;
 48.3810 -  }
 48.3811 -
 48.3812 -  tex << "\\end{tikzpicture}" << endl;
 48.3813 -  tex << "\\end{center}" << endl;
 48.3814 -
 48.3815 -  //@TODO: min-max-avg values
 48.3816 -  
 48.3817 -  /* write min-max-avg legend 
 48.3818 -  float r,g,b;
 48.3819 -
 48.3820 -  tex << endl << "\\bigskip" << endl;
 48.3821 -  tex << "\\begin{center}" << endl;
 48.3822 -  tex << "\\begin{tikzpicture} [step=1cm,scale=" << scale
 48.3823 -    << ",every node/.style={scale=" << scale << "}]";
 48.3824 -  tex << "\\footnotesize" << endl;
 48.3825 -
 48.3826 -  get_color_gray(minDataRate, maxDataRate, maxDataRate, r, g, b);
 48.3827 -  tex << "\\node(max)[minimum size=1cm,anchor=center] at (0,0) "
 48.3828 -         "[rectangle, fill={rgb,1:red,"
 48.3829 -      << r << ";green," << g << ";blue," << b << "}] {};" << endl;
 48.3830 -
 48.3831 -  get_color_gray(minDataRate, maxDataRate, minDataRate, r, g, b);
 48.3832 -  tex << "\\node(max)[minimum size=0.8cm,anchor=center] at (0,0)"
 48.3833 -         "[rectangle, fill={rgb,1:red,"
 48.3834 -      << r << ";green," << g << ";blue," << b << "}] {};" << endl;
 48.3835 -
 48.3836 -  get_color_gray(minDataRate, maxDataRate, (maxDataRate+minDataRate)/2, r, g, b);
 48.3837 -  tex << "\\node(max)[minimum size=0.6cm,anchor=center] at (0,0)"
 48.3838 -         "[rectangle, fill={rgb,1:red,"
 48.3839 -      << r << ";green," << g << ";blue," << b << "}] {};" << endl;
 48.3840 -
 48.3841 -  tex << "\\node[anchor=west] at (2,0.4) {Maximum};" << endl;
 48.3842 -  tex << "\\node[anchor=west] at (2,0) {Average};" << endl;
 48.3843 -  tex << "\\node[anchor=west] at (2,-0.35) {Minimum};" << endl;
 48.3844 -  tex << "\\draw[arrows=stealth-](0.45,0.4) -- (2,0.4);" << endl;
 48.3845 -  tex << "\\draw[arrows=stealth-](0,0) -- (2,0);" << endl;
 48.3846 -  tex << "\\draw[arrows=stealth-] (0.35,-0.35) -- (2,-0.35);" << endl;
 48.3847 -  tex << "\\end{tikzpicture}" << endl;
 48.3848 -  tex << "\\end{center}" << endl;*/
 48.3849 -
 48.3850 -  tex << "\\newpage" << endl;
 48.3851 -  tex.setf(ios::floatfield);
 48.3852 -  tex.precision(6);
 48.3853 +
 48.3854 +    /* @DEBUG
 48.3855 +     cout << "Processes found: " << ctrInt << " -- min: " << minDataRate
 48.3856 +     << " max: " << maxDataRate << endl;*/
 48.3857 +
 48.3858 +    /* Quantifier (K, M, G, T) for large values */
 48.3859 +    uint8_t colorsteps = 20;
 48.3860 +    char quant = ' ';
 48.3861 +    uint64_t div = getScaleQuantifierLog2(minDataRate, maxDataRate, quant);
 48.3862 +    string unit = string(&quant, 1);
 48.3863 +    unit.append("Byte/s");
 48.3864 +    maxDataRate /= div;
 48.3865 +    minDataRate /= div;
 48.3866 +
 48.3867 +    /*cout << "min: " << minDataRate << " max: "
 48.3868 +           << maxDataRate << " in [" << unit << "]" << endl;*/
 48.3869 +
 48.3870 +    makeNiceScaleTicks(minDataRate, maxDataRate, colorsteps);
 48.3871 +
 48.3872 +    /* colorize the fields */
 48.3873 +    it = msgMap.begin();
 48.3874 +    while (it != itend) {
 48.3875 +        if (it->second.bytes_send.cnt && it->seco