The reason is because valgrind was complaining about uninitialized values that were passed into proc_get_epoch. I saw the same warnings from valgrind when I ran it. I added the code to initialize the values to what really should be the default value and the warnings went away. Since the process_name_t struct isn't an object, it doesn't have an initialization function like so many of the other objects in the code. This is what we have.
FYI: Ralph's out today. He'll be back tomorrow.
I'm not really part of this ORTE discussion, but I am curious about a code style that I see in this commit: assigning ORTE_EPOCH_INVALID to a field, and then immediately overwriting that field with another value. E.g.:
This technique is used throughout this patch.
> peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
> peer_name.vpid = peer_idx;
> + peer_name.epoch = ORTE_EPOCH_INVALID;
> peer_name.epoch = orte_ess.proc_get_epoch(&peer_name);
What is the purpose for this? As I understand it, this won't squash any valgrind warnings, and may even get eliminated by the compiler as dead code because it seems to be useless.
--
On Aug 8, 2011, at 11:11 AM, wbland@osl.iu.edu wrote:
> Author: wbland
> Date: 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> New Revision: 25015
> URL: https://svn.open-mpi.org/trac/ompi/changeset/25015
>
> Log:
> Make sure that the epoch is initialized everywhere so we don't get weird output
> during valgrind. This shouldn't have caused any problems with any actual
> execution. Just extra warnings in valgrind.
>
>
> Text files modified:
> trunk/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c | 2 ++
> trunk/ompi/proc/proc.c | 2 +-
> trunk/orte/mca/ess/alps/ess_alps_module.c | 1 +
> trunk/orte/mca/ess/env/ess_env_module.c | 1 +
> trunk/orte/mca/ess/lsf/ess_lsf_module.c | 1 +
> trunk/orte/mca/ess/slave/ess_slave_module.c | 1 +
> trunk/orte/mca/ess/slurm/ess_slurm_module.c | 1 +
> trunk/orte/mca/grpcomm/base/grpcomm_base_coll.c | 12 +++++++-----
> trunk/orte/mca/iof/hnp/iof_hnp.c | 1 +
> trunk/orte/mca/odls/base/odls_base_default_fns.c | 1 +
> trunk/orte/mca/odls/base/odls_base_open.c | 1 +
> trunk/orte/mca/plm/base/plm_base_launch_support.c | 1 +
> trunk/orte/mca/plm/base/plm_base_orted_cmds.c | 2 ++
> trunk/orte/mca/plm/base/plm_base_receive.c | 1 +
> trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c | 3 +++
> trunk/orte/mca/rmaps/rank_file/rmaps_rank_file.c | 1 +
> trunk/orte/mca/rmaps/seq/rmaps_seq.c | 1 +
> trunk/orte/mca/rml/oob/rml_oob_component.c | 4 ----
> trunk/orte/mca/routed/binomial/routed_binomial.c | 4 ++++
> trunk/orte/mca/routed/cm/routed_cm.c | 4 ++++
> trunk/orte/mca/routed/linear/routed_linear.c | 2 ++
> trunk/orte/mca/routed/radix/routed_radix.c | 3 +++
> trunk/orte/mca/routed/slave/routed_slave.c | 1 +
> trunk/orte/mca/sstore/central/sstore_central_global.c | 1 +
> trunk/orte/mca/sstore/stage/sstore_stage_global.c | 1 +
> trunk/orte/orted/orted_comm.c | 1 +
> trunk/orte/test/system/oob_stress.c | 2 +-
> trunk/orte/test/system/orte_ring.c | 2 ++
> trunk/orte/test/system/orte_spawn.c | 1 +
> 29 files changed, 48 insertions(+), 11 deletions(-)
>
> Modified: trunk/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> ==============================================================================
> --- trunk/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c (original)
> +++ trunk/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -5284,6 +5284,7 @@
> */
> peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
> peer_name.vpid = peer_idx;
> + peer_name.epoch = ORTE_EPOCH_INVALID;
> peer_name.epoch = orte_ess.proc_get_epoch(&peer_name);
>
> if( NULL == (peer_ref = find_peer(peer_name))) {
> @@ -5345,6 +5346,7 @@
>
> peer_name.jobid = ORTE_PROC_MY_NAME->jobid;
> peer_name.vpid = peer_idx;
> + peer_name.epoch = ORTE_EPOCH_INVALID;
> peer_name.epoch = orte_ess.proc_get_epoch(&peer_name);
>
> if ( 0 > (ret = orte_rml.recv_buffer_nb(&peer_name,
>
> Modified: trunk/ompi/proc/proc.c
> ==============================================================================
> --- trunk/ompi/proc/proc.c (original)
> +++ trunk/ompi/proc/proc.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -362,7 +362,7 @@
>
> /* Does not change: proc->proc_name.vpid */
> proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
> -
> + proc->proc_name.epoch = ORTE_EPOCH_INVALID;
> proc->proc_name.epoch = orte_ess.proc_get_epoch(&proc->proc_name);
>
> /* Make sure to clear the local flag before we set it below */
>
> Modified: trunk/orte/mca/ess/alps/ess_alps_module.c
> ==============================================================================
> --- trunk/orte/mca/ess/alps/ess_alps_module.c (original)
> +++ trunk/orte/mca/ess/alps/ess_alps_module.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -351,6 +351,7 @@
>
> ORTE_PROC_MY_NAME->jobid = jobid;
> ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank() + starting_vpid;
> + ORTE_PROC_MY_NAME->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_NAME->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME);
>
> OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
>
> Modified: trunk/orte/mca/ess/env/ess_env_module.c
> ==============================================================================
> --- trunk/orte/mca/ess/env/ess_env_module.c (original)
> +++ trunk/orte/mca/ess/env/ess_env_module.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -392,6 +392,7 @@
>
> ORTE_PROC_MY_NAME->jobid = jobid;
> ORTE_PROC_MY_NAME->vpid = vpid;
> + ORTE_PROC_MY_NAME->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_NAME->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME);
>
> OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
>
> Modified: trunk/orte/mca/ess/lsf/ess_lsf_module.c
> ==============================================================================
> --- trunk/orte/mca/ess/lsf/ess_lsf_module.c (original)
> +++ trunk/orte/mca/ess/lsf/ess_lsf_module.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -357,6 +357,7 @@
>
> ORTE_PROC_MY_NAME->jobid = jobid;
> ORTE_PROC_MY_NAME->vpid = vpid;
> + ORTE_PROC_MY_NAME->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_NAME->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME);
>
> /* fix up the base name and make it the "real" name */
>
> Modified: trunk/orte/mca/ess/slave/ess_slave_module.c
> ==============================================================================
> --- trunk/orte/mca/ess/slave/ess_slave_module.c (original)
> +++ trunk/orte/mca/ess/slave/ess_slave_module.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -280,6 +280,7 @@
>
> ORTE_PROC_MY_NAME->jobid = jobid;
> ORTE_PROC_MY_NAME->vpid = vpid;
> + ORTE_PROC_MY_NAME->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_NAME->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME);
>
> OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
>
> Modified: trunk/orte/mca/ess/slurm/ess_slurm_module.c
> ==============================================================================
> --- trunk/orte/mca/ess/slurm/ess_slurm_module.c (original)
> +++ trunk/orte/mca/ess/slurm/ess_slurm_module.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -368,6 +368,7 @@
> /* fix up the vpid and make it the "real" vpid */
> slurm_nodeid = atoi(getenv("SLURM_NODEID"));
> ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid;
> + ORTE_PROC_MY_NAME->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_NAME->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_NAME);
>
> OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
>
> Modified: trunk/orte/mca/grpcomm/base/grpcomm_base_coll.c
> ==============================================================================
> --- trunk/orte/mca/grpcomm/base/grpcomm_base_coll.c (original)
> +++ trunk/orte/mca/grpcomm/base/grpcomm_base_coll.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -168,7 +168,7 @@
> if (vpids[0] == ORTE_PROC_MY_NAME->vpid) {
> /* I send first */
> peer.vpid = vpids[1];
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> /* setup a temp buffer so I can inform the other side as to the
> @@ -226,7 +226,7 @@
> opal_dss.pack(&buf, &num_entries, 1, OPAL_INT32);
> opal_dss.copy_payload(&buf, sendbuf);
> peer.vpid = vpids[0];
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
> @@ -320,7 +320,7 @@
> /* first send my current contents */
> nv = (rank - distance + np) % np;
> peer.vpid = vpids[nv];
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> OBJ_CONSTRUCT(&buf, opal_buffer_t);
> @@ -340,7 +340,7 @@
> num_recvd = 0;
> nv = (rank + distance) % np;
> peer.vpid = vpids[nv];
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> OBJ_CONSTRUCT(&bucket, opal_buffer_t);
> @@ -439,7 +439,7 @@
> /* first send my current contents */
> nv = rank ^ distance;
> peer.vpid = vpids[nv];
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> OBJ_CONSTRUCT(&buf, opal_buffer_t);
> @@ -646,6 +646,7 @@
> proc.jobid = jobid;
> proc.vpid = 0;
> while (proc.vpid < jobdat->num_procs && 0 < opal_list_get_size(&daemon_tree)) {
> + proc.epoch = ORTE_EPOCH_INVALID;
> proc.epoch = orte_ess.proc_get_epoch(&proc);
>
> /* get the daemon that hosts this proc */
> @@ -712,6 +713,7 @@
> /* send it */
> my_parent.jobid = ORTE_PROC_MY_NAME->jobid;
> my_parent.vpid = orte_routed.get_routing_tree(NULL);
> + my_parent.epoch = ORTE_EPOCH_INVALID;
> my_parent.epoch = orte_ess.proc_get_epoch(&my_parent);
>
> OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
>
> Modified: trunk/orte/mca/iof/hnp/iof_hnp.c
> ==============================================================================
> --- trunk/orte/mca/iof/hnp/iof_hnp.c (original)
> +++ trunk/orte/mca/iof/hnp/iof_hnp.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -281,6 +281,7 @@
> &mca_iof_hnp_component.sinks);
> sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid;
> sink->daemon.vpid = proc->node->daemon->name.vpid;
> + sink->daemon.epoch = ORTE_EPOCH_INVALID;
> sink->daemon.epoch = orte_ess.proc_get_epoch(&sink->daemon);
> }
> }
>
> Modified: trunk/orte/mca/odls/base/odls_base_default_fns.c
> ==============================================================================
> --- trunk/orte/mca/odls/base/odls_base_default_fns.c (original)
> +++ trunk/orte/mca/odls/base/odls_base_default_fns.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -734,6 +734,7 @@
> proc.jobid = jobdat->jobid;
> for (j=0; j < jobdat->num_procs; j++) {
> proc.vpid = j;
> + proc.epoch = ORTE_EPOCH_INVALID;
> proc.epoch = orte_ess.proc_get_epoch(&proc);
> /* get the vpid of the daemon that is to host this proc */
> if (ORTE_VPID_INVALID == (host_daemon = orte_ess.proc_get_daemon(&proc))) {
>
> Modified: trunk/orte/mca/odls/base/odls_base_open.c
> ==============================================================================
> --- trunk/orte/mca/odls/base/odls_base_open.c (original)
> +++ trunk/orte/mca/odls/base/odls_base_open.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -200,6 +200,7 @@
> * will be in the job - we'll check later
> */
> nm->name.vpid = rank;
> + nm->name.epoch = ORTE_EPOCH_INVALID;
> nm->name.epoch = orte_ess.proc_get_epoch(&nm->name);
> }
> opal_list_append(&orte_odls_globals.xterm_ranks, &nm->item);
>
> Modified: trunk/orte/mca/plm/base/plm_base_launch_support.c
> ==============================================================================
> --- trunk/orte/mca/plm/base/plm_base_launch_support.c (original)
> +++ trunk/orte/mca/plm/base/plm_base_launch_support.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -377,6 +377,7 @@
> /* push stdin - the IOF will know what to do with the specified target */
> name.jobid = job;
> name.vpid = jdata->stdin_target;
> + name.epoch = ORTE_EPOCH_INVALID;
> name.epoch = orte_ess.proc_get_epoch(&name);
>
> if (ORTE_SUCCESS != (rc = orte_iof.push(&name, ORTE_IOF_STDIN, 0))) {
>
> Modified: trunk/orte/mca/plm/base/plm_base_orted_cmds.c
> ==============================================================================
> --- trunk/orte/mca/plm/base/plm_base_orted_cmds.c (original)
> +++ trunk/orte/mca/plm/base/plm_base_orted_cmds.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -163,6 +163,7 @@
> continue;
> }
> peer.vpid = v;
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> /* don't worry about errors on the send here - just
> @@ -339,6 +340,7 @@
> continue;
> }
> peer.vpid = v;
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
> /* check to see if this daemon is known to be "dead" */
> if (proc->state > ORTE_PROC_STATE_UNTERMINATED) {
>
> Modified: trunk/orte/mca/plm/base/plm_base_receive.c
> ==============================================================================
> --- trunk/orte/mca/plm/base/plm_base_receive.c (original)
> +++ trunk/orte/mca/plm/base/plm_base_receive.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -394,6 +394,7 @@
> break;
> }
> name.vpid = vpid;
> + name.epoch = ORTE_EPOCH_INVALID;
> name.epoch = orte_ess.proc_get_epoch(&name);
>
> /* unpack the pid */
>
> Modified: trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c
> ==============================================================================
> --- trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c (original)
> +++ trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -559,6 +559,7 @@
> }
> }
> proc->name.vpid = vpid;
> + proc->name.epoch = ORTE_EPOCH_INVALID;
> proc->name.epoch = orte_ess.proc_get_epoch(&proc->name);
> /* If there is an invalid epoch here, it's because it doesn't exist yet. */
> if (ORTE_NODE_RANK_INVALID == proc->name.epoch) {
> @@ -600,6 +601,7 @@
> }
> }
> proc->name.vpid = vpid;
> + proc->name.epoch = ORTE_EPOCH_INVALID;
> proc->name.epoch = orte_ess.proc_get_epoch(&proc->name);
> }
> if (NULL == opal_pointer_array_get_item(jdata->procs, proc->name.vpid)) {
> @@ -1012,6 +1014,7 @@
> return ORTE_ERR_OUT_OF_RESOURCE;
> }
> proc->name.vpid = jdata->num_procs; /* take the next available vpid */
> + proc->name.epoch = ORTE_EPOCH_INVALID;
> proc->name.epoch = orte_ess.proc_get_epoch(&proc->name);
> proc->node = node;
> proc->nodename = node->name;
>
> Modified: trunk/orte/mca/rmaps/rank_file/rmaps_rank_file.c
> ==============================================================================
> --- trunk/orte/mca/rmaps/rank_file/rmaps_rank_file.c (original)
> +++ trunk/orte/mca/rmaps/rank_file/rmaps_rank_file.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -502,6 +502,7 @@
> }
> proc->name.vpid = rank;
> /* Either init or update the epoch. */
> + proc->name.epoch = ORTE_EPOCH_INVALID;
> proc->name.epoch = orte_ess.proc_get_epoch(&proc->name);
>
> proc->slot_list = strdup(rfmap->slot_list);
>
> Modified: trunk/orte/mca/rmaps/seq/rmaps_seq.c
> ==============================================================================
> --- trunk/orte/mca/rmaps/seq/rmaps_seq.c (original)
> +++ trunk/orte/mca/rmaps/seq/rmaps_seq.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -235,6 +235,7 @@
> }
> /* assign the vpid */
> proc->name.vpid = vpid++;
> + proc->name.epoch = ORTE_EPOCH_INVALID;
> proc->name.epoch = orte_ess.proc_get_epoch(&proc->name);
>
> /* add to the jdata proc array */
>
> Modified: trunk/orte/mca/rml/oob/rml_oob_component.c
> ==============================================================================
> --- trunk/orte/mca/rml/oob/rml_oob_component.c (original)
> +++ trunk/orte/mca/rml/oob/rml_oob_component.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -363,7 +363,6 @@
> origin = hdr->origin;
>
> next = orte_routed.get_route(&hdr->destination);
> -#if 0
> if (next.vpid == ORTE_VPID_INVALID) {
> opal_output(0,
> "%s:queued progress tried routing message from %s to %s:%d, can't find route",
> @@ -374,7 +373,6 @@
> opal_backtrace_print(stderr);
> orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
> }
> -#endif
>
> if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
> opal_output(0, "%s:queued progress trying to get message from %s to %s:%d, routing loop",
> @@ -475,7 +473,6 @@
>
> next = orte_routed.get_route(&hdr->destination);
> if (next.vpid == ORTE_VPID_INVALID) {
> -#if 0
> opal_output(0, "%s:route_callback tried routing message from %s to %s:%d, can't find route",
> ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
> ORTE_NAME_PRINT(&origin),
> @@ -483,7 +480,6 @@
> hdr->tag);
> opal_backtrace_print(stderr);
> orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
> -#endif
> return;
> }
>
>
> Modified: trunk/orte/mca/routed/binomial/routed_binomial.c
> ==============================================================================
> --- trunk/orte/mca/routed/binomial/routed_binomial.c (original)
> +++ trunk/orte/mca/routed/binomial/routed_binomial.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -274,6 +274,7 @@
> ORTE_NAME_PRINT(route)));
> jfam->route.jobid = route->jobid;
> jfam->route.vpid = route->vpid;
> + jfam->route.epoch = ORTE_EPOCH_INVALID;
> jfam->route.epoch = orte_ess.proc_get_epoch(&jfam->route);
>
> return ORTE_SUCCESS;
> @@ -289,6 +290,7 @@
> jfam->job_family = jfamily;
> jfam->route.jobid = route->jobid;
> jfam->route.vpid = route->vpid;
> + jfam->route.epoch = ORTE_EPOCH_INVALID;
> jfam->route.epoch = orte_ess.proc_get_epoch(&jfam->route);
>
> opal_pointer_array_add(&orte_routed_jobfams, jfam);
> @@ -459,6 +461,7 @@
> ret = &daemon;
>
> found:
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
>
> OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
> @@ -1007,6 +1010,7 @@
> ORTE_PROC_MY_PARENT->vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid,
> orte_process_info.max_procs,
> &num_children, &my_children, NULL, true, jobid);
> + ORTE_PROC_MY_PARENT->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_PARENT->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_PARENT);
>
> if (0 < opal_output_get_verbosity(orte_routed_base_output)) {
>
> Modified: trunk/orte/mca/routed/cm/routed_cm.c
> ==============================================================================
> --- trunk/orte/mca/routed/cm/routed_cm.c (original)
> +++ trunk/orte/mca/routed/cm/routed_cm.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -257,6 +257,7 @@
> ORTE_NAME_PRINT(route)));
> jfam->route.jobid = route->jobid;
> jfam->route.vpid = route->vpid;
> + jfam->route.epoch = ORTE_EPOCH_INVALID;
> jfam->route.epoch = orte_ess.proc_get_epoch(&jfam->route);
>
> return ORTE_SUCCESS;
> @@ -272,6 +273,7 @@
> jfam->job_family = jfamily;
> jfam->route.jobid = route->jobid;
> jfam->route.vpid = route->vpid;
> + jfam->route.epoch = ORTE_EPOCH_INVALID;
> jfam->route.epoch = orte_ess.proc_get_epoch(&jfam->route);
>
> opal_pointer_array_add(&orte_routed_jobfams, jfam);
> @@ -365,6 +367,7 @@
> }
>
> /* Initialize daemon's epoch, based on its current vpid/jobid */
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
>
> /* if the daemon is me, then send direct to the target! */
> @@ -811,6 +814,7 @@
> */
> local_lifeline.jobid = proc->jobid;
> local_lifeline.vpid = proc->vpid;
> + local_lifeline.epoch = ORTE_EPOCH_INVALID;
> local_lifeline.epoch = orte_ess.proc_get_epoch(&local_lifeline);
>
> lifeline = &local_lifeline;
>
> Modified: trunk/orte/mca/routed/linear/routed_linear.c
> ==============================================================================
> --- trunk/orte/mca/routed/linear/routed_linear.c (original)
> +++ trunk/orte/mca/routed/linear/routed_linear.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -373,6 +373,7 @@
> }
>
> /* Initialize daemon's epoch, based on its current vpid/jobid */
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
>
> /* if the daemon is me, then send direct to the target! */
> @@ -394,6 +395,7 @@
> /* we are at end of chain - wrap around */
> daemon.vpid = 0;
> }
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
> ret = &daemon;
> }
>
> Modified: trunk/orte/mca/routed/radix/routed_radix.c
> ==============================================================================
> --- trunk/orte/mca/routed/radix/routed_radix.c (original)
> +++ trunk/orte/mca/routed/radix/routed_radix.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -413,6 +413,7 @@
> if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) {
> /* yep - we need to step through this child */
> daemon.vpid = child->vpid;
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
> ret = &daemon;
> goto found;
> @@ -424,6 +425,7 @@
> * any of our children, so we have to step up through our parent
> */
> daemon.vpid = ORTE_PROC_MY_PARENT->vpid;
> + daemon.epoch = ORTE_EPOCH_INVALID;
> daemon.epoch = orte_ess.proc_get_epoch(&daemon);
>
> ret = &daemon;
> @@ -879,6 +881,7 @@
> ORTE_PROC_MY_PARENT->vpid = (Ii-Sum) % NInPrevLevel;
> ORTE_PROC_MY_PARENT->vpid += (Sum - NInPrevLevel);
> }
> + ORTE_PROC_MY_PARENT->epoch = ORTE_EPOCH_INVALID;
> ORTE_PROC_MY_PARENT->epoch = orte_ess.proc_get_epoch(ORTE_PROC_MY_PARENT);
>
> /* compute my direct children and the bitmap that shows which vpids
>
> Modified: trunk/orte/mca/routed/slave/routed_slave.c
> ==============================================================================
> --- trunk/orte/mca/routed/slave/routed_slave.c (original)
> +++ trunk/orte/mca/routed/slave/routed_slave.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -275,6 +275,7 @@
> */
> local_lifeline.jobid = proc->jobid;
> local_lifeline.vpid = proc->vpid;
> + local_lifeline.epoch = ORTE_EPOCH_INVALID;
> local_lifeline.epoch = orte_ess.proc_get_epoch(&local_lifeline);
>
> lifeline = &local_lifeline;
>
> Modified: trunk/orte/mca/sstore/central/sstore_central_global.c
> ==============================================================================
> --- trunk/orte/mca/sstore/central/sstore_central_global.c (original)
> +++ trunk/orte/mca/sstore/central/sstore_central_global.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -1216,6 +1216,7 @@
>
> vpid_snapshot->process_name.jobid = handle_info->jobid;
> vpid_snapshot->process_name.vpid = i;
> + vpid_snapshot->process_name.epoch = ORTE_EPOCH_INVALID;
> vpid_snapshot->process_name.epoch = orte_ess.proc_get_epoch(&vpid_snapshot->process_name);
>
> vpid_snapshot->crs_comp = NULL;
>
> Modified: trunk/orte/mca/sstore/stage/sstore_stage_global.c
> ==============================================================================
> --- trunk/orte/mca/sstore/stage/sstore_stage_global.c (original)
> +++ trunk/orte/mca/sstore/stage/sstore_stage_global.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -1706,6 +1706,7 @@
>
> vpid_snapshot->process_name.jobid = handle_info->jobid;
> vpid_snapshot->process_name.vpid = i;
> + vpid_snapshot->process_name.epoch = ORTE_EPOCH_INVALID;
> vpid_snapshot->process_name.epoch = orte_ess.proc_get_epoch(&vpid_snapshot->process_name);
>
> /* JJH: Currently we do not have this information since we do not save
>
> Modified: trunk/orte/orted/orted_comm.c
> ==============================================================================
> --- trunk/orte/orted/orted_comm.c (original)
> +++ trunk/orte/orted/orted_comm.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -129,6 +129,7 @@
> continue;
> }
>
> + target.epoch = ORTE_EPOCH_INVALID;
> if (ORTE_NODE_RANK_INVALID == (target.epoch = orte_ess.proc_get_epoch(&target))) {
> /* If we are trying to send to a previously failed process it's
> * better to fail silently. */
>
> Modified: trunk/orte/test/system/oob_stress.c
> ==============================================================================
> --- trunk/orte/test/system/oob_stress.c (original)
> +++ trunk/orte/test/system/oob_stress.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -74,7 +74,7 @@
>
> for (j=1; j < count+1; j++) {
> peer.vpid = (ORTE_PROC_MY_NAME->vpid + j) % orte_process_info.num_procs;
> -
> + peer.epoch = ORTE_EPOCH_INVALID;
> peer.epoch = orte_ess.proc_get_epoch(&peer);
>
> /* rank0 starts ring */
>
> Modified: trunk/orte/test/system/orte_ring.c
> ==============================================================================
> --- trunk/orte/test/system/orte_ring.c (original)
> +++ trunk/orte/test/system/orte_ring.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -41,6 +41,7 @@
> if( right_peer_orte_name.vpid >= num_peers ) {
> right_peer_orte_name.vpid = 0;
> }
> + right_peer_orte_name.epoch = ORTE_EPOCH_INVALID;
> right_peer_orte_name.epoch = orte_ess.proc_get_epoch(&right_peer_orte_name);
>
> left_peer_orte_name.jobid = ORTE_PROC_MY_NAME->jobid;
> @@ -48,6 +49,7 @@
> if( ORTE_PROC_MY_NAME->vpid == 0 ) {
> left_peer_orte_name.vpid = num_peers - 1;
> }
> + left_peer_orte_name.epoch = ORTE_EPOCH_INVALID;
> left_peer_orte_name.epoch = orte_ess.proc_get_epoch(&left_peer_orte_name);
>
> printf("My name is: %s -- PID %d\tMy Left Peer is %s\tMy Right Peer is %s\n",
>
> Modified: trunk/orte/test/system/orte_spawn.c
> ==============================================================================
> --- trunk/orte/test/system/orte_spawn.c (original)
> +++ trunk/orte/test/system/orte_spawn.c 2011-08-08 11:11:55 EDT (Mon, 08 Aug 2011)
> @@ -74,6 +74,7 @@
> for (i=0; i < app->num_procs; i++) {
> name.vpid = i;
>
> + name.epoch = ORTE_EPOCH_INVALID;
> name.epoch = orte_ess.proc_get_epoch(&name);
> fprintf(stderr, "Parent: sending message to child %s\n", ORTE_NAME_PRINT(&name));
> if (0 > (rc = orte_rml.send(&name, &msg, 1, MY_TAG, 0))) {
> _______________________________________________
> svn-full mailing list
> svn-full@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/