Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [PATCH 4/4] Trying to get the C/R code to compile again. (last)
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2013-12-04 11:07:52


On Nov 25, 2013, at 9:59 AM, Adrian Reber <adrian_at_[hidden]> wrote:

> diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c
> index 1448c04..fc16452 100644
> --- a/ompi/mca/bml/r2/bml_r2_ft.c
> +++ b/ompi/mca/bml/r2/bml_r2_ft.c
> @@ -191,7 +191,7 @@ int mca_bml_r2_ft_event(int state)
>
> for(p = 0; p < (int)num_procs; ++p) {
> if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
> - OBJ_RELEASE((mca_bml_base_endpoint_t*) procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
> + OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
> }
>
> @@ -263,9 +263,9 @@ int mca_bml_r2_ft_event(int state)
> mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
> opal_output_verbose(11, ompi_cr_output,
> "Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
> - if( NULL != param_list ) {
> - free(param_list);
> - param_list = NULL;
> + if( NULL != btl_list ) {
> + free(btl_list);
> + btl_list = NULL;
> }
>
> /*
> @@ -286,7 +286,7 @@ int mca_bml_r2_ft_event(int state)
>
> for(p = 0; p < (int)num_procs; ++p) {
> if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
> - OBJ_RELEASE((mca_bml_base_endpoint_t*) procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
> + OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
> procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
> }

Josh -- can you double check the above r2 changes?

> diff --git a/opal/mca/base/mca_base_components_open.c b/opal/mca/base/mca_base_components_open.c
> index e46e0f3..8d5e9da 100644
> --- a/opal/mca/base/mca_base_components_open.c
> +++ b/opal/mca/base/mca_base_components_open.c
> @@ -141,9 +141,7 @@ static int open_components(mca_base_framework_t *framework)
> * NTH: Logic moved to mca_base_components_filter.
> */
> #if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
> - if (mca_base_component_distill_checkpoint_ready) {
> - open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
> - }
> + open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
> #endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */
>
> /* If mca_base_framework_register_components was called with the MCA_BASE_COMPONENTS_ALL flag

Did the mca_base_component_distill_checkpoint_ready variable disappear? (I'm not sure what it was for -- I'm just curious as to why the if block disappeared.

> diff --git a/opal/mca/crs/self/crs_self_component.c b/opal/mca/crs/self/crs_self_component.c
> index e0ca1ab..eb45d59 100644
> --- a/opal/mca/crs/self/crs_self_component.c
> +++ b/opal/mca/crs/self/crs_self_component.c
> @@ -90,9 +90,9 @@ static int crs_self_register (void)
> mca_crs_self_component.super.priority = 20;
> ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
> "priority", "Priority of the CRS self component "
> - "(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL,
> + "(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL, 0,
> MCA_BASE_VAR_FLAG_SETTABLE,
> - OPAL_INFO_LVL_9, MPI_BASE_VAR_SCOPE_ALL_EQ,
> + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
> &mca_crs_self_component.super.priority);

General note: it is probably worth auditing all of these MCA params: should they really be level 9? Or would level 6 or 7 be more appropriate?

    https://svn.open-mpi.org/trac/ompi/wiki/MCAParamLevels

[snip parts that I have no comments for]

> --- a/orte/mca/errmgr/base/errmgr_base_fns.c
> +++ b/orte/mca/errmgr/base/errmgr_base_fns.c
> @@ -342,7 +342,7 @@ void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors)
> ********************/
> #if OPAL_ENABLE_FT_CR
>
> -void orte_errmgr_base_migrate_state_notify(int state)
> +ORTE_DECLSPEC void orte_errmgr_base_migrate_state_notify(int state)

Now that Windows support is gone, are we still doing DECLSPEC these days?

> {
> switch(state) {
> case ORTE_ERRMGR_MIGRATE_STATE_ERROR:
> @@ -366,7 +366,7 @@ void orte_errmgr_base_migrate_state_notify(int state)
> }
> }
>
> -void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc)
> +ORTE_DECLSPEC int orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc)
> {
> if (NULL != proc) {
> switch(state) {
> @@ -401,7 +401,7 @@ void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_na
> }
> }
>
> -int orte_errmgr_base_migrate_state_str(char ** state_str, int state)
> +ORTE_DECLSPEC int orte_errmgr_base_migrate_state_str(char ** state_str, int state)
> {
> switch(state) {
> case ORTE_ERRMGR_MIGRATE_STATE_NONE:
> diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c
> index 6a71230..52092b5 100644
> --- a/orte/mca/ess/env/ess_env_module.c
> +++ b/orte/mca/ess/env/ess_env_module.c
> @@ -422,7 +422,7 @@ static int rte_ft_event(int state)
> exit_status = ret;
> goto cleanup;
> }
> - if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) {
> + if (ORTE_SUCCESS != (ret = opal_db.remove(NULL, NULL))) {
> ORTE_ERROR_LOG(ret);
> exit_status = ret;
> goto cleanup;
> diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c
> index 3deee11..6aba2c2 100644
> --- a/orte/mca/plm/base/plm_base_launch_support.c
> +++ b/orte/mca/plm/base/plm_base_launch_support.c
> @@ -333,6 +333,7 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
> {
> orte_job_t *jdata, *jdatorted;
> orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
> + int rc;

Nice. :-) (just a general comment about my amusement/sadness to see how far this code has bit-rotted)

> /* if we don't want to launch the apps, now is the time to leave */
> if (orte_do_not_launch) {
> diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c
> index dd539cd..b91f4a3 100644
> --- a/orte/mca/rml/oob/rml_oob_component.c
> +++ b/orte/mca/rml/oob/rml_oob_component.c
> @@ -11,11 +11,7 @@
> * Copyright (c) 2004-2005 The Regents of the University of California.
> * All rights reserved.
> * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
> -<<<<<<< .mine
> - * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
> -=======
> * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
> ->>>>>>> .r28253

Ditto with above. :-)

[snip]

> diff --git a/orte/mca/snapc/full/snapc_full_component.c b/orte/mca/snapc/full/snapc_full_component.c
> index 7815363..b953e17 100644
> --- a/orte/mca/snapc/full/snapc_full_component.c
> +++ b/orte/mca/snapc/full/snapc_full_component.c
> @@ -32,6 +32,7 @@ const char *orte_snapc_full_component_version_string =
> */
> static int snapc_full_open(void);
> static int snapc_full_close(void);
> +static int snapc_full_register(void);
>
> bool orte_snapc_full_skip_app = false;
> bool orte_snapc_full_timing_enabled = false;
> @@ -74,7 +75,7 @@ orte_snapc_full_component_t mca_snapc_full_component = {
> }
> };
>
> -static int snaps_full_register (void)
> +static int snapc_full_register (void)
> {
> mca_base_component_t *component = &mca_snapc_full_component.super.base_version;
> /*
> @@ -129,7 +130,7 @@ static int snaps_full_register (void)
> OPAL_INFO_LVL_9,

Similar comment to above: it's probably worth auditing the CR-related MCA params -- I'm guessing they should not all be level 9.

[snip]

> diff --git a/orte/mca/snapc/full/snapc_full_local.c b/orte/mca/snapc/full/snapc_full_local.c
> index 5df216d..98c6977 100644
> --- a/orte/mca/snapc/full/snapc_full_local.c
> +++ b/orte/mca/snapc/full/snapc_full_local.c
> @@ -1756,7 +1756,7 @@ static void snapc_full_local_comm_read_event(int fd, short flags, void *arg)
> if( currently_migrating && !flushed_modex ) {
> OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
> "Local) Read Event: Flush the modex cached data\n"));
> - if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) {
> + if (ORTE_SUCCESS != (ret = opal_db.remove(NULL, NULL))) {

Opal functions should be compared with OPAL_* values, not ORTE_* values. In this case, it's easy: s/ORTE_SUCCESS/OPAL_SUCCESS/.

I didn't notice this particular issue in the rest of the patch, but it's worth double checking for this issue in the places where you s/orte_db/opal_db/ -- I might have missed them.

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/