Skip to content
This repository has been archived by the owner on Sep 30, 2022. It is now read-only.

btl/openib: enable connecting processes from different subnets. #1043

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions opal/mca/btl/openib/btl_openib.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,14 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open
matching_port = j;
}
rem_port_cnt++;
} else {
if (mca_btl_openib_component.allow_different_subnets) {
BTL_VERBOSE(("Using different subnets!"));
if (rem_port_cnt == btl_rank) {
matching_port = j;
}
rem_port_cnt++;
}
}
}

Expand Down Expand Up @@ -911,6 +919,13 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open
break;
else
rem_port_cnt ++;
} else {
if (mca_btl_openib_component.allow_different_subnets) {
if (rem_port_cnt == btl_rank)
break;
else
rem_port_cnt ++;
}
}
}

Expand Down Expand Up @@ -977,6 +992,13 @@ static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_
rank = port_cnt;
}
port_cnt++;
} else {
if (mca_btl_openib_component.allow_different_subnets) {
if (openib_btl == mca_btl_openib_component.openib_btls[j]) {
rank = port_cnt;
}
port_cnt++;
}
}
}
*port_cnt_ptr = port_cnt;
Expand Down
3 changes: 3 additions & 0 deletions opal/mca/btl/openib/btl_openib.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@ struct mca_btl_openib_component_t {
char* default_recv_qps;
/** GID index to use */
int gid_index;
/* Whether we want to allow connecting processes from different subnets.
* set to 'no' by default */
bool allow_different_subnets;
/** Whether we want a dynamically resizing srq, enabled by default */
bool enable_srq_resize;
bool allow_max_memory_registration;
Expand Down
5 changes: 5 additions & 0 deletions opal/mca/btl/openib/btl_openib_mca.c
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,11 @@ int btl_openib_register_mca_params(void)
0, &mca_btl_openib_component.gid_index,
REGINT_GE_ZERO));

CHECK(reg_bool("allow_different_subnets", NULL,
"Allow connecting processes from different IB subnets."
"(0 = do not allow; 1 = allow)",
false, &mca_btl_openib_component.allow_different_subnets));

#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED
tmp = mca_base_var_find ("opal", "memory", "linux", "memalign");
if (0 <= tmp) {
Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2109,7 +2109,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_
sin.sin_addr.s_addr = rdmacm_addr;
sin.sin_port = (uint16_t) rdmacm_port;
#else
rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid);
rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num,
mca_btl_openib_component.gid_index, &server->gid);
if (0 != rc) {
BTL_ERROR(("local gid query failed"));
goto out4;
Expand Down