diff --git a/NEWS b/NEWS index 4b8928b64bc..1749e519068 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,7 @@ documents those changes that are of interest to users and admins. -- Fix logic to optimize GRES topology with respect to allocated CPUs. -- Add job_submit/all_partitions plugin to set a job's default partition to ALL available partitions in the cluster. + -- Modify switch/nrt logic to permit build without libnrt.so library. * Changes in SLURM 2.5.1 ======================== diff --git a/auxdir/x_ac_nrt.m4 b/auxdir/x_ac_nrt.m4 index b8f697a9c30..4b2f29c50da 100644 --- a/auxdir/x_ac_nrt.m4 +++ b/auxdir/x_ac_nrt.m4 @@ -27,7 +27,9 @@ AC_DEFUN([X_AC_NRT], if test -f "$nrt_dir/nrt.h" -a -f "$nrt_dir/permapi.h"; then ac_have_nrt_h="yes" NRT_CPPFLAGS="-I$nrt_dir" - break; + AC_DEFINE(HAVE_NRT_H, 1, [define if you have nrt.h]) + AC_DEFINE(HAVE_PERMAPI_H, 1, [define if you have permapi_h]) + break; fi done if test "x$ac_have_nrt_h" != "xyes" ; then @@ -58,13 +60,12 @@ AC_DEFUN([X_AC_NRT], if test "x$ac_have_libnrt" != "xyes" ; then AC_MSG_RESULT([no]) - AC_MSG_NOTICE([Cannot support IBM NRT API without libnrt.so]) else AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_LIBNRT, 1, [define if you have libnrt.so]) fi - if test "x$ac_have_nrt_h" = "xyes" && test "x$ac_have_libnrt" = "xyes"; then + if test "x$ac_have_nrt_h" = "xyes"; then ac_have_nrt="yes" fi AM_CONDITIONAL(HAVE_NRT, test "x$ac_have_nrt" = "xyes") diff --git a/config.h.in b/config.h.in index 2d40afd9686..f2233a9c8e3 100644 --- a/config.h.in +++ b/config.h.in @@ -182,6 +182,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_NETDB_H +/* define if you have nrt.h */ +#undef HAVE_NRT_H + /* define if numa library installed */ #undef HAVE_NUMA @@ -197,6 +200,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_PATHS_H +/* define if you have permapi_h */ +#undef HAVE_PERMAPI_H + /* Define to 1 if using PostgreSQL libaries */ #undef HAVE_PGSQL diff --git a/configure b/configure index e520698f543..754d5415e94 100755 --- a/configure +++ b/configure @@ -21755,7 +21755,13 @@ $as_echo_n "checking Checking NRT and PERMAPI header files... " >&6; } if test -f "$nrt_dir/nrt.h" -a -f "$nrt_dir/permapi.h"; then ac_have_nrt_h="yes" NRT_CPPFLAGS="-I$nrt_dir" - break; + +$as_echo "#define HAVE_NRT_H 1" >>confdefs.h + + +$as_echo "#define HAVE_PERMAPI_H 1" >>confdefs.h + + break; fi done if test "x$ac_have_nrt_h" != "xyes" ; then @@ -21796,8 +21802,6 @@ $as_echo_n "checking whether to enable IBM NRT support... " >&6; } if test "x$ac_have_libnrt" != "xyes" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } - { $as_echo "$as_me:${as_lineno-$LINENO}: Cannot support IBM NRT API without libnrt.so" >&5 -$as_echo "$as_me: Cannot support IBM NRT API without libnrt.so" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } @@ -21806,7 +21810,7 @@ $as_echo "#define HAVE_LIBNRT 1" >>confdefs.h fi - if test "x$ac_have_nrt_h" = "xyes" && test "x$ac_have_libnrt" = "xyes"; then + if test "x$ac_have_nrt_h" = "xyes"; then ac_have_nrt="yes" fi if test "x$ac_have_nrt" = "xyes"; then diff --git a/doc/html/ibm-pe.shtml b/doc/html/ibm-pe.shtml index 48287416294..245560cb935 100644 --- a/doc/html/ibm-pe.shtml +++ b/doc/html/ibm-pe.shtml @@ -128,8 +128,8 @@ Allocate one switch window per task on each node and every network supporting MPI and a second window supporting PAMI.

--network=devtype=ib,instances=2,lapi,mpi
-On every Infiniband network connection, allocate two switch windows each for -both lapi and mpi interfaces. If each node has one Infinband network connection, +On every InfiniBand network connection, allocate two switch windows each for +both lapi and mpi interfaces. If each node has one InfiniBand network connection, this would result in four switch windows per task.

@@ -249,7 +249,7 @@ $ ls -l ~/.slurm/slurm_cmdfile.* $ rm ~/.slurm/slurm_cmdfile.* -

The -L/--label option differs slighly in that when the output from multiple +

The -L/--label option differs slightly in that when the output from multiple tasks are identical, they are combined on a single line with the prefix identifying which task(s) generated the output. In addition, there is a colon but no space between the task IDs and output. For example:

@@ -270,7 +270,7 @@ but no space between the task IDs and output. For example:

In addition, when srun's --multi-prog option (for Multiple Program, Multiple Data configurations) is used with the -L/--label option then a job -step ID, colon and space will preceed the task ID and colon. For example:

+step ID, colon and space will precede the task ID and colon. For example:

 # SLURM OUTPUT
 0: zero
@@ -284,7 +284,7 @@ step ID, colon and space will preceed the task ID and colon. For example:

The srun command is not able to report task status upon receipt of a SIGINT -signal (ctrl-c interupt from keyboard), however two SIGINT signals within a +signal (ctrl-c interrupt from keyboard), however two SIGINT signals within a one second interval will terminate the job as on other SLURM configurations.

Environment Variables

@@ -381,6 +381,11 @@ ProctrackType=proctrack/cgroup

In order for these plugins to be built, the locations of the POE Resource Manager header file (permapi.h) the NRT header file (nrt.h) and NRT library (libnrt.so) must be identified at the time the SLURM is built. +Slurm may be built and used on nodes without the NRT library (libnrt.so) +installed, but both the permapi.h and nrt.h header files must be available +when Slurm is built and the switch/nrt plugin built on such a node can not be +used on a compute node. The permapi.h and nrt.h header files may be installed +in a temporary location while Slurm is being built, such as in /tmp. SLURM searches for the header files in the /usr/include directory by default. If the files are not installed there, you can specify a different location using the --with-nrth=PATH option to the configure program, where "PATH" is @@ -438,12 +443,12 @@ export MP_TIMEOUT=600 Environment. Job's can be explicitly preempted and later resumed using the scontrol suspend <jobid> and scontrol resume <jobid> commands. This functionality relies upon NRT functions to suspend/resume -programs and reset MPI timeouts. Note that SLURM suports the preemption only +programs and reset MPI timeouts. Note that SLURM supports the preemption only of whole jobs rather than individual job steps. A suspended job will relinquish CPU resources, but retain memory and switch window resources. Note that the long term suspension of jobs with any allocated Collective Acceleration Units (CAU) is disabled and an error message to that effect will be generated -in response to such a request. In addition, verion 1200 or higher of IBM's NRT +in response to such a request. In addition, version 1200 or higher of IBM's NRT API is required to support this functionality.

Design Notes

@@ -460,7 +465,7 @@ node.

It is possible to configure SLURM and LoadLeveler to simultaneously exist on a cluster, however each scheduler must be configured to manage different compute nodes (e.g. LoadLeveler can manage compute nodes "tux[1-8]" and SLURM -can manaage compute nodes "tux[9-16]" on the same cluster). In addition, the +can manage compute nodes "tux[9-16]" on the same cluster). In addition, the /etc/poe.limits file on each node must identify the MP_PE_RMLIB appropriate for that node (e.g. IBM's or SLURM's libpermapi.so)

@@ -469,7 +474,7 @@ Then poe uses the launch/slurm plugin to launch the "pmd" process on the compute nodes, so two launch plugins are actually used.

Depending upon job size and network options, allocating and deallocating -switch resources can take multple seconds per node and the process of launching +switch resources can take multiple seconds per node and the process of launching applications on multiple nodes is not well parallelized. This is outside of SLURM's control.

@@ -478,7 +483,7 @@ This is outside of SLURM's control.

It is possible to generate detailed logging of all switch/nrt actions and data by configuring DebugFlags=switch.

-

The envirnoment variable MP_INFOLEVEL can be used to enable the +

The environment variable MP_INFOLEVEL can be used to enable the logging of POE debug messages. To enable fairly detailed logging, set MP_INFOLEVEL=6.

@@ -494,6 +499,6 @@ startsrc -s pnsd -a -D -

Last modified 21 November 2012

+

Last modified 10 January 2013

diff --git a/src/plugins/job_submit/all_partitions/Makefile.in b/src/plugins/job_submit/all_partitions/Makefile.in index 5be8ba55745..791068a24d2 100644 --- a/src/plugins/job_submit/all_partitions/Makefile.in +++ b/src/plugins/job_submit/all_partitions/Makefile.in @@ -130,12 +130,14 @@ am__uninstall_files_from_dir = { \ am__installdirs = "$(DESTDIR)$(pkglibdir)" LTLIBRARIES = $(pkglib_LTLIBRARIES) job_submit_all_partitions_la_LIBADD = -am_job_submit_all_partitions_la_OBJECTS = job_submit_all_partitions.lo +am_job_submit_all_partitions_la_OBJECTS = \ + job_submit_all_partitions.lo job_submit_all_partitions_la_OBJECTS = \ $(am_job_submit_all_partitions_la_OBJECTS) -job_submit_all_partitions_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(job_submit_all_partitions_la_LDFLAGS) $(LDFLAGS) -o $@ +job_submit_all_partitions_la_LINK = $(LIBTOOL) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(job_submit_all_partitions_la_LDFLAGS) \ + $(LDFLAGS) -o $@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles diff --git a/src/plugins/switch/nrt/libpermapi/shr_64.c b/src/plugins/switch/nrt/libpermapi/shr_64.c index 29c240323f9..7c35ad0c4cb 100644 --- a/src/plugins/switch/nrt/libpermapi/shr_64.c +++ b/src/plugins/switch/nrt/libpermapi/shr_64.c @@ -35,7 +35,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ -#include #include #include #include @@ -47,6 +46,17 @@ # include "config.h" #endif +#if HAVE_NRT_H +# include +#else +# error "Must have nrt.h to compile this module!" +#endif +#if HAVE_PERMAPI_H +# include +#else +# error "Must have permapi.h to compile this module!" +#endif + #include "src/common/slurm_xlator.h" #include "slurm/slurm.h" #include "slurm/slurm_errno.h" diff --git a/src/plugins/switch/nrt/nrt.c b/src/plugins/switch/nrt/nrt.c index 05a1f0eb40b..9fad0920b4e 100644 --- a/src/plugins/switch/nrt/nrt.c +++ b/src/plugins/switch/nrt/nrt.c @@ -58,10 +58,10 @@ # include "config.h" #endif -#if HAVE_LIBNRT +#if HAVE_NRT_H # include #else -# error "Must have libnrt to compile this module!" +# error "Must have nrt.h to compile this module!" #endif #include @@ -76,6 +76,14 @@ #include "src/plugins/switch/nrt/nrt_keys.h" #include "src/plugins/switch/nrt/slurm_nrt.h" +/* If the head node has nrt.h, but no libnrt.so, we need to build the + * switch/nrt plugin in order to manage the nrt data structures, but + * will not make use of the nrt_command function. */ +#if !HAVE_LIBNRT + int nrt_command(int version, nrt_cmd_type_t cmd_type, void *cmd) + { fatal("nrt_command not supported without libnrt"); return 0; } +#endif + extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid ); /* diff --git a/src/plugins/switch/nrt/nrt_keys.h b/src/plugins/switch/nrt/nrt_keys.h index 7487176f07c..9c53097ac56 100644 --- a/src/plugins/switch/nrt/nrt_keys.h +++ b/src/plugins/switch/nrt/nrt_keys.h @@ -39,10 +39,10 @@ #ifndef _NRT_KEYS_INCLUDED #define _NRT_KEYS_INCLUDED -#if HAVE_LIBNRT +#if HAVE_NRT_H # include #else -# error "Must have libnrt to compile this module!" +# error "Must have nrt.h to compile this module!" #endif enum { diff --git a/src/plugins/switch/nrt/slurm_nrt.h b/src/plugins/switch/nrt/slurm_nrt.h index 9a2b48e20e2..0306f80aaa0 100644 --- a/src/plugins/switch/nrt/slurm_nrt.h +++ b/src/plugins/switch/nrt/slurm_nrt.h @@ -47,10 +47,10 @@ #ifndef _SLURM_NRT_INCLUDED #define _SLURM_NRT_INCLUDED -#if HAVE_LIBNRT +#if HAVE_NRT_H # include #else -# error "Must have libnrt to compile this module!" +# error "Must have nrt.h to compile this module!" #endif /* opaque data structures - no peeking! */ diff --git a/testsuite/expect/test6.13 b/testsuite/expect/test6.13 index 5662114f7b0..edfe57afe31 100755 --- a/testsuite/expect/test6.13 +++ b/testsuite/expect/test6.13 @@ -83,7 +83,7 @@ if {[wait_for_job $job_id RUNNING] != 0} { } # Allow time for the step to start # This could take a while with launch/poe -sleep 10 +sleep 20 # # Test verbose scancel diff --git a/testsuite/expect/test6.7 b/testsuite/expect/test6.7 index 49b420f1c07..e39049e8894 100755 --- a/testsuite/expect/test6.7 +++ b/testsuite/expect/test6.7 @@ -81,7 +81,7 @@ if {[wait_for_job $job_id RUNNING] != 0} { } # Allow time for the step to start # This could take a while with launch/poe -sleep 10 +sleep 20 # # Test verbose scancel