From ea7af3566485b7b2e9ddf22aa36b527b057dd47f Mon Sep 17 00:00:00 2001 From: tabaer Date: Wed, 31 May 2017 15:49:18 -0400 Subject: [PATCH] Add optional GRES used/available/total metrics to moab module. Note that this requires that the user running gmond have permission to run mdiag -n GLOBAL --- moab/conf.d/moab.pyconf | 13 +++++++ moab/python_modules/moab.py | 73 ++++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/moab/conf.d/moab.pyconf b/moab/conf.d/moab.pyconf index 0dbf05fa..8d0a35ab 100644 --- a/moab/conf.d/moab.pyconf +++ b/moab/conf.d/moab.pyconf @@ -13,6 +13,11 @@ modules { value = False } + # Location of mdiag binary + param mdiag_bin { + value = "/opt/moab/bin/mdiag" + } + # location of Moab config files param moab_home_dir { value = "/var/spool/moab" @@ -28,6 +33,14 @@ modules { # value = 42559 #} + # Whether to query GRESes + # if this is true, make sure mdiag_bin is set + # and the gmond user is authorized to run + # mdiag -n GLOBAL + param query_gres { + value = False + } + # Location of showq binary param showq_bin { value = "/opt/moab/bin/showq" diff --git a/moab/python_modules/moab.py b/moab/python_modules/moab.py index b5256bf7..59644843 100644 --- a/moab/python_modules/moab.py +++ b/moab/python_modules/moab.py @@ -27,9 +27,10 @@ def get_metrics(): units = {} descr = {} prefix="" + query_gres = False + mdiag = None if ( "metric_prefix" in params ): prefix = params["metric_prefix"]+"_" - if ( 'moab_home_dir' in params ): os.environ['MOABHOMEDIR'] = params['moab_home_dir'] command = [ params['showq_bin'], "-s", "--xml" ] @@ -39,14 +40,18 @@ def get_metrics(): command.append("--port=%s" % str(params['moab_port'])) if ( 'timeout' in params ): command.append("--timeout=%s" % str(params['timeout'])) + if ( 'query_gres' in params ): + query_gres = params['query_gres'] + if ( 'mdiag_bin' in params ): + mdiag = params['mdiag_bin'] if ( 'debug' in params ): print str(command) - p = subprocess.Popen(command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - close_fds=True) try: + p = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + close_fds=True) xmldoc = minidom.parseString("\n".join(p.stdout.readlines())) p.stdout.close() xmlclusters = xmldoc.getElementsByTagName("cluster") @@ -115,7 +120,59 @@ def get_metrics(): new_metrics[metric_name] = int(xmlqueue.attributes["count"].value) units[metric_name] = "jobs" descr[metric_name] = "Blocked Jobs" - + + if ( query_gres and mdiag is not None ): + try: + command = [ mdiag,"-n","GLOBAL","--xml" ] + if ( 'moab_server' in params ): + command.append("--host=%s" % params['moab_server']) + if ( 'moab_port' in params ): + command.append("--port=%s" % str(params['moab_port'])) + if ( 'timeout' in params ): + command.append("--timeout=%s" % str(params['timeout'])) + if ( 'debug' in params ): + print str(command) + p = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + close_fds=True) + xmldoc = minidom.parseString("\n".join(p.stdout.readlines())) + p.stdout.close() + xmlnodes = xmldoc.getElementsByTagName("node") + for xmlnode in xmlnodes: + if ( xmlnode.hasAttributes() ): + if ( "GRES" in xmlnode.attributes.keys() ): + greses = xmlnode.attributes["GRES"].value + for gres in greses.split(";"): + (name,value) = gres.split("=") + metric_name = "%s%s_gres_total" % (prefix,name.lower()) + new_metrics[metric_name] = int(value) + units[metric_name] = "count" + descr[metric_name] = "%s GRES Total" % name.lower() + # zero out things that might get updated later + metric_name = "%s%s_gres_used" % (prefix,name.lower()) + new_metrics[metric_name] = 0 + units[metric_name] = "count" + descr[metric_name] = "%s GRES Used" % name.lower() + metric_name = "%s%s_gres_avail" % (prefix,name.lower()) + new_metrics[metric_name] = 0 + units[metric_name] = "count" + descr[metric_name] = "%s GRES Available" % name.lower() + if ( "AGRES" in xmlnode.attributes.keys() ): + greses = xmlnode.attributes["AGRES"].value + for gres in greses.split(";"): + (name,value) = gres.split("=") + metric_name = "%s%s_gres_avail" % (prefix,name.lower()) + new_metrics[metric_name] = int(value) + if ( "DEDGRES" in xmlnode.attributes.keys() ): + greses = xmlnode.attributes["DEDGRES"].value + for gres in greses.split(";"): + (name,value) = gres.split("=") + metric_name = "%s%s_gres_used" % (prefix,name.lower()) + new_metrics[metric_name] = int(value) + except Exception as e: + sys.stderr.write("WARNING: %s\n" % str(e)) + pass METRICS = { 'time': time.time(), 'data': new_metrics, @@ -195,10 +252,14 @@ def metric_cleanup(): params = { "metric_prefix" : "moab", #"debug" : True, + "mdiag_bin" : "/opt/moab/bin/mdiag", + #"mdiag_bin" : "/usr/local/moab/default/bin/mdiag", "moab_home_dir" : "/var/spool/moab", #"moab_server" : "moabsrv.mydomain.org", #"moab_port" : 42559, + #"query_gres" : True, "showq_bin" : "/opt/moab/bin/showq", + #"showq_bin" : "/usr/local/moab/default/bin/showq", "timeout" : 30, }