Skip to content

Commit

Permalink
Add optional GRES used/available/total metrics to moab module. Note t…
Browse files Browse the repository at this point in the history
…hat this requires that the user running gmond have permission to run mdiag -n GLOBAL
  • Loading branch information
tabaer committed May 31, 2017
1 parent d2960eb commit ea7af35
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 6 deletions.
13 changes: 13 additions & 0 deletions moab/conf.d/moab.pyconf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ modules {
value = False
}

# Location of mdiag binary
param mdiag_bin {
value = "/opt/moab/bin/mdiag"
}

# location of Moab config files
param moab_home_dir {
value = "/var/spool/moab"
Expand All @@ -28,6 +33,14 @@ modules {
# value = 42559
#}

# Whether to query GRESes
# if this is true, make sure mdiag_bin is set
# and the gmond user is authorized to run
# mdiag -n GLOBAL
param query_gres {
value = False
}

# Location of showq binary
param showq_bin {
value = "/opt/moab/bin/showq"
Expand Down
73 changes: 67 additions & 6 deletions moab/python_modules/moab.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ def get_metrics():
units = {}
descr = {}
prefix=""
query_gres = False
mdiag = None
if ( "metric_prefix" in params ):
prefix = params["metric_prefix"]+"_"

if ( 'moab_home_dir' in params ):
os.environ['MOABHOMEDIR'] = params['moab_home_dir']
command = [ params['showq_bin'], "-s", "--xml" ]
Expand All @@ -39,14 +40,18 @@ def get_metrics():
command.append("--port=%s" % str(params['moab_port']))
if ( 'timeout' in params ):
command.append("--timeout=%s" % str(params['timeout']))
if ( 'query_gres' in params ):
query_gres = params['query_gres']
if ( 'mdiag_bin' in params ):
mdiag = params['mdiag_bin']
if ( 'debug' in params ):
print str(command)

p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
close_fds=True)
try:
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
close_fds=True)
xmldoc = minidom.parseString("\n".join(p.stdout.readlines()))
p.stdout.close()
xmlclusters = xmldoc.getElementsByTagName("cluster")
Expand Down Expand Up @@ -115,7 +120,59 @@ def get_metrics():
new_metrics[metric_name] = int(xmlqueue.attributes["count"].value)
units[metric_name] = "jobs"
descr[metric_name] = "Blocked Jobs"


if ( query_gres and mdiag is not None ):
try:
command = [ mdiag,"-n","GLOBAL","--xml" ]
if ( 'moab_server' in params ):
command.append("--host=%s" % params['moab_server'])
if ( 'moab_port' in params ):
command.append("--port=%s" % str(params['moab_port']))
if ( 'timeout' in params ):
command.append("--timeout=%s" % str(params['timeout']))
if ( 'debug' in params ):
print str(command)
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
close_fds=True)
xmldoc = minidom.parseString("\n".join(p.stdout.readlines()))
p.stdout.close()
xmlnodes = xmldoc.getElementsByTagName("node")
for xmlnode in xmlnodes:
if ( xmlnode.hasAttributes() ):
if ( "GRES" in xmlnode.attributes.keys() ):
greses = xmlnode.attributes["GRES"].value
for gres in greses.split(";"):
(name,value) = gres.split("=")
metric_name = "%s%s_gres_total" % (prefix,name.lower())
new_metrics[metric_name] = int(value)
units[metric_name] = "count"
descr[metric_name] = "%s GRES Total" % name.lower()
# zero out things that might get updated later
metric_name = "%s%s_gres_used" % (prefix,name.lower())
new_metrics[metric_name] = 0
units[metric_name] = "count"
descr[metric_name] = "%s GRES Used" % name.lower()
metric_name = "%s%s_gres_avail" % (prefix,name.lower())
new_metrics[metric_name] = 0
units[metric_name] = "count"
descr[metric_name] = "%s GRES Available" % name.lower()
if ( "AGRES" in xmlnode.attributes.keys() ):
greses = xmlnode.attributes["AGRES"].value
for gres in greses.split(";"):
(name,value) = gres.split("=")
metric_name = "%s%s_gres_avail" % (prefix,name.lower())
new_metrics[metric_name] = int(value)
if ( "DEDGRES" in xmlnode.attributes.keys() ):
greses = xmlnode.attributes["DEDGRES"].value
for gres in greses.split(";"):
(name,value) = gres.split("=")
metric_name = "%s%s_gres_used" % (prefix,name.lower())
new_metrics[metric_name] = int(value)
except Exception as e:
sys.stderr.write("WARNING: %s\n" % str(e))
pass
METRICS = {
'time': time.time(),
'data': new_metrics,
Expand Down Expand Up @@ -195,10 +252,14 @@ def metric_cleanup():
params = {
"metric_prefix" : "moab",
#"debug" : True,
"mdiag_bin" : "/opt/moab/bin/mdiag",
#"mdiag_bin" : "/usr/local/moab/default/bin/mdiag",
"moab_home_dir" : "/var/spool/moab",
#"moab_server" : "moabsrv.mydomain.org",
#"moab_port" : 42559,
#"query_gres" : True,
"showq_bin" : "/opt/moab/bin/showq",
#"showq_bin" : "/usr/local/moab/default/bin/showq",
"timeout" : 30,
}

Expand Down

0 comments on commit ea7af35

Please sign in to comment.