Skip to content

Commit

Permalink
Merge pull request ganglia#222 from tabaer/master
Browse files Browse the repository at this point in the history
Various ipmi improvements
  • Loading branch information
jbuchbinder authored May 11, 2017
2 parents 3288fc8 + 78692e4 commit 6b3ca3b
Show file tree
Hide file tree
Showing 4 changed files with 444 additions and 53 deletions.
37 changes: 25 additions & 12 deletions ipmi/conf.d/ipmi.pyconf
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,34 @@ modules {
value = "ipmi"
}

# IP of rhe IPMI
param ipmi_ip {
value = "10.1.2.3"
# use sudo
param use_sudo {
value = False
}

# IP of the IPMI (optional)
#param ipmi_ip {
# value = "10.1.2.3"
#}

param username {
value = "admin"
}
# IPMI username (optional)
#param username {
# value = "admin"
#}

param password {
value = "secret"
}
# IPMI password (optional)
#param password {
# value = "secret"
#}

param level {
value = "USER"
# IPMI auth level (optional)
#param level {
# value = "USER"
#}

# timeout on ipmitool command
param timeout {
value = 15
}

# Location of timeout binary
Expand All @@ -31,7 +44,7 @@ modules {
}

# Location of ipmitool binary
param timeout_bin {
param ipmitool_bin {
value = "/usr/bin/ipmitool"
}

Expand Down
198 changes: 157 additions & 41 deletions ipmi/python_modules/ipmi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import sys
import re
import time
Expand All @@ -7,38 +8,143 @@

METRICS = {
'time' : 0,
'data' : {}
'data' : {},
'units': {},
'descr': {}
}

METRICS_CACHE_MAX = 5

stats_pos = {}

def get_metrics(params):
# Try to make different vendors' sensor names at least somewhat consistent...
# This list is admittedly a bit Dell centric, as I have HP and Dell
# hardware and Dell's sensor names (mostly) make more sense to me than
# HP's... --troy
unified_metric_names = {
# HP sensor names
"01-Inlet Ambient": "Inlet Temp",
"43-Sys Exhaust": "Exhaust Temp",
"02-CPU 1": "CPU 1 Temp",
"03-CPU 2": "CPU 2 Temp",
"04-P1 DIMM 1-4": "CPU 1 MemBank 1 Temp",
"05-P1 DIMM 5-8": "CPU 1 MemBank 2 Temp",
"06-P2 DIMM 1-4": "CPU 2 MemBank 1 Temp",
"07-P2 DIMM 5-8": "CPU 2 MemBank 2 Temp",
"34-Coprocessor 1": "Coprocessor 1 Temp",
"35-Coprocessor 2": "Coprocessor 2 Temp",
"36-Coprocessor 3": "Coprocessor 3 Temp",
"42-P/S Board": "Pwr Supply 1 Temp",
"Power Meter": "Pwr Consumption",
"Temp 1": "Inlet Temp",
"Temp 2 (CPU 1)": "CPU 1 Temp",
"Temp 3 (CPU 2)": "CPU 2 Temp",
"Temp 4 (MemD1)": "CPU 1 MemBank 1 Temp",
"Temp 5 (MemD2)": "CPU 2 MemBank 1 Temp",
"Temp 16 (GPU2)": "Coprocessor 2 Temp",
"Temp 17 (GPU3)": "Coprocessor 3 Temp",
"Temp 18 (GPU1)": "Coprocessor 1 Temp",
# Dell sensor names
"Fan1": "Fan 1",
"Fan2": "Fan 2",
"Fan3": "Fan 3",
"Fan4": "Fan 4",
"Fan5": "Fan 5",
"Fan6": "Fan 6",
"Fan7": "Fan 7",
"Fan8": "Fan 8",
"Fan1A": "Fan 1A",
"Fan1B": "Fan 1B",
"Fan2A": "Fan 2A",
"Fan2B": "Fan 2B",
"Fan3A": "Fan 3A",
"Fan3B": "Fan 3B",
"Fan4A": "Fan 4A",
"Fan4B": "Fan 4B",
"Fan5A": "Fan 5A",
"Fan5B": "Fan 5B",
"Fan6A": "Fan 6A",
"Fan6B": "Fan 6B",
"Fan7A": "Fan 7A",
"Fan7B": "Fan 7B",
"Fan8A": "Fan 8A",
"Fan8B": "Fan 8B",
# Intel(?) sensor names
"Front Panel Temp": "Inlet Temp",
"Exit Air Temp": "Exhaust Temp",
"System Fan 1": "Fan 1",
"System Fan 2": "Fan 2",
"Processor 1 Fan": "Fan 3",
"Processor 2 Fan": "Fan 4",
"PS1 Temperature": "Pwr Supply 1 Temp",
"PS2 Temperature": "Pwr Supply 2 Temp"
}
def mangle_metric_name(metric_name,prefix):
name = metric_name
if ( metric_name.strip() in unified_metric_names.keys() ):
name = unified_metric_names[metric_name.strip()]
return prefix+"_"+name.strip().lower().replace("+","").replace(" ","_").replace("-","_")
def metric_description(metric_name):
if ( metric_name.strip() in unified_metric_names.keys() ):
return unified_metric_names[metric_name.strip()]
else:
return metric_name.strip()

def get_metrics():
"""Return all metrics"""

global METRICS

if (time.time() - METRICS['time']) > METRICS_CACHE_MAX:

new_metrics = {}
units = {}

command = [ params['timeout_bin'],
"3", params['ipmitool_bin'],
"-H", params['ipmi_ip'],
"-U", params['username'],
'-P', params['password'],
'-L', params['level'],
'sensor']

params = global_params

# bail out if no ipmi ip address is set and there are no
# ipmi device files available (i.e. ipmitool is guaranteed
# to fail
if ( 'ipmi_ip' not in params.keys() and
not os.path.exists('/dev/ipmi0') and
not os.path.exists('/dev/ipmi/0') and
not os.path.exists('/dev/ipmidev/0') ):
pass
# otherwise, run ipmitool if we're outside the cache timeout
elif (time.time() - METRICS['time']) > METRICS_CACHE_MAX:
new_metrics = {}
units = {}
descr = {}

command = [ params['timeout_bin'], str(params['timeout']) ]
if ( 'use_sudo' in params.keys() and params['use_sudo'] ):
command.append('sudo')
command.append(params['ipmitool_bin'])
if ( 'ipmi_ip' in params.keys() ):
command.append("-H")
command.append(params['ipmi_ip'])
if ( 'username' in params.keys() ):
command.append("-U")
command.append(params['username'])
if ( 'password' in params.keys() ):
command.append('-P')
command.append(params['password'])
if ('level' in params.keys() ):
command.append('-L')
command.append(params['level'])
command.append('sensor')

p = subprocess.Popen(command,
stdout=subprocess.PIPE).communicate()[0][:-1]

dell_temp_count = 1
for i, v in enumerate(p.split("\n")):
data = v.split("|")
try:
metric_name = data[0].strip().lower().replace("+", "").replace(" ", "_")
if ( data[0].strip()=="Temp" ):
# Dell names all CPU temperature sensors "Temp";
# thus, the following stupidity:
description = "CPU "+str(dell_temp_count)+" Temp"
metric_name = mangle_metric_name(description,params['metric_prefix'])
dell_temp_count = dell_temp_count+1
else:
description = metric_description(data[0])
metric_name = mangle_metric_name(data[0],params['metric_prefix'])
value = data[1].strip()

# Skip missing sensors
Expand All @@ -53,16 +159,18 @@ def get_metrics(params):

new_metrics[metric_name] = metric_value
units[metric_name] = data[2].strip().replace("degrees C", "C")

descr[metric_name] = description

except ValueError:
continue
except IndexError:
continue
METRICS = {
METRICS = {
'time': time.time(),
'data': new_metrics,
'units': units
'units': units,
'descr': descr
}

return [METRICS]
Expand All @@ -72,14 +180,15 @@ def get_value(name):
"""Return a value for the requested metric"""

try:

metrics = get_metrics()[0]

metrics = get_metrics()[0]

name = name.lstrip('ipmi_')
if ( name in metrics['data'].keys() ):
result = metrics['data'][name]
else:
result = 0

result = metrics['data'][name]

except Exception:
except Exception as e:
result = 0

return result
Expand All @@ -91,7 +200,7 @@ def create_desc(skel, prop):
return d

def metric_init(params):
global descriptors, metric_map, Desc_Skel
global descriptors, metric_map, Desc_Skel, global_params

descriptors = []

Expand All @@ -107,34 +216,41 @@ def metric_init(params):
'groups' : 'XXX',
}

metrics = get_metrics(params)[0]
global_params = params

metrics = get_metrics()[0]

for item in metrics['data']:
descriptors.append(create_desc(Desc_Skel, {
"name" : params['metric_prefix'] + "_" + item,
'groups' : params['metric_prefix'],
'units' : metrics['units'][item]
}))
descriptors.append(create_desc(Desc_Skel, {
'name' : item,
'description' : metrics['descr'][item],
'groups' : params['metric_prefix'],
'units' : metrics['units'][item]
}))


return descriptors


def metric_cleanup():
'''Clean up the metric module.'''
pass


#This code is for debugging and unit testing
if __name__ == '__main__':

params = {
"metric_prefix" : "ipmi",
"ipmi_ip" : "10.1.2.3",
"username" : "ADMIN",
"password" : "secret",
"level" : "USER",
"ipmitool_bin" : "/usr/bin/ipmitool",
"timeout_bin" : "/usr/bin/timeout"
}
"use_sudo" : False,
"metric_prefix" : "ipmi",
#"ipmi_ip" : "10.1.2.3",
#"username" : "ADMIN",
#"password" : "secret",
#"level" : "USER",
"timeout" : 15,
"ipmitool_bin" : "/usr/bin/ipmitool",
"timeout_bin" : "/usr/bin/timeout"
}
descriptors = metric_init(params)

while True:
Expand Down
52 changes: 52 additions & 0 deletions moab/conf.d/moab.pyconf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
modules {
module {
name = "moab"
language = "python"

# If you change this entry make sure you put it under name_match section
param metric_prefix {
value = "moab"
}

# Debug flag
param debug {
value = False
}

# location of Moab config files
param moab_home_dir {
value = "/var/spool/moab"
}

# Moab server host
#param moab_server {
# value = "moabsrv.mydomain.org"
#}

# Moab server port
#param moab_port {
# value = 42559
#}

# Location of showq binary
param showq_bin {
value = "/opt/moab/bin/showq"
}

# timeout on Moab client commands
param timeout {
value = 15
}
}
}

collection_group {
collect_every = 60
time_threshold = 90

metric {
name_match = "moab_(.+)"
value_threshold = 1.0
}

}
Loading

0 comments on commit 6b3ca3b

Please sign in to comment.