Skip to content

Commit

Permalink
Merge pull request ceph#16487 from liewegas/wip-mgr-module-health
Browse files Browse the repository at this point in the history
mgr: mgr_module interface to report health alerts

Reviewed-by: John Spray <john.spray@redhat.com>
  • Loading branch information
John Spray authored Jul 28, 2017
2 parents c6ef41b + 76a35c1 commit 2c4ad54
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 2 deletions.
3 changes: 3 additions & 0 deletions src/mgr/DaemonServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,8 @@ void DaemonServer::send_report()
}

auto m = new MMonMgrReport();
py_modules.get_health_checks(&m->health_checks);

cluster_state.with_pgmap([&](const PGMap& pg_map) {
cluster_state.update_delta_stats();

Expand All @@ -1191,6 +1193,7 @@ void DaemonServer::send_report()

pg_map.get_health_checks(g_ceph_context, osdmap,
&m->health_checks);

dout(10) << m->health_checks.checks.size() << " health checks"
<< dendl;
dout(20) << "health checks:\n";
Expand Down
4 changes: 4 additions & 0 deletions src/mgr/MgrPyModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,7 @@ int MgrPyModule::handle_command(
return r;
}

void MgrPyModule::get_health_checks(health_check_map_t *checks)
{
checks->merge(health_checks);
}
9 changes: 9 additions & 0 deletions src/mgr/MgrPyModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include "common/cmdparse.h"
#include "common/LogEntry.h"
#include "common/Mutex.h"
#include "mon/health_check.h"

#include <vector>
#include <string>
Expand All @@ -47,6 +49,8 @@ class MgrPyModule
PyThreadState *pMainThreadState;
PyThreadState *pMyThreadState = nullptr;

health_check_map_t health_checks;

std::vector<ModuleCommand> commands;

int load_commands();
Expand Down Expand Up @@ -75,6 +79,11 @@ class MgrPyModule
const cmdmap_t &cmdmap,
std::stringstream *ds,
std::stringstream *ss);

void set_health_checks(health_check_map_t&& c) {
health_checks = std::move(c);
}
void get_health_checks(health_check_map_t *checks);
};

std::string handle_pyerror();
Expand Down
18 changes: 18 additions & 0 deletions src/mgr/PyModules.cc
Original file line number Diff line number Diff line change
Expand Up @@ -800,3 +800,21 @@ void PyModules::list_modules(std::set<std::string> *modules)
{
_list_modules(g_conf->mgr_module_path, modules);
}

void PyModules::set_health_checks(const std::string& handle,
health_check_map_t&& checks)
{
Mutex::Locker l(lock);
auto p = modules.find(handle);
if (p != modules.end()) {
p->second->set_health_checks(std::move(checks));
}
}

void PyModules::get_health_checks(health_check_map_t *checks)
{
Mutex::Locker l(lock);
for (auto& p : modules) {
p.second->get_health_checks(checks);
}
}
5 changes: 5 additions & 0 deletions src/mgr/PyModules.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "ClusterState.h"

class ServeThread;
class health_check_map_t;

class PyModules
{
Expand Down Expand Up @@ -115,6 +116,10 @@ class PyModules
void set_config(const std::string &handle,
const std::string &key, const std::string &val);

void set_health_checks(const std::string& handle,
health_check_map_t&& checks);
void get_health_checks(health_check_map_t *checks);

void log(const std::string &handle,
int level, const std::string &record);

Expand Down
103 changes: 103 additions & 0 deletions src/mgr/PyState.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,107 @@ ceph_send_command(PyObject *self, PyObject *args)
Py_RETURN_NONE;
}

static PyObject*
ceph_set_health_checks(PyObject *self, PyObject *args)
{
char *handle = nullptr;
PyObject *checks = NULL;
if (!PyArg_ParseTuple(args, "sO:ceph_set_health_checks", &handle, &checks)) {
return NULL;
}
if (!PyDict_Check(checks)) {
derr << __func__ << " arg not a dict" << dendl;
Py_RETURN_NONE;
}
PyObject *checksls = PyDict_Items(checks);
health_check_map_t out_checks;
for (int i = 0; i < PyList_Size(checksls); ++i) {
PyObject *kv = PyList_GET_ITEM(checksls, i);
char *check_name = nullptr;
PyObject *check_info = nullptr;
if (!PyArg_ParseTuple(kv, "sO:pair", &check_name, &check_info)) {
derr << __func__ << " dict item " << i
<< " not a size 2 tuple" << dendl;
continue;
}
if (!PyDict_Check(check_info)) {
derr << __func__ << " item " << i << " " << check_name
<< " value not a dict" << dendl;
continue;
}
health_status_t severity = HEALTH_OK;
string summary;
list<string> detail;
PyObject *infols = PyDict_Items(check_info);
for (int j = 0; j < PyList_Size(infols); ++j) {
PyObject *pair = PyList_GET_ITEM(infols, j);
if (!PyTuple_Check(pair)) {
derr << __func__ << " item " << i << " pair " << j
<< " not a tuple" << dendl;
continue;
}
char *k = nullptr;
PyObject *v = nullptr;
if (!PyArg_ParseTuple(pair, "sO:pair", &k, &v)) {
derr << __func__ << " item " << i << " pair " << j
<< " not a size 2 tuple" << dendl;
continue;
}
string ks(k);
if (ks == "severity") {
if (!PyString_Check(v)) {
derr << __func__ << " check " << check_name
<< " severity value not string" << dendl;
continue;
}
string vs(PyString_AsString(v));
if (vs == "warning") {
severity = HEALTH_WARN;
} else if (vs == "error") {
severity = HEALTH_ERR;
}
} else if (ks == "summary") {
if (!PyString_Check(v)) {
derr << __func__ << " check " << check_name
<< " summary value not string" << dendl;
continue;
}
summary = PyString_AsString(v);
} else if (ks == "detail") {
if (!PyList_Check(v)) {
derr << __func__ << " check " << check_name
<< " detail value not list" << dendl;
continue;
}
for (int k = 0; k < PyList_Size(v); ++k) {
PyObject *di = PyList_GET_ITEM(v, k);
if (!PyString_Check(di)) {
derr << __func__ << " check " << check_name
<< " detail item " << k << " not a string" << dendl;
continue;
}
detail.push_back(PyString_AsString(di));
}
} else {
derr << __func__ << " check " << check_name
<< " unexpected key " << k << dendl;
}
}
auto& d = out_checks.add(check_name, severity, summary);
d.detail.swap(detail);
}

JSONFormatter jf(true);
dout(10) << "module " << handle << " health checks:\n";
out_checks.dump(&jf);
jf.flush(*_dout);
*_dout << dendl;

global_handle->set_health_checks(handle, std::move(out_checks));

Py_RETURN_NONE;
}


static PyObject*
ceph_state_get(PyObject *self, PyObject *args)
Expand Down Expand Up @@ -359,6 +460,8 @@ PyMethodDef CephStateMethods[] = {
"Get a service's status"},
{"send_command", ceph_send_command, METH_VARARGS,
"Send a mon command"},
{"set_health_checks", ceph_set_health_checks, METH_VARARGS,
"Set health checks for this module"},
{"get_mgr_id", ceph_get_mgr_id, METH_NOARGS,
"Get the mgr id"},
{"get_config", ceph_config_get, METH_VARARGS,
Expand Down
2 changes: 0 additions & 2 deletions src/mon/PGMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2562,8 +2562,6 @@ void PGMap::get_health_checks(
const unsigned max = cct->_conf->mon_health_max_detail;
const auto& pools = osdmap.get_pools();

checks->clear();

typedef enum pg_consequence_t {
UNAVAILABLE = 1, // Client IO to the pool may block
DEGRADED = 2, // Fewer than the requested number of replicas are present
Expand Down
24 changes: 24 additions & 0 deletions src/pybind/mgr/mgr_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,30 @@ def send_command(self, *args, **kwargs):
"""
ceph_state.send_command(self._handle, *args, **kwargs)

def set_health_checks(self, checks):
"""
Set module's health checks
Set the module's current map of health checks. Argument is a
dict of check names to info, in this form:
{
'CHECK_FOO': {
'severity': 'warning', # or 'error'
'summary': 'summary string',
'detail': [ 'list', 'of', 'detail', 'strings' ],
},
'CHECK_BAR': {
'severity': 'error',
'summary': 'bars are bad',
'detail': [ 'too hard' ],
},
}
:param list: dict of health check dicts
"""
ceph_state.set_health_checks(self._handle, checks)

def handle_command(self, cmd):
"""
Called by ceph-mgr to request the plugin to handle one
Expand Down

0 comments on commit 2c4ad54

Please sign in to comment.