Skip to content

Commit

Permalink
EDAC, MCE: Rework MCE injection
Browse files Browse the repository at this point in the history
Add sysfs injection facilities for testing of the MCE decoding code.
Remove large parts of amd64_edac_dbg.c, as a result, which did only
NB MCE injection anyway and the new injection code supports that
functionality already.

Add an injection module so that MCE decoding code in production kernels
like those in RHEL and SLES can be tested.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
  • Loading branch information
Borislav Petkov authored and Borislav Petkov committed Oct 21, 2010
1 parent 30e1f7a commit 9cdeb40
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 207 deletions.
14 changes: 12 additions & 2 deletions drivers/edac/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ config EDAC_DEBUG
there're four debug levels (x=0,1,2,3 from low to high).
Usually you should select 'N'.

config EDAC_DECODE_MCE
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
depends on CPU_SUP_AMD && X86_MCE
default y
Expand All @@ -51,6 +51,16 @@ config EDAC_DEBUG
which occur really early upon boot, before the module infrastructure
has been initialized.

config EDAC_MCE_INJ
tristate "Simple MCE injection interface over /sysfs"
depends on EDAC_DECODE_MCE
default n
help
This is a simple interface to inject MCEs over /sysfs and test
the MCE decoding code in EDAC.

This is currently AMD-only.

config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
help
Expand All @@ -72,7 +82,7 @@ config EDAC_AMD64
Families of Memory Controllers (K8, F10h and F11h)

config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs Error Injection facilities"
bool "Sysfs HW Error injection facilities"
depends on EDAC_AMD64
help
Recent Opterons (Family 10h and later) provide for Memory Error
Expand Down
2 changes: 2 additions & 0 deletions drivers/edac/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ ifdef CONFIG_PCI
edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif

obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o

obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o

obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
Expand Down
2 changes: 1 addition & 1 deletion drivers/edac/amd64_edac.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ extern const char *ext_msgs[32];
extern const char *htlink_msgs[8];

#ifdef CONFIG_EDAC_DEBUG
#define NUM_DBG_ATTRS 9
#define NUM_DBG_ATTRS 5
#else
#define NUM_DBG_ATTRS 0
#endif
Expand Down
213 changes: 12 additions & 201 deletions drivers/edac/amd64_edac_dbg.c
Original file line number Diff line number Diff line change
@@ -1,173 +1,16 @@
#include "amd64_edac.h"

/*
* accept a hex value and store it into the virtual error register file, field:
* nbeal and nbeah. Assume virtual error values have already been set for: NBSL,
* NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and
* CHANNEL
*/
static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
size_t count)
{
struct amd64_pvt *pvt = mci->pvt_info;
u64 value;
int ret = 0;
struct mce m;

ret = strict_strtoull(data, 16, &value);
if (ret != -EINVAL) {
struct err_regs *regs = &pvt->ctl_error_info;

debugf0("received NBEA= 0x%llx\n", value);

/* place the value into the virtual error packet */
pvt->ctl_error_info.nbeal = (u32) value;
value >>= 32;
pvt->ctl_error_info.nbeah = (u32) value;

m.addr = value;
m.status = regs->nbsl | ((u64)regs->nbsh << 32);

/* Process the Mapping request */
/* TODO: Add race prevention */
amd_decode_nb_mce(pvt->mc_node_id, &m, regs->nbcfg);

return count;
}
return ret;
#define EDAC_DCT_ATTR_SHOW(reg) \
static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
{ \
struct amd64_pvt *pvt = mci->pvt_info; \
return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
}

/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;
u64 value;

value = pvt->ctl_error_info.nbeah;
value <<= 32;
value |= pvt->ctl_error_info.nbeal;

return sprintf(data, "%llx\n", value);
}

/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
size_t count)
{
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret = 0;

ret = strict_strtoul(data, 16, &value);
if (ret != -EINVAL) {
debugf0("received NBSL= 0x%lx\n", value);

pvt->ctl_error_info.nbsl = (u32) value;

return count;
}
return ret;
}

/* display back what the last NBSL value written */
static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;
u32 value;

value = pvt->ctl_error_info.nbsl;

return sprintf(data, "%x\n", value);
}

/* store the NBSH (MCA NB Status High) value user desires */
static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
size_t count)
{
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret = 0;

ret = strict_strtoul(data, 16, &value);
if (ret != -EINVAL) {
debugf0("received NBSH= 0x%lx\n", value);

pvt->ctl_error_info.nbsh = (u32) value;

return count;
}
return ret;
}

/* display back what the last NBSH value written */
static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;
u32 value;

value = pvt->ctl_error_info.nbsh;

return sprintf(data, "%x\n", value);
}

/* accept and store the NBCFG (MCA NB Configuration) value user desires */
static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci,
const char *data, size_t count)
{
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret = 0;

ret = strict_strtoul(data, 16, &value);
if (ret != -EINVAL) {
debugf0("received NBCFG= 0x%lx\n", value);

pvt->ctl_error_info.nbcfg = (u32) value;

return count;
}
return ret;
}

/* various show routines for the controls of a MCI */
static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;

return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
}


static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;

return sprintf(data, "%x\n", pvt->dhar);
}


static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;

return sprintf(data, "%x\n", pvt->dbam0);
}


static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;

return sprintf(data, "%llx\n", pvt->top_mem);
}


static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
{
struct amd64_pvt *pvt = mci->pvt_info;

return sprintf(data, "%llx\n", pvt->top_mem2);
}
EDAC_DCT_ATTR_SHOW(dhar);
EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2);

static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
{
Expand All @@ -186,38 +29,6 @@ static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
*/
struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {

{
.attr = {
.name = "nbea_ctl",
.mode = (S_IRUGO | S_IWUSR)
},
.show = amd64_nbea_show,
.store = amd64_nbea_store,
},
{
.attr = {
.name = "nbsl_ctl",
.mode = (S_IRUGO | S_IWUSR)
},
.show = amd64_nbsl_show,
.store = amd64_nbsl_store,
},
{
.attr = {
.name = "nbsh_ctl",
.mode = (S_IRUGO | S_IWUSR)
},
.show = amd64_nbsh_show,
.store = amd64_nbsh_store,
},
{
.attr = {
.name = "nbcfg_ctl",
.mode = (S_IRUGO | S_IWUSR)
},
.show = amd64_nbcfg_show,
.store = amd64_nbcfg_store,
},
{
.attr = {
.name = "dhar",
Expand All @@ -231,23 +42,23 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
.name = "dbam",
.mode = (S_IRUGO)
},
.show = amd64_dbam_show,
.show = amd64_dbam0_show,
.store = NULL,
},
{
.attr = {
.name = "topmem",
.mode = (S_IRUGO)
},
.show = amd64_topmem_show,
.show = amd64_top_mem_show,
.store = NULL,
},
{
.attr = {
.name = "topmem2",
.mode = (S_IRUGO)
},
.show = amd64_topmem2_show,
.show = amd64_top_mem2_show,
.store = NULL,
},
{
Expand Down
4 changes: 2 additions & 2 deletions drivers/edac/edac_mce_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,7 @@ static inline void amd_decode_err_code(u16 ec)
pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
}

static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
int node, ecc;
Expand Down Expand Up @@ -379,6 +378,7 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val,

return NOTIFY_STOP;
}
EXPORT_SYMBOL_GPL(amd_decode_mce);

static struct notifier_block amd_mce_dec_nb = {
.notifier_call = amd_decode_mce,
Expand Down
4 changes: 3 additions & 1 deletion drivers/edac/edac_mce_amd.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef _EDAC_MCE_AMD_H
#define _EDAC_MCE_AMD_H

#include <linux/notifier.h>

#include <asm/mce.h>

#define ERROR_CODE(x) ((x) & 0xffff)
Expand Down Expand Up @@ -61,10 +63,10 @@ struct err_regs {
u32 nbeal;
};


void amd_report_gart_errors(bool);
void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
void amd_decode_nb_mce(int, struct mce *, u32);
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);

#endif /* _EDAC_MCE_AMD_H */
Loading

0 comments on commit 9cdeb40

Please sign in to comment.