Skip to content

Commit

Permalink
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "Boris is on vacation so I'm sending the RAS bits this time. The main
  changes were:

   - Various RAS/CEC improvements and fixes by Borislav Petkov:
       - error insertion fixes
       - offlining latency fix
       - memory leak fix
       - additional sanity checks
       - cleanups
       - debug output improvements

   - More SMCA enhancements by Yazen Ghannam:
       - make banks truly per-CPU which they are in the hardware
       - don't over-cache certain registers
       - make the number of MCA banks per-CPU variable

     The long term goal with these changes is to support future
     heterogenous SMCA extensions.

   - Misc fixes and improvements"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Do not check return value of debugfs_create functions
  x86/MCE: Determine MCA banks' init state properly
  x86/MCE: Make the number of MCA banks a per-CPU variable
  x86/MCE/AMD: Don't cache block addresses on SMCA systems
  x86/MCE: Make mce_banks a per-CPU array
  x86/MCE: Make struct mce_banks[] static
  RAS/CEC: Add copyright
  RAS/CEC: Add CONFIG_RAS_CEC_DEBUG and move CEC debug features there
  RAS/CEC: Dump the different array element sections
  RAS/CEC: Rename count_threshold to action_threshold
  RAS/CEC: Sanity-check array on every insertion
  RAS/CEC: Fix potential memory leak
  RAS/CEC: Do not set decay value on error
  RAS/CEC: Check count_threshold unconditionally
  RAS/CEC: Fix pfn insertion
  • Loading branch information
torvalds committed Jul 8, 2019
2 parents e192832 + 6e4f929 commit 090bc5a
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 205 deletions.
92 changes: 47 additions & 45 deletions arch/x86/kernel/cpu/mce/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};

static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
{
[0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
};

static const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
Expand Down Expand Up @@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */

/* Map of banks that have more than MCA_MISC0 available. */
static DEFINE_PER_CPU(u32, smca_misc_banks_map);

static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);

Expand All @@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;

static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
{
u32 low, high;

/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
return;

if (!(low & MCI_CONFIG_MCAX))
return;

if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
return;

if (low & MASK_BLKPTR_LO)
per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);

}

static void smca_configure(unsigned int bank, unsigned int cpu)
{
unsigned int i, hwid_mcatype;
Expand Down Expand Up @@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}

smca_set_misc_banks_map(bank, cpu);

/* Return early if this bank was already initialized. */
if (smca_banks[bank].hwid)
return;
Expand Down Expand Up @@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}

static u32 smca_get_block_address(unsigned int bank, unsigned int block)
static u32 smca_get_block_address(unsigned int bank, unsigned int block,
unsigned int cpu)
{
u32 low, high;
u32 addr = 0;

if (smca_get_bank_type(bank) == SMCA_RESERVED)
return addr;

if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);

/* Check our cache first: */
if (smca_bank_addrs[bank][block] != -1)
return smca_bank_addrs[bank][block];

/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
goto out;

if (!(low & MCI_CONFIG_MCAX))
goto out;

if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
return 0;

out:
smca_bank_addrs[bank][block] = addr;
return addr;
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}

static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
unsigned int bank, unsigned int block,
unsigned int cpu)
{
u32 addr = 0, offset = 0;

if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;

if (mce_flags.smca)
return smca_get_block_address(bank, block);
return smca_get_block_address(bank, block, cpu);

/* Fall back to method we used for older processors: */
switch (block) {
Expand Down Expand Up @@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0;
int offset = -1;

for (bank = 0; bank < mca_cfg.banks; ++bank) {

for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);

disable_err_thresholding(c, bank);

for (block = 0; block < NR_BLOCKS; ++block) {
address = get_block_address(address, low, high, bank, block);
address = get_block_address(address, low, high, bank, block, cpu);
if (!address)
break;

Expand Down Expand Up @@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
{
unsigned int bank;

for (bank = 0; bank < mca_cfg.banks; ++bank)
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
log_error_deferred(bank);
}

Expand Down Expand Up @@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
unsigned int bank, cpu = smp_processor_id();

for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;

Expand Down Expand Up @@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
u32 low, high;
int err;

if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return 0;

if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
Expand Down Expand Up @@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
address = get_block_address(address, low, high, bank, ++block);
address = get_block_address(address, low, high, bank, ++block, cpu);
if (!address)
return 0;

Expand Down Expand Up @@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;

for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
Expand All @@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;

bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;

per_cpu(threshold_banks, cpu) = bp;

for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
Expand Down
Loading

0 comments on commit 090bc5a

Please sign in to comment.