[PATCH v2 14/24] EDAC, ghes: Rework memory hierarchy detection

From: Robert Richter
Date: Mon Jun 24 2019 - 11:10:36 EST


In a later patch we want to add more information about the memory
hierarchy (NUMA topology, DIMM label information). Rework memory
hierarchy detection to make the code extendable for this.

The general approach is roughly like:

mem_info_setup();
for_each_node(nid) {
mci = edac_mc_alloc(nid);
mem_info_prepare_mci(mci);
edac_mc_add_mc(mci);
};

This patch introduces mem_info_setup() and mem_info_prepare_mci().

All data of the memory hierarchy is collected in a local struct
ghes_mem_info.

Note: Per (NUMA) node registration will be implemented in a later
patch.

Signed-off-by: Robert Richter <rrichter@xxxxxxxxxxx>
---
drivers/edac/ghes_edac.c | 166 ++++++++++++++++++++++++++++++---------
1 file changed, 127 insertions(+), 39 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 8063996a311d..44bfb499b147 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -65,17 +65,53 @@ struct memdev_dmi_entry {
u16 conf_mem_clk_speed;
} __attribute__((__packed__));

-struct ghes_edac_dimm_fill {
- struct mem_ctl_info *mci;
- unsigned count;
+struct ghes_dimm_info {
+ struct dimm_info dimm_info;
+ int idx;
+};
+
+struct ghes_mem_info {
+ int num_dimm;
+ struct ghes_dimm_info *dimms;
};

+static struct ghes_mem_info mem_info;
+
+#define for_each_dimm(dimm) \
+ for (dimm = mem_info.dimms; \
+ dimm < mem_info.dimms + mem_info.num_dimm; \
+ dimm++)
+
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
{
- int *num_dimm = arg;
+ int *num = arg;

if (dh->type == DMI_ENTRY_MEM_DEVICE)
- (*num_dimm)++;
+ (*num)++;
+}
+
+static int ghes_dimm_info_init(int num)
+{
+ struct ghes_dimm_info *dimm;
+ int idx = 0;
+
+ memset(&mem_info, 0, sizeof(mem_info));
+
+ if (num <= 0)
+ return -EINVAL;
+
+ mem_info.dimms = kcalloc(num, sizeof(*mem_info.dimms), GFP_KERNEL);
+ if (!mem_info.dimms)
+ return -ENOMEM;
+
+ mem_info.num_dimm = num;
+
+ for_each_dimm(dimm) {
+ dimm->idx = idx;
+ idx++;
+ }
+
+ return 0;
}

static int get_dimm_smbios_index(u16 handle)
@@ -92,18 +128,15 @@ static int get_dimm_smbios_index(u16 handle)

static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
- struct ghes_edac_dimm_fill *dimm_fill = arg;
- struct mem_ctl_info *mci = dimm_fill->mci;
-
if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+ int *idx = arg;
struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
- struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count,
- 0, 0);
+ struct ghes_dimm_info *mi = &mem_info.dimms[*idx];
+ struct dimm_info *dimm = &mi->dimm_info;
u16 rdr_mask = BIT(7) | BIT(13);

if (entry->size == 0xffff) {
- pr_info("Can't get DIMM%i size\n",
- dimm_fill->count);
+ pr_info("Can't get DIMM%i size\n", mi->idx);
dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
} else if (entry->size == 0x7fff) {
dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
@@ -177,7 +210,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

if (dimm->nr_pages) {
edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
- dimm_fill->count, edac_mem_types[dimm->mtype],
+ mi->idx, edac_mem_types[dimm->mtype],
PAGES_TO_MiB(dimm->nr_pages),
(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
@@ -187,10 +220,74 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

dimm->smbios_handle = entry->handle;

- dimm_fill->count++;
+ (*idx)++;
}
}

+static int mem_info_setup(void)
+{
+ int num = 0;
+ int idx = 0;
+ int ret;
+
+ /* Get the number of DIMMs */
+ dmi_walk(ghes_edac_count_dimms, &num);
+
+ ret = ghes_dimm_info_init(num);
+ if (ret)
+ return ret;
+
+ dmi_walk(ghes_edac_dmidecode, &idx);
+
+ return 0;
+}
+
+static int mem_info_setup_fake(void)
+{
+ struct dimm_info *dimm;
+ int ret;
+
+ ret = ghes_dimm_info_init(1);
+ if (ret)
+ return ret;
+
+ dimm = &mem_info.dimms->dimm_info;
+ dimm->nr_pages = 1;
+ dimm->grain = 128;
+ dimm->mtype = MEM_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_SECDED;
+
+ return 0;
+}
+
+static void mem_info_prepare_mci(struct mem_ctl_info *mci)
+{
+ struct dimm_info *mci_dimm, *dmi_dimm;
+ struct ghes_dimm_info *dimm;
+ int index = 0;
+
+ for_each_dimm(dimm) {
+ dmi_dimm = &dimm->dimm_info;
+ mci_dimm = edac_get_dimm_by_index(mci, index);
+
+ index++;
+ if (index > mci->tot_dimms)
+ break;
+
+ mci_dimm->nr_pages = dmi_dimm->nr_pages;
+ mci_dimm->mtype = dmi_dimm->mtype;
+ mci_dimm->edac_mode = dmi_dimm->edac_mode;
+ mci_dimm->dtype = dmi_dimm->dtype;
+ mci_dimm->grain = dmi_dimm->grain;
+ mci_dimm->smbios_handle = dmi_dimm->smbios_handle;
+ }
+
+ if (index != mci->tot_dimms)
+ pr_warn("Unexpected number of DIMMs: %d (exp. %d)\n",
+ index, mci->tot_dimms);
+}
+
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
struct dimm_info *dimm_info;
@@ -450,10 +547,9 @@ static struct acpi_platform_list plat_list[] = {
int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
bool fake = false;
- int rc, num_dimm = 0;
+ int rc;
struct mem_ctl_info *mci;
struct edac_mc_layer layers[1];
- struct ghes_edac_dimm_fill dimm_fill;
int idx = -1;

if (IS_ENABLED(CONFIG_X86)) {
@@ -471,22 +567,24 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
if (atomic_inc_return(&ghes_init) > 1)
return 0;

- /* Get the number of DIMMs */
- dmi_walk(ghes_edac_count_dimms, &num_dimm);
-
- /* Check if we've got a bogus BIOS */
- if (num_dimm == 0) {
+ rc = mem_info_setup();
+ if (rc == -EINVAL) {
+ /* we've got a bogus BIOS */
fake = true;
- num_dimm = 1;
+ rc = mem_info_setup_fake();
+ }
+ if (rc < 0) {
+ pr_err("Can't allocate memory for DIMM data\n");
+ return rc;
}

layers[0].type = EDAC_MC_LAYER_ALL_MEM;
- layers[0].size = num_dimm;
+ layers[0].size = mem_info.num_dimm;
layers[0].is_virt_csrow = true;

mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
if (!mci) {
- pr_info("Can't allocate memory for EDAC data\n");
+ pr_err("Can't allocate memory for EDAC data\n");
return -ENOMEM;
}

@@ -512,26 +610,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
pr_info("If you find incorrect reports, please contact your hardware vendor\n");
pr_info("to correct its BIOS.\n");
- pr_info("This system has %d DIMM sockets.\n", num_dimm);
+ pr_info("This system has %d DIMM sockets.\n", mem_info.num_dimm);
}

- if (!fake) {
- dimm_fill.count = 0;
- dimm_fill.mci = mci;
- dmi_walk(ghes_edac_dmidecode, &dimm_fill);
- } else {
- struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
-
- dimm->nr_pages = 1;
- dimm->grain = 128;
- dimm->mtype = MEM_UNKNOWN;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_SECDED;
- }
+ mem_info_prepare_mci(mci);

rc = edac_mc_add_mc(mci);
if (rc < 0) {
- pr_info("Can't register at EDAC core\n");
+ pr_err("Can't register at EDAC core\n");
edac_mc_free(mci);
return -ENODEV;
}
@@ -548,4 +634,6 @@ void ghes_edac_unregister(struct ghes *ghes)
mci = ghes_pvt->mci;
edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
+
+ kfree(mem_info.dimms);
}
--
2.20.1