Re: [PATCHv7] EDAC, altera: Add Altera L2 Cache and OCRAM EDAC Support

From: Dinh Nguyen
Date: Mon Jan 04 2016 - 12:24:28 EST


On 11/19/2015 12:34 PM, Borislav Petkov wrote:
> On Tue, Oct 27, 2015 at 03:38:12PM -0500, dinguyen@xxxxxxxxxxxxxxxxxxxxx wrote:
>> From: Thor Thayer <tthayer@xxxxxxxxxxxxxxxxxxxxx>
>>
>> Adding L2 Cache and On-Chip RAM EDAC support for the
>> Altera SoCs using the EDAC device model. The SDRAM
>> controller is using the Memory Controller model.
>>
>> Each type of ECC is individually configurable.
>>
>> The SDRAM ECC is a separate Kconfig option because:
>> 1) the SDRAM preparation can take almost 2 seconds on boot and some
>> customers need a faster boot time.
>> 2) the SDRAM has an ECC initialization dependency on the preloader
>> which is outside the kernel. It is desirable to be able to turn the
>> SDRAM on & off separately.
>>
>> Signed-off-by: Thor Thayer <tthayer@xxxxxxxxxxxxxxxxxxxxx>
>> Signed-off-by: Dinh Nguyen <dinguyen@xxxxxxxxxxxxxxxxxxxxx>
>> ---
>> v7: s/of_get_named_gen_pool/of_gen_pool_get
>> Remove #ifdef for EDAC_DEBUG
>> Use -ENODEV instead of EPROBE_DEFER
>>
>> v6: Convert to nested EDAC in device tree. Force L2 cache
>> on for L2Cache ECC & remove L2 cache syscon for checking
>> enable bit. Update year in header.
>>
>> v5: No Change
>>
>> v4: Change mask defines to use BIT().
>> Fix comment style to agree with kernel coding style.
>> Better printk description for read != write in trigger.
>> Remove SysFS debugging message.
>> Better dci->mod_name
>> Move gen_pool pointer assignment to end of function.
>> Invert logic to reduce indent in ocram depenency check.
>> Change from dev_err() to edac_printk()
>> Replace magic numbers with defines & comments.
>> Improve error injection test.
>> Change Makefile intermediary name to altr (from alt)
>>
>> v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c
>> instead of separate files.
>>
>> v2: Fix L2 dependency comments.
>> ---
>> drivers/edac/Kconfig | 16 ++
>> drivers/edac/Makefile | 5 +-
>> drivers/edac/altera_edac.c | 488 ++++++++++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 507 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>> index ef25000..b80b4ad 100644
>> --- a/drivers/edac/Kconfig
>> +++ b/drivers/edac/Kconfig

[snip]

>> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
>> index dbf53e0..8f1c6fc 100644
>> --- a/drivers/edac/Makefile
>> +++ b/drivers/edac/Makefile
>> @@ -67,6 +67,9 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
>> obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
>> obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
>>
>> -obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o
>> +altr_edac-y := altera_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_MC) += altr_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_L2C) += altr_edac.o
>> +obj-$(CONFIG_EDAC_ALTERA_OCRAM) += altr_edac.o
>
> What are those supposed to accomplish?
>

We tried to jam the L2 and OCRAM EDAC functionality in the same
altr_edac.c file. It looks like it might be clean if we split out the L2
and OCRAM functions into their appropriate files(altr_edac_l2.c and
altr_edac_ocram.c). Do you agree?

>> obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
>> obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
>> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
>> index 9296409..154ac8c 100644
>> --- a/drivers/edac/altera_edac.c
>> +++ b/drivers/edac/altera_edac.c
>> @@ -17,8 +17,10 @@
>> * Adapted from the highbank_mc_edac driver.
>> */
>>
>> +#include <asm/cacheflush.h>
>> #include <linux/ctype.h>
>> #include <linux/edac.h>
>> +#include <linux/genalloc.h>
>> #include <linux/interrupt.h>
>> #include <linux/kernel.h>
>> #include <linux/mfd/syscon.h>
>> @@ -34,6 +36,7 @@
>>
>> #define EDAC_MOD_STR "altera_edac"
>> #define EDAC_VERSION "1"
>> +#define EDAC_DEVICE "ALTR_MEM"
>
> Let's simply call it "Altera" - it is more human-friendly.
>

Ok.

>> static const struct altr_sdram_prv_data c5_data = {
>> .ecc_ctrl_offset = CV_CTLCFG_OFST,
>> @@ -75,6 +78,33 @@ static const struct altr_sdram_prv_data a10_data = {
>> .ue_set_mask = A10_DIAGINT_TDERRA_MASK,
>> };
>>
>> +/************************** EDAC Device Defines **************************/
>> +
>> +/* OCRAM ECC Management Group Defines */
>> +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
>> +#define ALTR_OCR_ECC_EN_MASK BIT(0)
>> +#define ALTR_OCR_ECC_INJS_MASK BIT(1)
>> +#define ALTR_OCR_ECC_INJD_MASK BIT(2)
>> +#define ALTR_OCR_ECC_SERR_MASK BIT(3)
>> +#define ALTR_OCR_ECC_DERR_MASK BIT(4)
>> +
>> +/* L2 ECC Management Group Defines */
>> +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
>> +#define ALTR_L2_ECC_EN_MASK BIT(0)
>> +#define ALTR_L2_ECC_INJS_MASK BIT(1)
>> +#define ALTR_L2_ECC_INJD_MASK BIT(2)
>
> Single bit masks? You don't need to call them "_MASK" - simply remove
> it.
>

Ok.

>> +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
>> +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
>> +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
>> +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
>> +
>> +/*********************** EDAC Memory Controller Functions ****************/
>> +
>> +/* The SDRAM controller uses the EDAC Memory Controller framework. */
>> +
>> +#ifdef CONFIG_EDAC_ALTERA_MC
>> +
>> static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
>> {
>> struct mem_ctl_info *mci = dev_id;
>> @@ -504,6 +534,462 @@ static struct platform_driver altr_sdram_edac_driver = {
>>
>> module_platform_driver(altr_sdram_edac_driver);
>>
>> +#endif /* #ifdef CONFIG_EDAC_ALTERA_MC */
>
> #endif /* CONFIG_EDAC_ALTERA_MC */
>
> is the usual syntax for those.
>

Ok..

>> +/************************* EDAC Parent Probe *************************/
>> +
>> +static const struct of_device_id altr_edac_device_of_match[];
>> +
>> +static const struct of_device_id altr_edac_of_match[] = {
>> + { .compatible = "altr,edac" },
>> + {},
>> +};
>> +MODULE_DEVICE_TABLE(of, altr_edac_of_match);
>> +
>> +static int altr_edac_probe(struct platform_device *pdev)
>> +{
>> + of_platform_populate(pdev->dev.of_node, altr_edac_device_of_match,
>> + NULL, &pdev->dev);
>> + return 0;
>> +}
>> +
>> +static struct platform_driver altr_edac_driver = {
>> + .probe = altr_edac_probe,
>> + .driver = {
>> + .name = "altr_edac",
>> + .of_match_table = altr_edac_of_match,
>> + },
>> +};
>> +module_platform_driver(altr_edac_driver);
>
> WARNING: DT compatible string "altr,edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #176: FILE: drivers/edac/altera_edac.c:543:
> + { .compatible = "altr,edac" },
>
> WARNING: DT compatible string "altr,l2-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #326: FILE: drivers/edac/altera_edac.c:693:
> + { .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },
>
> WARNING: DT compatible string "altr,ocram-edac" appears un-documented -- check ./Documentation/devicetree/bindings/
> #329: FILE: drivers/edac/altera_edac.c:696:
> + { .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },
>

These bindings were in a separate patch that you were not CC'd. I'll
keep you on the entire patch set in the future.


>> +
>> +/************************* EDAC Device Functions *************************/
>> +
>> +/*
>> + * EDAC Device Functions (shared between various IPs).
>> + * The discrete memories use the EDAC Device framework. The probe
>> + * and error handling functions are very similar between memories
>> + * so they are shared. The memory allocation and free for EDAC trigger
>
> and freeing
>
>> + * testing are different for each memory.
>> + */
>> +
>> +const struct edac_device_prv_data ocramecc_data;
>> +const struct edac_device_prv_data l2ecc_data;
>> +
>> +struct edac_device_prv_data {
>> + int (*setup)(struct platform_device *pdev, void __iomem *base);
>> + int ce_clear_mask;
>> + int ue_clear_mask;
>> + struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
>> + void * (*alloc_mem)(size_t size, void **other);
>> + void (*free_mem)(void *p, size_t size, void *other);
>> + int ecc_enable_mask;
>> + int ce_set_mask;
>> + int ue_set_mask;
>> + int trig_alloc_sz;
>> +};
>> +
>> +struct altr_edac_device_dev {
>> + void __iomem *base;
>> + int sb_irq;
>> + int db_irq;
>> + const struct edac_device_prv_data *data;
>> + char *edac_dev_name;
>> +};
>> +
>> +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
>> +{
>> + struct edac_device_ctl_info *dci = dev_id;
>> + struct altr_edac_device_dev *drvdata = dci->pvt_info;
>> + const struct edac_device_prv_data *priv = drvdata->data;
>> +
>> + if (irq == drvdata->sb_irq) {
>> + if (priv->ce_clear_mask)
>> + writel(priv->ce_clear_mask, drvdata->base);
>> + edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
>> + }
>> + if (irq == drvdata->db_irq) {
>> + if (priv->ue_clear_mask)
>> + writel(priv->ue_clear_mask, drvdata->base);
>> + edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
>> + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>> + }
>> +
>> + return IRQ_HANDLED;
>> +}
>> +
>> +ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci,
>> + const char *buffer, size_t count)
>
> Is that an error injection function? If so, it should be behind
> CONFIG_EDAC_DEBUG or an altera-specific Kconfig item which people can
> enable - you don't want people to be able to inject errors on production
> systems.
>
> Also, those injection facilities should be in debugfs and not sysfs -
> look at xgene_edac for an example.

Yes, there is an injection, and it is using sysfs. Will have to update.

>
> I'll stop here - that's enough TODO for now :-)
>

Yes, I'll address all of the above comments in v9.

Thanks for the review.

Dinh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/