Re: [PATCH v12 7/7] fpga: sec-mgr: expose hardware error info

From: Moritz Fischer
Date: Mon May 10 2021 - 13:41:09 EST


On Mon, May 03, 2021 at 02:35:46PM -0700, Russ Weight wrote:
> Extend the FPGA Security Manager class driver to include
> an optional update/hw_errinfo sysfs node that can be used
> to retrieve 64 bits of device specific error information
> following a secure update failure.
>
> The underlying driver must provide a get_hw_errinfo() callback
> function to enable this feature. This data is treated as
> opaque by the class driver. It is left to user-space software
> or support personnel to interpret this data.
>
> Signed-off-by: Russ Weight <russell.h.weight@xxxxxxxxx>
> Reviewed-by: Tom Rix <trix@xxxxxxxxxx>
> ---
> v12:
> - Updated Date and KernelVersion fields in ABI documentation
> v11:
> - No change
> v10:
> - Rebased to 5.12-rc2 next
> - Updated Date and KernelVersion in ABI documentation
> v9:
> - Updated Date and KernelVersion in ABI documentation
> v8:
> - No change
> v7:
> - Changed Date in documentation file to December 2020
> v6:
> - No change
> v5:
> - No change
> v4:
> - Changed from "Intel FPGA Security Manager" to FPGA Security Manager"
> and removed unnecessary references to "Intel".
> - Changed: iops -> sops, imgr -> smgr, IFPGA_ -> FPGA_, ifpga_ to fpga_
> v3:
> - No change
> v2:
> - Bumped documentation date and version
> ---
> .../ABI/testing/sysfs-class-fpga-sec-mgr | 14 +++++++
> drivers/fpga/fpga-sec-mgr.c | 38 +++++++++++++++++++
> include/linux/fpga/fpga-sec-mgr.h | 5 +++
> 3 files changed, 57 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> index 749f2d4c78d3..f1881ce39c63 100644
> --- a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> +++ b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> @@ -65,3 +65,17 @@ Description: Read-only. Returns a string describing the failure
> idle state. If this file is read while a secure
> update is in progress, then the read will fail with
> EBUSY.
> +
> +What: /sys/class/fpga_sec_mgr/fpga_secX/update/hw_errinfo
> +Date: June 2021
> +KernelVersion: 5.14
> +Contact: Russ Weight <russell.h.weight@xxxxxxxxx>
> +Description: Read-only. Returns a 64 bit error value providing
> + hardware specific information that may be useful in
> + debugging errors that occur during FPGA image updates.
> + This file is only visible if the underlying device
> + supports it. The hw_errinfo value is only accessible
> + when the secure update engine is in the idle state.
> + If this file is read while a secure update is in
> + progress, then the read will fail with EBUSY.
> + Format: "0x%llx".
> diff --git a/drivers/fpga/fpga-sec-mgr.c b/drivers/fpga/fpga-sec-mgr.c
> index 35bd419bd3b9..3c59b142291d 100644
> --- a/drivers/fpga/fpga-sec-mgr.c
> +++ b/drivers/fpga/fpga-sec-mgr.c
> @@ -36,10 +36,17 @@ static void set_error(struct fpga_sec_mgr *smgr, enum fpga_sec_err err_code)
> smgr->err_code = err_code;
> }
>
> +static void set_hw_errinfo(struct fpga_sec_mgr *smgr)
> +{
> + if (smgr->sops->get_hw_errinfo)
> + smgr->hw_errinfo = smgr->sops->get_hw_errinfo(smgr);
> +}

Nit: fpga_sec_set_hw_errinfo() maybe?
> +
> static void fpga_sec_dev_error(struct fpga_sec_mgr *smgr,
> enum fpga_sec_err err_code)
> {
> set_error(smgr, err_code);
> + set_hw_errinfo(smgr);
> smgr->sops->cancel(smgr);
> }
>
> @@ -221,6 +228,23 @@ error_show(struct device *dev, struct device_attribute *attr, char *buf)
> }
> static DEVICE_ATTR_RO(error);
>
> +static ssize_t
> +hw_errinfo_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> + struct fpga_sec_mgr *smgr = to_sec_mgr(dev);
> + int ret;
> +
> + mutex_lock(&smgr->lock);
> + if (smgr->progress != FPGA_SEC_PROG_IDLE)
> + ret = -EBUSY;
> + else
> + ret = sysfs_emit(buf, "0x%llx\n", smgr->hw_errinfo);
> + mutex_unlock(&smgr->lock);
> +
> + return ret;
> +}
> +static DEVICE_ATTR_RO(hw_errinfo);
> +
> static ssize_t remaining_size_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> @@ -252,6 +276,7 @@ static ssize_t filename_store(struct device *dev, struct device_attribute *attr,
> }
>
> smgr->err_code = FPGA_SEC_ERR_NONE;
> + smgr->hw_errinfo = 0;
> smgr->request_cancel = false;
> smgr->progress = FPGA_SEC_PROG_READING;
> reinit_completion(&smgr->update_done);
> @@ -286,18 +311,31 @@ static ssize_t cancel_store(struct device *dev, struct device_attribute *attr,
> }
> static DEVICE_ATTR_WO(cancel);
>
> +static umode_t
> +sec_mgr_update_visible(struct kobject *kobj, struct attribute *attr, int n)
> +{
> + struct fpga_sec_mgr *smgr = to_sec_mgr(kobj_to_dev(kobj));
> +
> + if (attr == &dev_attr_hw_errinfo.attr && !smgr->sops->get_hw_errinfo)
> + return 0;
> +
> + return attr->mode;
> +}
> +
> static struct attribute *sec_mgr_update_attrs[] = {
> &dev_attr_filename.attr,
> &dev_attr_cancel.attr,
> &dev_attr_status.attr,
> &dev_attr_error.attr,
> &dev_attr_remaining_size.attr,
> + &dev_attr_hw_errinfo.attr,
> NULL,
> };
>
> static struct attribute_group sec_mgr_update_attr_group = {
> .name = "update",
> .attrs = sec_mgr_update_attrs,
> + .is_visible = sec_mgr_update_visible,
> };
>
> static ssize_t name_show(struct device *dev,
> diff --git a/include/linux/fpga/fpga-sec-mgr.h b/include/linux/fpga/fpga-sec-mgr.h
> index 0e1f50434024..a99bfd28f38c 100644
> --- a/include/linux/fpga/fpga-sec-mgr.h
> +++ b/include/linux/fpga/fpga-sec-mgr.h
> @@ -40,6 +40,9 @@ enum fpga_sec_err {
> * function and is called at the completion
> * of the update, whether success or failure,
> * if the prepare function succeeded.
> + * @get_hw_errinfo: Optional: Return u64 hw specific error info.
> + * The software err_code may used to determine
> + * whether the hw error info is applicable.
> */
> struct fpga_sec_mgr_ops {
> enum fpga_sec_err (*prepare)(struct fpga_sec_mgr *smgr);
> @@ -47,6 +50,7 @@ struct fpga_sec_mgr_ops {
> enum fpga_sec_err (*poll_complete)(struct fpga_sec_mgr *smgr);
> enum fpga_sec_err (*cancel)(struct fpga_sec_mgr *smgr);
> void (*cleanup)(struct fpga_sec_mgr *smgr);
> + u64 (*get_hw_errinfo)(struct fpga_sec_mgr *smgr);
> };
>
> /* Update progress codes */
> @@ -72,6 +76,7 @@ struct fpga_sec_mgr {
> enum fpga_sec_prog progress;
> enum fpga_sec_prog err_state; /* progress state at time of failure */
> enum fpga_sec_err err_code; /* security manager error code */
> + u64 hw_errinfo; /* 64 bits of HW specific error info */
> bool request_cancel;
> bool driver_unload;
> void *priv;
> --
> 2.25.1
>

Thanks,
Moritz