Re: [PATCH] pstore: drop pmsg bounce buffer

From: Kees Cook
Date: Thu Sep 08 2016 - 16:05:02 EST


On Thu, Sep 1, 2016 at 8:13 AM, Mark Salyzyn <salyzyn@xxxxxxxxxxx> wrote:
> Removing a bounce buffer copy operation in the pmsg driver path is
> always better. We also gain in overall performance by not requesting
> a vmalloc on every write as this can cause precious RT tasks, such
> as user facing media operation, to stall while memory is being
> reclaimed. Added a write_buf_user to the pstore functions, a backup
> platform write_buf_user that uses the small buffer that is part of
> the instance, and implemented a ramoops write_buf_user that only
> supports PSTORE_TYPE_PMSG.
>
> Signed-off-by: Mark Salyzyn <salyzyn@xxxxxxxxxxx>

I've applied this, thanks!

> ---
> fs/pstore/platform.c | 36 +++++++++++++++++++++++++++++++++++
> fs/pstore/pmsg.c | 35 ++++++----------------------------
> fs/pstore/ram.c | 19 +++++++++++++++++++
> fs/pstore/ram_core.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--
> include/linux/pstore.h | 11 ++++++++---
> include/linux/pstore_ram.h | 7 +++++--
> 6 files changed, 119 insertions(+), 36 deletions(-)
>
> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> index 16ecca5..b9840a6 100644
> --- a/fs/pstore/platform.c
> +++ b/fs/pstore/platform.c
> @@ -623,6 +623,40 @@ static int pstore_write_compat(enum pstore_type_id type,
> size, psi);
> }
>
> +static int pstore_write_buf_user_compat(enum pstore_type_id type,
> + enum kmsg_dump_reason reason,
> + u64 *id, unsigned int part,
> + const char __user *buf,
> + bool compressed, size_t size,
> + struct pstore_info *psi)
> +{
> + unsigned long flags = 0;
> + size_t i, bufsize = size;
> + long ret = 0;
> +
> + if (unlikely(!access_ok(VERIFY_READ, buf, size)))
> + return -EFAULT;
> + if (bufsize > psinfo->bufsize)
> + bufsize = psinfo->bufsize;
> + spin_lock_irqsave(&psinfo->buf_lock, flags);
> + for (i = 0; i < size; ) {
> + size_t c = min(size - i, bufsize);
> +
> + ret = __copy_from_user(psinfo->buf, buf + i, c);
> + if (unlikely(ret != 0)) {
> + ret = -EFAULT;
> + break;
> + }
> + ret = psi->write_buf(type, reason, id, part, psinfo->buf,
> + compressed, c, psi);
> + if (unlikely(ret < 0))
> + break;
> + i += c;
> + }
> + spin_unlock_irqrestore(&psinfo->buf_lock, flags);
> + return unlikely(ret < 0) ? ret : size;
> +}
> +
> /*
> * platform specific persistent storage driver registers with
> * us here. If pstore is already mounted, call the platform
> @@ -645,6 +679,8 @@ int pstore_register(struct pstore_info *psi)
>
> if (!psi->write)
> psi->write = pstore_write_compat;
> + if (!psi->write_buf_user)
> + psi->write_buf_user = pstore_write_buf_user_compat;
> psinfo = psi;
> mutex_init(&psinfo->read_mutex);
> spin_unlock(&pstore_lock);
> diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
> index 7de20cd..78f6176 100644
> --- a/fs/pstore/pmsg.c
> +++ b/fs/pstore/pmsg.c
> @@ -19,48 +19,25 @@
> #include "internal.h"
>
> static DEFINE_MUTEX(pmsg_lock);
> -#define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE)
>
> static ssize_t write_pmsg(struct file *file, const char __user *buf,
> size_t count, loff_t *ppos)
> {
> - size_t i, buffer_size;
> - char *buffer;
> + u64 id;
> + int ret;
>
> if (!count)
> return 0;
>
> + /* check outside lock, page in any data. write_buf_user also checks */
> if (!access_ok(VERIFY_READ, buf, count))
> return -EFAULT;
>
> - buffer_size = count;
> - if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE)
> - buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE;
> - buffer = vmalloc(buffer_size);
> - if (!buffer)
> - return -ENOMEM;
> -
> mutex_lock(&pmsg_lock);
> - for (i = 0; i < count; ) {
> - size_t c = min(count - i, buffer_size);
> - u64 id;
> - long ret;
> -
> - ret = __copy_from_user(buffer, buf + i, c);
> - if (unlikely(ret != 0)) {
> - mutex_unlock(&pmsg_lock);
> - vfree(buffer);
> - return -EFAULT;
> - }
> - psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, 0, c,
> - psinfo);
> -
> - i += c;
> - }
> -
> + ret = psinfo->write_buf_user(PSTORE_TYPE_PMSG, 0, &id, 0, buf, 0, count,
> + psinfo);
> mutex_unlock(&pmsg_lock);
> - vfree(buffer);
> - return count;
> + return ret ? ret : count;
> }
>
> static const struct file_operations pmsg_fops = {
> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> index 7a034d6..893e040c 100644
> --- a/fs/pstore/ram.c
> +++ b/fs/pstore/ram.c
> @@ -331,6 +331,24 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
> return 0;
> }
>
> +static int notrace ramoops_pstore_write_buf_user(enum pstore_type_id type,
> + enum kmsg_dump_reason reason,
> + u64 *id, unsigned int part,
> + const char __user *buf,
> + bool compressed, size_t size,
> + struct pstore_info *psi)
> +{
> + if (type == PSTORE_TYPE_PMSG) {
> + struct ramoops_context *cxt = psi->data;
> +
> + if (!cxt->mprz)
> + return -ENOMEM;
> + return persistent_ram_write_user(cxt->mprz, buf, size);
> + }
> +
> + return -EINVAL;
> +}
> +
> static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
> struct timespec time, struct pstore_info *psi)
> {
> @@ -369,6 +387,7 @@ static struct ramoops_context oops_cxt = {
> .open = ramoops_pstore_open,
> .read = ramoops_pstore_read,
> .write_buf = ramoops_pstore_write_buf,
> + .write_buf_user = ramoops_pstore_write_buf_user,
> .erase = ramoops_pstore_erase,
> },
> };
> diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
> index 76c3f80..aa9afe5 100644
> --- a/fs/pstore/ram_core.c
> +++ b/fs/pstore/ram_core.c
> @@ -17,15 +17,16 @@
> #include <linux/device.h>
> #include <linux/err.h>
> #include <linux/errno.h>
> -#include <linux/kernel.h>
> #include <linux/init.h>
> #include <linux/io.h>
> +#include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/memblock.h>
> +#include <linux/pstore_ram.h>
> #include <linux/rslib.h>
> #include <linux/slab.h>
> +#include <linux/uaccess.h>
> #include <linux/vmalloc.h>
> -#include <linux/pstore_ram.h>
> #include <asm/page.h>
>
> struct persistent_ram_buffer {
> @@ -303,6 +304,16 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
> persistent_ram_update_ecc(prz, start, count);
> }
>
> +static int notrace persistent_ram_update_user(struct persistent_ram_zone *prz,
> + const void __user *s, unsigned int start, unsigned int count)
> +{
> + struct persistent_ram_buffer *buffer = prz->buffer;
> + int ret = unlikely(__copy_from_user(buffer->data + start, s, count)) ?
> + -EFAULT : 0;
> + persistent_ram_update_ecc(prz, start, count);
> + return ret;
> +}
> +
> void persistent_ram_save_old(struct persistent_ram_zone *prz)
> {
> struct persistent_ram_buffer *buffer = prz->buffer;
> @@ -356,6 +367,38 @@ int notrace persistent_ram_write(struct persistent_ram_zone *prz,
> return count;
> }
>
> +int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
> + const void __user *s, unsigned int count)
> +{
> + int rem, ret = 0, c = count;
> + size_t start;
> +
> + if (unlikely(!access_ok(VERIFY_READ, s, count)))
> + return -EFAULT;
> + if (unlikely(c > prz->buffer_size)) {
> + s += c - prz->buffer_size;
> + c = prz->buffer_size;
> + }
> +
> + buffer_size_add(prz, c);
> +
> + start = buffer_start_add(prz, c);
> +
> + rem = prz->buffer_size - start;
> + if (unlikely(rem < c)) {
> + ret = persistent_ram_update_user(prz, s, start, rem);
> + s += rem;
> + c -= rem;
> + start = 0;
> + }
> + if (likely(!ret))
> + ret = persistent_ram_update_user(prz, s, start, c);
> +
> + persistent_ram_update_header_ecc(prz);
> +
> + return unlikely(ret) ? ret : count;
> +}
> +

As a follow-up, I wonder if ram_write and ram_write_user could be
consolidated in some way since they're identical except for the update
function, and contain a lot of non-obvious work that should stay the
same between both functions (and could probably use more comments).

-Kees

--
Kees Cook
Nexus Security