Re: [PATCH 2/3] qemu: Implement virtio-pstore device
From: Namhyung Kim
Date:  Fri Sep 16 2016 - 06:06:13 EST
On Tue, Sep 13, 2016 at 06:57:10PM +0300, Michael S. Tsirkin wrote:
> On Sat, Aug 20, 2016 at 05:07:43PM +0900, Namhyung Kim wrote:
> > Add virtio pstore device to allow kernel log files saved on the host.
> > It will save the log files on the directory given by pstore device
> > option.
> > 
> >   $ qemu-system-x86_64 -device virtio-pstore,directory=dir-xx ...
> > 
> >   (guest) # echo c > /proc/sysrq-trigger
> > 
> >   $ ls dir-xx
> >   dmesg-1.enc.z  dmesg-2.enc.z
> > 
> > The log files are usually compressed using zlib.  Users can see the log
> > messages directly on the host or on the guest (using pstore filesystem).
> > 
> > The 'directory' property is required for virtio-pstore device to work.
> > It also adds 'bufsize' property to set size of pstore bufer.
> > 
> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> > Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> > Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx>
> > Cc: Anthony Liguori <aliguori@xxxxxxxxxx>
> > Cc: Anton Vorontsov <anton@xxxxxxxxxx>
> > Cc: Colin Cross <ccross@xxxxxxxxxxx>
> > Cc: Kees Cook <keescook@xxxxxxxxxxxx>
> > Cc: Tony Luck <tony.luck@xxxxxxxxx>
> > Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> > Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> > Cc: Minchan Kim <minchan@xxxxxxxxxx>
> > Cc: Daniel P. Berrange <berrange@xxxxxxxxxx>
> > Cc: kvm@xxxxxxxxxxxxxxx
> > Cc: qemu-devel@xxxxxxxxxx
> > Cc: virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
> > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> > ---
> >  hw/virtio/Makefile.objs                        |   2 +-
> >  hw/virtio/virtio-pci.c                         |  52 ++
> >  hw/virtio/virtio-pci.h                         |  14 +
> >  hw/virtio/virtio-pstore.c                      | 699 +++++++++++++++++++++++++
> >  include/hw/pci/pci.h                           |   1 +
> >  include/hw/virtio/virtio-pstore.h              |  36 ++
> >  include/standard-headers/linux/virtio_ids.h    |   1 +
> >  include/standard-headers/linux/virtio_pstore.h |  76 +++
> >  qdev-monitor.c                                 |   1 +
> >  9 files changed, 881 insertions(+), 1 deletion(-)
> >  create mode 100644 hw/virtio/virtio-pstore.c
> >  create mode 100644 include/hw/virtio/virtio-pstore.h
> >  create mode 100644 include/standard-headers/linux/virtio_pstore.h
> > 
> > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
> > index 3e2b175..aae7082 100644
> > --- a/hw/virtio/Makefile.objs
> > +++ b/hw/virtio/Makefile.objs
> > @@ -4,4 +4,4 @@ common-obj-y += virtio-bus.o
> >  common-obj-y += virtio-mmio.o
> >  
> >  obj-y += virtio.o virtio-balloon.o 
> > -obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
> > +obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o virtio-pstore.o
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index 755f921..c184823 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -2416,6 +2416,57 @@ static const TypeInfo virtio_host_pci_info = {
> >  };
> >  #endif
> >  
> > +/* virtio-pstore-pci */
> > +
> > +static void virtio_pstore_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
> > +{
> > +    VirtIOPstorePCI *vps = VIRTIO_PSTORE_PCI(vpci_dev);
> > +    DeviceState *vdev = DEVICE(&vps->vdev);
> > +    Error *err = NULL;
> > +
> > +    qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
> > +    object_property_set_bool(OBJECT(vdev), true, "realized", &err);
> > +    if (err) {
> > +        error_propagate(errp, err);
> > +        return;
> > +    }
> > +}
> > +
> > +static void virtio_pstore_pci_class_init(ObjectClass *klass, void *data)
> > +{
> > +    DeviceClass *dc = DEVICE_CLASS(klass);
> > +    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
> > +    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
> > +
> > +    k->realize = virtio_pstore_pci_realize;
> > +    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> > +
> > +    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
> > +    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PSTORE;
> > +    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
> > +    pcidev_k->class_id = PCI_CLASS_OTHERS;
> > +}
> > +
> > +static void virtio_pstore_pci_instance_init(Object *obj)
> > +{
> > +    VirtIOPstorePCI *dev = VIRTIO_PSTORE_PCI(obj);
> > +
> > +    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
> > +                                TYPE_VIRTIO_PSTORE);
> > +    object_property_add_alias(obj, "directory", OBJECT(&dev->vdev),
> > +                              "directory", &error_abort);
> > +    object_property_add_alias(obj, "bufsize", OBJECT(&dev->vdev),
> > +                              "bufsize", &error_abort);
> > +}
> > +
> > +static const TypeInfo virtio_pstore_pci_info = {
> > +    .name          = TYPE_VIRTIO_PSTORE_PCI,
> > +    .parent        = TYPE_VIRTIO_PCI,
> > +    .instance_size = sizeof(VirtIOPstorePCI),
> > +    .instance_init = virtio_pstore_pci_instance_init,
> > +    .class_init    = virtio_pstore_pci_class_init,
> > +};
> > +
> >  /* virtio-pci-bus */
> >  
> >  static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
> > @@ -2485,6 +2536,7 @@ static void virtio_pci_register_types(void)
> >  #ifdef CONFIG_VHOST_SCSI
> >      type_register_static(&vhost_scsi_pci_info);
> >  #endif
> > +    type_register_static(&virtio_pstore_pci_info);
> >  }
> >  
> >  type_init(virtio_pci_register_types)
> > diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> > index 25fbf8a..354b2b7 100644
> > --- a/hw/virtio/virtio-pci.h
> > +++ b/hw/virtio/virtio-pci.h
> > @@ -31,6 +31,7 @@
> >  #ifdef CONFIG_VHOST_SCSI
> >  #include "hw/virtio/vhost-scsi.h"
> >  #endif
> > +#include "hw/virtio/virtio-pstore.h"
> >  
> >  typedef struct VirtIOPCIProxy VirtIOPCIProxy;
> >  typedef struct VirtIOBlkPCI VirtIOBlkPCI;
> > @@ -44,6 +45,7 @@ typedef struct VirtIOInputPCI VirtIOInputPCI;
> >  typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
> >  typedef struct VirtIOInputHostPCI VirtIOInputHostPCI;
> >  typedef struct VirtIOGPUPCI VirtIOGPUPCI;
> > +typedef struct VirtIOPstorePCI VirtIOPstorePCI;
> >  
> >  /* virtio-pci-bus */
> >  
> > @@ -324,6 +326,18 @@ struct VirtIOGPUPCI {
> >      VirtIOGPU vdev;
> >  };
> >  
> > +/*
> > + * virtio-pstore-pci: This extends VirtioPCIProxy.
> > + */
> > +#define TYPE_VIRTIO_PSTORE_PCI "virtio-pstore-pci"
> > +#define VIRTIO_PSTORE_PCI(obj) \
> > +        OBJECT_CHECK(VirtIOPstorePCI, (obj), TYPE_VIRTIO_PSTORE_PCI)
> > +
> > +struct VirtIOPstorePCI {
> > +    VirtIOPCIProxy parent_obj;
> > +    VirtIOPstore vdev;
> > +};
> > +
> >  /* Virtio ABI version, if we increment this, we break the guest driver. */
> >  #define VIRTIO_PCI_ABI_VERSION          0
> >  
> > diff --git a/hw/virtio/virtio-pstore.c b/hw/virtio/virtio-pstore.c
> > new file mode 100644
> > index 0000000..b8fb4be
> > --- /dev/null
> > +++ b/hw/virtio/virtio-pstore.c
> > @@ -0,0 +1,699 @@
> > +/*
> > + * Virtio Pstore Device
> > + *
> > + * Copyright (C) 2016  LG Electronics
> > + *
> > + * Authors:
> > + *  Namhyung Kim  <namhyung@xxxxxxxxx>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include <stdio.h>
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/iov.h"
> > +#include "qemu-common.h"
> > +#include "qemu/cutils.h"
> > +#include "qemu/error-report.h"
> > +#include "sysemu/kvm.h"
> > +#include "qapi/visitor.h"
> > +#include "qapi-event.h"
> > +#include "io/channel-util.h"
> > +#include "trace.h"
> > +
> > +#include "hw/virtio/virtio.h"
> > +#include "hw/virtio/virtio-bus.h"
> > +#include "hw/virtio/virtio-access.h"
> > +#include "hw/virtio/virtio-pstore.h"
> > +
> > +#define PSTORE_DEFAULT_BUFSIZE   (16 * 1024)
> > +#define PSTORE_DEFAULT_FILE_MAX  5
> > +
> > +/* the index should match to the type value */
> > +static const char *virtio_pstore_file_prefix[] = {
> > +    "unknown-",		/* VIRTIO_PSTORE_TYPE_UNKNOWN */
> 
> Is there value in treating everything unexpected as "unknown"
> and rotating them as if they were logs?
> It might be better to treat everything that's not known
> as guest error.
I was thinking about the version mismatch between the kernel and qemu.
I'd like to make the device can deal with a new kernel version which
might implement a new pstore message type.  It will be saved as
unknown but the kernel can read it properly later.
> 
> 
> > +    "dmesg-",		/* VIRTIO_PSTORE_TYPE_DMESG */
> 
> use named initializers for this instead of comments.
Ok.
> 
> > +};
> > +
> > +static char *virtio_pstore_to_filename(VirtIOPstore *s,
> > +                                       struct virtio_pstore_req *req)
> > +{
> > +    const char *basename;
> > +    unsigned long long id;
> > +    unsigned int type = le16_to_cpu(req->type);
> > +    unsigned int flags = le32_to_cpu(req->flags);
> > +
> > +    if (type < ARRAY_SIZE(virtio_pstore_file_prefix)) {
> > +        basename = virtio_pstore_file_prefix[type];
> > +    } else {
> > +        basename = "unknown-";
> > +    }
> > +
> > +    id = s->id++;
> > +    return g_strdup_printf("%s/%s%llu%s", s->directory, basename, id,
> > +                            flags & VIRTIO_PSTORE_FL_COMPRESSED ? ".enc.z" : "");
> > +}
> > +
> > +static char *virtio_pstore_from_filename(VirtIOPstore *s, char *name,
> > +                                         struct virtio_pstore_fileinfo *info)
> > +{
> > +    char *filename;
> > +    unsigned int idx;
> > +
> > +    filename = g_strdup_printf("%s/%s", s->directory, name);
> > +    if (filename == NULL)
> > +        return NULL;
> > +
> > +    for (idx = 0; idx < ARRAY_SIZE(virtio_pstore_file_prefix); idx++) {
> > +        if (g_str_has_prefix(name, virtio_pstore_file_prefix[idx])) {
> > +            info->type = idx;
> > +            name += strlen(virtio_pstore_file_prefix[idx]);
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (idx == ARRAY_SIZE(virtio_pstore_file_prefix)) {
> > +        g_free(filename);
> > +        return NULL;
> > +    }
> > +
> > +    qemu_strtoull(name, NULL, 0, &info->id);
> 
> What if this fails?
Hmm.. will add a check for return value then.
> 
> > +
> > +    info->flags = 0;
> > +    if (g_str_has_suffix(name, ".enc.z")) {
> > +        info->flags |= VIRTIO_PSTORE_FL_COMPRESSED;
> > +    }
> > +
> > +    return filename;
> > +}
> > +
> > +static int prefix_idx;
> > +static int prefix_count;
> > +static int prefix_len;
> This does not work properly if there are multiple instances
> of it. Pls move everything into device state.
Kernel (currently?) allows only a single pstore device active.  But I
think it'd be better to move them into device state anyway.
> 
> > +
> > +static int filter_pstore(const struct dirent *de)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < prefix_count; i++) {
> > +        const char *prefix = virtio_pstore_file_prefix[prefix_idx + i];
> > +
> > +        if (g_str_has_prefix(de->d_name, prefix)) {
> > +            return 1;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int sort_pstore(const struct dirent **a, const struct dirent **b)
> > +{
> > +    uint64_t id_a, id_b;
> > +
> > +    qemu_strtoull((*a)->d_name + prefix_len, NULL, 0, &id_a);
> > +    qemu_strtoull((*b)->d_name + prefix_len, NULL, 0, &id_b);
> > +
> > +    return id_a - id_b;
> > +}
> > +
> > +static int rotate_pstore_file(VirtIOPstore *s, unsigned short type)
> > +{
> > +    int ret = 0;
> > +    int i, num;
> > +    char *filename;
> > +    struct dirent **files;
> > +
> > +    if (type >= ARRAY_SIZE(virtio_pstore_file_prefix)) {
> > +        type = VIRTIO_PSTORE_TYPE_UNKNOWN;
> > +    }
> > +
> > +    prefix_idx = type;
> > +    prefix_len = strlen(virtio_pstore_file_prefix[type]);
> > +    prefix_count = 1;  /* only scan current type */
> > +
> > +    /* delete the oldest file in the same type */
> > +    num = scandir(s->directory, &files, filter_pstore, sort_pstore);
> > +    if (num < 0)
> > +        return num;
> > +    if (num < (int)s->file_max)
> > +        goto out;
> > +
> > +    filename = g_strdup_printf("%s/%s", s->directory, files[0]->d_name);
> > +    if (filename == NULL) {
> > +        ret = -1;
> > +        goto out;
> > +    }
> > +
> > +    ret = unlink(filename);
> > +
> > +out:
> > +    for (i = 0; i < num; i++) {
> > +        g_free(files[i]);
> > +    }
> > +    g_free(files);
> > +
> > +    return ret;
> > +}
> 
> Pls prefix everything with virtio_pstore or another
> unique prefix. also below.
Ok.
> 
> > +
> > +static ssize_t virtio_pstore_do_open(VirtIOPstore *s)
> > +{
> > +    /* scan all pstore files */
> > +    prefix_idx = 0;
> > +    prefix_count = ARRAY_SIZE(virtio_pstore_file_prefix);
> > +
> > +    s->file_idx = 0;
> > +    s->num_file = scandir(s->directory, &s->files, filter_pstore, alphasort);
> > +
> > +    return s->num_file >= 0 ? 0 : -1;
> > +}
> > +
> > +static ssize_t virtio_pstore_do_close(VirtIOPstore *s)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < s->num_file; i++) {
> > +        g_free(s->files[i]);
> > +    }
> > +    g_free(s->files);
> > +    s->files = NULL;
> > +
> > +    s->num_file = 0;
> > +    return 0;
> > +}
> > +
> > +static ssize_t virtio_pstore_do_erase(VirtIOPstore *s,
> > +                                      struct virtio_pstore_req *req)
> > +{
> > +    char *filename;
> > +    int ret;
> > +
> > +    filename = virtio_pstore_to_filename(s, req);
> > +    if (filename == NULL)
> > +        return -1;
> 
> this can't happen.
Why?  The virtio_pstore_to_filename() calls g_strdup_printf().  That
means I don't need to worry about the memory allocation failure?
> 
> also this is a coding style violation.
Oh, I missed the add {}, will fix.
> 
> > +
> > +    ret = unlink(filename);
> > +
> > +    g_free(filename);
> > +    return ret;
> > +}
> > +
> > +struct pstore_read_arg {
> > +    VirtIOPstore *vps;
> > +    VirtQueueElement *elem;
> > +    struct virtio_pstore_fileinfo info;
> > +    QIOChannel *ioc;
> > +};
> > +
> > +static gboolean pstore_async_read_fn(QIOChannel *ioc, GIOCondition condition,
> > +                                     gpointer data)
> > +{
> > +    struct pstore_read_arg *rarg = data;
> > +    struct virtio_pstore_fileinfo *info = &rarg->info;
> > +    VirtIOPstore *vps = rarg->vps;
> > +    VirtQueueElement *elem = rarg->elem;
> > +    struct virtio_pstore_res res;
> > +    size_t offset = sizeof(res) + sizeof(*info);
> > +    struct iovec *sg = elem->in_sg;
> > +    unsigned int sg_num = elem->in_num;
> > +    Error *err = NULL;
> > +    ssize_t len;
> > +    int ret;
> > +
> > +    /* skip res and fileinfo */
> > +    iov_discard_front(&sg, &sg_num, sizeof(res) + sizeof(*info));
> > +
> > +    len = qio_channel_readv(rarg->ioc, sg, sg_num, &err);
> > +    if (len < 0) {
> > +        if (errno == EAGAIN) {
> > +            len = 0;
> > +        }
> > +        ret = -1;
> > +    } else {
> > +        info->len = cpu_to_le32(len);
> > +        ret = 0;
> > +    }
> > +
> > +    res.cmd  = cpu_to_le16(VIRTIO_PSTORE_CMD_READ);
> > +    res.type = cpu_to_le16(VIRTIO_PSTORE_TYPE_UNKNOWN);
> > +    res.ret  = cpu_to_le32(ret);
> > +
> > +    /* now copy res and fileinfo */
> > +    iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
> > +    iov_from_buf(elem->in_sg, elem->in_num, sizeof(res), info, sizeof(*info));
> > +
> > +    len += offset;
> > +    virtqueue_push(vps->rvq, elem, len);
> > +    virtio_notify(VIRTIO_DEVICE(vps), vps->rvq);
> > +
> > +    return G_SOURCE_REMOVE;
> > +}
> > +
> > +static void free_rarg_fn(gpointer data)
> > +{
> > +    struct pstore_read_arg *rarg = data;
> > +
> > +    qio_channel_close(rarg->ioc, NULL);
> > +
> > +    g_free(rarg->elem);
> > +    g_free(rarg);
> > +}
> > +
> > +static ssize_t virtio_pstore_do_read(VirtIOPstore *s, VirtQueueElement *elem)
> > +{
> > +    char *filename = NULL;
> > +    int fd, idx;
> > +    struct stat stbuf;
> > +    struct pstore_read_arg *rarg = NULL;
> > +    Error *err = NULL;
> > +    int ret = -1;
> > +
> > +    if (s->file_idx >= s->num_file) {
> > +        return 0;
> > +    }
> > +
> > +    rarg = g_malloc(sizeof(*rarg));
> > +    if (rarg == NULL) {
> > +        return -1;
> > +    }
> > +
> > +    idx = s->file_idx++;
> > +    filename = virtio_pstore_from_filename(s, s->files[idx]->d_name,
> > +                                           &rarg->info);
> > +    if (filename == NULL) {
> > +        goto out;
> > +    }
> > +
> > +    fd = open(filename, O_RDONLY);
> > +    if (fd < 0) {
> > +        error_report("cannot open %s", filename);
> > +        goto out;
> > +    }
> 
> I see open here but close nowhere. Does this leak fds?
I guess so.  But this is changed to use qio_channel_file API in v5 and
I hope doing it right.
> 
> > +
> > +    if (fstat(fd, &stbuf) < 0) {
> 
> So we can stat, but can we e.g. read?
It's just being a paranoid, I think it should succeed, no?
> 
> 
> > +        goto out;
> > +    }
> > +
> > +    rarg->vps            = s;
> > +    rarg->elem           = elem;
> > +    rarg->info.id        = cpu_to_le64(rarg->info.id);
> > +    rarg->info.type      = cpu_to_le16(rarg->info.type);
> > +    rarg->info.flags     = cpu_to_le32(rarg->info.flags);
> > +    rarg->info.time_sec  = cpu_to_le64(stbuf.st_ctim.tv_sec);
> 
> Is this seconds since epoch?
> Why ctim specifically?
> Pls add comments.
I think it doesn't matter either ctim or mtim.
> 
> > +    rarg->info.time_nsec = cpu_to_le32(stbuf.st_ctim.tv_nsec);
> 
> Not all hosts support nanosecond precision.
> Do we need some way to tell guest what's reliable?
In fact I'm not sure how much it affects users.  The pstore messages
are occasional and AFAIK pstore keeps it only for users' information.
> 
> Unless you limit this to linux host, you should care about things
> like this (in man fstat)
> 
>            Since  kernel 2.5.48, the stat structure supports nanosecond
>     resolution for the three file timestamp fields.  The nanosecond compoâ
>     nents of each timestamp are available via names of the form
>     st_atim.tv_nsec if the _BSD_SOURCE or _SVID_SOURCE feature  test  macro
>     is  defined.   Nanosecond  timestamps are nowadays standardized,
>     starting with POSIX.1-2008, and, starting with version 2.12, glibc also
>     exposes the nanosecond component names if _POSIX_C_SOURCE is defined
>     with the value 200809L or greater,  or  _XOPEN_SOURCE  is defined  with
>     the  value 700 or greater.  If none of the aforementioned macros are
>     defined, then the nanosecond values are exposed with names of the form
>     st_atimensec.
Thanks for the info.
> 
> 
> 
> 
> > +
> > +    rarg->ioc = qio_channel_new_fd(fd, &err);
> > +    if (err) {
> > +        error_reportf_err(err, "cannot create io channel: ");
> > +        goto out;
> > +    }
> > +
> > +    qio_channel_set_blocking(rarg->ioc, false, &err);
> > +    qio_channel_add_watch(rarg->ioc, G_IO_IN, pstore_async_read_fn, rarg,
> > +                          free_rarg_fn);
> > +    g_free(filename);
> > +    return 1;
> > +
> > +out:
> > +    g_free(filename);
> > +    g_free(rarg);
> > +
> > +    return ret;
> > +}
> > +
> > +struct pstore_write_arg {
> > +    VirtIOPstore *vps;
> > +    VirtQueueElement *elem;
> > +    struct virtio_pstore_req *req;
> > +    QIOChannel *ioc;
> > +};
> > +
> > +static gboolean pstore_async_write_fn(QIOChannel *ioc, GIOCondition condition,
> > +                                      gpointer data)
> > +{
> > +    struct pstore_write_arg *warg = data;
> > +    VirtIOPstore *vps = warg->vps;
> > +    VirtQueueElement *elem = warg->elem;
> > +    struct iovec *sg = elem->out_sg;
> > +    unsigned int sg_num = elem->out_num;
> > +    struct virtio_pstore_res res;
> > +    Error *err = NULL;
> > +    ssize_t len;
> > +    int ret;
> > +
> > +    /* we already consumed the req */
> > +    iov_discard_front(&sg, &sg_num, sizeof(*warg->req));
> > +
> > +    len = qio_channel_writev(warg->ioc, sg, sg_num, &err);
> > +    if (len < 0) {
> > +        ret = -1;
> > +    } else {
> > +        ret = 0;
> > +    }
> 
> This can discard part of the data written.
> Don't we care?
Doing partial write is better than failing out.  But if it's
meaningful to add a retry loop, I'd like to do so.
> 
> > +
> > +    res.cmd  = cpu_to_le16(VIRTIO_PSTORE_CMD_WRITE);
> > +    res.type = warg->req->type;
> > +    res.ret  = cpu_to_le32(ret);
> > +
> > +    /* tell the result to guest */
> > +    iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
> > +
> > +    virtqueue_push(vps->wvq, elem, sizeof(res));
> > +    virtio_notify(VIRTIO_DEVICE(vps), vps->wvq);
> > +
> > +    return G_SOURCE_REMOVE;
> > +}
> > +
> > +static void free_warg_fn(gpointer data)
> > +{
> > +    struct pstore_write_arg *warg = data;
> > +
> > +    qio_channel_close(warg->ioc, NULL);
> > +
> > +    g_free(warg->elem);
> > +    g_free(warg);
> > +}
> > +
> > +static ssize_t virtio_pstore_do_write(VirtIOPstore *s, VirtQueueElement *elem,
> > +                                      struct virtio_pstore_req *req)
> > +{
> > +    unsigned short type = le16_to_cpu(req->type);
> > +    char *filename = NULL;
> > +    int fd;
> > +    int flags = O_WRONLY | O_CREAT | O_TRUNC;
> > +    struct pstore_write_arg *warg = NULL;
> > +    Error *err = NULL;
> > +    int ret = -1;
> > +
> > +    /* do not keep same type of files more than 'file-max' */
> > +    rotate_pstore_file(s, type);
> 
> If you don't care about failures, should this function
> return a value? How about reporting it to the user?
Did you mean when it failed to delete the oldest file (FYI it's not
really 'rotate').  Hmm.. will add error check and report.
> 
> 
> > +
> > +    filename = virtio_pstore_to_filename(s, req);
> > +    if (filename == NULL) {
> > +        return -1;
> > +    }
> 
> this can't happen
> 
> > +
> > +    warg = g_malloc(sizeof(*warg));
> > +    if (warg == NULL) {
> > +        goto out;
> > +    }
> > +
> > +    fd = open(filename, flags, 0644);
> > +    if (fd < 0) {
> > +        error_report("cannot open %s", filename);
> > +        ret = fd;
> > +        goto out;
> > +    }
> > +
> > +    warg->vps            = s;
> > +    warg->elem           = elem;
> > +    warg->req            = req;
> > +
> > +    warg->ioc = qio_channel_new_fd(fd, &err);
> > +    if (err) {
> > +        error_reportf_err(err, "cannot create io channel: ");
> > +        goto out;
> > +    }
> > +
> > +    qio_channel_set_blocking(warg->ioc, false, &err);
> > +    qio_channel_add_watch(warg->ioc, G_IO_OUT, pstore_async_write_fn, warg,
> > +                          free_warg_fn);
> > +    g_free(filename);
> > +    return 1;
> > +
> > +out:
> > +    g_free(filename);
> > +    g_free(warg);
> > +    return ret;
> > +}
> > +
> > +static void virtio_pstore_handle_io(VirtIODevice *vdev, VirtQueue *vq)
> > +{
> > +    VirtIOPstore *s = VIRTIO_PSTORE(vdev);
> > +    VirtQueueElement *elem;
> > +    struct virtio_pstore_req req;
> > +    struct virtio_pstore_res res;
> > +    ssize_t len = 0;
> > +    int ret;
> > +
> > +    for (;;) {
> > +        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> > +        if (!elem) {
> > +            return;
> > +        }
> > +
> > +        if (elem->out_num < 1 || elem->in_num < 1) {
> > +            error_report("request or response buffer is missing");
> > +            exit(1);
> > +        }
> > +
> > +        if (elem->out_num > 2 || elem->in_num > 3) {
> > +            error_report("invalid number of input/output buffer");
> > +            exit(1);
> > +        }
> > +
> > +        len = iov_to_buf(elem->out_sg, elem->out_num, 0, &req, sizeof(req));
> > +        if (len != (ssize_t)sizeof(req)) {
> > +            error_report("invalid request size: %ld", (long)len);
> > +            exit(1);
> > +        }
> > +        res.cmd  = req.cmd;
> > +        res.type = req.type;
> > +
> > +        switch (le16_to_cpu(req.cmd)) {
> > +        case VIRTIO_PSTORE_CMD_OPEN:
> > +            ret = virtio_pstore_do_open(s);
> > +            break;
> > +        case VIRTIO_PSTORE_CMD_CLOSE:
> > +            ret = virtio_pstore_do_close(s);
> > +            break;
> > +        case VIRTIO_PSTORE_CMD_ERASE:
> > +            ret = virtio_pstore_do_erase(s, &req);
> > +            break;
> > +        case VIRTIO_PSTORE_CMD_READ:
> > +            ret = virtio_pstore_do_read(s, elem);
> > +            if (ret == 1) {
> > +                /* async channel io */
> > +                continue;
> > +            }
> > +            break;
> > +        case VIRTIO_PSTORE_CMD_WRITE:
> > +            ret = virtio_pstore_do_write(s, elem, &req);
> > +            if (ret == 1) {
> > +                /* async channel io */
> > +                continue;
> > +            }
> > +            break;
> > +        default:
> > +            ret = -1;
> > +            break;
> > +        }
> > +
> > +        res.ret = ret;
> > +
> > +        iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
> > +        virtqueue_push(vq, elem, sizeof(res) + len);
> > +
> > +        virtio_notify(vdev, vq);
> > +        g_free(elem);
> > +
> > +        if (ret < 0) {
> > +            return;
> 
> what does this do?
If it failed on any processing, reports it to the kernel and stop
processing later commands.  The kernel won't send same kind of command
later.
> 
> > +        }
> > +    }
> > +}
> > +
> > +static void virtio_pstore_device_realize(DeviceState *dev, Error **errp)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +    VirtIOPstore *s = VIRTIO_PSTORE(dev);
> > +
> > +    virtio_init(vdev, "virtio-pstore", VIRTIO_ID_PSTORE,
> > +                sizeof(struct virtio_pstore_config));
> > +
> > +    s->id = 1;
> > +
> > +    if (!s->bufsize)
> > +        s->bufsize = PSTORE_DEFAULT_BUFSIZE;
> > +    if (!s->file_max)
> > +        s->file_max = PSTORE_DEFAULT_FILE_MAX;
> > +
> > +    s->rvq = virtio_add_queue(vdev, 128, virtio_pstore_handle_io);
> > +    s->wvq = virtio_add_queue(vdev, 128, virtio_pstore_handle_io);
> > +}
> > +
> > +static void virtio_pstore_device_unrealize(DeviceState *dev, Error **errp)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > +
> > +    virtio_cleanup(vdev);
> > +}
> > +
> > +static void virtio_pstore_get_config(VirtIODevice *vdev, uint8_t *config_data)
> > +{
> > +    VirtIOPstore *dev = VIRTIO_PSTORE(vdev);
> > +    struct virtio_pstore_config config;
> 
> Add {} here - you want all fields initialized
> if you add them, to avoid leaking them to guest.
Ok.
> 
> > +
> > +    config.bufsize = cpu_to_le32(dev->bufsize);
> > +
> > +    memcpy(config_data, &config, sizeof(struct virtio_pstore_config));
> > +}
> > +
> > +static void virtio_pstore_set_config(VirtIODevice *vdev,
> > +                                     const uint8_t *config_data)
> > +{
> > +    VirtIOPstore *dev = VIRTIO_PSTORE(vdev);
> > +    struct virtio_pstore_config config;
> > +
> > +    memcpy(&config, config_data, sizeof(struct virtio_pstore_config));
> > +
> > +    dev->bufsize = le32_to_cpu(config.bufsize);
> > +}
> > +
> > +static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
> > +{
> > +    return f;
> > +}
> > +
> > +static void pstore_get_directory(Object *obj, Visitor *v,
> > +                                 const char *name, void *opaque,
> > +                                 Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +
> > +    visit_type_str(v, name, &s->directory, errp);
> > +}
> > +
> > +static void pstore_set_directory(Object *obj, Visitor *v,
> > +                                 const char *name, void *opaque,
> > +                                 Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +    Error *local_err = NULL;
> > +    char *value;
> > +
> > +    visit_type_str(v, name, &value, &local_err);
> > +    if (local_err) {
> > +        error_propagate(errp, local_err);
> > +        return;
> > +    }
> > +
> > +    g_free(s->directory);
> > +    s->directory = value;
> > +}
> > +
> > +static void pstore_release_directory(Object *obj, const char *name,
> > +                                     void *opaque)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +
> > +    g_free(s->directory);
> > +    s->directory = NULL;
> > +}
> > +
> > +static void pstore_get_bufsize(Object *obj, Visitor *v,
> > +                               const char *name, void *opaque,
> > +                               Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +    uint64_t value = s->bufsize;
> > +
> > +    visit_type_size(v, name, &value, errp);
> > +}
> > +
> > +static void pstore_set_bufsize(Object *obj, Visitor *v,
> > +                               const char *name, void *opaque,
> > +                               Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +    Error *error = NULL;
> > +    uint64_t value;
> > +
> > +    visit_type_size(v, name, &value, &error);
> > +    if (error) {
> > +        error_propagate(errp, error);
> > +        return;
> > +    }
> > +
> > +    if (value < 4096) {
> > +        error_setg(&error, "Warning: too small buffer size: %"PRIu64, value);
> > +        error_propagate(errp, error);
> > +        return;
> > +    }
> > +
> > +    s->bufsize = value;
> > +}
> > +
> > +static void pstore_get_file_max(Object *obj, Visitor *v,
> > +                                const char *name, void *opaque,
> > +                                Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +    int64_t value = s->file_max;
> > +
> > +    visit_type_int(v, name, &value, errp);
> > +}
> > +
> > +static void pstore_set_file_max(Object *obj, Visitor *v,
> > +                                const char *name, void *opaque,
> > +                                Error **errp)
> > +{
> > +    VirtIOPstore *s = opaque;
> > +    Error *error = NULL;
> > +    int64_t value;
> > +
> > +    visit_type_int(v, name, &value, &error);
> > +    if (error) {
> > +        error_propagate(errp, error);
> > +        return;
> > +    }
> > +
> > +    s->file_max = value;
> > +}
> 
> Do you need dynamic properties? There are easier ways
> to define an int property. Same for others.
It was due to my insufficient knowledge about the qemu code base.  I
don't think it needs to be dynamic.
Thanks,
Namhyung
> 
> > +
> > +static Property virtio_pstore_properties[] = {
> > +    DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> > +static void virtio_pstore_instance_init(Object *obj)
> > +{
> > +    VirtIOPstore *s = VIRTIO_PSTORE(obj);
> > +
> > +    object_property_add(obj, "directory", "str",
> > +                        pstore_get_directory, pstore_set_directory,
> > +                        pstore_release_directory, s, NULL);
> > +    object_property_add(obj, "bufsize", "size",
> > +                        pstore_get_bufsize, pstore_set_bufsize, NULL, s, NULL);
> > +    object_property_add(obj, "file-max", "int",
> > +                        pstore_get_file_max, pstore_set_file_max, NULL, s, NULL);
> > +}
> > +
> > +static void virtio_pstore_class_init(ObjectClass *klass, void *data)
> > +{
> > +    DeviceClass *dc = DEVICE_CLASS(klass);
> > +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
> > +
> > +    dc->props = virtio_pstore_properties;
> > +    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> > +    vdc->realize = virtio_pstore_device_realize;
> > +    vdc->unrealize = virtio_pstore_device_unrealize;
> > +    vdc->get_config = virtio_pstore_get_config;
> > +    vdc->set_config = virtio_pstore_set_config;
> > +    vdc->get_features = get_features;
> > +}
> > +
> > +static const TypeInfo virtio_pstore_info = {
> > +    .name = TYPE_VIRTIO_PSTORE,
> > +    .parent = TYPE_VIRTIO_DEVICE,
> > +    .instance_size = sizeof(VirtIOPstore),
> > +    .instance_init = virtio_pstore_instance_init,
> > +    .class_init = virtio_pstore_class_init,
> > +};
> > +
> > +static void virtio_register_types(void)
> > +{
> > +    type_register_static(&virtio_pstore_info);
> > +}
> > +
> > +type_init(virtio_register_types)
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 929ec2f..b31774a 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -79,6 +79,7 @@
> >  #define PCI_DEVICE_ID_VIRTIO_SCSI        0x1004
> >  #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
> >  #define PCI_DEVICE_ID_VIRTIO_9P          0x1009
> > +#define PCI_DEVICE_ID_VIRTIO_PSTORE      0x100a
> >  
> >  #define PCI_VENDOR_ID_REDHAT             0x1b36
> >  #define PCI_DEVICE_ID_REDHAT_BRIDGE      0x0001
> > diff --git a/include/hw/virtio/virtio-pstore.h b/include/hw/virtio/virtio-pstore.h
> > new file mode 100644
> > index 0000000..85b1828
> > --- /dev/null
> > +++ b/include/hw/virtio/virtio-pstore.h
> > @@ -0,0 +1,36 @@
> > +/*
> > + * Virtio Pstore Support
> > + *
> > + * Authors:
> > + *  Namhyung Kim      <namhyung@xxxxxxxxx>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef _QEMU_VIRTIO_PSTORE_H
> > +#define _QEMU_VIRTIO_PSTORE_H
> > +
> > +#include "standard-headers/linux/virtio_pstore.h"
> > +#include "hw/virtio/virtio.h"
> > +#include "hw/pci/pci.h"
> > +
> > +#define TYPE_VIRTIO_PSTORE "virtio-pstore-device"
> > +#define VIRTIO_PSTORE(obj) \
> > +        OBJECT_CHECK(VirtIOPstore, (obj), TYPE_VIRTIO_PSTORE)
> > +
> > +typedef struct VirtIOPstore {
> > +    VirtIODevice    parent_obj;
> > +    VirtQueue      *rvq;
> > +    VirtQueue      *wvq;
> > +    char           *directory;
> > +    int             file_idx;
> > +    int             num_file;
> > +    struct dirent **files;
> > +    uint64_t        id;
> > +    uint64_t        bufsize;
> > +    uint64_t        file_max;
> > +} VirtIOPstore;
> > +
> > +#endif
> > diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
> > index 77925f5..c72a9ab 100644
> > --- a/include/standard-headers/linux/virtio_ids.h
> > +++ b/include/standard-headers/linux/virtio_ids.h
> > @@ -41,5 +41,6 @@
> >  #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
> >  #define VIRTIO_ID_GPU          16 /* virtio GPU */
> >  #define VIRTIO_ID_INPUT        18 /* virtio input */
> > +#define VIRTIO_ID_PSTORE       22 /* virtio pstore */
> >  
> >  #endif /* _LINUX_VIRTIO_IDS_H */
> > diff --git a/include/standard-headers/linux/virtio_pstore.h b/include/standard-headers/linux/virtio_pstore.h
> > new file mode 100644
> > index 0000000..2f91839
> > --- /dev/null
> > +++ b/include/standard-headers/linux/virtio_pstore.h
> > @@ -0,0 +1,76 @@
> > +#ifndef _LINUX_VIRTIO_PSTORE_H
> > +#define _LINUX_VIRTIO_PSTORE_H
> > +/* This header is BSD licensed so anyone can use the definitions to implement
> > + * compatible drivers/servers.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + * 1. Redistributions of source code must retain the above copyright
> > + *    notice, this list of conditions and the following disclaimer.
> > + * 2. Redistributions in binary form must reproduce the above copyright
> > + *    notice, this list of conditions and the following disclaimer in the
> > + *    documentation and/or other materials provided with the distribution.
> > + * 3. Neither the name of IBM nor the names of its contributors
> > + *    may be used to endorse or promote products derived from this software
> > + *    without specific prior written permission.
> > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
> > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> > + * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
> > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > + * SUCH DAMAGE. */
> > +#include "standard-headers/linux/types.h"
> > +#include "standard-headers/linux/virtio_types.h"
> > +#include "standard-headers/linux/virtio_ids.h"
> > +#include "standard-headers/linux/virtio_config.h"
> > +
> > +#define VIRTIO_PSTORE_CMD_NULL   0
> > +#define VIRTIO_PSTORE_CMD_OPEN   1
> > +#define VIRTIO_PSTORE_CMD_READ   2
> > +#define VIRTIO_PSTORE_CMD_WRITE  3
> > +#define VIRTIO_PSTORE_CMD_ERASE  4
> > +#define VIRTIO_PSTORE_CMD_CLOSE  5
> > +
> > +#define VIRTIO_PSTORE_TYPE_UNKNOWN  0
> > +#define VIRTIO_PSTORE_TYPE_DMESG    1
> > +
> > +#define VIRTIO_PSTORE_FL_COMPRESSED  1
> > +
> > +struct virtio_pstore_req {
> > +    __virtio16 cmd;
> > +    __virtio16 type;
> > +    __virtio32 flags;
> > +    __virtio64 id;
> > +    __virtio32 count;
> > +    __virtio32 reserved;
> > +};
> > +
> > +struct virtio_pstore_res {
> > +    __virtio16 cmd;
> > +    __virtio16 type;
> > +    __virtio32 ret;
> > +};
> > +
> > +struct virtio_pstore_fileinfo {
> > +    __virtio64 id;
> > +    __virtio32 count;
> > +    __virtio16 type;
> > +    __virtio16 unused;
> > +    __virtio32 flags;
> > +    __virtio32 len;
> > +    __virtio64 time_sec;
> > +    __virtio32 time_nsec;
> > +    __virtio32 reserved;
> > +};
> > +
> > +struct virtio_pstore_config {
> > +    __virtio32 bufsize;
> > +};
> > +
> > +#endif /* _LINUX_VIRTIO_PSTORE_H */
> > diff --git a/qdev-monitor.c b/qdev-monitor.c
> > index e19617f..e1df5a9 100644
> > --- a/qdev-monitor.c
> > +++ b/qdev-monitor.c
> > @@ -73,6 +73,7 @@ static const QDevAlias qdev_alias_table[] = {
> >      { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
> >      { "virtio-tablet-ccw", "virtio-tablet", QEMU_ARCH_S390X },
> >      { "virtio-tablet-pci", "virtio-tablet", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
> > +    { "virtio-pstore-pci", "virtio-pstore" },
> >      { }
> >  };
> >  
> > -- 
> > 2.9.3