[PATCH 02/11] blkoops: add blkoops, a warpper for pstore/blk

From: WeiXiong Liao
Date: Thu Jan 16 2020 - 05:01:36 EST


blkoops is a better wrapper for pstore/blk, which provides efficient
configuration mothod. It divides all configurations of pstore/blk into
2 parts, configurations for user and configurations for driver.

Configurations for user detemine how pstore/blk work, such as
dump_oops and dmesg_size. They can be set by Kconfig and module
parameters.

Configurations for driver are all about block/non-block device, such as
total_size of device and read/write operations. They should be provided
by device drivers, calling blkoops_register_device() for non-block
device and blkoops_register_blkdev() for block device.

If device driver support for panic records, @panic_write must be valid.
If panic occurs and pstore/blk does not recover yet, the first zone
of dmesg will be used.

Besides, Block device driver has no need to verify which partition is
used and provides generic read/write operations. Because blkoops has
done it. It also means that if users do not care panic records but
records for oops/console/pmsg/ftrace, block device driver should do
nothing.

Signed-off-by: WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx>
---
MAINTAINERS | 2 +-
fs/pstore/Kconfig | 61 +++++++
fs/pstore/Makefile | 2 +
fs/pstore/blkoops.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blkoops.h | 58 +++++++
5 files changed, 539 insertions(+), 1 deletion(-)
create mode 100644 fs/pstore/blkoops.c
create mode 100644 include/linux/blkoops.h

diff --git a/MAINTAINERS b/MAINTAINERS
index cc0a4a8ae06a..e4ba97130560 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13381,7 +13381,7 @@ F: drivers/firmware/efi/efi-pstore.c
F: drivers/acpi/apei/erst.c
F: Documentation/admin-guide/ramoops.rst
F: Documentation/devicetree/bindings/reserved-memory/ramoops.txt
-K: \b(pstore|ramoops)
+K: \b(pstore|ramoops|blkoops)

PTP HARDWARE CLOCK SUPPORT
M: Richard Cochran <richardcochran@xxxxxxxxx>
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 536fde9e13e8..cd15f9322acd 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -163,3 +163,64 @@ config PSTORE_BLK
where it can be read back at some later point.

If unsure, say N.
+
+config PSTORE_BLKOOPS
+ tristate "pstore block with oops logger"
+ depends on PSTORE_BLK
+ help
+ This is a wrapper for pstore/blk.
+
+ NOTE that, both kconfig and module parameters can configure blkoops,
+ but module parameters have priority over kconfig.
+
+ If unsure, say N.
+
+config PSTORE_BLKOOPS_DMESG_SIZE
+ int "dmesg size in kbytes for blkoops"
+ depends on PSTORE_BLKOOPS
+ default 64
+ help
+ This just sets size of dmesg (dmesg_size) for pstore/blk. The value
+ must be a multiple of 4096.
+
+ NOTE that, both kconfig and module parameters can configure blkoops,
+ but module parameters have priority over kconfig.
+
+config PSTORE_BLKOOPS_BLKDEV
+ string "block device for blkoops"
+ depends on PSTORE_BLKOOPS
+ default ""
+ help
+ Which block device should be used for pstore/blk.
+
+ It accept the following variants:
+ 1) <hex_major><hex_minor> device number in hexadecimal represents
+ itself no leading 0x, for example b302.
+ 2) /dev/<disk_name> represents the device number of disk
+ 3) /dev/<disk_name><decimal> represents the device number
+ of partition - device number of disk plus the partition number
+ 4) /dev/<disk_name>p<decimal> - same as the above, this form is
+ used when disk name of partitioned disk ends with a digit.
+ 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ unique id of a partition if the partition table provides it.
+ The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ filled hex representation of the 32-bit "NT disk signature", and PP
+ is a zero-filled hex representation of the 1-based partition number.
+ 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation
+ to a partition with a known unique id.
+ 7) <major>:<minor> major and minor number of the device separated by
+ a colon.
+
+ NOTE that, both kconfig and module parameters can configure blkoops,
+ but module parameters have priority over kconfig.
+
+config PSTORE_BLKOOPS_DUMP_OOPS
+ bool "dump oops"
+ depends on PSTORE_BLKOOPS
+ default y
+ help
+ Whether blkoops dumps oops or not.
+
+ NOTE that, both kconfig and module parameters can configure blkoops,
+ but module parameters have priority over kconfig.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 0ee2fc8d1bfb..24b3d488d2f0 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_PSTORE_RAM) += ramoops.o

obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o
pstore_blk-y += blkzone.o
+
+obj-$(CONFIG_PSTORE_BLKOOPS) += blkoops.o
diff --git a/fs/pstore/blkoops.c b/fs/pstore/blkoops.c
new file mode 100644
index 000000000000..69904fdeab6b
--- /dev/null
+++ b/fs/pstore/blkoops.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * blkoops.c: Block device Oops logger
+ *
+ * Copyright (C) 2019 WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#define pr_fmt(fmt) "blkoops : " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/blkoops.h>
+#include <linux/mount.h>
+#include <linux/uio.h>
+
+static long dmesg_size = -1;
+module_param(dmesg_size, long, 0400);
+MODULE_PARM_DESC(dmesg_size, "demsg size in kbytes");
+
+static int dump_oops = -1;
+module_param(dump_oops, int, 0400);
+MODULE_PARM_DESC(total_size, "whether dump oops");
+
+/**
+ * The block device to use. Most of the time, it is a partition of block
+ * device. It's fine to ignore it if you are not block device and register
+ * to blkoops by blkoops_register_device(). In this case, @blkdev is
+ * useless and @read, @write and @total_size must be supplied.
+ *
+ * @blkdev accepts the following variants:
+ * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
+ * no leading 0x, for example b302.
+ * 2) /dev/<disk_name> represents the device number of disk
+ * 3) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 4) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
+ * 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ * 7) <major>:<minor> major and minor number of the device separated by
+ * a colon.
+ */
+static char blkdev[80];
+module_param_string(blkdev, blkdev, 80, 0400);
+MODULE_PARM_DESC(blkdev, "the block device for general read/write");
+
+static DEFINE_MUTEX(blkz_lock);
+static struct block_device *blkoops_bdev;
+static struct blkz_info *bzinfo;
+static blkoops_blk_panic_write_op blkdev_panic_write;
+
+#ifdef CONFIG_PSTORE_BLKOOPS_DMESG_SIZE
+#define DEFAULT_DMESG_SIZE CONFIG_PSTORE_BLKOOPS_DMESG_SIZE
+#else
+#define DEFAULT_DMESG_SIZE 0
+#endif
+
+#ifdef CONFIG_PSTORE_BLKOOPS_DUMP_OOPS
+#define DEFAULT_DUMP_OOPS CONFIG_PSTORE_BLKOOPS_DUMP_OOPS
+#else
+#define DEFAULT_DUMP_OOPS 1
+#endif
+
+#ifdef CONFIG_PSTORE_BLKOOPS_BLKDEV
+#define DEFAULT_BLKDEV CONFIG_PSTORE_BLKOOPS_BLKDEV
+#else
+#define DEFAULT_BLKDEV ""
+#endif
+
+/**
+ * register device to blkoops
+ *
+ * Drivers, not only block drivers but also non-block drivers can call this
+ * function to register to blkoops. It will pack for blkzone and pstore.
+ */
+int blkoops_register_device(struct blkoops_device *bo_dev)
+{
+ int ret;
+
+ if (!bo_dev || !bo_dev->total_size || !bo_dev->read || !bo_dev->write)
+ return -EINVAL;
+
+ mutex_lock(&blkz_lock);
+
+ /* someone already registered before */
+ if (bzinfo) {
+ mutex_unlock(&blkz_lock);
+ return -EBUSY;
+ }
+ bzinfo = kzalloc(sizeof(struct blkz_info), GFP_KERNEL);
+ if (!bzinfo) {
+ mutex_unlock(&blkz_lock);
+ return -ENOMEM;
+ }
+
+#define verify_size(name, defsize, alignsize) { \
+ long _##name_ = (name); \
+ if (_##name_ < 0) \
+ _##name_ = (defsize); \
+ _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \
+ if (_##name_ & (alignsize - 1)) { \
+ pr_info(#name " must align to %d\n", \
+ (alignsize)); \
+ _##name_ = ALIGN(name, alignsize); \
+ } \
+ name = _##name_ / 1024; \
+ bzinfo->name = _##name_; \
+ }
+
+ verify_size(dmesg_size, DEFAULT_DMESG_SIZE, 4096);
+#undef verify_size
+ dump_oops = !!(dump_oops < 0 ? DEFAULT_DUMP_OOPS : dump_oops);
+
+ bzinfo->total_size = bo_dev->total_size;
+ bzinfo->dump_oops = dump_oops;
+ bzinfo->read = bo_dev->read;
+ bzinfo->write = bo_dev->write;
+ bzinfo->panic_write = bo_dev->panic_write;
+ bzinfo->name = "blkoops";
+ bzinfo->owner = THIS_MODULE;
+
+ ret = blkz_register(bzinfo);
+ if (ret) {
+ kfree(bzinfo);
+ bzinfo = NULL;
+ }
+ mutex_unlock(&blkz_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkoops_register_device);
+
+void blkoops_unregister_device(struct blkoops_device *bo_dev)
+{
+ mutex_lock(&blkz_lock);
+ if (bzinfo && bzinfo->read == bo_dev->read) {
+ blkz_unregister(bzinfo);
+ kfree(bzinfo);
+ bzinfo = NULL;
+ }
+ mutex_unlock(&blkz_lock);
+}
+EXPORT_SYMBOL_GPL(blkoops_unregister_device);
+
+/**
+ * get block_device of @blkdev
+ * @holder: exclusive holder identifier
+ *
+ * On success, @blkoops_bdev will save the block_device and the returned
+ * block_device has reference count of one.
+ */
+static struct block_device *blkoops_get_bdev(void *holder)
+{
+ struct block_device *bdev = ERR_PTR(-ENODEV);
+ fmode_t mode = FMODE_READ | FMODE_WRITE;
+
+ if (!blkdev[0] && strlen(DEFAULT_BLKDEV))
+ strncpy(blkdev, DEFAULT_BLKDEV, 80);
+ if (!blkdev[0])
+ return ERR_PTR(-ENODEV);
+
+ mutex_lock(&blkz_lock);
+ if (bzinfo)
+ goto out;
+ if (holder)
+ mode |= FMODE_EXCL;
+ bdev = blkdev_get_by_path(blkdev, mode, holder);
+ if (IS_ERR(bdev)) {
+ dev_t devt;
+
+ devt = name_to_dev_t(blkdev);
+ if (devt == 0) {
+ bdev = ERR_PTR(-ENODEV);
+ goto out;
+ }
+ bdev = blkdev_get_by_dev(devt, mode, holder);
+ }
+out:
+ mutex_unlock(&blkz_lock);
+ return bdev;
+}
+
+static void blkoops_put_bdev(struct block_device *bdev, void *holder)
+{
+ fmode_t mode = FMODE_READ | FMODE_WRITE;
+
+ if (!bdev)
+ return;
+
+ mutex_lock(&blkz_lock);
+ if (holder)
+ mode |= FMODE_EXCL;
+ blkdev_put(bdev, mode);
+ mutex_unlock(&blkz_lock);
+}
+
+static ssize_t blkoops_generic_blk_read(char *buf, size_t bytes, loff_t pos)
+{
+ ssize_t ret;
+ struct block_device *bdev = blkoops_bdev;
+ struct file filp;
+ mm_segment_t ofs;
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ struct iovec iov = {
+ .iov_base = (void __user *)buf,
+ .iov_len = bytes
+ };
+
+ if (!bdev)
+ return -ENODEV;
+
+ memset(&filp, 0, sizeof(struct file));
+ filp.f_mapping = bdev->bd_inode->i_mapping;
+ filp.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
+ filp.f_inode = bdev->bd_inode;
+
+ init_sync_kiocb(&kiocb, &filp);
+ kiocb.ki_pos = pos;
+ iov_iter_init(&iter, READ, &iov, 1, bytes);
+
+ ofs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = generic_file_read_iter(&kiocb, &iter);
+ set_fs(ofs);
+ return ret;
+}
+
+static ssize_t blkoops_generic_blk_write(const char *buf, size_t bytes,
+ loff_t pos)
+{
+ struct block_device *bdev = blkoops_bdev;
+ struct iov_iter iter;
+ struct kiocb kiocb;
+ struct file filp;
+ mm_segment_t ofs;
+ ssize_t ret;
+ struct iovec iov = {
+ .iov_base = (void __user *)buf,
+ .iov_len = bytes
+ };
+
+ if (!bdev)
+ return -ENODEV;
+
+ /* Console/Ftrace recorder may handle buffer until flush dirty zones */
+ if (in_interrupt() || irqs_disabled())
+ return -EBUSY;
+
+ memset(&filp, 0, sizeof(struct file));
+ filp.f_mapping = bdev->bd_inode->i_mapping;
+ filp.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
+ filp.f_inode = bdev->bd_inode;
+
+ init_sync_kiocb(&kiocb, &filp);
+ kiocb.ki_pos = pos;
+ iov_iter_init(&iter, WRITE, &iov, 1, bytes);
+
+ ofs = get_fs();
+ set_fs(KERNEL_DS);
+
+ inode_lock(bdev->bd_inode);
+ ret = generic_write_checks(&kiocb, &iter);
+ if (ret > 0)
+ ret = generic_perform_write(&filp, &iter, pos);
+ inode_unlock(bdev->bd_inode);
+
+ if (likely(ret > 0)) {
+ const struct file_operations f_op = {.fsync = blkdev_fsync};
+
+ filp.f_op = &f_op;
+ kiocb.ki_pos += ret;
+ ret = generic_write_sync(&kiocb, ret);
+ }
+ set_fs(ofs);
+ return ret;
+}
+
+static inline unsigned long blkoops_bdev_size(struct block_device *bdev)
+{
+ return (unsigned long)part_nr_sects_read(bdev->bd_part) << SECTOR_SHIFT;
+}
+
+static ssize_t blkoops_blk_panic_write(const char *buf, size_t size,
+ loff_t off)
+{
+ int ret;
+
+ if (!blkdev_panic_write)
+ return -EOPNOTSUPP;
+
+ /* size and off must align to SECTOR_SIZE for block device */
+ ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT,
+ size >> SECTOR_SHIFT);
+ return ret ? -EIO : size;
+}
+
+/**
+ * register block device to blkoops
+ * @major: the major device number of registering device
+ * @panic_write: the write interface for panic case.
+ *
+ * It is ONLY used for block device to register to blkoops. In this case,
+ * the module parameter @blkdev must be valid. Generic read/write interfaces
+ * will be used.
+ *
+ * Block driver has no need to verify which partition is used. Block driver
+ * should only tell me what major number is, so blkoops can get the matching
+ * driver for @blkdev.
+ *
+ * If block driver support for panic records, @panic_write must be valid. If
+ * panic occurs but pstore/blk does not recover yet, the first zone of dmesg
+ * will be used.
+ */
+int blkoops_register_blkdev(unsigned int major,
+ blkoops_blk_panic_write_op panic_write)
+{
+ struct block_device *bdev;
+ struct blkoops_device bo_dev = {0};
+ int ret = -ENODEV;
+ void *holder = blkdev;
+
+ bdev = blkoops_get_bdev(holder);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+
+ blkoops_bdev = bdev;
+ blkdev_panic_write = panic_write;
+
+ /* only allow driver matching the @blkdev */
+ if (!bdev->bd_dev || MAJOR(bdev->bd_dev) != major)
+ goto err_put_bdev;
+
+ bo_dev.total_size = blkoops_bdev_size(bdev);
+ if (bo_dev.total_size == 0)
+ goto err_put_bdev;
+ bo_dev.panic_write = panic_write ? blkoops_blk_panic_write : NULL;
+ bo_dev.read = blkoops_generic_blk_read;
+ bo_dev.write = blkoops_generic_blk_write;
+
+ ret = blkoops_register_device(&bo_dev);
+ if (ret)
+ goto err_put_bdev;
+ return 0;
+
+err_put_bdev:
+ blkdev_panic_write = NULL;
+ blkoops_bdev = NULL;
+ blkoops_put_bdev(bdev, holder);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkoops_register_blkdev);
+
+void blkoops_unregister_blkdev(unsigned int major)
+{
+ struct blkoops_device bo_dev = {.read = blkoops_generic_blk_read};
+ void *holder = blkdev;
+
+ if (blkoops_bdev && MAJOR(blkoops_bdev->bd_dev) == major) {
+ blkoops_unregister_device(&bo_dev);
+ blkoops_put_bdev(blkoops_bdev, holder);
+ blkdev_panic_write = NULL;
+ blkoops_bdev = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(blkoops_unregister_blkdev);
+
+/**
+ * get information of @blkdev
+ * @devt: the block device num of @blkdev
+ * @nr_sectors: the sector count of @blkdev
+ * @start_sect: the start sector of @blkdev
+ *
+ * Block driver needs the follow information for @panic_write.
+ */
+int blkoops_blkdev_info(dev_t *devt, sector_t *nr_sects, sector_t *start_sect)
+{
+ struct block_device *bdev;
+
+ bdev = blkoops_get_bdev(NULL);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+
+ if (devt)
+ *devt = bdev->bd_dev;
+ if (nr_sects)
+ *nr_sects = part_nr_sects_read(bdev->bd_part);
+ if (start_sect)
+ *start_sect = get_start_sect(bdev);
+
+ blkoops_put_bdev(bdev, NULL);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkoops_blkdev_info);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("Wrapper for Pstore BLK with Oops logger");
diff --git a/include/linux/blkoops.h b/include/linux/blkoops.h
new file mode 100644
index 000000000000..fe63739309aa
--- /dev/null
+++ b/include/linux/blkoops.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __BLKOOPS_H_
+#define __BLKOOPS_H_
+
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/pstore_blk.h>
+
+/**
+ * struct blkoops_device - backend blkoops driver structure.
+ *
+ * This structure is ONLY used for non-block device by
+ * blkoops_register_device(). If block device, you are strongly recommended
+ * to use blkoops_register_blkdev().
+ *
+ * @total_size:
+ * The total size in bytes pstore/blk can use. It must be greater than
+ * 4096 and be multiple of 4096.
+ * @read, @write:
+ * The general (not panic) read/write operation.
+ *
+ * Both of the @size and @offset parameters on this interface are
+ * the relative size of the space provided, not the whole disk/flash.
+ *
+ * On success, the number of bytes read should be returned.
+ * On error, negative number should be returned.
+ * @panic_write:
+ * The write operation only used for panic.
+ *
+ * Both of the @size and @offset parameters on this interface are
+ * the relative size of the space provided, not the whole disk/flash.
+ *
+ * On success, the number of bytes read should be returned.
+ * On error, negative number should be returned.
+ */
+struct blkoops_device {
+ unsigned long total_size;
+ blkz_read_op read;
+ blkz_write_op write;
+ blkz_write_op panic_write;
+};
+
+/*
+ * Panic write for block device who should write alignmemt to SECTOR_SIZE.
+ * On success, zero should be returned. Others mean error.
+ */
+typedef int (*blkoops_blk_panic_write_op)(const char *buf, sector_t start_sect,
+ sector_t sects);
+
+int blkoops_register_device(struct blkoops_device *bo_dev);
+void blkoops_unregister_device(struct blkoops_device *bo_dev);
+int blkoops_register_blkdev(unsigned int major,
+ blkoops_blk_panic_write_op panic_write);
+void blkoops_unregister_blkdev(unsigned int major);
+int blkoops_blkdev_info(dev_t *devt, sector_t *nr_sects, sector_t *start_sect);
+
+#endif
--
1.9.1