On Mon, Jan 22, 2024 at 03:38:59PM +0800, Lu Baolu wrote:
--- /dev/null
+++ b/drivers/iommu/iommufd/fault.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2024 Intel Corporation
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/iommufd.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <uapi/linux/iommufd.h>
+
+#include "iommufd_private.h"
+
+static int device_add_fault(struct iopf_group *group)
+{
+ struct iommufd_device *idev = group->cookie->private;
+ void *curr;
+
+ curr = xa_cmpxchg(&idev->faults, group->last_fault.fault.prm.grpid,
+ NULL, group, GFP_KERNEL);
+
+ return curr ? xa_err(curr) : 0;
+}
+
+static void device_remove_fault(struct iopf_group *group)
+{
+ struct iommufd_device *idev = group->cookie->private;
+
+ xa_store(&idev->faults, group->last_fault.fault.prm.grpid,
+ NULL, GFP_KERNEL);
xa_erase ?
Is grpid OK to use this way? Doesn't it come from the originating
device?
+void iommufd_fault_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
+ struct iopf_group *group, *next;
+
+ mutex_lock(&fault->mutex);
+ list_for_each_entry_safe(group, next, &fault->deliver, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ list_for_each_entry_safe(group, next, &fault->response, node) {
+ list_del(&group->node);
+ device_remove_fault(group);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+
+ mutex_destroy(&fault->mutex);
It is really weird to lock a mutex we are about to destroy? At this
point the refcount on the iommufd_object is zero so there had better
not be any other threads with access to this pointer!
+static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
+ struct iommufd_fault *fault = filep->private_data;
+ struct iommu_hwpt_pgfault data;
+ struct iommufd_device *idev;
+ struct iopf_group *group;
+ struct iopf_fault *iopf;
+ size_t done = 0;
+ int rc;
+
+ if (*ppos || count % fault_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (!list_empty(&fault->deliver) && count > done) {
+ group = list_first_entry(&fault->deliver,
+ struct iopf_group, node);
+
+ if (list_count_nodes(&group->faults) * fault_size > count - done)
+ break;
+
+ idev = (struct iommufd_device *)group->cookie->private;
+ list_for_each_entry(iopf, &group->faults, list) {
+ iommufd_compose_fault_message(&iopf->fault, &data, idev);
+ rc = copy_to_user(buf + done, &data, fault_size);
+ if (rc)
+ goto err_unlock;
+ done += fault_size;
+ }
+
+ rc = device_add_fault(group);
See I wonder if this should be some xa_alloc or something instead of
trying to use the grpid?
+ while (!list_empty(&fault->response) && count > done) {
+ rc = copy_from_user(&response, buf + done, response_size);
+ if (rc)
+ break;
+
+ idev = container_of(iommufd_get_object(fault->ictx,
+ response.dev_id,
+ IOMMUFD_OBJ_DEVICE),
+ struct iommufd_device, obj);
It seems unfortunate we do this lookup for every iteration of the loop,
I would lift it up and cache it..
+ if (IS_ERR(idev))
+ break;
+
+ group = device_get_fault(idev, response.grpid);
This looks locked wrong, it should hold the fault mutex here and we
should probably do xchng to zero it at the same time we fetch it.
+ if (!group ||
+ response.addr != group->last_fault.fault.prm.addr ||
+ ((group->last_fault.fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
+ response.pasid != group->last_fault.fault.prm.pasid)) {
Why? If grpid is unique then just trust it.
+ iommufd_put_object(fault->ictx, &idev->obj);
+ break;
+ }
+
+ iopf_group_response(group, response.code);
+
+ mutex_lock(&fault->mutex);
+ list_del(&group->node);
+ mutex_unlock(&fault->mutex);
+
+ device_remove_fault(group);
+ iopf_free_group(group);
+ done += response_size;
+
+ iommufd_put_object(fault->ictx, &idev->obj);
+ }
+
+ return done;
+}
+
+static __poll_t iommufd_fault_fops_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct iommufd_fault *fault = filep->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filep, &fault->wait_queue, wait);
+ mutex_lock(&fault->mutex);
+ if (!list_empty(&fault->deliver))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ mutex_unlock(&fault->mutex);
+
+ return pollflags;
+}
+
+static const struct file_operations iommufd_fault_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .read = iommufd_fault_fops_read,
+ .write = iommufd_fault_fops_write,
+ .poll = iommufd_fault_fops_poll,
+ .llseek = no_llseek,
+};
No release? That seems wrong..
+static int get_fault_fd(struct iommufd_fault *fault)^^^^^^
+{
+ struct file *filep;
+ int fdno;
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0)
+ return fdno;
+
+ filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
+ fault, O_RDWR);
See here you stick the iommufd_object into the FD but we don't
refcount it...
And iommufd_fault_destroy() doesn't do anything about this, so it just
UAFs the fault memory in the FD.
You need to refcount the iommufd_object here and add a release
function for the fd to put it back
*and* this FD needs to take a reference on the base iommufd_ctx fd too
as we can't tolerate them being destroyed out of sequence.
+int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_fault_alloc *cmd = ucmd->cmd;
+ struct iommufd_fault *fault;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
+ if (IS_ERR(fault))
+ return PTR_ERR(fault);
+
+ fault->ictx = ucmd->ictx;
+ INIT_LIST_HEAD(&fault->deliver);
+ INIT_LIST_HEAD(&fault->response);
+ mutex_init(&fault->mutex);
+ init_waitqueue_head(&fault->wait_queue);
+
+ rc = get_fault_fd(fault);
+ if (rc < 0)
+ goto out_abort;
And this is ordered wrong, fd_install has to happen after return to
user space is guarenteed as it cannot be undone.
+ cmd->out_fault_id = fault->obj.id;
+ cmd->out_fault_fd = rc;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_abort;
+ iommufd_object_finalize(ucmd->ictx, &fault->obj);
fd_install() goes here
+ return 0;
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
+
+ return rc;
+}
Jason