[PATCH v3 24/30] drivers: hv: dxgkrnl: Ioctl to put device to error state

From: Iouri Tarassov
Date: Tue Mar 01 2022 - 14:48:47 EST


Implement the ioctl to put the virtual compute device to the error
state (LX_DXMARKDEVICEASERROR).

This ioctl is used by the user mode driver when it detects an
unrecoverable error condition.

When a compute device is put to the error state, all subsequent
ioctl calls to the device will fail.

Signed-off-by: Iouri Tarassov <iourit@xxxxxxxxxxxxxxxxxxx>
---
drivers/hv/dxgkrnl/dxgkrnl.h | 3 +++
drivers/hv/dxgkrnl/dxgvmbus.c | 25 ++++++++++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 5 +++++
drivers/hv/dxgkrnl/ioctl.c | 39 +++++++++++++++++++++++++++++++++++
include/uapi/misc/d3dkmthk.h | 12 +++++++++++
5 files changed, 84 insertions(+)

diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index 875e336c11d3..03acf8ce3baa 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -836,6 +836,9 @@ int dxgvmb_send_update_alloc_property(struct dxgprocess *process,
struct d3dddi_updateallocproperty *args,
struct d3dddi_updateallocproperty *__user
inargs);
+int dxgvmb_send_mark_device_as_error(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmt_markdeviceaserror *args);
int dxgvmb_send_set_allocation_priority(struct dxgprocess *process,
struct dxgadapter *adapter,
struct d3dkmt_setallocationpriority *a);
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index c76ab993e9ba..bf65599635c6 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -2708,6 +2708,31 @@ int dxgvmb_send_update_alloc_property(struct dxgprocess *process,
return ret;
}

+int dxgvmb_send_mark_device_as_error(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmt_markdeviceaserror *args)
+{
+ struct dxgkvmb_command_markdeviceaserror *command;
+ int ret;
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+
+ ret = init_message(&msg, adapter, process, sizeof(*command));
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr,
+ DXGK_VMBCOMMAND_MARKDEVICEASERROR,
+ process->host_handle);
+ command->args = *args;
+ ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ pr_debug("err: %s %d", __func__, ret);
+ return ret;
+}
+
int dxgvmb_send_set_allocation_priority(struct dxgprocess *process,
struct dxgadapter *adapter,
struct d3dkmt_setallocationpriority *args)
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index d2d22775645b..615a163f836e 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -627,6 +627,11 @@ struct dxgkvmb_command_updateallocationproperty_return {
struct ntstatus status;
};

+struct dxgkvmb_command_markdeviceaserror {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmt_markdeviceaserror args;
+};
+
/* Returns ntstatus */
struct dxgkvmb_command_changevideomemoryreservation {
struct dxgkvmb_command_vgpu_to_host hdr;
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
index 763bd76382a0..b2cc2c6fc725 100644
--- a/drivers/hv/dxgkrnl/ioctl.c
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -3380,6 +3380,43 @@ dxgk_update_alloc_property(struct dxgprocess *process, void *__user inargs)
return ret;
}

+static int
+dxgk_mark_device_as_error(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_markdeviceaserror args;
+ struct dxgadapter *adapter = NULL;
+ struct dxgdevice *device = NULL;
+ int ret;
+
+ pr_debug("ioctl: %s", __func__);
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ pr_err("%s failed to copy input args", __func__);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ device = dxgprocess_device_by_handle(process, args.device);
+ if (device == NULL) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ adapter = device->adapter;
+ ret = dxgadapter_acquire_lock_shared(adapter);
+ if (ret < 0) {
+ adapter = NULL;
+ goto cleanup;
+ }
+ device->execution_state = _D3DKMT_DEVICEEXECUTION_RESET;
+ ret = dxgvmb_send_mark_device_as_error(process, adapter, &args);
+cleanup:
+ if (adapter)
+ dxgadapter_release_lock_shared(adapter);
+ if (device)
+ kref_put(&device->device_kref, dxgdevice_release);
+ pr_debug("ioctl:%s %s %d", errorstr(ret), __func__, ret);
+ return ret;
+}
+
static int
dxgk_query_alloc_residency(struct dxgprocess *process, void *__user inargs)
{
@@ -4515,6 +4552,8 @@ void init_ioctls(void)
LX_DXFLUSHHEAPTRANSITIONS);
SET_IOCTL(/*0x25 */ dxgk_lock2,
LX_DXLOCK2);
+ SET_IOCTL(/*0x26 */ dxgk_mark_device_as_error,
+ LX_DXMARKDEVICEASERROR);
SET_IOCTL(/*0x2a */ dxgk_query_alloc_residency,
LX_DXQUERYALLOCATIONRESIDENCY);
SET_IOCTL(/*0x2e */ dxgk_set_allocation_priority,
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
index cf08166f4e5d..fb4ff4b905c4 100644
--- a/include/uapi/misc/d3dkmthk.h
+++ b/include/uapi/misc/d3dkmthk.h
@@ -786,6 +786,16 @@ struct d3dkmt_unlock2 {
struct d3dkmthandle allocation;
};

+enum d3dkmt_device_error_reason {
+ _D3DKMT_DEVICE_ERROR_REASON_GENERIC = 0x80000000,
+ _D3DKMT_DEVICE_ERROR_REASON_DRIVER_ERROR = 0x80000006,
+};
+
+struct d3dkmt_markdeviceaserror {
+ struct d3dkmthandle device;
+ enum d3dkmt_device_error_reason reason;
+};
+
enum d3dkmt_standardallocationtype {
_D3DKMT_STANDARDALLOCATIONTYPE_EXISTINGHEAP = 1,
_D3DKMT_STANDARDALLOCATIONTYPE_CROSSADAPTER = 2,
@@ -1286,6 +1296,8 @@ struct d3dkmt_shareobjectwithhost {
_IOWR(0x47, 0x1f, struct d3dkmt_flushheaptransitions)
#define LX_DXLOCK2 \
_IOWR(0x47, 0x25, struct d3dkmt_lock2)
+#define LX_DXMARKDEVICEASERROR \
+ _IOWR(0x47, 0x26, struct d3dkmt_markdeviceaserror)
#define LX_DXQUERYALLOCATIONRESIDENCY \
_IOWR(0x47, 0x2a, struct d3dkmt_queryallocationresidency)
#define LX_DXSETALLOCATIONPRIORITY \
--
2.35.1