Re: [PATCH 4/5] kernel: Prepare set_kthread_struct to be used for setup_thread_fn

From: Mike Christie
Date: Mon Feb 13 2023 - 19:50:54 EST


On 2/13/23 1:54 PM, Linus Torvalds wrote:
> IOW, this patch is just being much too complicated for no good reason.
> The point was to make it _simpler_ to do thread setup, not more
> complicated.

Ah ok, I think I better understand your original question/suggestion.
I was thinking we couldn't do:

diff --git a/kernel/fork.c b/kernel/fork.c
index 9f7fe3541897..e94dd5a838d6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2109,6 +2109,11 @@ static __latent_entropy struct task_struct *copy_process(
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}

+
+ if (args->name)
+ snprintf(p->comm, TASK_COMM_LEN, "%s-%d", args->name,
+ current->pid);
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
/*
* Clear TID on mm_release()?


because it would only support vhost and io_uring's sqpoll.c case which use the
parent pid in the name. I went wild trying to support every case and also kthread's
struct setup :)

I could balance it by doing the following which would support vhost, io_uring sqpoll
and kthread's name setting:


diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 357e0068497c..81179faa943d 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -23,6 +23,7 @@ struct kernel_clone_args {
int __user *pidfd;
int __user *child_tid;
int __user *parent_tid;
+ const char *name;
int exit_signal;
unsigned long stack;
unsigned long stack_size;
@@ -89,9 +90,11 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct task_struct *);

extern pid_t kernel_clone(struct kernel_clone_args *kargs);
-struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg,
+ const char *name, int node);
struct task_struct *fork_idle(int);
-extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
+ unsigned long flags);
extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
int kernel_wait(pid_t pid, int *stat);
diff --git a/init/main.c b/init/main.c
index e1c3911d7c70..9dc816aa904f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -707,7 +707,7 @@ noinline void __ref rest_init(void)
rcu_read_unlock();

numa_default_policy();
- pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ pid = kernel_thread(kthreadd, NULL, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 411bb2d1acd4..b399ef30882d 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -748,7 +748,7 @@ static void create_worker_cont(struct callback_head *cb)
worker = container_of(cb, struct io_worker, create_work);
clear_bit_unlock(0, &worker->create_state);
wqe = worker->wqe;
- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+ tsk = create_io_thread(io_wqe_worker, worker, NULL, wqe->node);
if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk);
io_worker_release(worker);
@@ -817,7 +817,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;

- tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+ tsk = create_io_thread(io_wqe_worker, worker, NULL, wqe->node);
if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk);
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 559652380672..4b0388e15671 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -223,12 +223,8 @@ static int io_sq_thread(void *data)
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
unsigned long timeout = 0;
- char buf[TASK_COMM_LEN];
DEFINE_WAIT(wait);

- snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
- set_task_comm(current, buf);
-
if (sqd->sq_cpu != -1)
set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
else
@@ -350,6 +346,7 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
fdput(f);
}
if (ctx->flags & IORING_SETUP_SQPOLL) {
+ char name[TASK_COMM_LEN];
struct task_struct *tsk;
struct io_sq_data *sqd;
bool attached;
@@ -395,7 +392,8 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,

sqd->task_pid = current->pid;
sqd->task_tgid = current->tgid;
- tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+ snprintf(name, sizeof(name), "iou-sqp-%d", sqd->task_pid);
+ tsk = create_io_thread(io_sq_thread, sqd, name, NUMA_NO_NODE);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
goto err_sqpoll;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9f7fe3541897..c566512281e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2109,6 +2109,9 @@ static __latent_entropy struct task_struct *copy_process(
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}

+ if (args->name)
+ snprintf(p->comm, TASK_COMM_LEN, "%s", args->name);
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
/*
* Clear TID on mm_release()?
@@ -2613,7 +2616,8 @@ struct task_struct * __init fork_idle(int cpu)
* The returned task is inactive, and the caller must fire it up through
* wake_up_new_task(p). All signals are blocked in the created task.
*/
-struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg,
+ const char *name, int node)
{
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
CLONE_IO;
@@ -2624,6 +2628,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.fn = fn,
.fn_arg = arg,
.io_thread = 1,
+ .name = name,
};

return copy_process(NULL, 0, node, &args);
@@ -2727,7 +2732,8 @@ pid_t kernel_clone(struct kernel_clone_args *args)
/*
* Create a kernel thread.
*/
-pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
+ unsigned long flags)
{
struct kernel_clone_args args = {
.flags = ((lower_32_bits(flags) | CLONE_VM |
@@ -2735,6 +2741,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
.fn = fn,
.fn_arg = arg,
+ .name = name,
.kthread = 1,
};

diff --git a/kernel/kthread.c b/kernel/kthread.c
index f97fd01a2932..20fdab8c6b25 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -38,6 +38,7 @@ struct task_struct *kthreadd_task;
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */
+ char *full_name;
int (*threadfn)(void *data);
void *data;
int node;
@@ -343,10 +344,12 @@ static int kthread(void *_create)
/* Release the structure when caller killed by a fatal signal. */
done = xchg(&create->done, NULL);
if (!done) {
+ kfree(create->full_name);
kfree(create);
kthread_exit(-EINTR);
}

+ self->full_name = create->full_name;
self->threadfn = threadfn;
self->data = data;

@@ -396,12 +399,14 @@ static void create_kthread(struct kthread_create_info *create)
current->pref_node_fork = create->node;
#endif
/* We want our own signal handler (we take no signals by default). */
- pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+ pid = kernel_thread(kthread, create, create->full_name,
+ CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) {
/* Release the structure when caller killed by a fatal signal. */
struct completion *done = xchg(&create->done, NULL);

if (!done) {
+ kfree(create->full_name);
kfree(create);
return;
}
@@ -427,6 +432,11 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
create->data = data;
create->node = node;
create->done = &done;
+ create->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
+ if (!create->full_name) {
+ task = ERR_PTR(-ENOMEM);
+ goto free_create;
+ }

spin_lock(&kthread_create_lock);
list_add_tail(&create->list, &kthread_create_list);
@@ -453,26 +463,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
wait_for_completion(&done);
}
task = create->result;
- if (!IS_ERR(task)) {
- char name[TASK_COMM_LEN];
- va_list aq;
- int len;
-
- /*
- * task is already visible to other tasks, so updating
- * COMM must be protected.
- */
- va_copy(aq, args);
- len = vsnprintf(name, sizeof(name), namefmt, aq);
- va_end(aq);
- if (len >= TASK_COMM_LEN) {
- struct kthread *kthread = to_kthread(task);
-
- /* leave it truncated when out of memory. */
- kthread->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
- }
- set_task_comm(task, name);
- }
+free_create:
kfree(create);
return task;
}