Re: [RESEND PATCH net-next 4/6] af_unix: stash pidfs dentry when needed

From: Kuniyuki Iwashima
Date: Mon Jun 30 2025 - 16:03:34 EST


On Sun, Jun 29, 2025 at 2:45 PM Alexander Mikhalitsyn
<aleksandr.mikhalitsyn@xxxxxxxxxxxxx> wrote:
>
> We need to ensure that pidfs dentry is allocated when we meet any
> struct pid for the first time. This will allows us to open pidfd
> even after the task it corresponds to is reaped.
>
> Basically, we need to identify all places where we fill skb/scm_cookie
> with struct pid reference for the first time and call pidfs_register_pid().
>
> Tricky thing here is that we have a few places where this happends
> depending on what userspace is doing:
> - [__scm_replace_pid()] explicitly sending an SCM_CREDENTIALS message
> and specified pid in a numeric format
> - [unix_maybe_add_creds()] enabled SO_PASSCRED/SO_PASSPIDFD but
> didn't send SCM_CREDENTIALS explicitly
> - [scm_send()] force_creds is true. Netlink case.
>
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Cc: netdev@xxxxxxxxxxxxxxx
> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
> Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
> Cc: Jakub Kicinski <kuba@xxxxxxxxxx>
> Cc: Paolo Abeni <pabeni@xxxxxxxxxx>
> Cc: Simon Horman <horms@xxxxxxxxxx>
> Cc: Leon Romanovsky <leon@xxxxxxxxxx>
> Cc: Arnd Bergmann <arnd@xxxxxxxx>
> Cc: Christian Brauner <brauner@xxxxxxxxxx>
> Cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
> Cc: Lennart Poettering <mzxreary@xxxxxxxxxxx>
> Cc: Luca Boccassi <bluca@xxxxxxxxxx>
> Cc: David Rheinsberg <david@xxxxxxxxxxxx>
> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@xxxxxxxxxxxxx>
> ---
> include/net/scm.h | 35 ++++++++++++++++++++++++++++++-----
> net/unix/af_unix.c | 36 +++++++++++++++++++++++++++++++++---
> 2 files changed, 63 insertions(+), 8 deletions(-)
>
> diff --git a/include/net/scm.h b/include/net/scm.h
> index 856eb3a380f6..d1ae0704f230 100644
> --- a/include/net/scm.h
> +++ b/include/net/scm.h
> @@ -8,6 +8,7 @@
> #include <linux/file.h>
> #include <linux/security.h>
> #include <linux/pid.h>
> +#include <linux/pidfs.h>
> #include <linux/nsproxy.h>
> #include <linux/sched/signal.h>
> #include <net/compat.h>
> @@ -66,19 +67,37 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
> { }
> #endif /* CONFIG_SECURITY_NETWORK */
>
> -static __inline__ void scm_set_cred(struct scm_cookie *scm,
> - struct pid *pid, kuid_t uid, kgid_t gid)
> +static __inline__ int __scm_set_cred(struct scm_cookie *scm,
> + struct pid *pid, bool pidfs_register,
> + kuid_t uid, kgid_t gid)

scm_set_cred() is only called from 3 places, and I think you can simply
pass pidfd_register == false from one of the places.

while at it, please replace s/__inline__/inline/

> {
> - scm->pid = get_pid(pid);
> + if (pidfs_register) {
> + int err;
> +
> + err = pidfs_register_pid(pid);

nit: int err = pidfs_...();

> + if (err)
> + return err;
> + }
> +
> + scm->pid = get_pid(pid);
> +
> scm->creds.pid = pid_vnr(pid);
> scm->creds.uid = uid;
> scm->creds.gid = gid;
> + return 0;
> +}
> +
> +static __inline__ void scm_set_cred(struct scm_cookie *scm,
> + struct pid *pid, kuid_t uid, kgid_t gid)
> +{
> + /* __scm_set_cred() can't fail when pidfs_register == false */
> + (void) __scm_set_cred(scm, pid, false, uid, gid);

I think this (void) style is unnecessary for recent compilers.

> }
>
> static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
> {
> put_pid(scm->pid);
> - scm->pid = NULL;
> + scm->pid = NULL;
> }
>
> static __inline__ void scm_destroy(struct scm_cookie *scm)
> @@ -90,9 +109,15 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
>
> static __inline__ int __scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
> {
> + int err;
> +
> /* drop all previous references */
> scm_destroy_cred(scm);
>
> + err = pidfs_register_pid(pid);
> + if (err)
> + return err;
> +
> scm->pid = get_pid(pid);
> scm->creds.pid = pid_vnr(pid);
> return 0;
> @@ -105,7 +130,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
> scm->creds.uid = INVALID_UID;
> scm->creds.gid = INVALID_GID;
> if (forcecreds)
> - scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
> + __scm_set_cred(scm, task_tgid(current), true, current_uid(), current_gid());
> unix_get_peersec_dgram(sock, scm);
> if (msg->msg_controllen <= 0)
> return 0;
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 5efe6e44abdf..1f4a5fe8a1f7 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -1924,12 +1924,34 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
> scm->fp = scm_fp_dup(UNIXCB(skb).fp);
> }
>
> +static int __skb_set_pid(struct sk_buff *skb, struct pid *pid, bool pidfs_register)

unix_set_pid_to_skb ?

> +{
> + if (pidfs_register) {
> + int err;
> +
> + err = pidfs_register_pid(pid);
> + if (err)
> + return err;
> + }
> +
> + UNIXCB(skb).pid = get_pid(pid);
> + return 0;
> +}
> +
> static void unix_destruct_scm(struct sk_buff *skb)
> {
> struct scm_cookie scm;
>
> memset(&scm, 0, sizeof(scm));
> - scm.pid = UNIXCB(skb).pid;
> +
> + /* Pass ownership of struct pid from skb to scm cookie.
> + *
> + * We rely on scm_destroy() -> scm_destroy_cred() to properly
> + * release everything.
> + */
> + scm.pid = UNIXCB(skb).pid;
> + UNIXCB(skb).pid = NULL;

The skb is under destruction and we no longer touch it, so
this chunk is not needed.


> +
> if (UNIXCB(skb).fp)
> unix_detach_fds(&scm, skb);
>
> @@ -1943,7 +1965,10 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
> {
> int err = 0;
>
> - UNIXCB(skb).pid = get_pid(scm->pid);
> + err = __skb_set_pid(skb, scm->pid, false);
> + if (unlikely(err))
> + return err;
> +
> UNIXCB(skb).uid = scm->creds.uid;
> UNIXCB(skb).gid = scm->creds.gid;
> UNIXCB(skb).fp = NULL;
> @@ -1976,7 +2001,12 @@ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> return 0;
>
> if (unix_may_passcred(sk) || unix_may_passcred(other)) {
> - UNIXCB(skb).pid = get_pid(task_tgid(current));
> + int err;
> +
> + err = __skb_set_pid(skb, task_tgid(current), true);
> + if (unlikely(err))
> + return err;
> +
> current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
> }
>
> --
> 2.43.0
>