Re: [PATCH net-next v2 2/3] net: core: add getsockopt SO_PEERPIDFD

From: Aleksandr Mikhalitsyn
Date: Wed Mar 22 2023 - 12:17:10 EST


On Wed, Mar 22, 2023 at 4:35 PM Christian Brauner <brauner@xxxxxxxxxx> wrote:
>
> On Tue, Mar 21, 2023 at 07:33:41PM +0100, Alexander Mikhalitsyn wrote:
> > Add SO_PEERPIDFD which allows to get pidfd of peer socket holder pidfd.
> > This thing is direct analog of SO_PEERCRED which allows to get plain PID.
> >
> > Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
> > Cc: Eric Dumazet <edumazet@xxxxxxxxxx>
> > Cc: Jakub Kicinski <kuba@xxxxxxxxxx>
> > Cc: Paolo Abeni <pabeni@xxxxxxxxxx>
> > Cc: Leon Romanovsky <leon@xxxxxxxxxx>
> > Cc: David Ahern <dsahern@xxxxxxxxxx>
> > Cc: Arnd Bergmann <arnd@xxxxxxxx>
> > Cc: Kees Cook <keescook@xxxxxxxxxxxx>
> > Cc: Christian Brauner <brauner@xxxxxxxxxx>
> > Cc: Kuniyuki Iwashima <kuniyu@xxxxxxxxxx>
> > Cc: Lennart Poettering <mzxreary@xxxxxxxxxxx>
> > Cc: linux-kernel@xxxxxxxxxxxxxxx
> > Cc: netdev@xxxxxxxxxxxxxxx
> > Cc: linux-arch@xxxxxxxxxxxxxxx
> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@xxxxxxxxxxxxx>
> > ---
> > v2:
> > According to review comments from Kuniyuki Iwashima and Christian Brauner:
> > - use pidfd_create(..) retval as a result
> > - whitespace change
> > ---
> > arch/alpha/include/uapi/asm/socket.h | 1 +
> > arch/mips/include/uapi/asm/socket.h | 1 +
> > arch/parisc/include/uapi/asm/socket.h | 1 +
> > arch/sparc/include/uapi/asm/socket.h | 1 +
> > include/uapi/asm-generic/socket.h | 1 +
> > net/core/sock.c | 21 +++++++++++++++++++++
> > tools/include/uapi/asm-generic/socket.h | 1 +
> > 7 files changed, 27 insertions(+)
> >
> > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> > index ff310613ae64..e94f621903fe 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -138,6 +138,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> > index 762dcb80e4ec..60ebaed28a4c 100644
> > --- a/arch/mips/include/uapi/asm/socket.h
> > +++ b/arch/mips/include/uapi/asm/socket.h
> > @@ -149,6 +149,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> > index df16a3e16d64..be264c2b1a11 100644
> > --- a/arch/parisc/include/uapi/asm/socket.h
> > +++ b/arch/parisc/include/uapi/asm/socket.h
> > @@ -130,6 +130,7 @@
> > #define SO_RCVMARK 0x4049
> >
> > #define SO_PASSPIDFD 0x404A
> > +#define SO_PEERPIDFD 0x404B
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> > index 6e2847804fea..682da3714686 100644
> > --- a/arch/sparc/include/uapi/asm/socket.h
> > +++ b/arch/sparc/include/uapi/asm/socket.h
> > @@ -131,6 +131,7 @@
> > #define SO_RCVMARK 0x0054
> >
> > #define SO_PASSPIDFD 0x0055
> > +#define SO_PEERPIDFD 0x0056
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> > index b76169fdb80b..8ce8a39a1e5f 100644
> > --- a/include/uapi/asm-generic/socket.h
> > +++ b/include/uapi/asm-generic/socket.h
> > @@ -133,6 +133,7 @@
> > #define SO_RCVMARK 75
> >
> > #define SO_PASSPIDFD 76
> > +#define SO_PEERPIDFD 77
> >
> > #if !defined(__KERNEL__)
> >
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 3f974246ba3e..85c269ca9d8a 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1763,6 +1763,27 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> > goto lenout;
> > }
> >
> > + case SO_PEERPIDFD:
> > + {
> > + struct pid *peer_pid;
> > + int pidfd;
> > +
> > + if (len > sizeof(pidfd))
> > + len = sizeof(pidfd);
> > +
> > + spin_lock(&sk->sk_peer_lock);
> > + peer_pid = get_pid(sk->sk_peer_pid);
> > + spin_unlock(&sk->sk_peer_lock);
> > +
> > + pidfd = pidfd_create(peer_pid, 0);
> > +
> > + put_pid(peer_pid);
> > +
> > + if (copy_to_sockptr(optval, &pidfd, len))
> > + return -EFAULT;
>
> This leaks the pidfd. We could do:
>
> if (copy_to_sockptr(optval, &pidfd, len)) {
> close_fd(pidfd);
> return -EFAULT;
> }

Ah, my bad. Thanks for pointing this out!

>
> but it's a nasty anti-pattern to install the fd in the caller's fdtable
> and then close it again. So let's avoid it if we can. Since you can only
> set one socket option per setsockopt() sycall we should be able to
> reserve an fd and pidfd_file, do the stuff that might fail, and then
> call fd_install. So that would roughly be:
>
> peer_pid = get_pid(sk->sk_peer_pid);
> pidfd_file = pidfd_file_create(peer_pid, 0, &pidfd);
> f (copy_to_sockptr(optval, &pidfd, len))
> return -EFAULT;
> goto lenout:
>
> .
> .
> .
>
> lenout:
> if (copy_to_sockptr(optlen, &len, sizeof(int)))
> return -EFAULT;
>
> // Made it safely, install pidfd now.
> fd_install(pidfd, pidfd_file)
>
> (See below for the associated api I'm going to publish independent of
> this as kernel/fork.c and fanotify both could use it.)
>
> But now, let's look at net/socket.c there's another wrinkle. So let's say you
> have successfully installed the pidfd then it seems you can still fail later:
>
> if (level == SOL_SOCKET)
> err = sock_getsockopt(sock, level, optname, optval, optlen);
> else if (unlikely(!sock->ops->getsockopt))
> err = -EOPNOTSUPP;
> else
> err = sock->ops->getsockopt(sock, level, optname, optval,
> optlen);
>
> if (!in_compat_syscall())
> err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
> optval, optlen, max_optlen,
> err);
>
> out_put:
> fput_light(sock->file, fput_needed);
> return err;
>
> If the bpf hook returns an error we've placed an fd into the caller's sockopt
> buffer without their knowledge.

yes, so we need to postpone fd_install to the end of __sys_getsockopt.
I'll think about that.

>
> From 4fee16f0920308bee2531fd3b08484f607eb5830 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <brauner@xxxxxxxxxx>
> Date: Wed, 22 Mar 2023 15:59:02 +0100
> Subject: [PATCH 1/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add
> pidfd_file_create()
>
> Reserve and fd and pidfile, do stuff that might fail, install fd when
> point of no return.
>
> [HERE BE DRAGONS - DRAFT - __UNTESTED__] pid: add pidfd_file_create()
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
> include/linux/pid.h | 1 +
> kernel/pid.c | 45 +++++++++++++++++++++++++++++++++------------
> 2 files changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 343abf22092e..c486dbc4d7b6 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file);
> struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
> struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
> int pidfd_create(struct pid *pid, unsigned int flags);
> +struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd);
>
> static inline struct pid *get_pid(struct pid *pid)
> {
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 3fbc5e46b721..8d0924f1dbf6 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -576,6 +576,32 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
> return task;
> }
>
> +struct file *pidfd_file_create(struct pid *pid, unsigned int flags, int *pidfd)
> +{
> + int fd;
> + struct file *pidfile;
> +
> + if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> + return ERR_PTR(-EINVAL);
> +
> + if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
> + return ERR_PTR(-EINVAL);
> +
> + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
> + if (fd < 0)
> + return ERR_PTR(fd);
> +
> + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
> + flags | O_RDWR | O_CLOEXEC);
> + if (IS_ERR(pidfile)) {
> + put_unused_fd(fd);
> + return pidfile;
> + }
> + get_pid(pid); /* held by pidfile now */
> + *pidfd = fd;
> + return pidfile;
> +}
> +
> /**
> * pidfd_create() - Create a new pid file descriptor.
> *
> @@ -594,20 +620,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
> */
> int pidfd_create(struct pid *pid, unsigned int flags)
> {
> - int fd;
> + int pidfd;
> + struct file *pidfile;
>
> - if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
> - return -EINVAL;
> + pidfile = pidfd_file_create(pid, flags, &pidfd);
> + if (IS_ERR(pidfile))
> + return PTR_ERR(pidfile);
>
> - if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
> - return -EINVAL;
> -
> - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
> - flags | O_RDWR | O_CLOEXEC);
> - if (fd < 0)
> - put_pid(pid);
> -
> - return fd;
> + fd_install(pidfd, pidfile);
> + return pidfd;
> }
>
> /**
> --
> 2.34.1
>
> From c336f1c6cc39faa5aef4fbedd3c4f8eca51d8436 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <brauner@xxxxxxxxxx>
> Date: Wed, 22 Mar 2023 15:59:54 +0100
> Subject: [PATCH 2/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fork: use
> pidfd_file_create()
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
> kernel/fork.c | 11 +----------
> 1 file changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index f68954d05e89..c8dc78ee0a74 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2296,20 +2296,11 @@ static __latent_entropy struct task_struct *copy_process(
> * if the fd table isn't shared).
> */
> if (clone_flags & CLONE_PIDFD) {
> - retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
> - if (retval < 0)
> - goto bad_fork_free_pid;
> -
> - pidfd = retval;
> -
> - pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
> - O_RDWR | O_CLOEXEC);
> + pidfile = pidfd_file_create(pid, O_RDWR | O_CLOEXEC, &pidfd);
> if (IS_ERR(pidfile)) {
> - put_unused_fd(pidfd);
> retval = PTR_ERR(pidfile);
> goto bad_fork_free_pid;
> }
> - get_pid(pid); /* held by pidfile now */
>
> retval = put_user(pidfd, args->pidfd);
> if (retval)
> --
> 2.34.1
>
> From 0897f68fe06a8777d8ec600fdc719143f76095b1 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <brauner@xxxxxxxxxx>
> Date: Wed, 22 Mar 2023 16:02:50 +0100
> Subject: [PATCH 3/3] [HERE BE DRAGONS - DRAFT - __UNTESTED__] fanotify: use
> pidfd_file_create()
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
> fs/notify/fanotify/fanotify_user.c | 15 +++++++++++----
> 1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 8f430bfad487..4a8db6b5f690 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -665,6 +665,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
> struct file *f = NULL;
> int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
> + struct file *pidfd_file = NULL;
>
> pr_debug("%s: group=%p event=%p\n", __func__, group, event);
>
> @@ -718,9 +719,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> !pid_has_task(event->pid, PIDTYPE_TGID)) {
> pidfd = FAN_NOPIDFD;
> } else {
> - pidfd = pidfd_create(event->pid, 0);
> - if (pidfd < 0)
> + pidfd_file = pidfd_file_create(event->pid, 0, &pidfd);
> + if (IS_ERR(pidfd_file)) {
> pidfd = FAN_EPIDFD;
> + pidfd_file = NULL;
> + }
> }
> }
>
> @@ -750,6 +753,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
>
> if (f)
> fd_install(fd, f);
> + if (pidfd_file)
> + fd_install(pidfd, pidfd_file);
>
> return metadata.event_len;
>
> @@ -759,8 +764,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> fput(f);
> }
>
> - if (pidfd >= 0)
> - close_fd(pidfd);
> + if (pidfd >= 0) {
> + put_unused_fd(pidfd);
> + fput(pidfd_file);
> + }
>
> return ret;
> }
> --
> 2.34.1
>