Re: [PATCH v2] proc: add missing '\0' back to /proc/$pid/cmdline

From: Alexey Dobriyan
Date: Tue Jun 19 2018 - 13:14:54 EST


On Tue, Jun 19, 2018 at 06:28:40PM +0200, Michal Kubecek wrote:
> Recent rewrite introduced a regression, /proc/$pid/cmdline is missing the
> trailing null character:
>
> mike@lion:/tmp> cat /proc/self/cmdline | od -t c
> 0000000 c a t \0 / p r o c / s e l f / c
> 0000020 m d l i n e
> 0000026
>
> This is because strnlen() is used to search for the null character but it
> returns the length without it so that we need to copy one byte more (but
> only if we actually found a null character). And once we pass the null
> character to userspace we need to make sure next read returns 0 (EOF).
>
> This is another problem, even if rather theoretical one: if userspace seeks
> to arg_end or past it, we only start checking for null character from that
> position so that we may return random segment of environment rather then
> EOF.
>
> Resolve both by always starting no later than at arg_end-1 (so that we
> always catch the right null character) but don't copy data until we reach
> the requested start position.

See what's happening?

Code was made allegedly simpler by removing special case for "c == '\0'"
but that didn't work and now you're adding complexity back with another
variable and branches.

> Fixes: 5ab827189965 ("fs/proc: simplify and clarify get_mm_cmdline() function")

> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -209,7 +209,8 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> size_t count, loff_t *ppos)
> {
> unsigned long arg_start, arg_end, env_start, env_end;
> - unsigned long pos, len;
> + unsigned long req_pos, pos, len;
> + bool end_found = false;
> char *page;
>
> /* Check if process spawned far enough to have cmdline. */
> @@ -236,25 +237,27 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> env_start = env_end = arg_end;
>
> /* We're not going to care if "*ppos" has high bits set */
> - pos = arg_start + *ppos;
> + req_pos = arg_start + *ppos;
>
> /* .. but we do check the result is in the proper range */
> - if (pos < arg_start || pos >= env_end)
> + if (req_pos < arg_start || req_pos >= env_end)
> return 0;
>
> /* .. and we never go past env_end */
> - if (env_end - pos < count)
> - count = env_end - pos;
> + if (env_end - req_pos < count)
> + count = env_end - req_pos;
>
> + pos = min_t(unsigned long, req_pos, arg_end - 1);
> page = (char *)__get_free_page(GFP_KERNEL);
> if (!page)
> return -ENOMEM;
>
> len = 0;
> - while (count) {
> + while (count && !end_found) {
> int got;
> - size_t size = min_t(size_t, PAGE_SIZE, count);
> + size_t size = count + (pos < req_pos ? req_pos - pos : 0);
>
> + size = min_t(size_t, PAGE_SIZE, size);
> got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
> if (got <= 0)
> break;
> @@ -276,12 +279,26 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> n = arg_end - pos - 1;
>
> /* Cut off at first NUL after 'n' */
> - got = n + strnlen(page+n, got-n);
> + n += strnlen(page + n, got - n);
> + got = min_t(int, got, n + 1);
> + end_found = !page[n];
> if (!got)
> break;
> }
>
> - got -= copy_to_user(buf, page, got);
> + if (pos + got <= req_pos) {
> + /* got > 0 here so that pos always advances */
> + pos += got;
> + continue;
> + }
> +
> + if (pos < req_pos) {
> + got -= (req_pos - pos);
> + got -= copy_to_user(buf, page + req_pos - pos, got);
> + pos = req_pos;
> + } else {
> + got -= copy_to_user(buf, page, got);
> + }
> if (unlikely(!got)) {
> if (!len)
> len = -EFAULT;