Re: [PATCH v3 2/3] lib/string_helpers.c: Refactor string_escape_mem

From: Andy Shevchenko
Date: Tue Feb 10 2015 - 07:16:53 EST


On Tue, 2015-02-10 at 00:44 +0100, Rasmus Villemoes wrote:
> When printf is given the format specifier %pE, it needs a way of
> obtaining the total output size that would be generated if the buffer
> was large enough, and string_escape_mem doesn't easily provide
> that. This is a refactorization of string_escape_mem in preparation of
> changing its external API to provide that information.
>
> The somewhat ugly 'goto skip;'s and subsequent seemingly redundant
> conditionals are to make the following patch touch as little as
> possible in string_helpers.c while still preserving the current
> behaviour of never outputting partial escape sequences. That behaviour
> must also change for %pE to work as one expects from every other
> printf specifier.
>

Thanks for an update.
My comment below.

> Signed-off-by: Rasmus Villemoes <linux@xxxxxxxxxxxxxxxxxx>
> ---
> lib/string_helpers.c | 202 +++++++++++++++++++++++----------------------------
> 1 file changed, 90 insertions(+), 112 deletions(-)
>
> diff --git a/lib/string_helpers.c b/lib/string_helpers.c
> index 58b78ba57439..7e2fef1eb40e 100644
> --- a/lib/string_helpers.c
> +++ b/lib/string_helpers.c
> @@ -243,29 +243,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
> }
> EXPORT_SYMBOL(string_unescape);
>
> -static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
> +static bool escape_passthrough(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
>
> - if (*osz < 1)
> - return -ENOMEM;
> -
> - *out++ = c;
> -
> - *dst = out;
> - *osz -= 1;
> -
> - return 1;
> + if (out < end)
> + *out = c;
> + *dst = out + 1;
> + return true;
> }
>
> -static int escape_space(unsigned char c, char **dst, size_t *osz)
> +static bool escape_space(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
> unsigned char to;
>
> - if (*osz < 2)
> - return -ENOMEM;
> -
> switch (c) {
> case '\n':
> to = 'n';
> @@ -283,26 +275,26 @@ static int escape_space(unsigned char c, char **dst, size_t *osz)
> to = 'f';
> break;
> default:
> - return 0;
> + return false;
> }
>
> - *out++ = '\\';
> - *out++ = to;
> -
> - *dst = out;
> - *osz -= 2;
> + if (out + 1 >= end)
> + goto skip;
> + if (out + 0 < end)
> + out[0] = '\\';
> + if (out + 1 < end)
> + out[1] = to;
>
> - return 1;
> +skip:
> + *dst = out + 2;
> + return true;

Why couldn't we use 3 line idiom?
You are going to remove skip stuff in the next patch anyway, but
assignment will be left. So, what about

if (out + 2 > end) {
*dst = out + 2;
return true;
}

if (out < end)
*out = '\\';
++out;
if (out < end)
*out = to;
++out;

*dst = out;
return true;


> }
>
> -static int escape_special(unsigned char c, char **dst, size_t *osz)
> +static bool escape_special(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
> unsigned char to;
>
> - if (*osz < 2)
> - return -ENOMEM;
> -
> switch (c) {
> case '\\':
> to = '\\';
> @@ -314,71 +306,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz)
> to = 'e';
> break;
> default:
> - return 0;
> + return false;
> }
>
> - *out++ = '\\';
> - *out++ = to;
> -
> - *dst = out;
> - *osz -= 2;
> + if (out + 1 >= end)
> + goto skip;
> + if (out + 0 < end)
> + out[0] = '\\';
> + if (out + 1 < end)
> + out[1] = to;
>
> - return 1;
> +skip:
> + *dst = out + 2;
> + return true;
> }
>
> -static int escape_null(unsigned char c, char **dst, size_t *osz)
> +static bool escape_null(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
>
> - if (*osz < 2)
> - return -ENOMEM;
> -
> if (c)
> - return 0;
> -
> - *out++ = '\\';
> - *out++ = '0';
> + return false;
>
> - *dst = out;
> - *osz -= 2;
> + if (out + 1 >= end)
> + goto skip;
> + if (out + 0 < end)
> + out[0] = '\\';
> + if (out + 1 < end)
> + out[1] = '0';
>
> - return 1;
> +skip:
> + *dst = out + 2;
> + return true;
> }
>
> -static int escape_octal(unsigned char c, char **dst, size_t *osz)
> +static bool escape_octal(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
>
> - if (*osz < 4)
> - return -ENOMEM;
> -
> - *out++ = '\\';
> - *out++ = ((c >> 6) & 0x07) + '0';
> - *out++ = ((c >> 3) & 0x07) + '0';
> - *out++ = ((c >> 0) & 0x07) + '0';
> -
> - *dst = out;
> - *osz -= 4;
> -
> - return 1;
> + if (out + 3 >= end)
> + goto skip;
> + if (out + 0 < end)
> + out[0] = '\\';
> + if (out + 1 < end)
> + out[1] = ((c >> 6) & 0x07) + '0';
> + if (out + 2 < end)
> + out[2] = ((c >> 3) & 0x07) + '0';
> + if (out + 3 < end)
> + out[3] = ((c >> 0) & 0x07) + '0';
> +
> +skip:
> + *dst = out + 4;
> + return true;
> }
>
> -static int escape_hex(unsigned char c, char **dst, size_t *osz)
> +static bool escape_hex(unsigned char c, char **dst, char *end)
> {
> char *out = *dst;
>
> - if (*osz < 4)
> - return -ENOMEM;
> -
> - *out++ = '\\';
> - *out++ = 'x';
> - *out++ = hex_asc_hi(c);
> - *out++ = hex_asc_lo(c);
> -
> - *dst = out;
> - *osz -= 4;
> -
> - return 1;
> + if (out + 3 >= end)
> + goto skip;
> + if (out + 0 < end)
> + out[0] = '\\';
> + if (out + 1 < end)
> + out[1] = 'x';
> + if (out + 2 < end)
> + out[2] = hex_asc_hi(c);
> + if (out + 3 < end)
> + out[3] = hex_asc_lo(c);
> +
> +skip:
> + *dst = out + 4;
> + return true;
> }
>
> /**
> @@ -440,9 +439,10 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz)
> int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
> unsigned int flags, const char *esc)
> {
> - char *out = *dst, *p = out;
> + char *p = *dst;
> + char *end = p + osz;
> bool is_dict = esc && *esc;
> - int ret = 0;
> + int ret;
>
> while (isz--) {
> unsigned char c = *src++;
> @@ -462,55 +462,33 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
> (is_dict && !strchr(esc, c))) {
> /* do nothing */
> } else {
> - if (flags & ESCAPE_SPACE) {
> - ret = escape_space(c, &p, &osz);
> - if (ret < 0)
> - break;
> - if (ret > 0)
> - continue;
> - }
> -
> - if (flags & ESCAPE_SPECIAL) {
> - ret = escape_special(c, &p, &osz);
> - if (ret < 0)
> - break;
> - if (ret > 0)
> - continue;
> - }
> -
> - if (flags & ESCAPE_NULL) {
> - ret = escape_null(c, &p, &osz);
> - if (ret < 0)
> - break;
> - if (ret > 0)
> - continue;
> - }
> + if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
> + continue;
> +
> + if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
> + continue;
> +
> + if (flags & ESCAPE_NULL && escape_null(c, &p, end))
> + continue;
>
> /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
> - if (flags & ESCAPE_OCTAL) {
> - ret = escape_octal(c, &p, &osz);
> - if (ret < 0)
> - break;
> + if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
> continue;
> - }
> - if (flags & ESCAPE_HEX) {
> - ret = escape_hex(c, &p, &osz);
> - if (ret < 0)
> - break;
> +
> + if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
> continue;
> - }
> }
>
> - ret = escape_passthrough(c, &p, &osz);
> - if (ret < 0)
> - break;
> + escape_passthrough(c, &p, end);
> }
>
> - *dst = p;
> -
> - if (ret < 0)
> - return ret;
> + if (p > end) {
> + *dst = end;
> + return -ENOMEM;
> + }
>
> - return p - out;
> + ret = p - *dst;
> + *dst = p;
> + return ret;
> }
> EXPORT_SYMBOL(string_escape_mem);


--
Andy Shevchenko <andriy.shevchenko@xxxxxxxxx>
Intel Finland Oy

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/