Re: 64-bit divide cleanup (tested on ppc)

From: Gabriel Paubert (paubert@iram.es)
Date: Thu Feb 07 2002 - 11:11:33 EST


        Hi Troy,

sorry for the delay, I was sick :-(

Troy Benjegerdes wrote:

> Attached is a patch to get rid of asm/div64.h on arches that don't have
> optimized asm routines.
>
> I didn't include removeing the various arch/div64.h file yet, since I want
> some comments on this.
>
[snipped]
> ===================================================================
> RCS file: /cvsdev/hhl-2.4.17/linux/fs/ntfs/util.c,v
> retrieving revision 1.1
> diff -u -r1.1 util.c
> --- fs/ntfs/util.c 2001/11/30 22:28:59 1.1
> +++ fs/ntfs/util.c 2002/01/29 00:40:14
> @@ -13,7 +13,8 @@
> #include "util.h"
> #include <linux/string.h>
> #include <linux/errno.h>
> -#include <asm/div64.h> /* For do_div(). */
> +#define USE_SLOW_64BIT_DIVIDES
> +#include <linux/div64.h> /* For do_div(). */
> #include "support.h"
>
> /*
> @@ -233,7 +234,7 @@
> {
> /* Subtract the NTFS time offset, then convert to 1s intervals. */
> ntfs_time64_t t = ntutc - NTFS_TIME_OFFSET;
> - do_div(t, 10000000);
> + do_div(&t, 10000000);
> return (ntfs_time_t)t;
> }

>
> Index: fs/smbfs/proc.c
> ===================================================================
> RCS file: /cvsdev/hhl-2.4.17/linux/fs/smbfs/proc.c,v
> retrieving revision 1.1
> diff -u -r1.1 proc.c
> --- fs/smbfs/proc.c 2001/11/30 22:28:58 1.1
> +++ fs/smbfs/proc.c 2002/01/29 00:40:17
> @@ -18,12 +18,14 @@
> #include <linux/dirent.h>
> #include <linux/nls.h>
>
> +#define USE_SLOW_64BIT_DIVIDES
> +#include <linux/div64.h>
> +
> #include <linux/smb_fs.h>
> #include <linux/smbno.h>
> #include <linux/smb_mount.h>
>
> #include <asm/string.h>
> -#include <asm/div64.h>
>
> #include "smb_debug.h"
> #include "proto.h"
> @@ -375,7 +377,7 @@
> /* FIXME: what about the timezone difference? */
> /* Subtract the NTFS time offset, then convert to 1s intervals. */
> u64 t = ntutc - NTFS_TIME_OFFSET;
> - do_div(t, 10000000);
> + do_div(&t, 10000000);
> return (time_t)t;
> }

At least for these 2, your patch is wrong. 10000000 is not especially
small and the algorithm you propose does not work for these. It is
limited to about 65536 actually.

>
> Index: lib/vsprintf.c
> ===================================================================
> RCS file: /cvsdev/hhl-2.4.17/linux/lib/vsprintf.c,v
> retrieving revision 1.1
> diff -u -r1.1 vsprintf.c
> --- lib/vsprintf.c 2001/11/30 22:28:59 1.1
> +++ lib/vsprintf.c 2002/01/29 00:40:24
> @@ -19,9 +19,9 @@
> #include <linux/string.h>
> #include <linux/ctype.h>
> #include <linux/kernel.h>
> +/* #define USE_SLOW_64BIT_DIVIDE */
> +#include <linux/div64.h>
>
> -#include <asm/div64.h>
> -
> /**
> * simple_strtoul - convert a string to an unsigned long
> * @cp: The start of the string
> @@ -165,7 +165,7 @@
> if (num == 0)
> tmp[i++]='0';
> else while (num != 0)
> - tmp[i++] = digits[do_div(num,base)];
> + tmp[i++] = digits[do_div(&num,base)];

Forcing to use slow do_div version even when base is 8 or 16 is not
nice. Heck I believe that seperating it into several cases and having a
different 2 or 3 distinct loops (one for base 10, the other or 2 others
for shifts by 3 or 4) could actually result in smaller code. On PPC and
Alpha at lesat the compiler knows how to do a divide by 10 with a
multiply high or however it's called instruction.

Oh, and what abour removing the if and doing a do {...} while(num!=0)
instead ?

> if (i > precision)
> precision = i;
> size -= precision;
> @@ -426,22 +426,31 @@
> }
> continue;
> }
> - if (qualifier == 'L')
> +
> + switch (qualifier) {
> + case 'L':
> num = va_arg(args, long long);
> - else if (qualifier == 'l') {
> - num = va_arg(args, unsigned long);
> + break;
> + case 'l':
> if (flags & SIGN)
> - num = (signed long) num;
> - } else if (qualifier == 'Z') {
> + num = (signed long long) va_arg(args, long);
> + else
> + num = va_arg(args, unsigned long);
> + break;
> + case 'Z':
> num = va_arg(args, size_t);
> - } else if (qualifier == 'h') {
> - num = (unsigned short) va_arg(args, int);
> + break;
> + case 'h':
> if (flags & SIGN)
> - num = (signed short) num;
> - } else {
> - num = va_arg(args, unsigned int);
> + num = (signed long long) va_arg(args, int);
> + else
> + num = va_arg(args, unsigned int);
> + break;
> + default:
> if (flags & SIGN)
> - num = (signed int) num;
> + num = (signed long long) va_arg(args, int);
> + else
> + num = va_arg(args, unsigned int);
> }
> str = number(str, end, num, base,
> field_width, precision, flags);
> Index: include/linux/div64.h
> ===================================================================
> RCS file: /cvsdev/hhl-2.4.17/linux/include/linux/div64.h
> diff -N div64.h
> --- /dev/null Tue May 5 13:32:27 1998
> +++ include/linux/div64.h Mon Jan 28 16:59:28 2002
> @@ -0,0 +1,85 @@
> +/*
> + * include/linux/div64.h
> + *
> + * Primarily used by vsprintf to divide a 64 bit int N by a small integer base

                                                                ^^^^^
Read the comments, here goes you 10000000 factor. The modulo once
shifted left by 16 bits can easily overflow.... Perhaps you should patch
it s/small/_small_/ to better see it.

> + * We really do NOT want to encourage people to do slow 64 bit divides in
> + * the kernel, so the 'default' version of this function panics if you
> + * try and divide a 64 bit number by anything other than 8 or 16.
> + *
> + * If you really *really* need this, and are prepared to be flamed by
> + * lkml, #define USE_SLOW_64BIT_DIVIDES before including this file.
> + */
> +#ifndef __DIV64
> +#define __DIV64
> +
> +#include <linux/config.h>
> +
> +/* configurable */
> +#undef __USE_ASM
> +
> +
> +#ifdef __USE_ASM
> +/* yeah, this is a mess, and leaves out m68k.... */
> +# if defined(CONFIG_X86) || define(CONFIG_ARCH_S390) || defined(CONFIG_MIPS)
> +# define __USE_ASM__
> +# endif
> +#endif
> +
> +#ifdef __USE_ASM__
> +#include <asm/div64.h>
> +#else /* __USE_ASM__ */
> +static inline int do_div(unsigned long long * n, unsigned long base)
> +{
> + int res = 0;
> + unsigned long long t = *n;
> + if ( t == (unsigned long)t ){ /* this should handle 64 bit platforms */
> + res = ((unsigned long) t) % base;
> + t = ((unsigned long) t) / base;
> + } else {
> +#ifndef USE_SLOW_64BIT_DIVIDES
> + switch (base) {
> + case 8:
> + res = ((unsigned long) t & 0x7);
> + t = t >> 3;
> + break;
> + case 16:
> + res = ((unsigned long) t & 0xf);
> + t = t >> 4;
> + break;
> + default:
> + panic("do_div called with 64 bit arg and unsupported base\n", base);
> + }
> +#else /* USE_SLOW_64BIT_DIVIDES */
> + /* this was stolen from the old asm-parisc/div64.h */
> + /*
> + * Copyright (C) 1999 Hewlett-Packard Co
> + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
> + *
> + * vsprintf uses this to divide a 64-bit integer N by a small

s/small/_small_/ just to be sure that the comment is understood.

For the 10000000 case, I believe a simple stupid looping algorithm is
the only solution which does not result in code size explosion.

        Regards,
        Gabriel.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Feb 07 2002 - 21:01:03 EST