Re: [PATCH] kernel/sys.c : Get rid of expensive divides in groups_sort()

From: Eric Dumazet
Date: Tue Dec 18 2007 - 19:36:56 EST


Andrew Morton a écrit :
On Wed, 19 Dec 2007 01:14:33 +0100
Eric Dumazet <dada1@xxxxxxxxxxxxx> wrote:

groups_sort() can be quite long if user loads a large gid table.

This is because GROUP_AT(group_info, some_integer) uses an integer divide.
So having to do XXX thousand divides during one syscall can lead to very high latencies. (NGROUPS_MAX=65536)

In the past (25 Mar 2006), an analog problem was found in groups_search()
(commit d74beb9f33a5f16d2965f11b275e401f225c949d ) and at that time I changed some variables to unsigned int.

I believe that a more generic fix is to make sure NGROUPS_PER_BLOCK is unsigned.


diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac3d496..725a491 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,7 @@ static inline int above_background_load(void)
struct io_context; /* See blkdev.h */
#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))

That was a dopey thing to do. Both PAGE_SIZE and sizeof() are unsigned and
we went and cast it to a signed thing, even though it is a nonsense to ever
consider a negative value of this.

Maybe the author intention was to cast it to an "int", instead of "long", I dont know...


+#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
struct group_info {
int ngroups;
atomic_t usage;
diff --git a/kernel/sys.c b/kernel/sys.c
index d1fe71e..091e58f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1148,7 +1148,7 @@ static int groups_to_user(gid_t __user *grouplist,
int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min(NGROUPS_PER_BLOCK, count);
+ int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
int off = i * NGROUPS_PER_BLOCK;
int len = cp_count * sizeof(*grouplist);
@@ -1168,7 +1168,7 @@ static int groups_from_user(struct group_info *group_info,
int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min(NGROUPS_PER_BLOCK, count);
+ int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
int off = i * NGROUPS_PER_BLOCK;
int len = cp_count * sizeof(*grouplist);

We now have a jumble of signed and unsigned, stuck together with min_t
sticky tape.

Please, take a closer look in there and see if there's anything which
*needs* to be signed: is there anything which can actually, sensibly have a
negative quantity? I bet there isn't, and I bet the code would be cleaner,
faster and more obviously correct if it was converted to unsigned throughout.

I blame C. Negative quantities are rare, and C's default of treating
scalars as signed was a mistake. Oh well.



Yes I agree that 'unsigned' should be the default...

Well, I was trying to keep patch small, but as you have some time, I will make it bigger :)

[PATCH] kernel/sys.c : Get rid of expensive divides in groups_sort()

groups_sort() can be quite long if user loads a large gid table.

This is because GROUP_AT(group_info, some_integer) uses an integer divide.
So having to do XXX thousand divides during one syscall can lead to very high latencies. (NGROUPS_MAX=65536)

In the past (25 Mar 2006), an analog problem was found in groups_search()
(commit d74beb9f33a5f16d2965f11b275e401f225c949d ) and at that time I changed some variables to unsigned int.

I believe that a more generic fix is to make sure NGROUPS_PER_BLOCK is unsigned.

Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx> diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac3d496..725a491 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,7 @@ static inline int above_background_load(void)

struct io_context; /* See blkdev.h */
#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
+#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
struct group_info {
int ngroups;
atomic_t usage;
diff --git a/kernel/sys.c b/kernel/sys.c
index d1fe71e..d632352 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1145,16 +1145,16 @@ static int groups_to_user(gid_t __user *grouplist,
struct group_info *group_info)
{
int i;
- int count = group_info->ngroups;
+ unsigned int count = group_info->ngroups;

for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min(NGROUPS_PER_BLOCK, count);
- int off = i * NGROUPS_PER_BLOCK;
- int len = cp_count * sizeof(*grouplist);
+ unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+ unsigned int len = cp_count * sizeof(*grouplist);

- if (copy_to_user(grouplist+off, group_info->blocks[i], len))
+ if (copy_to_user(grouplist, group_info->blocks[i], len))
return -EFAULT;

+ grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;
@@ -1165,16 +1165,16 @@ static int groups_from_user(struct group_info *group_info,
gid_t __user *grouplist)
{
int i;
- int count = group_info->ngroups;
+ unsigned int count = group_info->ngroups;

for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min(NGROUPS_PER_BLOCK, count);
- int off = i * NGROUPS_PER_BLOCK;
- int len = cp_count * sizeof(*grouplist);
+ unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+ unsigned int len = cp_count * sizeof(*grouplist);

- if (copy_from_user(group_info->blocks[i], grouplist+off, len))
+ if (copy_from_user(group_info->blocks[i], grouplist, len))
return -EFAULT;

+ grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;