patch for 2.1.122 dquota -- please test!

Bill Hawes (whawes@star.net)
Mon, 21 Sep 1998 11:32:39 -0400


This is a multi-part message in MIME format.
--------------539501046038E08D8FEC6291
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've put together a patch that should fix the recently reported race
problems in the dquota code.

There were several problems with the existing code:
(1) dquots can change lists while being synced or invalidated, and the
successor node obtained before or after the blocking operation isn't
guaranteed to be on the list being processed.
(2) the clear_dquot operation wasn't preserving use counts.
(3) get_empty_dquot wasn't preserving the wait queue, and a potentially
blocking operation after selection could allow the dquot to be reused.

The patch addresses the sync and invalidate problems by restarting the
operations if the use count changes, or if there were any blocking
operations. This guarantees that when the call completes all of the
dquots will have been sync'ed or invalidated.

The clear_dquot routine has been changed to clear only those fields that
need to be cleared, preserving the use count, queue pointers, and wait
queue.

In get_empty_dquot, a check is made for dirty or locked dquots, and the
search is repeated if a blocking operation occurs after selection. This
ensures that the selected dquot is still on the free list, and then
clear_dquot is used to clear the structure without destroying the wait
queue.

I've also added code to selectively shrink the dcache if no free dquots
were found, which should help avoid the "no free dquots" message. The
patch also fixes several copy_xx_user error returns to properly return
-EFAULT.

The patch has been compiled but not tested, so I'd appreciate some test
reports from anyone using disk quota, especially if you've had problems
with hanging processes.

Regards,
Bill
--------------539501046038E08D8FEC6291
Content-Type: text/plain; charset=us-ascii; name="dquot_122-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="dquot_122-patch"

--- linux-2.1.122/fs/dquot.c.old Tue Sep 1 21:40:17 1998
+++ linux-2.1.122/fs/dquot.c Mon Sep 21 11:25:54 1998
@@ -274,73 +274,130 @@
dqstats.reads++;
}

+/*
+ * Unhash and selectively clear the dquot structure,
+ * but preserve the use count, queue, and wait list.
+ */
void clear_dquot(struct dquot *dquot)
{
- struct wait_queue *wait;
-
- /* So we don't disappear. */
- dquot->dq_count++;
-
- wait_on_dquot(dquot);
-
- if (--dquot->dq_count > 0)
- remove_inuse(dquot);
- else
- remove_free_dquot(dquot);
+ /* unhash it first */
unhash_dquot(dquot);
- wait = dquot->dq_wait;
- memset(dquot, 0, sizeof(*dquot)); barrier();
- dquot->dq_wait = wait;
- put_dquot_head(dquot);
+ dquot->dq_flags = 0;
+ dquot->dq_dev = 0;
+ dquot->dq_referenced = 0;
+ dquot->dq_mnt = NULL;
+ memset(&dquot->dq_dqb, 0, sizeof(struct dqblk));
}

void invalidate_dquots(kdev_t dev, short type)
{
- struct dquot *dquot, *next = NULL;
- int pass = 0;
+ struct dquot **head = &free_dquots.head;
+ struct dquot *dquot, *next;
+ int need_restart, old_count;
+
+restart:
+ need_restart = 0;
+ head = &free_dquots.head;

- dquot = free_dquots.head;
repeat:
- while (dquot) {
+ next = *head;
+ while ((dquot = next) != NULL) {
next = dquot->dq_next;
- if (dquot->dq_dev != dev || dquot->dq_type != type)
- goto next;
+ if (dquot->dq_dev != dev)
+ continue;
+ if (dquot->dq_type != type)
+ continue;
+ if (dquot->dq_flags & DQ_LOCKED) {
+ /* save the current use count */
+ old_count = dquot->dq_count;
+ __wait_on_dquot(dquot);
+ /*
+ * If the dquot changed lists, it may not need
+ * invalidation. Handle this by restarting ...
+ */
+ if (dquot->dq_count != old_count) {
+printk("invalidate_dquots: count changed %d to %d, restarting\n",
+old_count, dquot->dq_count);
+ goto restart;
+ }
+ /* update the successor ... */
+ next = dquot->dq_next;
+ need_restart = 1;
+ }
clear_dquot(dquot);
- next:
- dquot = next;
}
-
- if (pass == 0) {
- dquot = inuse_list;
- pass = 1;
+ /* repeat for the inuse list */
+ if (head != &inuse_list) {
+ head = &inuse_list;
goto repeat;
}
+
+ /*
+ * If anything blocked, restart the operation
+ * to ensure we don't miss any dquots.
+ */
+ if (need_restart)
+ goto restart;
}

int sync_dquots(kdev_t dev, short type)
{
+ struct dquot **head;
struct dquot *dquot, *next;
- int pass = 0;
+ int need_restart, old_count;
+
+restart:
+ need_restart = 0;
+ head = &free_dquots.head;

- dquot = free_dquots.head;
repeat:
- while (dquot) {
+ next = *head;
+ while ((dquot = next) != NULL) {
next = dquot->dq_next;
- if ((dev && dquot->dq_dev != dev) ||
- (type != -1 && dquot->dq_type != type))
- goto next;
- wait_on_dquot(dquot);
- if (dquot->dq_flags & DQ_MOD)
- write_dquot(dquot);
- next:
- dquot = next;
- }
+ if (dev && dquot->dq_dev != dev)
+ continue;
+ if (type != -1 && dquot->dq_type != type)
+ continue;
+ if (!(dquot->dq_flags & (DQ_LOCKED | DQ_MOD)))
+ continue;
+ /*
+ * The dquot might switch lists while we sync it,
+ * so we check the use count before and after.
+ */
+ old_count = dquot->dq_count;
+ do {
+ if (dquot->dq_flags & DQ_LOCKED)
+ __wait_on_dquot(dquot);
+ if (dquot->dq_flags & DQ_MOD)
+ write_dquot(dquot);
+ } while (dquot->dq_flags & (DQ_LOCKED | DQ_MOD));

- if (pass == 0) {
- dquot = inuse_list;
- pass = 1;
+ /* restart now if the dquot changed lists */
+ if (dquot->dq_count != old_count) {
+printk("sync_dquots: count changed %d to %d, restarting\n",
+old_count, dquot->dq_count);
+ goto restart;
+ }
+ /*
+ * Update the successor and set the 'restart' flag,
+ * as the lists may have changed while we slept.
+ */
+ next = dquot->dq_next;
+ need_restart = 1;
+ }
+ /* repeat for the inuse list */
+ if (head != &inuse_list) {
+ head = &inuse_list;
goto repeat;
}
+
+ /*
+ * If anything blocked, restart the operation
+ * to ensure we don't miss any dquots.
+ */
+ if (need_restart)
+ goto restart;
+
dqstats.syncs++;
return(0);
}
@@ -349,6 +406,13 @@
{
if (!dquot)
return;
+ if (!dquot->dq_count) {
+ printk("VFS: dqput: trying to free free dquot\n");
+ printk("VFS: device %s, dquot of %s %d\n",
+ kdevname(dquot->dq_dev), quotatypes[dquot->dq_type],
+ dquot->dq_id);
+ return;
+ }

/*
* If the dq_mnt pointer isn't initialized this entry needs no
@@ -357,32 +421,22 @@
*/
if (dquot->dq_mnt != (struct vfsmount *)NULL) {
dqstats.drops++;
- wait_on_dquot(dquot);
-
- if (!dquot->dq_count) {
- printk("VFS: dqput: trying to free free dquot\n");
- printk("VFS: device %s, dquot of %s %d\n", kdevname(dquot->dq_dev),
- quotatypes[dquot->dq_type], dquot->dq_id);
- return;
- }
we_slept:
+ wait_on_dquot(dquot);
if (dquot->dq_count > 1) {
dquot->dq_count--;
return;
- } else {
- wake_up(&dquot_wait);
-
- if (dquot->dq_flags & DQ_MOD) {
- write_dquot(dquot);
- wait_on_dquot(dquot);
- goto we_slept;
- }
+ }
+ if (dquot->dq_flags & DQ_MOD) {
+ write_dquot(dquot);
+ goto we_slept;
}
}

if (--dquot->dq_count == 0) {
remove_inuse(dquot);
put_dquot_last(dquot); /* Place at end of LRU free queue */
+ wake_up(&dquot_wait);
}

return;
@@ -405,40 +459,33 @@
}
}

-static struct dquot *find_best_candidate_weighted(struct dquot *dquot)
+static struct dquot *find_best_candidate_weighted(struct dquot *next)
{
- int limit, myscore;
- unsigned long bestscore;
- struct dquot *best = NULL;
-
- if (dquot) {
- bestscore = 2147483647;
- limit = nr_free_dquots >> 2;
- do {
- if (!((dquot->dq_flags & DQ_LOCKED) || (dquot->dq_flags & DQ_MOD))) {
- myscore = dquot->dq_referenced;
- if (myscore < bestscore) {
- bestscore = myscore;
- best = dquot;
- }
- }
- dquot = dquot->dq_next;
- } while (dquot && --limit);
+ struct dquot *dquot, *best = NULL;
+ unsigned long myscore, bestscore = ~0U;
+ int limit = (nr_free_dquots > 128) ? nr_free_dquots >> 2 : 32;
+
+ while ((dquot = next) != NULL && --limit) {
+ next = dquot->dq_next;
+ if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD))
+ continue;
+ myscore = dquot->dq_referenced;
+ if (myscore < bestscore) {
+ bestscore = myscore;
+ best = dquot;
+ }
}
return best;
}

static inline struct dquot *find_best_free(struct dquot *dquot)
{
- int limit;
+ int limit = (nr_free_dquots > 1024) ? nr_free_dquots >> 5 : 32;

- if (dquot) {
- limit = nr_free_dquots >> 5;
- do {
- if (dquot->dq_referenced == 0)
- return dquot;
- dquot = dquot->dq_next;
- } while (dquot && --limit);
+ while (dquot && --limit) {
+ if (dquot->dq_referenced == 0)
+ return dquot;
+ dquot = dquot->dq_next;
}
return NULL;
}
@@ -446,22 +493,37 @@
struct dquot *get_empty_dquot(void)
{
struct dquot *dquot;
+ int count;

repeat:
dquot = find_best_free(free_dquots.head);
if (!dquot)
goto pressure;
got_it:
- dquot->dq_count++;
- wait_on_dquot(dquot);
- unhash_dquot(dquot);
- remove_free_dquot(dquot);
+ if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD)) {
+ do {
+ if (dquot->dq_flags & DQ_LOCKED)
+ __wait_on_dquot(dquot);
+ if (dquot->dq_flags & DQ_MOD)
+ write_dquot(dquot);
+ } while (dquot->dq_flags & (DQ_LOCKED | DQ_MOD));
+ /*
+ * The dquot may be back in use now, so we
+ * must recheck the free list.
+ */
+ goto repeat;
+ }
+ /* sanity check ... */
+ if (dquot->dq_count != 0)
+ printk(KERN_ERR "VFS: free dquot count=%d\n", dquot->dq_count);

- memset(dquot, 0, sizeof(*dquot));
+ remove_free_dquot(dquot);
+ /* unhash and selectively clear the structure */
+ clear_dquot(dquot);
dquot->dq_count = 1;
-
put_inuse(dquot);
return dquot;
+
pressure:
if (nr_dquots < max_dquots) {
grow_dquots();
@@ -469,19 +531,22 @@
}

dquot = find_best_candidate_weighted(free_dquots.head);
- if (!dquot) {
- printk("VFS: No free dquots, contact mvw@planets.elm.net\n");
- sleep_on(&dquot_wait);
- goto repeat;
- }
- if (dquot->dq_flags & DQ_LOCKED) {
- wait_on_dquot(dquot);
- goto repeat;
- } else if (dquot->dq_flags & DQ_MOD) {
- write_dquot(dquot);
+ if (dquot)
+ goto got_it;
+ /*
+ * Try pruning the dcache to free up some dquots ...
+ */
+ count = select_dcache(128, 0);
+ if (count) {
+ printk(KERN_DEBUG "get_empty_dquot: pruning %d\n", count);
+ prune_dcache(count);
+ free_inode_memory(count);
goto repeat;
}
- goto got_it;
+
+ printk("VFS: No free dquots, contact mvw@planets.elm.net\n");
+ sleep_on(&dquot_wait);
+ goto repeat;
}

struct dquot *dqget(kdev_t dev, unsigned int id, short type)
@@ -507,8 +572,9 @@
dquot->dq_type = type;
dquot->dq_dev = dev;
dquot->dq_mnt = vfsmnt;
- read_dquot(dquot);
+ /* hash it first so it can be found */
hash_dquot(dquot);
+ read_dquot(dquot);
} else {
if (!dquot->dq_count++) {
remove_free_dquot(dquot);
@@ -793,26 +859,30 @@
static int get_quota(kdev_t dev, int id, short type, struct dqblk *dqblk)
{
struct dquot *dquot;
- int error;
+ int error = -ESRCH;

- if (dev_has_quota_enabled(dev, type)) {
- if (dqblk == (struct dqblk *)NULL)
- return(-EFAULT);
-
- if ((dquot = dqget(dev, id, type)) != NODQUOT) {
- error = copy_to_user((caddr_t)dqblk, (caddr_t)&dquot->dq_dqb, sizeof(struct dqblk));
- dqput(dquot);
- return(error);
- }
- }
- return(-ESRCH);
+ if (!dev_has_quota_enabled(dev, type))
+ goto out;
+ dquot = dqget(dev, id, type);
+ if (dquot == NODQUOT)
+ goto out;
+
+ error = -EFAULT;
+ if (dqblk && !copy_to_user(dqblk, &dquot->dq_dqb, sizeof(struct dqblk)))
+ error = 0;
+ dqput(dquot);
+out:
+ return error;
}

static int get_stats(caddr_t addr)
{
+ int error = -EFAULT;
dqstats.allocated_dquots = nr_dquots;
dqstats.free_dquots = nr_free_dquots;
- return(copy_to_user(addr, (caddr_t)&dqstats, sizeof(struct dqstats)));
+ if (!copy_to_user(addr, &dqstats, sizeof(struct dqstats)))
+ error = 0;
+ return error;
}

static int quota_root_squash(kdev_t dev, short type, int *addr)
@@ -823,11 +893,12 @@
if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL)
return(-ENODEV);

- if ((error = copy_from_user((caddr_t)&new_value, (caddr_t)addr, sizeof(int))) != 0)
- return(error);
-
- vfsmnt->mnt_dquot.rsquash[type] = new_value;
- return(0);
+ error = -EFAULT;
+ if (!copy_from_user(&new_value, addr, sizeof(int))) {
+ vfsmnt->mnt_dquot.rsquash[type] = new_value;
+ error = 0;
+ }
+ return error;
}

/*

--------------539501046038E08D8FEC6291--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/