[patch 3/3] mm: bdi: allow to set a maximum to the bdi dirty limit

From: Miklos Szeredi
Date: Thu Nov 29 2007 - 06:16:26 EST


[mszeredi@xxxxxxx]
- Fix parsing in max_ratio_store().
- Export bdi_set_max_ratio() to modules (for fuse)
- Limit bdi_dirty with bdi->max_ratio

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---

Index: linux/include/linux/backing-dev.h
===================================================================
--- linux.orig/include/linux/backing-dev.h 2007-11-27 20:55:54.000000000 +0100
+++ linux/include/linux/backing-dev.h 2007-11-27 20:56:02.000000000 +0100
@@ -52,6 +52,7 @@ struct backing_dev_info {
int dirty_exceeded;

unsigned int min_ratio;
+ unsigned int max_ratio, max_prop_frac;

struct device *dev;
};
@@ -150,6 +151,7 @@ static inline unsigned long bdi_stat_err
}

int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);

/*
* Flags in backing_dev_info::capability
Index: linux/include/linux/proportions.h
===================================================================
--- linux.orig/include/linux/proportions.h 2007-11-27 12:45:32.000000000 +0100
+++ linux/include/linux/proportions.h 2007-11-27 20:55:56.000000000 +0100
@@ -78,6 +78,19 @@ void prop_inc_percpu(struct prop_descrip
}

/*
+ * Limit the time part in order to ensure there are some bits left for the
+ * cycle counter and fraction multiply.
+ */
+#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+
+#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1)
+#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT)
+
+void __prop_inc_percpu_max(struct prop_descriptor *pd,
+ struct prop_local_percpu *pl, long frac);
+
+
+/*
* ----- SINGLE ------
*/

Index: linux/lib/proportions.c
===================================================================
--- linux.orig/lib/proportions.c 2007-11-27 12:45:32.000000000 +0100
+++ linux/lib/proportions.c 2007-11-27 20:55:56.000000000 +0100
@@ -73,12 +73,6 @@
#include <linux/proportions.h>
#include <linux/rcupdate.h>

-/*
- * Limit the time part in order to ensure there are some bits left for the
- * cycle counter.
- */
-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
-
int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
int err;
@@ -273,6 +267,38 @@ void __prop_inc_percpu(struct prop_descr
}

/*
+ * identical to __prop_inc_percpu, except that it limits this pl's fraction to
+ * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
+ */
+void __prop_inc_percpu_max(struct prop_descriptor *pd,
+ struct prop_local_percpu *pl, long frac)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_percpu(pg, pl);
+
+ if (unlikely(frac != PROP_FRAC_BASE)) {
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+ long numerator, denominator;
+
+ numerator = percpu_counter_read_positive(&pl->events);
+ global_count = percpu_counter_read(&pg->events);
+ denominator = period_2 + (global_count & counter_mask);
+
+ if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
+ goto out_put;
+ }
+
+ percpu_counter_add(&pl->events, 1);
+ percpu_counter_add(&pg->events, 1);
+
+out_put:
+ prop_put_global(pd, pg);
+}
+
+/*
* Obtain a fraction of this proportion
*
* p_{j} = x_{j} / (period/2 + t % period/2)
Index: linux/mm/backing-dev.c
===================================================================
--- linux.orig/mm/backing-dev.c 2007-11-27 20:55:54.000000000 +0100
+++ linux/mm/backing-dev.c 2007-11-27 20:55:56.000000000 +0100
@@ -68,6 +68,24 @@ static ssize_t min_ratio_store(struct de
}
BDI_SHOW(min_ratio, bdi->min_ratio)

+static ssize_t max_ratio_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ char *end;
+ unsigned int ratio;
+ ssize_t ret = -EINVAL;
+
+ ratio = simple_strtoul(buf, &end, 10);
+ if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
+ ret = bdi_set_max_ratio(bdi, ratio);
+ if (!ret)
+ ret = count;
+ }
+ return ret;
+}
+BDI_SHOW(max_ratio, bdi->max_ratio)
+
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)

static struct device_attribute bdi_dev_attrs[] = {
@@ -77,6 +95,7 @@ static struct device_attribute bdi_dev_a
__ATTR_RO(dirty_kb),
__ATTR_RO(bdi_dirty_kb),
__ATTR_RW(min_ratio),
+ __ATTR_RW(max_ratio),
__ATTR_NULL,
};

@@ -136,6 +155,10 @@ int bdi_init(struct backing_dev_info *bd

bdi->dev = NULL;

+ bdi->min_ratio = 0;
+ bdi->max_ratio = 100;
+ bdi->max_prop_frac = PROP_FRAC_BASE;
+
for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
if (err)
Index: linux/mm/page-writeback.c
===================================================================
--- linux.orig/mm/page-writeback.c 2007-11-27 20:55:54.000000000 +0100
+++ linux/mm/page-writeback.c 2007-11-27 21:54:43.000000000 +0100
@@ -158,7 +158,7 @@ int dirty_ratio_handler(struct ctl_table
*/
static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
{
- __prop_inc_percpu(&vm_completions, &bdi->completions);
+ __prop_inc_percpu_max(&vm_completions, &bdi->completions, bdi->max_prop_frac);
}

static inline void task_dirty_inc(struct task_struct *tsk)
@@ -252,17 +252,43 @@ int bdi_set_min_ratio(struct backing_dev
unsigned long flags;

spin_lock_irqsave(&bdi_lock, flags);
- min_ratio -= bdi->min_ratio;
- if (bdi_min_ratio + min_ratio < 100) {
- bdi_min_ratio += min_ratio;
- bdi->min_ratio += min_ratio;
- } else
+ if (min_ratio > bdi->max_ratio) {
ret = -EINVAL;
+ } else {
+ min_ratio -= bdi->min_ratio;
+ if (bdi_min_ratio + min_ratio < 100) {
+ bdi_min_ratio += min_ratio;
+ bdi->min_ratio += min_ratio;
+ } else {
+ ret = -EINVAL;
+ }
+ }
spin_unlock_irqrestore(&bdi_lock, flags);

return ret;
}

+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (max_ratio > 100)
+ return -EINVAL;
+
+ spin_lock_irqsave(&bdi_lock, flags);
+ if (bdi->min_ratio > max_ratio) {
+ ret = -EINVAL;
+ } else {
+ bdi->max_ratio = max_ratio;
+ bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
+ }
+ spin_unlock_irqrestore(&bdi_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(bdi_set_max_ratio);
+
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
@@ -360,6 +386,8 @@ get_dirty_limits(long *pbackground, long
bdi_dirty *= numerator;
do_div(bdi_dirty, denominator);
bdi_dirty += (dirty * bdi->min_ratio) / 100;
+ if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
+ bdi_dirty = dirty * bdi->max_ratio / 100;

*pbdi_dirty = bdi_dirty;
clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/