[PATCH 3/4] cfq-iosched: Enable both hierarchical mode and flat modefor cfq group scheduling

From: Gui Jianfeng
Date: Wed Oct 20 2010 - 22:36:37 EST


This patch enables both hierarchical mode and flat mode for cfq group scheduling.
Users can switch between two modes by using "use_hierarchy" interface in blkio
cgroup.

Signed-off-by: Gui Jianfeng <guijianfeng@xxxxxxxxxxxxxx>
---
block/cfq-iosched.c | 256 +++++++++++++++++++++++++++++++++++++++------------
1 files changed, 196 insertions(+), 60 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f781e4d..98c9191 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -240,6 +240,9 @@ struct cfq_data {
/* cfq group schedule in flat or hierarchy manner. */
bool use_hierarchy;

+ /* Service tree for cfq group flat scheduling mode. */
+ struct cfq_rb_root grp_service_tree;
+
/*
* The priority currently being served
*/
@@ -635,10 +638,20 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
static inline unsigned
cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
- struct io_sched_entity *queue_entity = &cfqg->queue_se;
- struct cfq_rb_root *st = queue_entity->st;
+ struct cfq_rb_root *st;
+ unsigned int weight;
+
+ if (cfqd->use_hierarchy) {
+ struct io_sched_entity *queue_entity = &cfqg->queue_se;
+ st = queue_entity->st;
+ weight = queue_entity->weight;
+ } else {
+ struct io_sched_entity *group_entity = &cfqg->group_se;
+ st = &cfqd->grp_service_tree;
+ weight = group_entity->weight;
+ }

- return cfq_target_latency * queue_entity->weight / st->total_weight;
+ return cfq_target_latency * weight / st->total_weight;
}

static inline void
@@ -932,16 +945,30 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)

cfqg->nr_cfqq++;

- io_sched_entity_add(queue_entity->st, queue_entity);
+ if (cfqd->use_hierarchy) {
+ io_sched_entity_add(queue_entity->st, queue_entity);

- while (group_entity && group_entity->parent) {
+ while (group_entity && group_entity->parent) {
+ if (group_entity->on_st)
+ return;
+ io_sched_entity_add(group_entity->st, group_entity);
+ group_entity = group_entity->parent;
+ __cfqg = cfqg_of_group_entity(group_entity);
+ __cfqg->nr_subgp++;
+ }
+ } else {
if (group_entity->on_st)
return;
+
+ /*
+ * For flat mode, all cfq group schedule on the global service
+ * tree(cfqd->grp_service_tree).
+ */
io_sched_entity_add(group_entity->st, group_entity);
- group_entity = group_entity->parent;
- __cfqg = cfqg_of_group_entity(group_entity);
- __cfqg->nr_subgp++;
+
}
+
+
}

static void io_sched_entity_del(struct io_sched_entity *se)
@@ -975,24 +1002,32 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
if (cfqg->nr_cfqq)
return;

- /* dequeue queue se from group */
- io_sched_entity_del(queue_entity);
+ /* For cfq group hierarchical schuduling case */
+ if (cfqd->use_hierarchy) {
+ /* dequeue queue se from group */
+ io_sched_entity_del(queue_entity);

- if (cfqg->nr_subgp)
- return;
+ if (cfqg->nr_subgp)
+ return;

- /* prevent from dequeuing root group */
- while (group_entity && group_entity->parent) {
- __cfqg = cfqg_of_group_entity(group_entity);
- p_cfqg = cfqg_of_group_entity(group_entity->parent);
+ /* prevent from dequeuing root group */
+ while (group_entity && group_entity->parent) {
+ __cfqg = cfqg_of_group_entity(group_entity);
+ p_cfqg = cfqg_of_group_entity(group_entity->parent);
+ io_sched_entity_del(group_entity);
+ cfq_blkiocg_update_dequeue_stats(&__cfqg->blkg, 1);
+ cfq_log_cfqg(cfqd, __cfqg, "del_from_rr group");
+ __cfqg->saved_workload_slice = 0;
+ group_entity = group_entity->parent;
+ p_cfqg->nr_subgp--;
+ if (p_cfqg->nr_cfqq || p_cfqg->nr_subgp)
+ return;
+ }
+ } else {
+ cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
io_sched_entity_del(group_entity);
- cfq_blkiocg_update_dequeue_stats(&__cfqg->blkg, 1);
- cfq_log_cfqg(cfqd, __cfqg, "del_from_rr group");
- __cfqg->saved_workload_slice = 0;
- group_entity = group_entity->parent;
- p_cfqg->nr_subgp--;
- if (p_cfqg->nr_cfqq || p_cfqg->nr_subgp)
- return;
+ cfqg->saved_workload_slice = 0;
+ cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
}
}

@@ -1026,7 +1061,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
struct cfq_queue *cfqq)
{
struct io_sched_entity *group_entity = &cfqg->group_se;
- struct io_sched_entity *queue_entity = &cfqg->queue_se;
+ struct io_sched_entity *queue_entity;
unsigned int used_sl, charge;
int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
- cfqg->service_tree_idle.count;
@@ -1039,25 +1074,33 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
charge = cfqq->allocated_slice;

- /*
- * update queue se's vdisktime.
- * Can't update vdisktime while group is on service tree.
- */
-
- cfq_rb_erase(&queue_entity->rb_node, queue_entity->st);
- queue_entity->vdisktime += cfq_scale_slice(charge, queue_entity);
- __io_sched_entity_add(queue_entity->st, queue_entity);
- if (&queue_entity->rb_node == queue_entity->st->active)
- queue_entity->st->active = NULL;
-
- while (group_entity && group_entity->parent) {
+ if (cfqd->use_hierarchy) {
+ /*
+ * update queue se's vdisktime.
+ * Can't update vdisktime while group is on service tree.
+ */
+ queue_entity = &cfqg->queue_se;
+ cfq_rb_erase(&queue_entity->rb_node, queue_entity->st);
+ queue_entity->vdisktime += cfq_scale_slice(charge,
+ queue_entity);
+ __io_sched_entity_add(queue_entity->st, queue_entity);
+ if (&queue_entity->rb_node == queue_entity->st->active)
+ queue_entity->st->active = NULL;
+
+ while (group_entity && group_entity->parent) {
+ cfq_rb_erase(&group_entity->rb_node, group_entity->st);
+ group_entity->vdisktime += cfq_scale_slice(charge,
+ group_entity);
+ __io_sched_entity_add(group_entity->st, group_entity);
+ if (&group_entity->rb_node == group_entity->st->active)
+ group_entity->st->active = NULL;
+ group_entity = group_entity->parent;
+ }
+ } else {
cfq_rb_erase(&group_entity->rb_node, group_entity->st);
group_entity->vdisktime += cfq_scale_slice(charge,
group_entity);
__io_sched_entity_add(group_entity->st, group_entity);
- if (&group_entity->rb_node == group_entity->st->active)
- group_entity->st->active = NULL;
- group_entity = group_entity->parent;
}

/* This group is being expired. Save the context */
@@ -1125,13 +1168,35 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_put_cfqg(cfqg);
}

-void
-cfq_update_blkio_use_hierarchy(struct blkio_group *blkg, bool val)
+static int cfq_forced_dispatch(struct cfq_data *cfqd);
+
+void cfq_update_blkio_use_hierarchy(struct blkio_group *blkg, bool val)
{
+ unsigned long flags;
struct cfq_group *cfqg;
+ struct cfq_data *cfqd;
+ struct io_sched_entity *group_entity;
+ int nr;

+ /* Get root group here */
cfqg = cfqg_of_blkg(blkg);
- cfqg->cfqd->use_hierarchy = val;
+ cfqd = cfqg->cfqd;
+
+ spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+
+ /* Drain all requests */
+ nr = cfq_forced_dispatch(cfqd);
+
+ group_entity = &cfqg->group_se;
+
+ if (!val)
+ group_entity->st = &cfqd->grp_service_tree;
+ else
+ group_entity->st = NULL;
+
+ cfqd->use_hierarchy = val;
+
+ spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
}

static void init_group_queue_entity(struct blkio_cgroup *blkcg,
@@ -1202,11 +1267,21 @@ static void uninit_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_destroy_cfqg(cfqd, cfqg);
}

-static void cfqg_set_parent(struct cfq_group *cfqg, struct cfq_group *p_cfqg)
+static void cfqg_set_parent(struct cfq_data *cfqd, struct cfq_group *cfqg,
+ struct cfq_group *p_cfqg)
{
- struct io_sched_entity *group_entity = &cfqg->group_se;
- struct io_sched_entity *queue_entity = &cfqg->queue_se;
- struct io_sched_entity *p_group_entity = &p_cfqg->group_se;
+ struct io_sched_entity *group_entity, *queue_entity, *p_group_entity;
+
+ group_entity = &cfqg->group_se;
+
+ if (!p_cfqg) {
+ group_entity->st = &cfqd->grp_service_tree;
+ group_entity->parent = NULL;
+ return;
+ }
+
+ queue_entity = &cfqg->queue_se;
+ p_group_entity = &p_cfqg->group_se;

group_entity->parent = p_group_entity;
group_entity->st = &p_cfqg->grp_service_tree;
@@ -1258,10 +1333,39 @@ int cfqg_chain_alloc(struct cfq_data *cfqd, struct cgroup *cgroup)
p_cfqg = cfqg_of_blkg(blkiocg_lookup_group(p_blkcg, key));
BUG_ON(p_cfqg == NULL);

- cfqg_set_parent(cfqg, p_cfqg);
+ cfqg_set_parent(cfqd, cfqg, p_cfqg);
return 0;
}

+static struct cfq_group *cfqg_alloc(struct cfq_data *cfqd,
+ struct cgroup *cgroup)
+{
+ struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+ struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+ unsigned int major, minor;
+ struct cfq_group *cfqg;
+ void *key = cfqd;
+
+ cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+ if (cfqg) {
+ if (!cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+ sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+ cfqg->blkg.dev = MKDEV(major, minor);
+ }
+ return cfqg;
+ }
+
+ cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
+ if (!cfqg)
+ return NULL;
+
+ init_cfqg(cfqd, blkcg, cfqg);
+
+ cfqg_set_parent(cfqd, cfqg, NULL);
+
+ return cfqg;
+}
+
static struct cfq_group *
cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
{
@@ -1281,11 +1385,26 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
if (cfqg || !create)
goto done;

- ret = cfqg_chain_alloc(cfqd, cgroup);
- if (!ret) {
- cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
- BUG_ON(cfqg == NULL);
- goto done;
+ if (!cfqd->use_hierarchy) {
+ /*
+ * For flat cfq group scheduling, we just need to allocate a
+ * single cfq group.
+ */
+ cfqg = cfqg_alloc(cfqd, cgroup);
+ if (!cfqg)
+ goto done;
+ return cfqg;
+ } else {
+ /*
+ * For hierarchical cfq group scheduling, we need to allocate
+ * the whole cfq group chain.
+ */
+ ret = cfqg_chain_alloc(cfqd, cgroup);
+ if (!ret) {
+ cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+ BUG_ON(cfqg == NULL);
+ goto done;
+ }
}
done:
return cfqg;
@@ -2404,23 +2523,37 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)

static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
{
- struct cfq_group *root_group = &cfqd->root_group;
- struct cfq_rb_root *st = &root_group->grp_service_tree;
+ struct cfq_rb_root *st;
struct cfq_group *cfqg;
struct io_sched_entity *se;

- do {
+ if (cfqd->use_hierarchy) {
+ struct cfq_group *root_group = &cfqd->root_group;
+ st = &root_group->grp_service_tree;
+
+ do {
+ se = cfq_rb_first_se(st);
+ if (!se)
+ return NULL;
+ st->active = &se->rb_node;
+ update_min_vdisktime(st);
+ cfqg = cfqg_of_queue_entity(se);
+ if (cfqg)
+ return cfqg;
+ cfqg = cfqg_of_group_entity(se);
+ st = &cfqg->grp_service_tree;
+ } while (1);
+ } else {
+ st = &cfqd->grp_service_tree;
se = cfq_rb_first_se(st);
if (!se)
return NULL;
st->active = &se->rb_node;
update_min_vdisktime(st);
- cfqg = cfqg_of_queue_entity(se);
- if (cfqg)
- return cfqg;
cfqg = cfqg_of_group_entity(se);
- st = &cfqg->grp_service_tree;
- } while (1);
+ BUG_ON(!cfqg);
+ return cfqg;
+ }
}

static void cfq_choose_cfqg(struct cfq_data *cfqd)
@@ -4089,6 +4222,9 @@ static void *cfq_init_queue(struct request_queue *q)

cfqd->cic_index = i;

+ /* Init flat service tree */
+ cfqd->grp_service_tree = CFQ_RB_ROOT;
+
/* Init root group */
cfqg = &cfqd->root_group;
cfqg->cfqd = cfqd;
-- 1.6.5.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/