[PATCH 17/24] GFS2: Use RCU/hlist_bl based hash for quotas

From: Steven Whitehouse
Date: Mon Jan 20 2014 - 07:25:07 EST


Prior to this patch, GFS2 kept all the quotas for each
super block in a single linked list. This is rather slow
when there are large numbers of quotas.

This patch introduces a hlist_bl based hash table, similar
to the one used for glocks. The initial look up of the quota
is now lockless in the case where it is already cached,
although we still have to take the per quota spinlock in
order to bump the ref count. Either way though, this is a
big improvement on what was there before.

The qd_lock and the per super block list is preserved, for
the time being. However it is intended that since this is no
longer used for its original role, it should be possible to
shrink the number of items on that list in due course and
remove the requirement to take qd_lock in qd_get.

Signed-off-by: Steven Whitehouse <swhiteho@xxxxxxxxxx>
Cc: Abhijith Das <adas@xxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a99f60c..59d99ec 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -428,10 +428,13 @@ enum {
};

struct gfs2_quota_data {
+ struct hlist_bl_node qd_hlist;
struct list_head qd_list;
struct kqid qd_id;
+ struct gfs2_sbd *qd_sbd;
struct lockref qd_lockref;
struct list_head qd_lru;
+ unsigned qd_hash;

unsigned long qd_flags; /* QDF_... */

@@ -450,6 +453,7 @@ struct gfs2_quota_data {

u64 qd_sync_gen;
unsigned long qd_last_warn;
+ struct rcu_head qd_rcu;
};

struct gfs2_trans {
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 0650db2..c272e73 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -76,6 +76,7 @@ static int __init init_gfs2_fs(void)

gfs2_str2qstr(&gfs2_qdot, ".");
gfs2_str2qstr(&gfs2_qdotdot, "..");
+ gfs2_quota_hash_init();

error = gfs2_sys_init();
if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 1b6b367..a1df01d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -52,6 +52,10 @@
#include <linux/dqblk_xfs.h>
#include <linux/lockref.h>
#include <linux/list_lru.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
+#include <linux/bit_spinlock.h>
+#include <linux/jhash.h>

#include "gfs2.h"
#include "incore.h"
@@ -67,10 +71,43 @@
#include "inode.h"
#include "util.h"

-/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */
+#define GFS2_QD_HASH_SHIFT 12
+#define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT)
+#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1)
+
+/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
static DEFINE_SPINLOCK(qd_lock);
struct list_lru gfs2_qd_lru;

+static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE];
+
+static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp,
+ const struct kqid qid)
+{
+ unsigned int h;
+
+ h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0);
+ h = jhash(&qid, sizeof(struct kqid), h);
+
+ return h & GFS2_QD_HASH_MASK;
+}
+
+static inline void spin_lock_bucket(unsigned int hash)
+{
+ hlist_bl_lock(&qd_hash_table[hash]);
+}
+
+static inline void spin_unlock_bucket(unsigned int hash)
+{
+ hlist_bl_unlock(&qd_hash_table[hash]);
+}
+
+static void gfs2_qd_dealloc(struct rcu_head *rcu)
+{
+ struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
+}
+
static void gfs2_qd_dispose(struct list_head *list)
{
struct gfs2_quota_data *qd;
@@ -87,6 +124,10 @@ static void gfs2_qd_dispose(struct list_head *list)
list_del(&qd->qd_list);
spin_unlock(&qd_lock);

+ spin_lock_bucket(qd->qd_hash);
+ hlist_bl_del_rcu(&qd->qd_hlist);
+ spin_unlock_bucket(qd->qd_hash);
+
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
gfs2_assert_warn(sdp, !qd->qd_bh_count);
@@ -95,7 +136,7 @@ static void gfs2_qd_dispose(struct list_head *list)
atomic_dec(&sdp->sd_quota_count);

/* Delete it from the common reclaim list */
- kmem_cache_free(gfs2_quotad_cachep, qd);
+ call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
}
}

@@ -165,83 +206,95 @@ static u64 qd2offset(struct gfs2_quota_data *qd)
return offset;
}

-static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
- struct gfs2_quota_data **qdp)
+static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
{
struct gfs2_quota_data *qd;
int error;

qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
if (!qd)
- return -ENOMEM;
+ return NULL;

+ qd->qd_sbd = sdp;
qd->qd_lockref.count = 1;
spin_lock_init(&qd->qd_lockref.lock);
qd->qd_id = qid;
qd->qd_slot = -1;
INIT_LIST_HEAD(&qd->qd_lru);
+ qd->qd_hash = hash;

error = gfs2_glock_get(sdp, qd2index(qd),
&gfs2_quota_glops, CREATE, &qd->qd_gl);
if (error)
goto fail;

- *qdp = qd;
-
- return 0;
+ return qd;

fail:
kmem_cache_free(gfs2_quotad_cachep, qd);
- return error;
+ return NULL;
}

-static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
- struct gfs2_quota_data **qdp)
+static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash,
+ const struct gfs2_sbd *sdp,
+ struct kqid qid)
{
- struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
- int error, found;
-
- *qdp = NULL;
+ struct gfs2_quota_data *qd;
+ struct hlist_bl_node *h;

- for (;;) {
- found = 0;
- spin_lock(&qd_lock);
- list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
- if (qid_eq(qd->qd_id, qid) &&
- lockref_get_not_dead(&qd->qd_lockref)) {
- list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
- found = 1;
- break;
- }
+ hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) {
+ if (!qid_eq(qd->qd_id, qid))
+ continue;
+ if (qd->qd_sbd != sdp)
+ continue;
+ if (lockref_get_not_dead(&qd->qd_lockref)) {
+ list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
+ return qd;
}
+ }

- if (!found)
- qd = NULL;
+ return NULL;
+}

- if (!qd && new_qd) {
- qd = new_qd;
- list_add(&qd->qd_list, &sdp->sd_quota_list);
- atomic_inc(&sdp->sd_quota_count);
- new_qd = NULL;
- }

- spin_unlock(&qd_lock);
+static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
+ struct gfs2_quota_data **qdp)
+{
+ struct gfs2_quota_data *qd, *new_qd;
+ unsigned int hash = gfs2_qd_hash(sdp, qid);

- if (qd) {
- if (new_qd) {
- gfs2_glock_put(new_qd->qd_gl);
- kmem_cache_free(gfs2_quotad_cachep, new_qd);
- }
- *qdp = qd;
- return 0;
- }
+ rcu_read_lock();
+ *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
+ rcu_read_unlock();

- error = qd_alloc(sdp, qid, &new_qd);
- if (error)
- return error;
+ if (qd)
+ return 0;
+
+ new_qd = qd_alloc(hash, sdp, qid);
+ if (!new_qd)
+ return -ENOMEM;
+
+ spin_lock(&qd_lock);
+ spin_lock_bucket(hash);
+ *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
+ if (qd == NULL) {
+ *qdp = new_qd;
+ list_add(&new_qd->qd_list, &sdp->sd_quota_list);
+ hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
+ atomic_inc(&sdp->sd_quota_count);
+ }
+ spin_unlock_bucket(hash);
+ spin_unlock(&qd_lock);
+
+ if (qd) {
+ gfs2_glock_put(new_qd->qd_gl);
+ kmem_cache_free(gfs2_quotad_cachep, new_qd);
}
+
+ return 0;
}

+
static void qd_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
@@ -1215,6 +1268,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
unsigned int x, slot = 0;
unsigned int found = 0;
+ unsigned int hash;
u64 dblock;
u32 extlen = 0;
int error;
@@ -1272,8 +1326,9 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
if (!qc_change)
continue;

- error = qd_alloc(sdp, qc_id, &qd);
- if (error) {
+ hash = gfs2_qd_hash(sdp, qc_id);
+ qd = qd_alloc(hash, sdp, qc_id);
+ if (qd == NULL) {
brelse(bh);
goto fail;
}
@@ -1289,6 +1344,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
atomic_inc(&sdp->sd_quota_count);
spin_unlock(&qd_lock);

+ spin_lock_bucket(hash);
+ hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]);
+ spin_unlock_bucket(hash);
+
found++;
}

@@ -1335,11 +1394,16 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
spin_unlock(&qd->qd_lockref.lock);

list_del(&qd->qd_list);
+
/* Also remove if this qd exists in the reclaim list */
list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
atomic_dec(&sdp->sd_quota_count);
spin_unlock(&qd_lock);

+ spin_lock_bucket(qd->qd_hash);
+ hlist_bl_del_rcu(&qd->qd_hlist);
+ spin_unlock_bucket(qd->qd_hash);
+
if (!qd->qd_lockref.count) {
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
@@ -1348,7 +1412,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
gfs2_assert_warn(sdp, !qd->qd_bh_count);

gfs2_glock_put(qd->qd_gl);
- kmem_cache_free(gfs2_quotad_cachep, qd);
+ call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);

spin_lock(&qd_lock);
}
@@ -1643,3 +1707,11 @@ const struct quotactl_ops gfs2_quotactl_ops = {
.get_dqblk = gfs2_get_dqblk,
.set_dqblk = gfs2_set_dqblk,
};
+
+void __init gfs2_quota_hash_init(void)
+{
+ unsigned i;
+
+ for(i = 0; i < GFS2_QD_HASH_SIZE; i++)
+ INIT_HLIST_BL_HEAD(&qd_hash_table[i]);
+}
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 96e4f34..55d506e 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -57,5 +57,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
extern const struct quotactl_ops gfs2_quotactl_ops;
extern struct shrinker gfs2_qd_shrinker;
extern struct list_lru gfs2_qd_lru;
+extern void __init gfs2_quota_hash_init(void);

#endif /* __QUOTA_DOT_H__ */
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/