[PATCH 06/11] fs/dcache: directory opportunistically stores # of positive dentries

From: Waiman Long
Date: Wed Feb 26 2020 - 11:15:39 EST


For directories that contain large number of files (e.g. in thousands),
the number of positive dentries that will never be reclaimed by
the negative dentry reclaiming process may approach or even exceed
"dentry-dir-max". That will unnecessary cause the triggering of the
reclaim process even if there aren't that many negative dentries that
can be reclaimed.

This can impact system performance. One possible way to solve this
problem is to somehow store the number of positive or negative dentries
in the directory dentry itself. The negative dentry count can change
frequently, whereas the positive dentry count is relatively more stable,

Keeping an accurate count of positive or negative dentries can be
costly. Instead, an estimate of the positive dentry count is computed
in the scan loop of the negative dentry reclaim work function.

That computed value is then stored in the trailing end of the d_iname[]
array if there is enough space for it. This value is then used to
estimate the number of negative dentries in the directory to be compare
against the "dentry-dir-max" value.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
fs/dcache.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 81 insertions(+), 9 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 01c6d7277244..c5364c2ed5d8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1294,6 +1294,60 @@ struct reclaim_dentry
struct dentry *parent_dir;
};

+/*
+ * If there is enough space at the end of d_iname[] of a directory dentry
+ * to hold an integer. The space will be used to hold an estimate of the
+ * number of positive dentries in the directory. That number will be
+ * subtracted from d_nchildren to see if the limit has been exceeded and
+ * the number of excess negative dentries to be reclaimed.
+ */
+struct d_iname_count {
+ unsigned char d_dummy[DNAME_INLINE_LEN - sizeof(int)];
+ unsigned int d_npositive;
+};
+
+static inline bool dentry_has_npositive(struct dentry *dentry)
+{
+ int len = dentry->d_name.len;
+
+ BUILD_BUG_ON(sizeof(struct d_iname_count) != sizeof(dentry->d_iname));
+
+ return (len >= DNAME_INLINE_LEN) ||
+ (len < DNAME_INLINE_LEN - sizeof(int));
+}
+
+static inline unsigned int read_dentry_npositive(struct dentry *dentry)
+{
+ struct d_iname_count *p = (struct d_iname_count *)dentry->d_iname;
+
+ return p->d_npositive;
+}
+
+static inline void set_dentry_npositive(struct dentry *dentry,
+ unsigned int npositive)
+{
+ struct d_iname_count *p = (struct d_iname_count *)dentry->d_iname;
+
+ p->d_npositive = npositive;
+}
+
+/*
+ * Get an estimated negative dentry count
+ */
+static inline unsigned int read_dentry_nnegative(struct dentry *dentry)
+{
+ return dentry->d_nchildren - (dentry_has_npositive(dentry)
+ ? read_dentry_npositive(dentry) : 0);
+}
+
+/*
+ * Initialize d_iname[] to have null bytes at the end of the array.
+ */
+static inline void init_dentry_iname(struct dentry *dentry)
+{
+ set_dentry_npositive(dentry, 0);
+}
+
/*
* Reclaim excess negative dentries in a directory
*/
@@ -1302,11 +1356,11 @@ static void reclaim_negative_dentry(struct dentry *parent,
{
struct dentry *child;
int limit = READ_ONCE(dcache_dentry_dir_max_sysctl);
- int cnt;
+ int cnt, npositive;

lockdep_assert_held(&parent->d_lock);

- cnt = parent->d_nchildren;
+ cnt = read_dentry_nnegative(parent);

/*
* Compute # of negative dentries to be reclaimed
@@ -1316,6 +1370,7 @@ static void reclaim_negative_dentry(struct dentry *parent,
return;
cnt -= limit;
cnt += (limit >> 3);
+ npositive = 0;

/*
* The d_subdirs is treated like a kind of LRU where
@@ -1327,8 +1382,10 @@ static void reclaim_negative_dentry(struct dentry *parent,
/*
* This check is racy and so it may not be accurate.
*/
- if (!d_is_negative(child))
+ if (!d_is_negative(child)) {
+ npositive++;
continue;
+ }

if (!spin_trylock(&child->d_lock))
continue;
@@ -1368,7 +1425,17 @@ static void reclaim_negative_dentry(struct dentry *parent,
list_cut_before(&list, &parent->d_subdirs,
&child->d_child);
list_splice_tail(&list, &parent->d_subdirs);
+
+ /*
+ * Update positive dentry count estimate
+ * Don't allow npositive to decay by more than 1/2.
+ */
+ if (dentry_has_npositive(parent) &&
+ (read_dentry_npositive(parent) > 2 * npositive))
+ npositive = read_dentry_npositive(parent) / 2;
}
+ if (dentry_has_npositive(parent))
+ set_dentry_npositive(parent, npositive);
}

/*
@@ -1430,16 +1497,14 @@ static void negative_reclaim_check(struct dentry *parent)
*/
if (!can_reclaim_dentry(parent->d_flags) ||
(parent->d_flags & DCACHE_RECLAIMING) ||
- (READ_ONCE(parent->d_nchildren) <= limit))
+ (read_dentry_nnegative(parent) <= limit))
goto rcu_unlock_out;

spin_lock(&parent->d_lock);
if (!can_reclaim_dentry(parent->d_flags) ||
(parent->d_flags & DCACHE_RECLAIMING) ||
- (READ_ONCE(parent->d_nchildren) <= limit))
- goto unlock_out;
-
- if (!d_is_dir(parent))
+ (read_dentry_nnegative(parent) <= limit) ||
+ !d_is_dir(parent))
goto unlock_out;

dentry_node = kzalloc(sizeof(*dentry_node), GFP_NOWAIT);
@@ -1921,7 +1986,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
* will still always have a NUL at the end, even if we might
* be overwriting an internal NUL character
*/
- dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
+ init_dentry_iname(dentry);
if (unlikely(!name)) {
name = &slash_name;
dname = dentry->d_iname;
@@ -2991,11 +3056,18 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
}
}
swap(dentry->d_name.hash_len, target->d_name.hash_len);
+
+ if (dentry_has_npositive(dentry))
+ set_dentry_npositive(dentry, 0);
+ if (dentry_has_npositive(target))
+ set_dentry_npositive(target, 0);
}

static void copy_name(struct dentry *dentry, struct dentry *target)
{
struct external_name *old_name = NULL;
+
+ init_dentry_iname(dentry);
if (unlikely(dname_external(dentry)))
old_name = external_name(dentry);
if (unlikely(dname_external(target))) {
--
2.18.1