[PATCH 10/32] ocfs2: Add total entry count to dx_root_block

From: Mark Fasheh
Date: Tue Mar 17 2009 - 19:37:23 EST


This little bit of extra accounting speeds up ocfs2_empty_dir()
dramatically by allowing us to short-circuit the full directory scan.

Signed-off-by: Mark Fasheh <mfasheh@xxxxxxxx>
---
fs/ocfs2/dir.c | 162 +++++++++++++++++++++++++++++++++++++--------------
fs/ocfs2/ocfs2_fs.h | 6 ++-
2 files changed, 124 insertions(+), 44 deletions(-)

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 52df912..5e6aeb0 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1322,16 +1322,15 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
* entry yet. Likewise, successful return means we *must*
* remove the indexed entry.
*
- * We're also careful to journal the root tree block here if
- * we're going to be adding to the start of the free list.
+ * We're also careful to journal the root tree block here as
+ * the entry count needs to be updated. Also, we might be
+ * adding to the start of the free list.
*/
- if (add_to_free_list || ocfs2_dx_root_inline(dx_root)) {
- ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}

if (!ocfs2_dx_root_inline(dx_root)) {
@@ -1365,11 +1364,12 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
/* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */
ocfs2_journal_dirty(handle, leaf_bh);

+ le32_add_cpu(&dx_root->dr_num_entries, -1);
+ ocfs2_journal_dirty(handle, dx_root_bh);
+
ocfs2_dx_list_remove_entry(entry_list, index);

- if (ocfs2_dx_root_inline(dx_root))
- ocfs2_journal_dirty(handle, dx_root_bh);
- else
+ if (!ocfs2_dx_root_inline(dx_root))
ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);

out:
@@ -1508,13 +1508,20 @@ out:
return ret;
}

-static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
- struct ocfs2_dx_hinfo *hinfo,
- u64 dirent_blk,
- struct buffer_head *dx_root_bh)
+static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
+ struct ocfs2_dx_hinfo *hinfo,
+ u64 dirent_blk,
+ struct ocfs2_dx_root_block *dx_root)
{
- int ret;
+ ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
+}
+
+static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ int ret = 0;
struct ocfs2_dx_root_block *dx_root;
+ struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;

ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1523,31 +1530,27 @@ static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
goto out;
}

- dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
- ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
+ dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
+ if (ocfs2_dx_root_inline(dx_root)) {
+ ocfs2_dx_inline_root_insert(dir, handle,
+ &lookup->dl_hinfo,
+ lookup->dl_leaf_bh->b_blocknr,
+ dx_root);
+ } else {
+ ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
+ lookup->dl_leaf_bh->b_blocknr,
+ lookup->dl_dx_leaf_bh);
+ if (ret)
+ goto out;
+ }
+
+ le32_add_cpu(&dx_root->dr_num_entries, 1);
ocfs2_journal_dirty(handle, dx_root_bh);

out:
return ret;
}

-static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
- struct ocfs2_dir_lookup_result *lookup)
-{
- struct ocfs2_dx_root_block *dx_root;
-
- dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
- if (ocfs2_dx_root_inline(dx_root))
- return ocfs2_dx_inline_root_insert(dir, handle,
- &lookup->dl_hinfo,
- lookup->dl_leaf_bh->b_blocknr,
- lookup->dl_dx_root_bh);
-
- return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
- lookup->dl_leaf_bh->b_blocknr,
- lookup->dl_dx_leaf_bh);
-}
-
static void ocfs2_remove_block_from_free_list(struct inode *dir,
handle_t *handle,
struct ocfs2_dir_lookup_result *lookup)
@@ -2121,6 +2124,7 @@ struct ocfs2_empty_dir_priv {
unsigned seen_dot;
unsigned seen_dot_dot;
unsigned seen_other;
+ unsigned dx_dir;
};
static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
loff_t pos, u64 ino, unsigned type)
@@ -2130,6 +2134,13 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
/*
* Check the positions of "." and ".." records to be sure
* they're in the correct place.
+ *
+ * Indexed directories don't need to proceed past the first
+ * two entries, so we end the scan after seeing '..'. Despite
+ * that, we allow the scan to proceed In the event that we
+ * have a corrupted indexed directory (no dot or dot dot
+ * entries). This allows us to double check for existing
+ * entries which might not have been found in the index.
*/
if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
p->seen_dot = 1;
@@ -2139,18 +2150,57 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
if (name_len == 2 && !strncmp("..", name, 2) &&
pos == OCFS2_DIR_REC_LEN(1)) {
p->seen_dot_dot = 1;
+
+ if (p->dx_dir && p->seen_dot)
+ return 1;
+
return 0;
}

p->seen_other = 1;
return 1;
}
+
+static int ocfs2_empty_dir_dx(struct inode *inode,
+ struct ocfs2_empty_dir_priv *priv)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+ struct buffer_head *dx_root_bh = NULL;
+ struct ocfs2_dinode *di;
+ struct ocfs2_dx_root_block *dx_root;
+
+ priv->dx_dir = 1;
+
+ ret = ocfs2_read_inode_block(inode, &di_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+
+ ret = ocfs2_read_dx_root(inode, di, &dx_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+
+ if (le32_to_cpu(dx_root->dr_num_entries) != 2)
+ priv->seen_other = 1;
+
+out:
+ brelse(di_bh);
+ brelse(dx_root_bh);
+ return ret;
+}
+
/*
* routine to check that the specified directory is empty (for rmdir)
*
* Returns 1 if dir is empty, zero otherwise.
*
- * XXX: This is a performance problem
+ * XXX: This is a performance problem for unindexed directories.
*/
int ocfs2_empty_dir(struct inode *inode)
{
@@ -2160,6 +2210,16 @@ int ocfs2_empty_dir(struct inode *inode)

memset(&priv, 0, sizeof(priv));

+ if (ocfs2_dir_indexed(inode)) {
+ ret = ocfs2_empty_dir_dx(inode, &priv);
+ if (ret)
+ mlog_errno(ret);
+ /*
+ * We still run ocfs2_dir_foreach to get the checks
+ * for "." and "..".
+ */
+ }
+
ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
if (ret)
mlog_errno(ret);
@@ -2329,7 +2389,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
struct buffer_head *di_bh,
struct buffer_head *dirdata_bh,
struct ocfs2_alloc_context *meta_ac,
- int dx_inline,
+ int dx_inline, u32 num_entries,
struct buffer_head **ret_dx_root_bh)
{
int ret;
@@ -2375,6 +2435,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
dx_root->dr_blkno = cpu_to_le64(dr_blkno);
dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
+ dx_root->dr_num_entries = cpu_to_le32(num_entries);
if (le16_to_cpu(trailer->db_free_rec_len))
dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
else
@@ -2586,7 +2647,7 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
}

ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh,
- meta_ac, 1, &dx_root_bh);
+ meta_ac, 1, 2, &dx_root_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -2633,6 +2694,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
handle_t *handle,
struct buffer_head **dx_leaves,
int num_dx_leaves,
+ u32 *num_dx_entries,
struct buffer_head *dirent_bh)
{
int ret, namelen, i;
@@ -2664,6 +2726,8 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
goto out;
}

+ *num_dx_entries = *num_dx_entries + 1;
+
inc:
de_buf += le16_to_cpu(de->rec_len);
}
@@ -2707,6 +2771,8 @@ static void ocfs2_dx_dir_index_root_block(struct inode *dir,

ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
dirent_blk);
+
+ le32_add_cpu(&dx_root->dr_num_entries, 1);
inc:
de_buf += le16_to_cpu(de->rec_len);
}
@@ -2810,7 +2876,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dir_lookup_result *lookup,
struct buffer_head **first_block_bh)
{
- u32 alloc, dx_alloc, bit_off, len;
+ u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0;
struct super_block *sb = dir->i_sb;
int ret, i, num_dx_leaves = 0, dx_inline = 0,
credits = ocfs2_inline_to_extents_credits(sb);
@@ -2972,10 +3038,14 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
/*
* Dx dirs with an external cluster need to do this up
* front. Inline dx root's get handled later, after
- * we've allocated our root block.
+ * we've allocated our root block. We get passed back
+ * a total number of items so that dr_num_entries can
+ * be correctly set once the dx_root has been
+ * allocated.
*/
ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
- num_dx_leaves, dirdata_bh);
+ num_dx_leaves, &num_dx_entries,
+ dirdata_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
@@ -3037,7 +3107,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
if (ocfs2_supports_indexed_dirs(osb)) {
ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
dirdata_bh, meta_ac, dx_inline,
- &dx_root_bh);
+ num_dx_entries, &dx_root_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
@@ -4202,6 +4272,12 @@ static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
}

dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+ if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) {
+ ret = -ENOSPC;
+ mlog_errno(ret);
+ goto out;
+ }
+
if (ocfs2_dx_root_inline(dx_root)) {
ret = ocfs2_inline_dx_has_space(dx_root_bh);

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 43da76e..f7a5201 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -421,6 +421,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_LINK_MAX 32000
#define OCFS2_DX_LINK_MAX ((1U << 31) - 1U)
#define OCFS2_LINKS_HI_SHIFT 16
+#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)

#define S_SHIFT 12
static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -844,7 +845,10 @@ struct ocfs2_dx_root_block {
__u8 dr_reserved0;
__le16 dr_reserved1;
__le64 dr_dir_blkno; /* Pointer to parent inode */
- __le64 dr_reserved2;
+ __le32 dr_num_entries; /* Total number of
+ * names stored in
+ * this directory.*/
+ __le32 dr_reserved2;
__le64 dr_free_blk; /* Pointer to head of free
* unindexed block list. */
__le64 dr_reserved3[15];
--
1.5.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/