Re: [Ocfs2-devel] [PATCH] ocfs2: Implement llseek()

From: Tristan Ye
Date: Thu May 19 2011 - 05:14:13 EST


Sunil Mushran wrote:
> ocfs2 implements its own llseek() to provide the SEEK_HOLE/SEEK_DATA
> functionality.
>
> SEEK_HOLE sets the file pointer to the start of either a hole or an unwritten
> (preallocated) extent, that is greater than or equal to the supplied offset.
>
> SEEK_DATA sets the file pointer to the start of an allocated extent (not
> unwritten) that is greater than or equal to the supplied offset.
>
> If the supplied offset is on a desired region, then the file pointer is set
> to it. Offsets greater than or equal to the file size return -ENXIO.
>
> Unwritten (preallocated) extents are considered holes because the file system
> treats reads to such regions in the same way as it does to holes.
>
> Signed-off-by: Sunil Mushran <sunil.mushran@xxxxxxxxxx>
> ---
> fs/ocfs2/extent_map.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++
> fs/ocfs2/extent_map.h | 2 +
> fs/ocfs2/file.c | 53 ++++++++++++++++++++++++++-
> 3 files changed, 150 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
> index 23457b4..6942c21 100644
> --- a/fs/ocfs2/extent_map.c
> +++ b/fs/ocfs2/extent_map.c
> @@ -832,6 +832,103 @@ out:
> return ret;
> }
>
> +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
> +{
> + struct inode *inode = file->f_mapping->host;
> + int ret;
> + unsigned int is_last = 0, is_data = 0;
> + u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
> + u32 cpos, cend, clen, hole_size;
> + u64 extoff, extlen;
> + struct buffer_head *di_bh = NULL;
> + struct ocfs2_extent_rec rec;
> +
> + BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE);
> +
> + ret = ocfs2_inode_lock(inode, &di_bh, 0);
> + if (ret) {
> + mlog_errno(ret);
> + goto out;
> + }
> +
> + down_read(&OCFS2_I(inode)->ip_alloc_sem);
> +
> + if (inode->i_size == 0 || *offset >= inode->i_size) {
> + ret = -ENXIO;
> + goto out_unlock;
> + }

Why not using if (*offset >= inode->i_size) directly?

> +
> + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
> + if (origin == SEEK_HOLE)
> + *offset = inode->i_size;
> + goto out_unlock;
> + }
> +
> + clen = 0;
> + cpos = *offset >> cs_bits;
> + cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size);
> +
> + while (cpos < cend && !is_last) {
> + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
> + &rec, &is_last);
> + if (ret) {
> + mlog_errno(ret);
> + goto out_unlock;
> + }
> +
> + extoff = cpos;
> + extoff <<= cs_bits;
> +
> + if (rec.e_blkno == 0ULL) {
> + clen = hole_size;
> + is_data = 0;
> + } else {
> + BUG_ON(cpos < le32_to_cpu(rec.e_cpos));


A same assert has already been performed inside ocfs2_get_clusters_nocache(),
does it make sense to do it again here?


> + clen = le16_to_cpu(rec.e_leaf_clusters) -
> + (cpos - le32_to_cpu(rec.e_cpos));
> + is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1;
> + }
> +
> + if ((!is_data && origin == SEEK_HOLE) ||
> + (is_data && origin == SEEK_DATA)) {
> + if (extoff > *offset)
> + *offset = extoff;
> + goto out_unlock;

Seems above logic is going to stop at the first time we find a hole.

How about the offset was within the range of a hole already when we doing
SEEK_HOLE, shouldn't we proceed detecting until the next hole gets found, whose
start_offset was greater than supplied offset, according to semantics described
by the the header of this patch, should it be like following?

if (extoff > *offset) {
*offset = extoff;
goto out_unlock;
}

> + }
> +
> + if (!is_last)
> + cpos += clen;
> + }
> +
> + if (origin == SEEK_HOLE) {
> + extoff = cpos;
> + extoff <<= cs_bits;

extoff already has been assigned properly above in while loop?

> + extlen = clen;
> + extlen <<= cs_bits;
> +
> + if ((extoff + extlen) > inode->i_size)
> + extlen = inode->i_size - extoff;
> + extoff += extlen;
> + if (extoff > *offset)
> + *offset = extoff;
> + goto out_unlock;
> + }
> +
> + ret = -ENXIO;
> +
> +out_unlock:
> +
> + brelse(di_bh);
> +
> + up_read(&OCFS2_I(inode)->ip_alloc_sem);
> +
> + ocfs2_inode_unlock(inode, 0);
> +out:
> + if (ret && ret != -ENXIO)
> + ret = -ENXIO;
> + return ret;
> +}
> +
> int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
> struct buffer_head *bhs[], int flags,
> int (*validate)(struct super_block *sb,
> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
> index e79d41c..67ea57d 100644
> --- a/fs/ocfs2/extent_map.h
> +++ b/fs/ocfs2/extent_map.h
> @@ -53,6 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
> int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> u64 map_start, u64 map_len);
>
> +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin);
> +
> int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
> u32 *p_cluster, u32 *num_clusters,
> struct ocfs2_extent_list *el,
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index cce8c2b..5349a4b 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -2572,6 +2572,55 @@ bail:
> return ret;
> }
>
> +/* Refer generic_file_llseek_unlocked() */
> +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
> +{
> + struct inode *inode = file->f_mapping->host;
> + int ret = 0;
> +
> + mutex_lock(&inode->i_mutex);
> +
> + switch (origin) {
> + case SEEK_END:
> + offset += inode->i_size;
> + break;
> + case SEEK_CUR:
> + if (offset == 0) {
> + offset = file->f_pos;
> + goto out;
> + }
> + offset += file->f_pos;
> + break;
> + case SEEK_DATA:
> + case SEEK_HOLE:
> + ret = ocfs2_seek_data_hole_offset(file, &offset, origin);
> + if (ret)
> + goto out;
> + break;
> + default:
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
> + ret = -EINVAL;
> + if (!ret && offset > inode->i_sb->s_maxbytes)
> + ret = -EINVAL;
> + if (ret)
> + goto out;
> +
> + if (offset != file->f_pos) {
> + file->f_pos = offset;
> + file->f_version = 0;
> + }
> +
> +out:
> + mutex_unlock(&inode->i_mutex);
> + if (ret)
> + return ret;
> + return offset;
> +}
> +
> const struct inode_operations ocfs2_file_iops = {
> .setattr = ocfs2_setattr,
> .getattr = ocfs2_getattr,
> @@ -2594,7 +2643,7 @@ const struct inode_operations ocfs2_special_file_iops = {
> * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
> */
> const struct file_operations ocfs2_fops = {
> - .llseek = generic_file_llseek,
> + .llseek = ocfs2_file_llseek,
> .read = do_sync_read,
> .write = do_sync_write,
> .mmap = ocfs2_mmap,
> @@ -2642,7 +2691,7 @@ const struct file_operations ocfs2_dops = {
> * the cluster.
> */
> const struct file_operations ocfs2_fops_no_plocks = {
> - .llseek = generic_file_llseek,
> + .llseek = ocfs2_file_llseek,
> .read = do_sync_read,
> .write = do_sync_write,
> .mmap = ocfs2_mmap,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/