Re: [PATCH] Fix generic_block_fiemap for files bigger than 4GB
From: Josef Bacik
Date:  Thu Oct 29 2009 - 08:41:37 EST
On Thu, Oct 29, 2009 at 08:31:54AM -0400, Josef Bacik wrote:
> On Wed, Oct 28, 2009 at 11:00:22PM -0700, Andrew Morton wrote:
> > On Mon, 26 Oct 2009 18:24:28 +0100 Mike Hommey <mh@xxxxxxxxxxxx> wrote:
> > 
> > > Because of an integer overflow on start_blk, various kind of wrong results
> > > would be returned by the generic_block_fiemap handler, such as no extents
> > > when there is a 4GB+ hole at the beginning of the file, or wrong fe_logical
> > > when an extent starts after the first 4GB.
> > > 
> > > Signed-off-by: Mike Hommey <mh@xxxxxxxxxxxx>
> > > ---
> > >  fs/ioctl.c |    2 +-
> > >  1 files changed, 1 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > > index 7b17a14..6c75110 100644
> > > --- a/fs/ioctl.c
> > > +++ b/fs/ioctl.c
> > > @@ -254,7 +254,7 @@ int __generic_block_fiemap(struct inode *inode,
> > >  			   u64 len, get_block_t *get_block)
> > >  {
> > >  	struct buffer_head tmp;
> > > -	unsigned int start_blk;
> > > +	unsigned long long start_blk;
> > >  	long long length = 0, map_len = 0;
> > >  	u64 logical = 0, phys = 0, size = 0;
> > >  	u32 flags = FIEMAP_EXTENT_MERGED;
> > 
> > Well.  Should it be unsigned long long, or u64 or sector_t?  Or even loff_t.
> > 
> > The code's a bit confused about types in there.  And it's made much
> > more confusing by the moronic and wholly unnecessary use of macros for
> > blk_to_logical() and logical_to_blk().
> > 
> > It's also unhelpful that the `u64 start' argument forgot to get itself
> > documented in the kerneldoc comment.  Sigh.
> > 
> > Ah, generic_block_fiemap() has it:
> > 
> >  * @start: The initial block to map
> > 
> > I guess u64 was logical there as it comes in from userspace.  But at
> > some boundary we should start talking kernel types so I suspect the
> > correct thing to do here is to use sector_t?
> > 
> 
> Hmm this is strange, I had sent a patch a few months ago after you chewed me out
> the first time for this to change all the types and such and make the macro's
> inlined functions.  I will see if I can find it and resend it.  Thanks,
> 
Ok here it is.  I've fixed all the type issues and there were other problems
with not setting FIEMAP_EXTENT_LAST last properly.  If this is more reasonable
to you I will update it and send it along properly.  Thanks,
Josef
diff --git a/fs/ioctl.c b/fs/ioctl.c
index ac2d47e..ee9fba0 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,13 +228,22 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 
 #ifdef CONFIG_BLOCK
 
-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+	return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+	return (blk << inode->i_blkbits);
+}
 
 /**
  * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
  * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
+ * @fieinfo - the fiemap info struct that will be passed back to userspace
+ * @start - where to start mapping in the inode
+ * @len - how much space to map
  * @get_block - the fs's get_block function
  *
  * This does FIEMAP for block based inodes.  Basically it will just loop
@@ -250,43 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  */
 
 int __generic_block_fiemap(struct inode *inode,
-			   struct fiemap_extent_info *fieinfo, u64 start,
-			   u64 len, get_block_t *get_block)
+			   struct fiemap_extent_info *fieinfo, loff_t start,
+			   loff_t len, get_block_t *get_block)
 {
-	struct buffer_head tmp;
-	unsigned int start_blk;
-	long long length = 0, map_len = 0;
+	struct buffer_head map_bh;
+	sector_t start_blk;
+	loff_t end;
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = FIEMAP_EXTENT_MERGED;
+	bool past_eof = false, whole_file = false;
 	int ret = 0;
 
-	if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
 		return ret;
 
 	start_blk = logical_to_blk(inode, start);
 
-	length = (long long)min_t(u64, len, i_size_read(inode));
-	map_len = length;
+	if (len >= i_size_read(inode)) {
+		whole_file = true;
+		len = i_size_read(inode);
+	}
+
+	end = start + len;
 
 	do {
 		/*
-		 * we set b_size to the total size we want so it will map as
+		 * We set b_size to the total size we want so it will map as
 		 * many contiguous blocks as possible at once
 		 */
-		memset(&tmp, 0, sizeof(struct buffer_head));
-		tmp.b_size = map_len;
+		memset(&map_bh, 0, sizeof(struct buffer_head));
+		map_bh.b_size = len;
 
-		ret = get_block(inode, start_blk, &tmp, 0);
+		ret = get_block(inode, start_blk, &map_bh, 0);
 		if (ret)
 			break;
 
 		/* HOLE */
-		if (!buffer_mapped(&tmp)) {
+		if (!buffer_mapped(&map_bh)) {
+			start_blk++;
+
 			/*
-			 * first hole after going past the EOF, this is our
+			 * We want to handle the case where there is an
+			 * allocated block at the front of the file, and then
+			 * nothing but holes up to the end of the file properly,
+			 * to make sure that extent at the front gets properly
+			 * marked with FIEMAP_EXTENT_LAST
+			 */
+			if (!past_eof &&
+			    blk_to_logical(inode, start_blk) >=
+			    blk_to_logical(inode, 0) + i_size_read(inode))
+				past_eof = true;
+
+			/*
+			 * First hole after going past the EOF, this is our
 			 * last extent
 			 */
-			if (length <= 0) {
+			if (past_eof && size) {
 				flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
@@ -294,15 +323,39 @@ int __generic_block_fiemap(struct inode *inode,
 				break;
 			}
 
-			length -= blk_to_logical(inode, 1);
-
-			/* if we have holes up to/past EOF then we're done */
-			if (length <= 0)
+			/* If we have holes up to/past EOF then we're done */
+			if (blk_to_logical(inode, start_blk) >= end
+			    || past_eof)
 				break;
-
-			start_blk++;
 		} else {
-			if (length <= 0 && size) {
+			/*
+			 * We have gone over the length of what we wanted to
+			 * map, and it wasn't the entire file, so add the extent
+			 * we got last time and exit.
+			 *
+			 * This is for the case where say we want to map all the
+			 * way up to the second to the last block in a file, but
+			 * the last block is a hole, making the second to last
+			 * block FIEMAP_EXTENT_LAST.  In this case we want to
+			 * see if there is a hole after the second to last block
+			 * so we can mark it properly.  If we found data after
+			 * we exceeded the length we were requesting, then we
+			 * are good to go, just add the extent to the fieinfo
+			 * and break
+			 */
+			if (blk_to_logical(inode, start_blk) >= end
+			    && !whole_file) {
+				ret = fiemap_fill_next_extent(fieinfo, logical,
+							      phys, size,
+							      flags);
+				break;
+			}
+
+			/*
+			 * If size != 0 then we know we already have an extent
+			 * to add, so add it.
+			 */
+			if (size) {
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
@@ -311,32 +364,26 @@ int __generic_block_fiemap(struct inode *inode,
 			}
 
 			logical = blk_to_logical(inode, start_blk);
-			phys = blk_to_logical(inode, tmp.b_blocknr);
-			size = tmp.b_size;
+			phys = blk_to_logical(inode, map_bh.b_blocknr);
+			size = map_bh.b_size;
 			flags = FIEMAP_EXTENT_MERGED;
 
-			length -= tmp.b_size;
 			start_blk += logical_to_blk(inode, size);
 
 			/*
-			 * if we are past the EOF we need to loop again to see
-			 * if there is a hole so we can mark this extent as the
-			 * last one, and if not keep mapping things until we
-			 * find a hole, or we run out of slots in the extent
-			 * array
+			 * If we are past the EOF, then we need to make sure as
+			 * soon as we find a hole that the last extent we found
+			 * is marked with FIEMAP_EXTENT_LAST
 			 */
-			if (length <= 0)
-				continue;
-
-			ret = fiemap_fill_next_extent(fieinfo, logical, phys,
-						      size, flags);
-			if (ret)
-				break;
+			if (!past_eof &&
+			    logical + size >=
+			    blk_to_logical(inode, 0) + i_size_read(inode))
+				past_eof = true;
 		}
 		cond_resched();
 	} while (1);
 
-	/* if ret is 1 then we just hit the end of the extent array */
+	/* If ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bed436..b23c725 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2305,8 +2305,9 @@ extern int vfs_fstatat(int , char __user *, struct kstat *, int);
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern int __generic_block_fiemap(struct inode *inode,
-				  struct fiemap_extent_info *fieinfo, u64 start,
-				  u64 len, get_block_t *get_block);
+				  struct fiemap_extent_info *fieinfo,
+				  loff_t start, loff_t len,
+				  get_block_t *get_block);
 extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/