[PATCH 4.9 46/51] xfs: fix COW writeback race

From: Greg Kroah-Hartman
Date: Thu Feb 02 2017 - 13:44:04 EST


4.9-stable review patch. If anyone has any objections, please let me know.

------------------

From: Christoph Hellwig <hch@xxxxxx>

commit d2b3964a0780d2d2994eba57f950d6c9fe489ed8 upstream.

Due to the way how xfs_iomap_write_allocate tries to convert the whole
found extents from delalloc to real space we can run into a race
condition with multiple threads doing writes to this same extent.
For the non-COW case that is harmless as the only thing that can happen
is that we call xfs_bmapi_write on an extent that has already been
converted to a real allocation. For COW writes where we move the extent
from the COW to the data fork after I/O completion the race is, however,
not quite as harmless. In the worst case we are now calling
xfs_bmapi_write on a region that contains hole in the COW work, which
will trip up an assert in debug builds or lead to file system corruption
in non-debug builds. This seems to be reproducible with workloads of
small O_DSYNC write, although so far I've not managed to come up with
a with an isolated reproducer.

The fix for the issue is relatively simple: tell xfs_bmapi_write
that we are only asked to convert delayed allocations and skip holes
in that case.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>
Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
fs/xfs/libxfs/xfs_bmap.c | 44 ++++++++++++++++++++++++++++++++------------
fs/xfs/libxfs/xfs_bmap.h | 6 +++++-
fs/xfs/xfs_iomap.c | 2 +-
3 files changed, 38 insertions(+), 14 deletions(-)

--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4607,8 +4607,6 @@ xfs_bmapi_write(
int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */
int whichfork; /* data or attr fork */
- char inhole; /* current location is hole in file */
- char wasdelay; /* old extent was delayed */

#ifdef DEBUG
xfs_fileoff_t orig_bno; /* original block number value */
@@ -4694,22 +4692,44 @@ xfs_bmapi_write(
bma.firstblock = firstblock;

while (bno < end && n < *nmap) {
- inhole = eof || bma.got.br_startoff > bno;
- wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+ bool need_alloc = false, wasdelay = false;

- /*
- * Make sure we only reflink into a hole.
- */
- if (flags & XFS_BMAPI_REMAP)
- ASSERT(inhole);
- if (flags & XFS_BMAPI_COWFORK)
- ASSERT(!inhole);
+ /* in hole or beyoned EOF? */
+ if (eof || bma.got.br_startoff > bno) {
+ if (flags & XFS_BMAPI_DELALLOC) {
+ /*
+ * For the COW fork we can reasonably get a
+ * request for converting an extent that races
+ * with other threads already having converted
+ * part of it, as there converting COW to
+ * regular blocks is not protected using the
+ * IOLOCK.
+ */
+ ASSERT(flags & XFS_BMAPI_COWFORK);
+ if (!(flags & XFS_BMAPI_COWFORK)) {
+ error = -EIO;
+ goto error0;
+ }
+
+ if (eof || bno >= end)
+ break;
+ } else {
+ need_alloc = true;
+ }
+ } else {
+ /*
+ * Make sure we only reflink into a hole.
+ */
+ ASSERT(!(flags & XFS_BMAPI_REMAP));
+ if (isnullstartblock(bma.got.br_startblock))
+ wasdelay = true;
+ }

/*
* First, deal with the hole before the allocated space
* that we found, if any.
*/
- if (inhole || wasdelay) {
+ if (need_alloc || wasdelay) {
bma.eof = eof;
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay;
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
/* Map something in the CoW fork. */
#define XFS_BMAPI_COWFORK 0x200

+/* Only convert delalloc space, don't allocate entirely new extents */
+#define XFS_BMAPI_DELALLOC 0x400
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" }, \
{ XFS_BMAPI_REMAP, "REMAP" }, \
- { XFS_BMAPI_COWFORK, "COWFORK" }
+ { XFS_BMAPI_COWFORK, "COWFORK" }, \
+ { XFS_BMAPI_DELALLOC, "DELALLOC" }


static inline int xfs_bmapi_aflag(int w)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
xfs_trans_t *tp;
int nimaps;
int error = 0;
- int flags = 0;
+ int flags = XFS_BMAPI_DELALLOC;
int nres;

if (whichfork == XFS_COW_FORK)