Re: [PATCH V2 2/2] fs: Move swap_[de]activate to file_operations

From: Darrick J. Wong
Date: Tue Nov 12 2019 - 19:59:15 EST


On Tue, Nov 12, 2019 at 04:42:44PM -0800, ira.weiny@xxxxxxxxx wrote:
> From: Ira Weiny <ira.weiny@xxxxxxxxx>
>
> swap_activate() and swap_deactivate() have nothing to do with address
> spaces. We want to be able to change the address space operations on
> the fly to allow changing inode flags dynamically.
>
> Switching address space operations can be difficult to do reliably.[1]
> Therefore, to simplify switching address space operations we reduce the
> number of functions in those operations by moving swap_activate() and
> swap_deactivate() out of the address space operations.
>
> No functionality is changed with this patch.
>
> This has been tested with XFS but not NFS, f2fs, or btrfs.
>
> Also note we move some functions to facilitate compilation. But there
> are no functional changes are contained within those diffs.
>
> [1] https://lkml.org/lkml/2019/11/11/572
>
> Cc: Dave Chinner <david@xxxxxxxxxxxxx>
> Cc: linux-fsdevel@xxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Suggested-by: Jan Kara <jack@xxxxxxx>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
>
> ---
> Changes from V0:
> Update cover letter.
> fix btrfs as per Andrew's comments
> change xfs_iomap_swapfile_activate() to xfs_file_swap_activate()
>
> Changes from V1:
> Update recipients list
>
>
> fs/btrfs/file.c | 341 +++++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/inode.c | 340 --------------------------------------------
> fs/f2fs/data.c | 122 ----------------
> fs/f2fs/file.c | 122 ++++++++++++++++
> fs/nfs/file.c | 4 +-
> fs/xfs/xfs_aops.c | 13 --
> fs/xfs/xfs_file.c | 12 ++
> include/linux/fs.h | 10 +-
> mm/swapfile.c | 12 +-

For /only/ the xfs and mm parts,
Acked-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

--D

> 9 files changed, 487 insertions(+), 489 deletions(-)
>
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 0cb43b682789..117502311fe0 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -16,6 +16,7 @@
> #include <linux/btrfs.h>
> #include <linux/uio.h>
> #include <linux/iversion.h>
> +#include <linux/swap.h>
> #include "ctree.h"
> #include "disk-io.h"
> #include "transaction.h"
> @@ -27,6 +28,7 @@
> #include "qgroup.h"
> #include "compression.h"
> #include "delalloc-space.h"
> +#include "block-group.h"
>
> static struct kmem_cache *btrfs_inode_defrag_cachep;
> /*
> @@ -3444,6 +3446,343 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
> return generic_file_open(inode, filp);
> }
>
> +#ifdef CONFIG_SWAP
> +/*
> + * Add an entry indicating a block group or device which is pinned by a
> + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> + * negative errno on failure.
> + */
> +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> + bool is_block_group)
> +{
> + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> + struct btrfs_swapfile_pin *sp, *entry;
> + struct rb_node **p;
> + struct rb_node *parent = NULL;
> +
> + sp = kmalloc(sizeof(*sp), GFP_NOFS);
> + if (!sp)
> + return -ENOMEM;
> + sp->ptr = ptr;
> + sp->inode = inode;
> + sp->is_block_group = is_block_group;
> +
> + spin_lock(&fs_info->swapfile_pins_lock);
> + p = &fs_info->swapfile_pins.rb_node;
> + while (*p) {
> + parent = *p;
> + entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> + if (sp->ptr < entry->ptr ||
> + (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> + p = &(*p)->rb_left;
> + } else if (sp->ptr > entry->ptr ||
> + (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> + p = &(*p)->rb_right;
> + } else {
> + spin_unlock(&fs_info->swapfile_pins_lock);
> + kfree(sp);
> + return 1;
> + }
> + }
> + rb_link_node(&sp->node, parent, p);
> + rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> + spin_unlock(&fs_info->swapfile_pins_lock);
> + return 0;
> +}
> +
> +/* Free all of the entries pinned by this swapfile. */
> +static void btrfs_free_swapfile_pins(struct inode *inode)
> +{
> + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> + struct btrfs_swapfile_pin *sp;
> + struct rb_node *node, *next;
> +
> + spin_lock(&fs_info->swapfile_pins_lock);
> + node = rb_first(&fs_info->swapfile_pins);
> + while (node) {
> + next = rb_next(node);
> + sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> + if (sp->inode == inode) {
> + rb_erase(&sp->node, &fs_info->swapfile_pins);
> + if (sp->is_block_group)
> + btrfs_put_block_group(sp->ptr);
> + kfree(sp);
> + }
> + node = next;
> + }
> + spin_unlock(&fs_info->swapfile_pins_lock);
> +}
> +
> +struct btrfs_swap_info {
> + u64 start;
> + u64 block_start;
> + u64 block_len;
> + u64 lowest_ppage;
> + u64 highest_ppage;
> + unsigned long nr_pages;
> + int nr_extents;
> +};
> +
> +static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> + struct btrfs_swap_info *bsi)
> +{
> + unsigned long nr_pages;
> + u64 first_ppage, first_ppage_reported, next_ppage;
> + int ret;
> +
> + first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> + next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> + PAGE_SIZE) >> PAGE_SHIFT;
> +
> + if (first_ppage >= next_ppage)
> + return 0;
> + nr_pages = next_ppage - first_ppage;
> +
> + first_ppage_reported = first_ppage;
> + if (bsi->start == 0)
> + first_ppage_reported++;
> + if (bsi->lowest_ppage > first_ppage_reported)
> + bsi->lowest_ppage = first_ppage_reported;
> + if (bsi->highest_ppage < (next_ppage - 1))
> + bsi->highest_ppage = next_ppage - 1;
> +
> + ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> + if (ret < 0)
> + return ret;
> + bsi->nr_extents += ret;
> + bsi->nr_pages += nr_pages;
> + return 0;
> +}
> +
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> + struct inode *inode = file_inode(file);
> +
> + btrfs_free_swapfile_pins(inode);
> + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> + sector_t *span)
> +{
> + struct inode *inode = file_inode(file);
> + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> + struct extent_state *cached_state = NULL;
> + struct extent_map *em = NULL;
> + struct btrfs_device *device = NULL;
> + struct btrfs_swap_info bsi = {
> + .lowest_ppage = (sector_t)-1ULL,
> + };
> + int ret = 0;
> + u64 isize;
> + u64 start;
> +
> + /*
> + * If the swap file was just created, make sure delalloc is done. If the
> + * file changes again after this, the user is doing something stupid and
> + * we don't really care.
> + */
> + ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> + if (ret)
> + return ret;
> +
> + /*
> + * The inode is locked, so these flags won't change after we check them.
> + */
> + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> + btrfs_warn(fs_info, "swapfile must not be compressed");
> + return -EINVAL;
> + }
> + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> + btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> + return -EINVAL;
> + }
> + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> + btrfs_warn(fs_info, "swapfile must not be checksummed");
> + return -EINVAL;
> + }
> +
> + /*
> + * Balance or device remove/replace/resize can move stuff around from
> + * under us. The EXCL_OP flag makes sure they aren't running/won't run
> + * concurrently while we are mapping the swap extents, and
> + * fs_info->swapfile_pins prevents them from running while the swap file
> + * is active and moving the extents. Note that this also prevents a
> + * concurrent device add which isn't actually necessary, but it's not
> + * really worth the trouble to allow it.
> + */
> + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> + btrfs_warn(fs_info,
> + "cannot activate swapfile while exclusive operation is running");
> + return -EBUSY;
> + }
> + /*
> + * Snapshots can create extents which require COW even if NODATACOW is
> + * set. We use this counter to prevent snapshots. We must increment it
> + * before walking the extents because we don't want a concurrent
> + * snapshot to run after we've already checked the extents.
> + */
> + atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> +
> + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> +
> + lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> + start = 0;
> + while (start < isize) {
> + u64 logical_block_start, physical_block_start;
> + struct btrfs_block_group_cache *bg;
> + u64 len = isize - start;
> +
> + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> + if (IS_ERR(em)) {
> + ret = PTR_ERR(em);
> + goto out;
> + }
> +
> + if (em->block_start == EXTENT_MAP_HOLE) {
> + btrfs_warn(fs_info, "swapfile must not have holes");
> + ret = -EINVAL;
> + goto out;
> + }
> + if (em->block_start == EXTENT_MAP_INLINE) {
> + /*
> + * It's unlikely we'll ever actually find ourselves
> + * here, as a file small enough to fit inline won't be
> + * big enough to store more than the swap header, but in
> + * case something changes in the future, let's catch it
> + * here rather than later.
> + */
> + btrfs_warn(fs_info, "swapfile must not be inline");
> + ret = -EINVAL;
> + goto out;
> + }
> + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> + btrfs_warn(fs_info, "swapfile must not be compressed");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + logical_block_start = em->block_start + (start - em->start);
> + len = min(len, em->len - (start - em->start));
> + free_extent_map(em);
> + em = NULL;
> +
> + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> + if (ret < 0) {
> + goto out;
> + } else if (ret) {
> + ret = 0;
> + } else {
> + btrfs_warn(fs_info,
> + "swapfile must not be copy-on-write");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> + if (IS_ERR(em)) {
> + ret = PTR_ERR(em);
> + goto out;
> + }
> +
> + if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> + btrfs_warn(fs_info,
> + "swapfile must have single data profile");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (device == NULL) {
> + device = em->map_lookup->stripes[0].dev;
> + ret = btrfs_add_swapfile_pin(inode, device, false);
> + if (ret == 1)
> + ret = 0;
> + else if (ret)
> + goto out;
> + } else if (device != em->map_lookup->stripes[0].dev) {
> + btrfs_warn(fs_info, "swapfile must be on one device");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + physical_block_start = (em->map_lookup->stripes[0].physical +
> + (logical_block_start - em->start));
> + len = min(len, em->len - (logical_block_start - em->start));
> + free_extent_map(em);
> + em = NULL;
> +
> + bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> + if (!bg) {
> + btrfs_warn(fs_info,
> + "could not find block group containing swapfile");
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + ret = btrfs_add_swapfile_pin(inode, bg, true);
> + if (ret) {
> + btrfs_put_block_group(bg);
> + if (ret == 1)
> + ret = 0;
> + else
> + goto out;
> + }
> +
> + if (bsi.block_len &&
> + bsi.block_start + bsi.block_len == physical_block_start) {
> + bsi.block_len += len;
> + } else {
> + if (bsi.block_len) {
> + ret = btrfs_add_swap_extent(sis, &bsi);
> + if (ret)
> + goto out;
> + }
> + bsi.start = start;
> + bsi.block_start = physical_block_start;
> + bsi.block_len = len;
> + }
> +
> + start += len;
> + }
> +
> + if (bsi.block_len)
> + ret = btrfs_add_swap_extent(sis, &bsi);
> +
> +out:
> + if (!IS_ERR_OR_NULL(em))
> + free_extent_map(em);
> +
> + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> +
> + if (ret)
> + btrfs_swap_deactivate(file);
> +
> + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +
> + if (ret)
> + return ret;
> +
> + if (device)
> + sis->bdev = device->bdev;
> + *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> + sis->max = bsi.nr_pages;
> + sis->pages = bsi.nr_pages - 1;
> + sis->highest_bit = bsi.nr_pages - 1;
> + return bsi.nr_extents;
> +}
> +#else
> +static void btrfs_swap_deactivate(struct file *file)
> +{
> +}
> +
> +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> + sector_t *span)
> +{
> + return -EOPNOTSUPP;
> +}
> +#endif
> +
> const struct file_operations btrfs_file_operations = {
> .llseek = btrfs_file_llseek,
> .read_iter = generic_file_read_iter,
> @@ -3459,6 +3798,8 @@ const struct file_operations btrfs_file_operations = {
> .compat_ioctl = btrfs_compat_ioctl,
> #endif
> .remap_file_range = btrfs_remap_file_range,
> + .swap_activate = btrfs_swap_activate,
> + .swap_deactivate = btrfs_swap_deactivate,
> };
>
> void __cold btrfs_auto_defrag_exit(void)
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 6d159df7b536..c11b86f2bf24 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -27,7 +27,6 @@
> #include <linux/uio.h>
> #include <linux/magic.h>
> #include <linux/iversion.h>
> -#include <linux/swap.h>
> #include <linux/sched/mm.h>
> #include <asm/unaligned.h>
> #include "misc.h"
> @@ -10629,343 +10628,6 @@ void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
> }
> }
>
> -#ifdef CONFIG_SWAP
> -/*
> - * Add an entry indicating a block group or device which is pinned by a
> - * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a
> - * negative errno on failure.
> - */
> -static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
> - bool is_block_group)
> -{
> - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> - struct btrfs_swapfile_pin *sp, *entry;
> - struct rb_node **p;
> - struct rb_node *parent = NULL;
> -
> - sp = kmalloc(sizeof(*sp), GFP_NOFS);
> - if (!sp)
> - return -ENOMEM;
> - sp->ptr = ptr;
> - sp->inode = inode;
> - sp->is_block_group = is_block_group;
> -
> - spin_lock(&fs_info->swapfile_pins_lock);
> - p = &fs_info->swapfile_pins.rb_node;
> - while (*p) {
> - parent = *p;
> - entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
> - if (sp->ptr < entry->ptr ||
> - (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
> - p = &(*p)->rb_left;
> - } else if (sp->ptr > entry->ptr ||
> - (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
> - p = &(*p)->rb_right;
> - } else {
> - spin_unlock(&fs_info->swapfile_pins_lock);
> - kfree(sp);
> - return 1;
> - }
> - }
> - rb_link_node(&sp->node, parent, p);
> - rb_insert_color(&sp->node, &fs_info->swapfile_pins);
> - spin_unlock(&fs_info->swapfile_pins_lock);
> - return 0;
> -}
> -
> -/* Free all of the entries pinned by this swapfile. */
> -static void btrfs_free_swapfile_pins(struct inode *inode)
> -{
> - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> - struct btrfs_swapfile_pin *sp;
> - struct rb_node *node, *next;
> -
> - spin_lock(&fs_info->swapfile_pins_lock);
> - node = rb_first(&fs_info->swapfile_pins);
> - while (node) {
> - next = rb_next(node);
> - sp = rb_entry(node, struct btrfs_swapfile_pin, node);
> - if (sp->inode == inode) {
> - rb_erase(&sp->node, &fs_info->swapfile_pins);
> - if (sp->is_block_group)
> - btrfs_put_block_group(sp->ptr);
> - kfree(sp);
> - }
> - node = next;
> - }
> - spin_unlock(&fs_info->swapfile_pins_lock);
> -}
> -
> -struct btrfs_swap_info {
> - u64 start;
> - u64 block_start;
> - u64 block_len;
> - u64 lowest_ppage;
> - u64 highest_ppage;
> - unsigned long nr_pages;
> - int nr_extents;
> -};
> -
> -static int btrfs_add_swap_extent(struct swap_info_struct *sis,
> - struct btrfs_swap_info *bsi)
> -{
> - unsigned long nr_pages;
> - u64 first_ppage, first_ppage_reported, next_ppage;
> - int ret;
> -
> - first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
> - next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
> - PAGE_SIZE) >> PAGE_SHIFT;
> -
> - if (first_ppage >= next_ppage)
> - return 0;
> - nr_pages = next_ppage - first_ppage;
> -
> - first_ppage_reported = first_ppage;
> - if (bsi->start == 0)
> - first_ppage_reported++;
> - if (bsi->lowest_ppage > first_ppage_reported)
> - bsi->lowest_ppage = first_ppage_reported;
> - if (bsi->highest_ppage < (next_ppage - 1))
> - bsi->highest_ppage = next_ppage - 1;
> -
> - ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
> - if (ret < 0)
> - return ret;
> - bsi->nr_extents += ret;
> - bsi->nr_pages += nr_pages;
> - return 0;
> -}
> -
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> - struct inode *inode = file_inode(file);
> -
> - btrfs_free_swapfile_pins(inode);
> - atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> - sector_t *span)
> -{
> - struct inode *inode = file_inode(file);
> - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
> - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> - struct extent_state *cached_state = NULL;
> - struct extent_map *em = NULL;
> - struct btrfs_device *device = NULL;
> - struct btrfs_swap_info bsi = {
> - .lowest_ppage = (sector_t)-1ULL,
> - };
> - int ret = 0;
> - u64 isize;
> - u64 start;
> -
> - /*
> - * If the swap file was just created, make sure delalloc is done. If the
> - * file changes again after this, the user is doing something stupid and
> - * we don't really care.
> - */
> - ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
> - if (ret)
> - return ret;
> -
> - /*
> - * The inode is locked, so these flags won't change after we check them.
> - */
> - if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
> - btrfs_warn(fs_info, "swapfile must not be compressed");
> - return -EINVAL;
> - }
> - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
> - btrfs_warn(fs_info, "swapfile must not be copy-on-write");
> - return -EINVAL;
> - }
> - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
> - btrfs_warn(fs_info, "swapfile must not be checksummed");
> - return -EINVAL;
> - }
> -
> - /*
> - * Balance or device remove/replace/resize can move stuff around from
> - * under us. The EXCL_OP flag makes sure they aren't running/won't run
> - * concurrently while we are mapping the swap extents, and
> - * fs_info->swapfile_pins prevents them from running while the swap file
> - * is active and moving the extents. Note that this also prevents a
> - * concurrent device add which isn't actually necessary, but it's not
> - * really worth the trouble to allow it.
> - */
> - if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
> - btrfs_warn(fs_info,
> - "cannot activate swapfile while exclusive operation is running");
> - return -EBUSY;
> - }
> - /*
> - * Snapshots can create extents which require COW even if NODATACOW is
> - * set. We use this counter to prevent snapshots. We must increment it
> - * before walking the extents because we don't want a concurrent
> - * snapshot to run after we've already checked the extents.
> - */
> - atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
> -
> - isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
> -
> - lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
> - start = 0;
> - while (start < isize) {
> - u64 logical_block_start, physical_block_start;
> - struct btrfs_block_group_cache *bg;
> - u64 len = isize - start;
> -
> - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
> - if (IS_ERR(em)) {
> - ret = PTR_ERR(em);
> - goto out;
> - }
> -
> - if (em->block_start == EXTENT_MAP_HOLE) {
> - btrfs_warn(fs_info, "swapfile must not have holes");
> - ret = -EINVAL;
> - goto out;
> - }
> - if (em->block_start == EXTENT_MAP_INLINE) {
> - /*
> - * It's unlikely we'll ever actually find ourselves
> - * here, as a file small enough to fit inline won't be
> - * big enough to store more than the swap header, but in
> - * case something changes in the future, let's catch it
> - * here rather than later.
> - */
> - btrfs_warn(fs_info, "swapfile must not be inline");
> - ret = -EINVAL;
> - goto out;
> - }
> - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
> - btrfs_warn(fs_info, "swapfile must not be compressed");
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - logical_block_start = em->block_start + (start - em->start);
> - len = min(len, em->len - (start - em->start));
> - free_extent_map(em);
> - em = NULL;
> -
> - ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
> - if (ret < 0) {
> - goto out;
> - } else if (ret) {
> - ret = 0;
> - } else {
> - btrfs_warn(fs_info,
> - "swapfile must not be copy-on-write");
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
> - if (IS_ERR(em)) {
> - ret = PTR_ERR(em);
> - goto out;
> - }
> -
> - if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> - btrfs_warn(fs_info,
> - "swapfile must have single data profile");
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - if (device == NULL) {
> - device = em->map_lookup->stripes[0].dev;
> - ret = btrfs_add_swapfile_pin(inode, device, false);
> - if (ret == 1)
> - ret = 0;
> - else if (ret)
> - goto out;
> - } else if (device != em->map_lookup->stripes[0].dev) {
> - btrfs_warn(fs_info, "swapfile must be on one device");
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - physical_block_start = (em->map_lookup->stripes[0].physical +
> - (logical_block_start - em->start));
> - len = min(len, em->len - (logical_block_start - em->start));
> - free_extent_map(em);
> - em = NULL;
> -
> - bg = btrfs_lookup_block_group(fs_info, logical_block_start);
> - if (!bg) {
> - btrfs_warn(fs_info,
> - "could not find block group containing swapfile");
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - ret = btrfs_add_swapfile_pin(inode, bg, true);
> - if (ret) {
> - btrfs_put_block_group(bg);
> - if (ret == 1)
> - ret = 0;
> - else
> - goto out;
> - }
> -
> - if (bsi.block_len &&
> - bsi.block_start + bsi.block_len == physical_block_start) {
> - bsi.block_len += len;
> - } else {
> - if (bsi.block_len) {
> - ret = btrfs_add_swap_extent(sis, &bsi);
> - if (ret)
> - goto out;
> - }
> - bsi.start = start;
> - bsi.block_start = physical_block_start;
> - bsi.block_len = len;
> - }
> -
> - start += len;
> - }
> -
> - if (bsi.block_len)
> - ret = btrfs_add_swap_extent(sis, &bsi);
> -
> -out:
> - if (!IS_ERR_OR_NULL(em))
> - free_extent_map(em);
> -
> - unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
> -
> - if (ret)
> - btrfs_swap_deactivate(file);
> -
> - clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> -
> - if (ret)
> - return ret;
> -
> - if (device)
> - sis->bdev = device->bdev;
> - *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
> - sis->max = bsi.nr_pages;
> - sis->pages = bsi.nr_pages - 1;
> - sis->highest_bit = bsi.nr_pages - 1;
> - return bsi.nr_extents;
> -}
> -#else
> -static void btrfs_swap_deactivate(struct file *file)
> -{
> -}
> -
> -static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
> - sector_t *span)
> -{
> - return -EOPNOTSUPP;
> -}
> -#endif
> -
> static const struct inode_operations btrfs_dir_inode_operations = {
> .getattr = btrfs_getattr,
> .lookup = btrfs_lookup,
> @@ -11032,8 +10694,6 @@ static const struct address_space_operations btrfs_aops = {
> .releasepage = btrfs_releasepage,
> .set_page_dirty = btrfs_set_page_dirty,
> .error_remove_page = generic_error_remove_page,
> - .swap_activate = btrfs_swap_activate,
> - .swap_deactivate = btrfs_swap_deactivate,
> };
>
> static const struct inode_operations btrfs_file_inode_operations = {
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3c7777bfae17..04b2a8f44fa9 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -14,7 +14,6 @@
> #include <linux/pagevec.h>
> #include <linux/blkdev.h>
> #include <linux/bio.h>
> -#include <linux/swap.h>
> #include <linux/prefetch.h>
> #include <linux/uio.h>
> #include <linux/cleancache.h>
> @@ -3142,125 +3141,6 @@ int f2fs_migrate_page(struct address_space *mapping,
> }
> #endif
>
> -#ifdef CONFIG_SWAP
> -/* Copied from generic_swapfile_activate() to check any holes */
> -static int check_swap_activate(struct file *swap_file, unsigned int max)
> -{
> - struct inode *inode = swap_file->f_mapping->host;
> - unsigned blocks_per_page;
> - unsigned long page_no;
> - unsigned blkbits;
> - sector_t probe_block;
> - sector_t last_block;
> - sector_t lowest_block = -1;
> - sector_t highest_block = 0;
> -
> - blkbits = inode->i_blkbits;
> - blocks_per_page = PAGE_SIZE >> blkbits;
> -
> - /*
> - * Map all the blocks into the extent list. This code doesn't try
> - * to be very smart.
> - */
> - probe_block = 0;
> - page_no = 0;
> - last_block = i_size_read(inode) >> blkbits;
> - while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> - unsigned block_in_page;
> - sector_t first_block;
> -
> - cond_resched();
> -
> - first_block = bmap(inode, probe_block);
> - if (first_block == 0)
> - goto bad_bmap;
> -
> - /*
> - * It must be PAGE_SIZE aligned on-disk
> - */
> - if (first_block & (blocks_per_page - 1)) {
> - probe_block++;
> - goto reprobe;
> - }
> -
> - for (block_in_page = 1; block_in_page < blocks_per_page;
> - block_in_page++) {
> - sector_t block;
> -
> - block = bmap(inode, probe_block + block_in_page);
> - if (block == 0)
> - goto bad_bmap;
> - if (block != first_block + block_in_page) {
> - /* Discontiguity */
> - probe_block++;
> - goto reprobe;
> - }
> - }
> -
> - first_block >>= (PAGE_SHIFT - blkbits);
> - if (page_no) { /* exclude the header page */
> - if (first_block < lowest_block)
> - lowest_block = first_block;
> - if (first_block > highest_block)
> - highest_block = first_block;
> - }
> -
> - page_no++;
> - probe_block += blocks_per_page;
> -reprobe:
> - continue;
> - }
> - return 0;
> -
> -bad_bmap:
> - pr_err("swapon: swapfile has holes\n");
> - return -EINVAL;
> -}
> -
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> - sector_t *span)
> -{
> - struct inode *inode = file_inode(file);
> - int ret;
> -
> - if (!S_ISREG(inode->i_mode))
> - return -EINVAL;
> -
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> - return -EROFS;
> -
> - ret = f2fs_convert_inline_inode(inode);
> - if (ret)
> - return ret;
> -
> - ret = check_swap_activate(file, sis->max);
> - if (ret)
> - return ret;
> -
> - set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_precache_extents(inode);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> - return 0;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> - struct inode *inode = file_inode(file);
> -
> - clear_inode_flag(inode, FI_PIN_FILE);
> -}
> -#else
> -static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> - sector_t *span)
> -{
> - return -EOPNOTSUPP;
> -}
> -
> -static void f2fs_swap_deactivate(struct file *file)
> -{
> -}
> -#endif
> -
> const struct address_space_operations f2fs_dblock_aops = {
> .readpage = f2fs_read_data_page,
> .readpages = f2fs_read_data_pages,
> @@ -3273,8 +3153,6 @@ const struct address_space_operations f2fs_dblock_aops = {
> .releasepage = f2fs_release_page,
> .direct_IO = f2fs_direct_IO,
> .bmap = f2fs_bmap,
> - .swap_activate = f2fs_swap_activate,
> - .swap_deactivate = f2fs_swap_deactivate,
> #ifdef CONFIG_MIGRATION
> .migratepage = f2fs_migrate_page,
> #endif
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 483ad22a0946..de7f9cf36689 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -21,6 +21,7 @@
> #include <linux/uuid.h>
> #include <linux/file.h>
> #include <linux/nls.h>
> +#include <linux/swap.h>
>
> #include "f2fs.h"
> #include "node.h"
> @@ -3466,6 +3467,125 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> }
> #endif
>
> +#ifdef CONFIG_SWAP
> +/* Copied from generic_swapfile_activate() to check any holes */
> +static int check_swap_activate(struct file *swap_file, unsigned int max)
> +{
> + struct inode *inode = swap_file->f_mapping->host;
> + unsigned blocks_per_page;
> + unsigned long page_no;
> + unsigned blkbits;
> + sector_t probe_block;
> + sector_t last_block;
> + sector_t lowest_block = -1;
> + sector_t highest_block = 0;
> +
> + blkbits = inode->i_blkbits;
> + blocks_per_page = PAGE_SIZE >> blkbits;
> +
> + /*
> + * Map all the blocks into the extent list. This code doesn't try
> + * to be very smart.
> + */
> + probe_block = 0;
> + page_no = 0;
> + last_block = i_size_read(inode) >> blkbits;
> + while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
> + unsigned block_in_page;
> + sector_t first_block;
> +
> + cond_resched();
> +
> + first_block = bmap(inode, probe_block);
> + if (first_block == 0)
> + goto bad_bmap;
> +
> + /*
> + * It must be PAGE_SIZE aligned on-disk
> + */
> + if (first_block & (blocks_per_page - 1)) {
> + probe_block++;
> + goto reprobe;
> + }
> +
> + for (block_in_page = 1; block_in_page < blocks_per_page;
> + block_in_page++) {
> + sector_t block;
> +
> + block = bmap(inode, probe_block + block_in_page);
> + if (block == 0)
> + goto bad_bmap;
> + if (block != first_block + block_in_page) {
> + /* Discontiguity */
> + probe_block++;
> + goto reprobe;
> + }
> + }
> +
> + first_block >>= (PAGE_SHIFT - blkbits);
> + if (page_no) { /* exclude the header page */
> + if (first_block < lowest_block)
> + lowest_block = first_block;
> + if (first_block > highest_block)
> + highest_block = first_block;
> + }
> +
> + page_no++;
> + probe_block += blocks_per_page;
> +reprobe:
> + continue;
> + }
> + return 0;
> +
> +bad_bmap:
> + pr_err("swapon: swapfile has holes\n");
> + return -EINVAL;
> +}
> +
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> + sector_t *span)
> +{
> + struct inode *inode = file_inode(file);
> + int ret;
> +
> + if (!S_ISREG(inode->i_mode))
> + return -EINVAL;
> +
> + if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + return -EROFS;
> +
> + ret = f2fs_convert_inline_inode(inode);
> + if (ret)
> + return ret;
> +
> + ret = check_swap_activate(file, sis->max);
> + if (ret)
> + return ret;
> +
> + set_inode_flag(inode, FI_PIN_FILE);
> + f2fs_precache_extents(inode);
> + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + return 0;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> + struct inode *inode = file_inode(file);
> +
> + clear_inode_flag(inode, FI_PIN_FILE);
> +}
> +#else
> +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> + sector_t *span)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static void f2fs_swap_deactivate(struct file *file)
> +{
> +}
> +#endif
> +
> const struct file_operations f2fs_file_operations = {
> .llseek = f2fs_llseek,
> .read_iter = generic_file_read_iter,
> @@ -3482,4 +3602,6 @@ const struct file_operations f2fs_file_operations = {
> #endif
> .splice_read = generic_file_splice_read,
> .splice_write = iter_file_splice_write,
> + .swap_activate = f2fs_swap_activate,
> + .swap_deactivate = f2fs_swap_deactivate,
> };
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 95dc90570786..1f82f92185d6 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -520,8 +520,6 @@ const struct address_space_operations nfs_file_aops = {
> .launder_page = nfs_launder_page,
> .is_dirty_writeback = nfs_check_dirty_writeback,
> .error_remove_page = generic_error_remove_page,
> - .swap_activate = nfs_swap_activate,
> - .swap_deactivate = nfs_swap_deactivate,
> };
>
> /*
> @@ -847,5 +845,7 @@ const struct file_operations nfs_file_operations = {
> .splice_write = iter_file_splice_write,
> .check_flags = nfs_check_flags,
> .setlease = simple_nosetlease,
> + .swap_activate = nfs_swap_activate,
> + .swap_deactivate = nfs_swap_deactivate,
> };
> EXPORT_SYMBOL_GPL(nfs_file_operations);
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 3a688eb5c5ae..99f578a9ed90 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -631,17 +631,6 @@ xfs_vm_readpages(
> return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
> }
>
> -static int
> -xfs_iomap_swapfile_activate(
> - struct swap_info_struct *sis,
> - struct file *swap_file,
> - sector_t *span)
> -{
> - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> - return iomap_swapfile_activate(sis, swap_file, span,
> - &xfs_read_iomap_ops);
> -}
> -
> const struct address_space_operations xfs_address_space_operations = {
> .readpage = xfs_vm_readpage,
> .readpages = xfs_vm_readpages,
> @@ -655,7 +644,6 @@ const struct address_space_operations xfs_address_space_operations = {
> .migratepage = iomap_migrate_page,
> .is_partially_uptodate = iomap_is_partially_uptodate,
> .error_remove_page = generic_error_remove_page,
> - .swap_activate = xfs_iomap_swapfile_activate,
> };
>
> const struct address_space_operations xfs_dax_aops = {
> @@ -663,5 +651,4 @@ const struct address_space_operations xfs_dax_aops = {
> .direct_IO = noop_direct_IO,
> .set_page_dirty = noop_set_page_dirty,
> .invalidatepage = noop_invalidatepage,
> - .swap_activate = xfs_iomap_swapfile_activate,
> };
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 865543e41fb4..225f58561f06 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1294,6 +1294,17 @@ xfs_file_mmap(
> return 0;
> }
>
> +static int
> +xfs_file_swap_activate(
> + struct swap_info_struct *sis,
> + struct file *swap_file,
> + sector_t *span)
> +{
> + sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
> + return iomap_swapfile_activate(sis, swap_file, span,
> + &xfs_read_iomap_ops);
> +}
> +
> const struct file_operations xfs_file_operations = {
> .llseek = xfs_file_llseek,
> .read_iter = xfs_file_read_iter,
> @@ -1314,6 +1325,7 @@ const struct file_operations xfs_file_operations = {
> .fallocate = xfs_file_fallocate,
> .fadvise = xfs_file_fadvise,
> .remap_file_range = xfs_file_remap_range,
> + .swap_activate = xfs_file_swap_activate,
> };
>
> const struct file_operations xfs_dir_file_operations = {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 83e011e0df7f..1175815da3df 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -402,11 +402,6 @@ struct address_space_operations {
> unsigned long);
> void (*is_dirty_writeback) (struct page *, bool *, bool *);
> int (*error_remove_page)(struct address_space *, struct page *);
> -
> - /* swapfile support */
> - int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> - sector_t *span);
> - void (*swap_deactivate)(struct file *file);
> };
>
> extern const struct address_space_operations empty_aops;
> @@ -1858,6 +1853,11 @@ struct file_operations {
> struct file *file_out, loff_t pos_out,
> loff_t len, unsigned int remap_flags);
> int (*fadvise)(struct file *, loff_t, loff_t, int);
> +
> + /* swapfile support */
> + int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
> + sector_t *span);
> + void (*swap_deactivate)(struct file *file);
> } __randomize_layout;
>
> struct inode_operations {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index bb3261d45b6a..d2de8d668708 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2293,11 +2293,10 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
>
> if (sis->flags & SWP_ACTIVATED) {
> struct file *swap_file = sis->swap_file;
> - struct address_space *mapping = swap_file->f_mapping;
>
> sis->flags &= ~SWP_ACTIVATED;
> - if (mapping->a_ops->swap_deactivate)
> - mapping->a_ops->swap_deactivate(swap_file);
> + if (swap_file->f_op->swap_deactivate)
> + swap_file->f_op->swap_deactivate(swap_file);
> }
> }
>
> @@ -2381,8 +2380,7 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
> static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
> {
> struct file *swap_file = sis->swap_file;
> - struct address_space *mapping = swap_file->f_mapping;
> - struct inode *inode = mapping->host;
> + struct inode *inode = swap_file->f_mapping->host;
> int ret;
>
> if (S_ISBLK(inode->i_mode)) {
> @@ -2391,8 +2389,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
> return ret;
> }
>
> - if (mapping->a_ops->swap_activate) {
> - ret = mapping->a_ops->swap_activate(sis, swap_file, span);
> + if (swap_file->f_op->swap_activate) {
> + ret = swap_file->f_op->swap_activate(sis, swap_file, span);
> if (ret >= 0)
> sis->flags |= SWP_ACTIVATED;
> if (!ret) {
> --
> 2.21.0
>