Re: [PATCH 2/2] mm, zswap: Support THP

From: Dan Streetman
Date: Fri Nov 08 2019 - 11:16:11 EST


On Mon, Oct 14, 2019 at 11:49 PM Hui Zhu <teawater@xxxxxxxxx> wrote:
>
> This commit let zswap treats THP as continuous normal pages
> in zswap_frontswap_store.
> It will store them to a lot of "zswap_entry". These "zswap_entry"
> will be inserted to "zswap_tree" together.

why does zswap need to carry the added complexity of converting THP
into separate normal sized pages? That should be done higher up in
the swap layer.

>
> Signed-off-by: Hui Zhu <teawaterz@xxxxxxxxxxxxxxxxx>
> ---
> mm/zswap.c | 170 +++++++++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 109 insertions(+), 61 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 46a3223..36aa10d 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -316,11 +316,7 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> }
> }
>
> -/*
> - * Carries out the common pattern of freeing and entry's zpool allocation,
> - * freeing the entry itself, and decrementing the number of stored pages.
> - */
> -static void zswap_free_entry(struct zswap_entry *entry)
> +static void zswap_free_entry_1(struct zswap_entry *entry)
> {
> if (!entry->length)
> atomic_dec(&zswap_same_filled_pages);
> @@ -329,6 +325,15 @@ static void zswap_free_entry(struct zswap_entry *entry)
> zswap_pool_put(entry->pool);
> }
> zswap_entry_cache_free(entry);
> +}
> +
> +/*
> + * Carries out the common pattern of freeing and entry's zpool allocation,
> + * freeing the entry itself, and decrementing the number of stored pages.
> + */
> +static void zswap_free_entry(struct zswap_entry *entry)
> +{
> + zswap_free_entry_1(entry);
> atomic_dec(&zswap_stored_pages);
> zswap_update_total_size();
> }
> @@ -980,15 +985,11 @@ static void zswap_fill_page(void *ptr, unsigned long value)
> memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
> }
>
> -/*********************************
> -* frontswap hooks
> -**********************************/
> -/* attempts to compress and store an single page */
> -static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> - struct page *page)
> +static int zswap_frontswap_store_1(unsigned type, pgoff_t offset,
> + struct page *page,
> + struct zswap_entry **entry_pointer)
> {
> - struct zswap_tree *tree = zswap_trees[type];
> - struct zswap_entry *entry, *dupentry;
> + struct zswap_entry *entry;
> struct crypto_comp *tfm;
> int ret;
> unsigned int hlen, dlen = PAGE_SIZE;
> @@ -998,36 +999,6 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
> gfp_t gfp;
>
> - /* THP isn't supported */
> - if (PageTransHuge(page)) {
> - ret = -EINVAL;
> - goto reject;
> - }
> -
> - if (!zswap_enabled || !tree) {
> - ret = -ENODEV;
> - goto reject;
> - }
> -
> - /* reclaim space if needed */
> - if (zswap_is_full()) {
> - zswap_pool_limit_hit++;
> - if (zswap_shrink()) {
> - zswap_reject_reclaim_fail++;
> - ret = -ENOMEM;
> - goto reject;
> - }
> -
> - /* A second zswap_is_full() check after
> - * zswap_shrink() to make sure it's now
> - * under the max_pool_percent
> - */
> - if (zswap_is_full()) {
> - ret = -ENOMEM;
> - goto reject;
> - }
> - }
> -
> /* allocate entry */
> entry = zswap_entry_cache_alloc(GFP_KERNEL);
> if (!entry) {
> @@ -1035,6 +1006,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> ret = -ENOMEM;
> goto reject;
> }
> + *entry_pointer = entry;
>
> if (zswap_same_filled_pages_enabled) {
> src = kmap_atomic(page);
> @@ -1044,7 +1016,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> entry->length = 0;
> entry->value = value;
> atomic_inc(&zswap_same_filled_pages);
> - goto insert_entry;
> + goto out;
> }
> kunmap_atomic(src);
> }
> @@ -1093,31 +1065,105 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> entry->handle = handle;
> entry->length = dlen;
>
> -insert_entry:
> +out:
> + return 0;
> +
> +put_dstmem:
> + put_cpu_var(zswap_dstmem);
> + zswap_pool_put(entry->pool);
> +freepage:
> + zswap_entry_cache_free(entry);
> +reject:
> + return ret;
> +}
> +
> +/*********************************
> +* frontswap hooks
> +**********************************/
> +/* attempts to compress and store an single page */
> +static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> + struct page *page)
> +{
> + struct zswap_tree *tree = zswap_trees[type];
> + struct zswap_entry **entries = NULL, *dupentry;
> + struct zswap_entry *single_entry[1];
> + int ret;
> + int i, nr;
> +
> + if (!zswap_enabled || !tree) {
> + ret = -ENODEV;
> + goto reject;
> + }
> +
> + /* reclaim space if needed */
> + if (zswap_is_full()) {
> + zswap_pool_limit_hit++;
> + if (zswap_shrink()) {
> + zswap_reject_reclaim_fail++;
> + ret = -ENOMEM;
> + goto reject;
> + }
> +
> + /* A second zswap_is_full() check after
> + * zswap_shrink() to make sure it's now
> + * under the max_pool_percent
> + */
> + if (zswap_is_full()) {
> + ret = -ENOMEM;
> + goto reject;
> + }
> + }
> +
> + nr = hpage_nr_pages(page);
> +
> + if (unlikely(nr > 1)) {
> + entries = kvmalloc(sizeof(struct zswap_entry *) * nr,
> + GFP_KERNEL);
> + if (!entries) {
> + ret = -ENOMEM;
> + goto reject;
> + }
> + } else
> + entries = single_entry;
> +
> + for (i = 0; i < nr; i++) {
> + ret = zswap_frontswap_store_1(type, offset + i, page + i,
> + &entries[i]);
> + if (ret)
> + goto freepage;
> + }
> +
> /* map */
> spin_lock(&tree->lock);
> - do {
> - ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
> - if (ret == -EEXIST) {
> - zswap_duplicate_entry++;
> - /* remove from rbtree */
> - zswap_rb_erase(&tree->rbroot, dupentry);
> - zswap_entry_put(tree, dupentry);
> - }
> - } while (ret == -EEXIST);
> + for (i = 0; i < nr; i++) {
> + do {
> + ret = zswap_rb_insert(&tree->rbroot, entries[i],
> + &dupentry);
> + if (ret == -EEXIST) {
> + zswap_duplicate_entry++;
> + /* remove from rbtree */
> + zswap_rb_erase(&tree->rbroot, dupentry);
> + zswap_entry_put(tree, dupentry);
> + }
> + } while (ret == -EEXIST);
> + }
> spin_unlock(&tree->lock);
>
> /* update stats */
> - atomic_inc(&zswap_stored_pages);
> + atomic_add(nr, &zswap_stored_pages);
> zswap_update_total_size();
>
> - return 0;
> -
> -put_dstmem:
> - put_cpu_var(zswap_dstmem);
> - zswap_pool_put(entry->pool);
> + ret = 0;
> freepage:
> - zswap_entry_cache_free(entry);
> + if (unlikely(nr > 1)) {
> + if (ret) {
> + int j;
> +
> + for (j = 0; j < i; j++)
> + zswap_free_entry_1(entries[j]);
> + }
> + kvfree(entries);
> + }
> reject:
> return ret;
> }
> @@ -1136,6 +1182,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> unsigned int dlen;
> int ret;
>
> + BUG_ON(PageTransHuge(page));
> +
> /* find */
> spin_lock(&tree->lock);
> entry = zswap_entry_find_get(&tree->rbroot, offset);
> --
> 2.7.4
>