Re: [PATCH 3/3] zsmalloc: add page table mapping method

From: Minchan Kim
Date: Sun Jul 22 2012 - 20:26:36 EST


On Wed, Jul 18, 2012 at 11:55:56AM -0500, Seth Jennings wrote:
> This patchset provides page mapping via the page table.
> On some archs, most notably ARM, this method has been
> demonstrated to be faster than copying.
>
> The logic controlling the method selection (copy vs page table)
> is controlled by the definition of USE_PGTABLE_MAPPING which
> is/can be defined for any arch that performs better with page
> table mapping.
>
> Signed-off-by: Seth Jennings <sjenning@xxxxxxxxxxxxxxxxxx>
> ---
> drivers/staging/zsmalloc/zsmalloc-main.c | 182 ++++++++++++++++++++++--------
> drivers/staging/zsmalloc/zsmalloc_int.h | 6 -
> 2 files changed, 134 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
> index b86133f..defe350 100644
> --- a/drivers/staging/zsmalloc/zsmalloc-main.c
> +++ b/drivers/staging/zsmalloc/zsmalloc-main.c
> @@ -89,6 +89,30 @@
> #define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
> #define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
>
> +/*
> + * By default, zsmalloc uses a copy-based object mapping method to access
> + * allocations that span two pages. However, if a particular architecture
> + * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping
> + * faster than copying, then it should be added here so that

How about adding your benchmark url?

> + * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table
> + * mapping rather than copying
> + * for object mapping.

unnecessary new line.

> +*/
> +#if defined(CONFIG_ARM)
> +#define USE_PGTABLE_MAPPING
> +#endif

I had no better idea and I would like to add zsmalloc into mainline.
So no objection.
Nitin?

> +
> +struct mapping_area {
> +#ifdef USE_PGTABLE_MAPPING
> + struct vm_struct *vm; /* vm area for mapping object that span pages */
> +#else
> + char *vm_buf; /* copy buffer for objects that span pages */
> +#endif
> + char *vm_addr; /* address of kmap_atomic()'ed pages */
> + enum zs_mapmode vm_mm; /* mapping mode */
> +};
> +
> +
> /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
> static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
>
> @@ -471,16 +495,83 @@ static struct page *find_get_zspage(struct size_class *class)
> return page;
> }
>
> -static void zs_copy_map_object(char *buf, struct page *page,
> - int off, int size)
> +#ifdef USE_PGTABLE_MAPPING
> +static inline int __zs_cpu_up(struct mapping_area *area)
> +{
> + /*
> + * Make sure we don't leak memory if a cpu UP notification
> + * and zs_init() race and both call zs_cpu_up() on the same cpu
> + */
> + if (area->vm)
> + return 0;
> + area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
> + if (!area->vm)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +static inline void __zs_cpu_down(struct mapping_area *area)
> +{
> + if (area->vm)
> + free_vm_area(area->vm);
> + area->vm = NULL;
> +}
> +
> +static inline void *__zs_map_object(struct mapping_area *area,
> + struct page *pages[2], int off, int size)
> +{
> + BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
> + area->vm_addr = area->vm->addr;
> + return area->vm_addr + off;
> +}
> +
> +static inline void __zs_unmap_object(struct mapping_area *area,
> + struct page *pages[2], int off, int size)
> +{
> + unsigned long addr = (unsigned long)area->vm_addr;
> + unsigned long end = addr + (PAGE_SIZE * 2);
> +
> + flush_cache_vunmap(addr, end);
> + unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
> + local_flush_tlb_kernel_range(addr, end);
> +}
> +
> +#else /* USE_PGTABLE_MAPPING */
> +
> +static inline int __zs_cpu_up(struct mapping_area *area)
> +{
> + /*
> + * Make sure we don't leak memory if a cpu UP notification
> + * and zs_init() race and both call zs_cpu_up() on the same cpu
> + */
> + if (area->vm_buf)
> + return 0;
> + area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
> + if (!area->vm_buf)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +static inline void __zs_cpu_down(struct mapping_area *area)
> +{
> + if (area->vm_buf)
> + free_page((unsigned long)area->vm_buf);
> + area->vm_buf = NULL;
> +}
> +
> +static void *__zs_map_object(struct mapping_area *area,
> + struct page *pages[2], int off, int size)
> {
> - struct page *pages[2];
> int sizes[2];
> void *addr;
> + char *buf = area->vm_buf;
>
> - pages[0] = page;
> - pages[1] = get_next_page(page);
> - BUG_ON(!pages[1]);
> + /* disable page faults to match kmap_atomic() return conditions */
> + pagefault_disable();
> +
> + /* no read fastpath */
> + if (area->vm_mm == ZS_MM_WO)
> + goto out;
>
> sizes[0] = PAGE_SIZE - off;
> sizes[1] = size - sizes[0];
> @@ -492,18 +583,20 @@ static void zs_copy_map_object(char *buf, struct page *page,
> addr = kmap_atomic(pages[1]);
> memcpy(buf + sizes[0], addr, sizes[1]);
> kunmap_atomic(addr);
> +out:
> + return area->vm_buf;
> }
>
> -static void zs_copy_unmap_object(char *buf, struct page *page,
> - int off, int size)
> +static void __zs_unmap_object(struct mapping_area *area,
> + struct page *pages[2], int off, int size)
> {
> - struct page *pages[2];
> int sizes[2];
> void *addr;
> + char *buf = area->vm_buf;
>
> - pages[0] = page;
> - pages[1] = get_next_page(page);
> - BUG_ON(!pages[1]);
> + /* no write fastpath */
> + if (area->vm_mm == ZS_MM_RO)
> + goto out;
>
> sizes[0] = PAGE_SIZE - off;
> sizes[1] = size - sizes[0];
> @@ -515,34 +608,31 @@ static void zs_copy_unmap_object(char *buf, struct page *page,
> addr = kmap_atomic(pages[1]);
> memcpy(addr, buf + sizes[0], sizes[1]);
> kunmap_atomic(addr);
> +
> +out:
> + /* enable page faults to match kunmap_atomic() return conditions */
> + pagefault_enable();
> }
>
> +#endif /* USE_PGTABLE_MAPPING */
> +
> static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
> void *pcpu)
> {
> - int cpu = (long)pcpu;
> + int ret, cpu = (long)pcpu;
> struct mapping_area *area;
>
> switch (action) {
> case CPU_UP_PREPARE:
> area = &per_cpu(zs_map_area, cpu);
> - /*
> - * Make sure we don't leak memory if a cpu UP notification
> - * and zs_init() race and both call zs_cpu_up() on the same cpu
> - */
> - if (area->vm_buf)
> - return 0;
> - area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
> - if (!area->vm_buf)
> - return -ENOMEM;
> - return 0;
> + ret = __zs_cpu_up(area);
> + if (ret)
> + return notifier_from_errno(ret);
> break;
> case CPU_DEAD:
> case CPU_UP_CANCELED:
> area = &per_cpu(zs_map_area, cpu);
> - if (area->vm_buf)
> - free_page((unsigned long)area->vm_buf);
> - area->vm_buf = NULL;
> + __zs_cpu_down(area);
> break;
> }
>
> @@ -759,6 +849,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
> enum fullness_group fg;
> struct size_class *class;
> struct mapping_area *area;
> + struct page *pages[2];
>
> BUG_ON(!handle);
>
> @@ -775,19 +866,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
> off = obj_idx_to_offset(page, obj_idx, class->size);
>
> area = &get_cpu_var(zs_map_area);
> + area->vm_mm = mm;
> if (off + class->size <= PAGE_SIZE) {
> /* this object is contained entirely within a page */
> area->vm_addr = kmap_atomic(page);
> return area->vm_addr + off;
> }
>
> - /* disable page faults to match kmap_atomic() return conditions */
> - pagefault_disable();
> + /* this object spans two pages */
> + pages[0] = page;
> + pages[1] = get_next_page(page);
> + BUG_ON(!pages[1]);
>
> - if (mm != ZS_MM_WO)
> - zs_copy_map_object(area->vm_buf, page, off, class->size);
> - area->vm_addr = NULL;
> - return area->vm_buf;
> + return __zs_map_object(area, pages, off, class->size);
> }
> EXPORT_SYMBOL_GPL(zs_map_object);
>
> @@ -801,17 +892,6 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
> struct size_class *class;
> struct mapping_area *area;
>
> - area = &__get_cpu_var(zs_map_area);
> - /* single-page object fastpath */
> - if (area->vm_addr) {
> - kunmap_atomic(area->vm_addr);
> - goto out;
> - }
> -
> - /* no write fastpath */
> - if (area->vm_mm == ZS_MM_RO)
> - goto pfenable;
> -
> BUG_ON(!handle);
>
> obj_handle_to_location(handle, &page, &obj_idx);
> @@ -819,12 +899,18 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
> class = &pool->size_class[class_idx];
> off = obj_idx_to_offset(page, obj_idx, class->size);
>
> - zs_copy_unmap_object(area->vm_buf, page, off, class->size);
> + area = &__get_cpu_var(zs_map_area);
> + if (off + class->size <= PAGE_SIZE)
> + kunmap_atomic(area->vm_addr);
> + else {
> + struct page *pages[2];
> +
> + pages[0] = page;
> + pages[1] = get_next_page(page);
> + BUG_ON(!pages[1]);
>
> -pfenable:
> - /* enable page faults to match kunmap_atomic() return conditions */
> - pagefault_enable();
> -out:
> + __zs_unmap_object(area, pages, off, class->size);
> + }
> put_cpu_var(zs_map_area);
> }
> EXPORT_SYMBOL_GPL(zs_unmap_object);
> diff --git a/drivers/staging/zsmalloc/zsmalloc_int.h b/drivers/staging/zsmalloc/zsmalloc_int.h
> index 52805176..8c0b344 100644
> --- a/drivers/staging/zsmalloc/zsmalloc_int.h
> +++ b/drivers/staging/zsmalloc/zsmalloc_int.h
> @@ -109,12 +109,6 @@ enum fullness_group {
> */
> static const int fullness_threshold_frac = 4;
>
> -struct mapping_area {
> - char *vm_buf; /* copy buffer for objects that span pages */
> - char *vm_addr; /* address of kmap_atomic()'ed pages */
> - enum zs_mapmode vm_mm; /* mapping mode */
> -};
> -
> struct size_class {
> /*
> * Size of objects stored in this class. Must be multiple
> --
> 1.7.9.5
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/