From 82454842b92cb79024395d3d2872c138c61058d8 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 17 Jun 2013 17:11:16 +0300 Subject: [PATCH] aio: use ring->id and IDR to speed-up lookps --- arch/s390/mm/pgtable.c | 5 ++-- fs/aio.c | 75 +++++++++++++++++++++++++++++++--------------- include/linux/idr.h | 11 +++++++ include/linux/mm_types.h | 3 +- kernel/fork.c | 2 +- 5 files changed, 68 insertions(+), 28 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a938b54..085f317 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1028,7 +1029,7 @@ int s390_enable_sie(void) task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || #ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || + !idr_empty(&tsk->mm->ioctx_idr) || #endif tsk->mm != tsk->active_mm) { task_unlock(tsk); @@ -1055,7 +1056,7 @@ int s390_enable_sie(void) task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || #ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || + !idr_empty(&tsk->mm->ioctx_idr) || #endif tsk->mm != tsk->active_mm) { mmput(mm); diff --git a/fs/aio.c b/fs/aio.c index 2bbcacf..d4298d0 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -63,9 +63,7 @@ struct kioctx { atomic_t users; atomic_t dead; - /* This needs improving */ - unsigned long user_id; - struct hlist_node list; + unsigned id; /* * This is what userspace passed to io_setup(), it's not used for @@ -199,12 +197,11 @@ static int aio_setup_ring(struct kioctx *ctx) if (populate) mm_populate(ctx->mmap_base, populate); - ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ - ring->id = ctx->user_id; + ring->id = ~0U; ring->head = ring->tail = 0; ring->magic = AIO_RING_MAGIC; ring->compat_features = AIO_RING_COMPAT_FEATURES; @@ -343,6 +340,7 @@ static void put_ioctx(struct kioctx *ctx) static struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm = current->mm; + struct aio_ring *ring; struct kioctx *ctx; int err = -ENOMEM; @@ -379,22 +377,43 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (aio_nr + nr_events > aio_max_nr || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); + err = -EAGAIN; goto out_cleanup; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); - /* now link into global list. */ + /* Allocate an id for the ioctx, save it in the aio_ring, and + * link it with the current context. This will allow us to + * quickly locate the kioctx based on the user handle which + * currently is a pointer to aio_ring. + * + * NOTE: a simpler approach would be to use the ctx->id as the + * user handle. However, libaio uses the aio_ring directly to + * check the head/tail pointers and thus relies on the handle + * to be a pointer to the aio_ring. + */ + idr_preload(GFP_KERNEL); spin_lock(&mm->ioctx_lock); - hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); + ctx->id = idr_alloc(&mm->ioctx_idr, ctx, 0, 0, GFP_NOWAIT); spin_unlock(&mm->ioctx_lock); + idr_preload_end(); + if (ctx->id < 0) + goto out_undo_aio_nr; + + ring = kmap_atomic(ctx->ring_pages[0]); + ring->id = ctx->id; + kunmap_atomic(ring); - pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", - ctx, ctx->user_id, mm, ctx->nr_events); + pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x id=%d\n", + ctx, ctx->mmap_base, mm, ctx->nr_events, ctx->id); return ctx; +out_undo_aio_nr: + spin_lock(&aio_nr_lock); + aio_nr -= ctx->max_reqs; + spin_unlock(&aio_nr_lock); out_cleanup: - err = -EAGAIN; aio_free_ring(ctx); out_freectx: kmem_cache_free(kioctx_cachep, ctx); @@ -423,10 +442,12 @@ static void kill_ioctx_rcu(struct rcu_head *head) * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct kioctx *ctx) +static void kill_ioctx(struct kioctx *ctx, struct mm_struct *mm) { if (!atomic_xchg(&ctx->dead, 1)) { - hlist_del_rcu(&ctx->list); + spin_lock(&mm->ioctx_lock); + idr_remove(&mm->ioctx_idr, ctx->id); + spin_unlock(&mm->ioctx_lock); /* * It'd be more correct to do this in free_ioctx(), after all @@ -475,9 +496,9 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); void exit_aio(struct mm_struct *mm) { struct kioctx *ctx; - struct hlist_node *n; + unsigned id; - hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { + idr_for_each_entry(&mm->ioctx_idr, ctx, id) { if (1 != atomic_read(&ctx->users)) printk(KERN_DEBUG "exit_aio:ioctx still alive: %d %d %d\n", @@ -494,7 +515,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(ctx); + kill_ioctx(ctx, mm); } } @@ -553,20 +574,26 @@ EXPORT_SYMBOL(aio_put_req); static struct kioctx *lookup_ioctx(unsigned long ctx_id) { + struct aio_ring __user *ring = (void __user *)ctx_id; struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; + unsigned id; - rcu_read_lock(); + if (get_user(id, &ring->id)) + return NULL; - hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { - if (ctx->user_id == ctx_id) { + rcu_read_lock(); + ctx = idr_find(&mm->ioctx_idr, id); + if (ctx) { + if (ctx->mmap_base == ctx_id) { atomic_inc(&ctx->users); ret = ctx; - break; - } + } else + WARN_ONCE(1, "aio: invalid ctx_id %ld: mmap_base = %ld, id = %u", + ctx_id, ctx->mmap_base, id); } - rcu_read_unlock(); + return ret; } @@ -850,9 +877,9 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ioctx = ioctx_alloc(nr_events); ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { - ret = put_user(ioctx->user_id, ctxp); + ret = put_user(ioctx->mmap_base, ctxp); if (ret) - kill_ioctx(ioctx); + kill_ioctx(ioctx, current->mm); put_ioctx(ioctx); } @@ -870,7 +897,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(ioctx); + kill_ioctx(ioctx, current->mm); put_ioctx(ioctx); return 0; } diff --git a/include/linux/idr.h b/include/linux/idr.h index 871a213..c28b9ab 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -132,6 +132,17 @@ static inline void *idr_find(struct idr *idr, int id) #define idr_for_each_entry(idp, entry, id) \ for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) + +/** + * idr_empty - returns true if there are no elements in the given idr + * @idp: idr handle + */ +static inline bool idr_empty(struct idr *idp) +{ + int id = 0; + return idr_get_next(idp, &id) == NULL; +} + /* * Don't use the following functions. These exist only to suppress * deprecated warnings on EXPORT_SYMBOL()s. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ace9a5f..f97eb1e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -386,7 +387,7 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; - struct hlist_head ioctx_list; + struct idr ioctx_idr; #endif #ifdef CONFIG_MM_OWNER /* diff --git a/kernel/fork.c b/kernel/fork.c index 987b28a..82a84a2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -524,7 +524,7 @@ static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); + idr_init(&mm->ioctx_idr); #endif } -- 1.7.10.4