[PATCH v2] xen-blkback: allocate list of pending reqs in small chunks
From: Roger Pau Monne
Date:  Fri Apr 26 2013 - 13:52:50 EST
Allocate pending requests in smaller chunks instead of allocating them
all at the same time.
This change also removes the global array of pending_reqs, it is no
longer necessay.
Variables related to the grant mapping have been grouped into a struct
called "grant_page", this allows to allocate them in smaller chunks,
and also improves memory locality.
Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
Reported-by: Sander Eikelenboom <linux@xxxxxxxxxxxxxx>
Reviewed-by: David Vrabel <david.vrabel@xxxxxxxxxx>
Cc: David Vrabel <david.vrabel@xxxxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
Changes since v1:
 * Remove stray pr_alert.
---
 drivers/block/xen-blkback/blkback.c |   92 +++++++++++++++--------------------
 drivers/block/xen-blkback/common.h  |   18 +++---
 drivers/block/xen-blkback/xenbus.c  |   74 +++++++++++++++++++++------
 3 files changed, 106 insertions(+), 78 deletions(-)
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1ebc0aa..e79ab45 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -641,9 +641,7 @@ purge_gnt_list:
  * used in the 'pending_req'.
  */
 static void xen_blkbk_unmap(struct xen_blkif *blkif,
-                            grant_handle_t handles[],
-                            struct page *pages[],
-                            struct persistent_gnt *persistent_gnts[],
+                            struct grant_page *pages[],
                             int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -652,16 +650,16 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 	int ret;
 
 	for (i = 0; i < num; i++) {
-		if (persistent_gnts[i] != NULL) {
-			put_persistent_gnt(blkif, persistent_gnts[i]);
+		if (pages[i]->persistent_gnt != NULL) {
+			put_persistent_gnt(blkif, pages[i]->persistent_gnt);
 			continue;
 		}
-		if (handles[i] == BLKBACK_INVALID_HANDLE)
+		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		unmap_pages[invcount] = pages[i];
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]),
-				    GNTMAP_host_map, handles[i]);
-		handles[i] = BLKBACK_INVALID_HANDLE;
+		unmap_pages[invcount] = pages[i]->page;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+				    GNTMAP_host_map, pages[i]->handle);
+		pages[i]->handle = BLKBACK_INVALID_HANDLE;
 		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
 			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
 			                        invcount);
@@ -677,10 +675,8 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 	}
 }
 
-static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
-			 struct persistent_gnt *persistent_gnts[],
-			 grant_handle_t handles[],
-			 struct page *pages[],
+static int xen_blkbk_map(struct xen_blkif *blkif,
+			 struct grant_page *pages[],
 			 int num, bool ro)
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -707,26 +703,26 @@ again:
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
 				blkif,
-				grefs[i]);
+				pages[i]->gref);
 
 		if (persistent_gnt) {
 			/*
 			 * We are using persistent grants and
 			 * the grant is already mapped
 			 */
-			pages[i] = persistent_gnt->page;
-			persistent_gnts[i] = persistent_gnt;
+			pages[i]->page = persistent_gnt->page;
+			pages[i]->persistent_gnt = persistent_gnt;
 		} else {
-			if (get_free_page(blkif, &pages[i]))
+			if (get_free_page(blkif, &pages[i]->page))
 				goto out_of_memory;
-			addr = vaddr(pages[i]);
-			pages_to_gnt[segs_to_map] = pages[i];
-			persistent_gnts[i] = NULL;
+			addr = vaddr(pages[i]->page);
+			pages_to_gnt[segs_to_map] = pages[i]->page;
+			pages[i]->persistent_gnt = NULL;
 			flags = GNTMAP_host_map;
 			if (!use_persistent_gnts && ro)
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
-					  flags, grefs[i],
+					  flags, pages[i]->gref,
 					  blkif->domid);
 		}
 		map_until = i + 1;
@@ -745,16 +741,16 @@ again:
 	 * the page from the other domain.
 	 */
 	for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
-		if (!persistent_gnts[seg_idx]) {
+		if (!pages[seg_idx]->persistent_gnt) {
 			/* This is a newly mapped grant */
 			BUG_ON(new_map_idx >= segs_to_map);
 			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				handles[seg_idx] = BLKBACK_INVALID_HANDLE;
+				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
 				goto next;
 			}
-			handles[seg_idx] = map[new_map_idx].handle;
+			pages[seg_idx]->handle = map[new_map_idx].handle;
 		} else {
 			continue;
 		}
@@ -776,14 +772,14 @@ again:
 			}
 			persistent_gnt->gnt = map[new_map_idx].ref;
 			persistent_gnt->handle = map[new_map_idx].handle;
-			persistent_gnt->page = pages[seg_idx];
+			persistent_gnt->page = pages[seg_idx]->page;
 			if (add_persistent_gnt(blkif,
 			                       persistent_gnt)) {
 				kfree(persistent_gnt);
 				persistent_gnt = NULL;
 				goto next;
 			}
-			persistent_gnts[seg_idx] = persistent_gnt;
+			pages[seg_idx]->persistent_gnt = persistent_gnt;
 			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
 				 xen_blkif_max_pgrants);
@@ -814,15 +810,11 @@ out_of_memory:
 	return -ENOMEM;
 }
 
-static int xen_blkbk_map_seg(struct pending_req *pending_req,
-			     struct seg_buf seg[],
-			     struct page *pages[])
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
 {
 	int rc;
 
-	rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs,
-	                   pending_req->persistent_gnts,
-	                   pending_req->grant_handles, pending_req->pages,
+	rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
 			   pending_req->nr_pages,
 	                   (pending_req->operation != BLKIF_OP_READ));
 
@@ -834,9 +826,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 				    struct seg_buf seg[],
 				    struct phys_req *preq)
 {
-	struct persistent_gnt **persistent =
-		pending_req->indirect_persistent_gnts;
-	struct page **pages = pending_req->indirect_pages;
+	struct grant_page **pages = pending_req->indirect_pages;
 	struct xen_blkif *blkif = pending_req->blkif;
 	int indirect_grefs, rc, n, nseg, i;
 	struct blkif_request_segment_aligned *segments = NULL;
@@ -845,9 +835,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 	indirect_grefs = INDIRECT_PAGES(nseg);
 	BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
-	rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs,
-			   persistent, pending_req->indirect_handles,
-			   pages, indirect_grefs, true);
+	for (i = 0; i < indirect_grefs; i++)
+		pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+	rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
 	if (rc)
 		goto unmap;
 
@@ -856,10 +847,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 			/* Map indirect segments */
 			if (segments)
 				kunmap_atomic(segments);
-			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]);
+			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
 		}
 		i = n % SEGS_PER_INDIRECT_FRAME;
-		pending_req->grefs[n] = segments[i].gref;
+		pending_req->segments[n]->gref = segments[i].gref;
 		seg[n].nsec = segments[i].last_sect -
 			segments[i].first_sect + 1;
 		seg[n].offset = (segments[i].first_sect << 9);
@@ -874,8 +865,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 unmap:
 	if (segments)
 		kunmap_atomic(segments);
-	xen_blkbk_unmap(blkif, pending_req->indirect_handles,
-			pages, persistent, indirect_grefs);
+	xen_blkbk_unmap(blkif, pages, indirect_grefs);
 	return rc;
 }
 
@@ -965,9 +955,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req->blkif, pending_req->grant_handles,
-		                pending_req->pages,
-		                pending_req->persistent_gnts,
+		xen_blkbk_unmap(pending_req->blkif,
+		                pending_req->segments,
 		                pending_req->nr_pages);
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
@@ -1104,7 +1093,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
-	struct page **pages = pending_req->pages;
+	struct grant_page **pages = pending_req->segments;
 	unsigned short req_operation;
 
 	req_operation = req->operation == BLKIF_OP_INDIRECT ?
@@ -1165,7 +1154,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		preq.dev               = req->u.rw.handle;
 		preq.sector_number     = req->u.rw.sector_number;
 		for (i = 0; i < nseg; i++) {
-			pending_req->grefs[i] = req->u.rw.seg[i].gref;
+			pages[i]->gref = req->u.rw.seg[i].gref;
 			seg[i].nsec = req->u.rw.seg[i].last_sect -
 				req->u.rw.seg[i].first_sect + 1;
 			seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
@@ -1216,7 +1205,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map_seg(pending_req, seg, pages))
+	if (xen_blkbk_map_seg(pending_req))
 		goto fail_flush;
 
 	/*
@@ -1228,7 +1217,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
-				     pages[i],
+				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 
@@ -1277,8 +1266,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	return 0;
 
  fail_flush:
-	xen_blkbk_unmap(blkif, pending_req->grant_handles,
-	                pending_req->pages, pending_req->persistent_gnts,
+	xen_blkbk_unmap(blkif, pending_req->segments,
 	                pending_req->nr_pages);
  fail_response:
 	/* Haven't submitted any bio's yet. */
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 1ac53da..c6b4cb9 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -297,8 +297,6 @@ struct xen_blkif {
 	int			free_pages_num;
 	struct list_head	free_pages;
 
-	/* Allocation of pending_reqs */
-	struct pending_req	*pending_reqs;
 	/* List of all 'pending_req' available */
 	struct list_head	pending_free;
 	/* And its spinlock. */
@@ -323,6 +321,13 @@ struct seg_buf {
 	unsigned int nsec;
 };
 
+struct grant_page {
+	struct page 		*page;
+	struct persistent_gnt	*persistent_gnt;
+	grant_handle_t		handle;
+	grant_ref_t		gref;
+};
+
 /*
  * Each outstanding request that we've passed to the lower device layers has a
  * 'pending_req' allocated to it. Each buffer_head that completes decrements
@@ -337,14 +342,9 @@ struct pending_req {
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
-	struct page		*pages[MAX_INDIRECT_SEGMENTS];
-	struct persistent_gnt	*persistent_gnts[MAX_INDIRECT_SEGMENTS];
-	grant_handle_t		grant_handles[MAX_INDIRECT_SEGMENTS];
-	grant_ref_t		grefs[MAX_INDIRECT_SEGMENTS];
+	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
 	/* Indirect descriptors */
-	struct persistent_gnt	*indirect_persistent_gnts[MAX_INDIRECT_PAGES];
-	struct page		*indirect_pages[MAX_INDIRECT_PAGES];
-	grant_handle_t		indirect_handles[MAX_INDIRECT_PAGES];
+	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
 	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
 	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index afab208..4a4749c 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -105,7 +105,8 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
-	int i;
+	struct pending_req *req, *n;
+	int i, j;
 
 	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
@@ -127,22 +128,51 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
-	blkif->pending_reqs = kcalloc(XEN_BLKIF_REQS,
-	                              sizeof(blkif->pending_reqs[0]),
-	                              GFP_KERNEL);
-	if (!blkif->pending_reqs) {
-		kmem_cache_free(xen_blkif_cachep, blkif);
-		return ERR_PTR(-ENOMEM);
-	}
 	INIT_LIST_HEAD(&blkif->pending_free);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list,
+		              &blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+			                           GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+			                                 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
 	spin_lock_init(&blkif->pending_free_lock);
 	init_waitqueue_head(&blkif->pending_free_wq);
 
-	for (i = 0; i < XEN_BLKIF_REQS; i++)
-		list_add_tail(&blkif->pending_reqs[i].free_list,
-			      &blkif->pending_free);
-
 	return blkif;
+
+fail:
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+
+	kmem_cache_free(xen_blkif_cachep, blkif);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -221,18 +251,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-	struct pending_req *req;
-	int i = 0;
+	struct pending_req *req, *n;
+	int i = 0, j;
 
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
 
 	/* Check that there is no request in use */
-	list_for_each_entry(req, &blkif->pending_free, free_list)
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+			kfree(req->segments[j]);
+
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+			kfree(req->indirect_pages[j]);
+
+		kfree(req);
 		i++;
-	BUG_ON(i != XEN_BLKIF_REQS);
+	}
+
+	WARN_ON(i != XEN_BLKIF_REQS);
 
-	kfree(blkif->pending_reqs);
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
-- 
1.7.7.5 (Apple Git-26)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/