Re: [PATCH v3 7/9] xen/blkback: separate ring information out of struct xen_blkif

From: Bob Liu
Date: Sat Oct 10 2015 - 00:09:50 EST



On 10/05/2015 10:55 PM, Roger Pau Monné wrote:
> El 05/09/15 a les 14.39, Bob Liu ha escrit:
>> Split per ring information to an new structure:xen_blkif_ring, so that one vbd
>> device can associate with one or more rings/hardware queues.
>>
>> This patch is a preparation for supporting multi hardware queues/rings.
>>
>> Signed-off-by: Arianna Avanzini <avanzini.arianna@xxxxxxxxx>
>> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
>> ---
>> drivers/block/xen-blkback/blkback.c | 365 ++++++++++++++++++-----------------
>> drivers/block/xen-blkback/common.h | 52 +++--
>> drivers/block/xen-blkback/xenbus.c | 130 +++++++------
>> 3 files changed, 295 insertions(+), 252 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
>> index 954c002..fd02240 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -113,71 +113,71 @@ module_param(log_stats, int, 0644);
>> /* Number of free pages to remove on each call to gnttab_free_pages */
>> #define NUM_BATCH_FREE_PAGES 10
>>
>> -static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
>> +static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
>> {
>> unsigned long flags;
>>
>> - spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> - if (list_empty(&blkif->free_pages)) {
>> - BUG_ON(blkif->free_pages_num != 0);
>> - spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> + spin_lock_irqsave(&ring->free_pages_lock, flags);
>> + if (list_empty(&ring->free_pages)) {
>
> I'm afraid the pool of free pages should be per-device, not per-ring.
>
>> + BUG_ON(ring->free_pages_num != 0);
>> + spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>> return gnttab_alloc_pages(1, page);
>> }
>> - BUG_ON(blkif->free_pages_num == 0);
>> - page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
>> + BUG_ON(ring->free_pages_num == 0);
>> + page[0] = list_first_entry(&ring->free_pages, struct page, lru);
>> list_del(&page[0]->lru);
>> - blkif->free_pages_num--;
>> - spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> + ring->free_pages_num--;
>> + spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>
>> return 0;
>> }
>>
>> -static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
>> +static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
>> int num)
>> {
>> unsigned long flags;
>> int i;
>>
>> - spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> + spin_lock_irqsave(&ring->free_pages_lock, flags);
>> for (i = 0; i < num; i++)
>> - list_add(&page[i]->lru, &blkif->free_pages);
>> - blkif->free_pages_num += num;
>> - spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> + list_add(&page[i]->lru, &ring->free_pages);
>> + ring->free_pages_num += num;
>> + spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>> }
>>
>> -static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
>> +static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
>> {
>> /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
>> struct page *page[NUM_BATCH_FREE_PAGES];
>> unsigned int num_pages = 0;
>> unsigned long flags;
>>
>> - spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> - while (blkif->free_pages_num > num) {
>> - BUG_ON(list_empty(&blkif->free_pages));
>> - page[num_pages] = list_first_entry(&blkif->free_pages,
>> + spin_lock_irqsave(&ring->free_pages_lock, flags);
>> + while (ring->free_pages_num > num) {
>> + BUG_ON(list_empty(&ring->free_pages));
>> + page[num_pages] = list_first_entry(&ring->free_pages,
>> struct page, lru);
>> list_del(&page[num_pages]->lru);
>> - blkif->free_pages_num--;
>> + ring->free_pages_num--;
>> if (++num_pages == NUM_BATCH_FREE_PAGES) {
>> - spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> + spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>> gnttab_free_pages(num_pages, page);
>> - spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> + spin_lock_irqsave(&ring->free_pages_lock, flags);
>> num_pages = 0;
>> }
>> }
>> - spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> + spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>> if (num_pages != 0)
>> gnttab_free_pages(num_pages, page);
>> }
>>
>> #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
>>
>> -static int do_block_io_op(struct xen_blkif *blkif);
>> -static int dispatch_rw_block_io(struct xen_blkif *blkif,
>> +static int do_block_io_op(struct xen_blkif_ring *ring);
>> +static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
>> struct blkif_request *req,
>> struct pending_req *pending_req);
>> -static void make_response(struct xen_blkif *blkif, u64 id,
>> +static void make_response(struct xen_blkif_ring *ring, u64 id,
>> unsigned short op, int st);
>>
>> #define foreach_grant_safe(pos, n, rbtree, node) \
>> @@ -198,19 +198,19 @@ static void make_response(struct xen_blkif *blkif, u64 id,
>> * bit operations to modify the flags of a persistent grant and to count
>> * the number of used grants.
>> */
>> -static int add_persistent_gnt(struct xen_blkif *blkif,
>> +static int add_persistent_gnt(struct xen_blkif_ring *ring,
>> struct persistent_gnt *persistent_gnt)
>> {
>> struct rb_node **new = NULL, *parent = NULL;
>> struct persistent_gnt *this;
>>
>> - if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> - if (!blkif->vbd.overflow_max_grants)
>> - blkif->vbd.overflow_max_grants = 1;
>> + if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> + if (!ring->blkif->vbd.overflow_max_grants)
>> + ring->blkif->vbd.overflow_max_grants = 1;
>
> The same for the pool of persistent grants, it should be per-device and
> not per-ring.
>
> And I think this issue is far worse than the others, because a frontend
> might use a persistent grant on different queues, forcing the backend
> map the grant several times for each queue, this is not acceptable IMO.
>

Hi Roger,

I realize it would make things complicate if making persistent grant per-device instead of per-queue.
Extra locks are required to protect the per-device pool on both blkfront and blkback.

AFAIR, there was a discussion before about dropping persistent grant map at all.
The only reason we left this feature was backward compatibility.
So that I think we should not complicate xen-block code any more because of a going to be dropped feature.

How about disable feature-persistent if multi-queue was used?

--
Regards,
-Bob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/