Re: hard-coded limit on unresolved multicast route cache in ipv4/ipmr.c causes slow, unreliable creation of multicast routes on busy networks

From: Hangbin Liu
Date: Tue Nov 20 2018 - 04:23:35 EST


Hi David,

On Sat, Jul 21, 2018 at 10:03:09PM -0700, David Miller wrote:
> Yeah that limit is bogus for several reasons.
...
>
> Therefore, it probably is safe and correct to remove this
> cache_resolve_queue_len altogether.
>
> Something like this:
>
> diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
> index d633f737b3c6..b166465d7c05 100644
> --- a/include/linux/mroute_base.h
> +++ b/include/linux/mroute_base.h
> @@ -234,7 +234,6 @@ struct mr_table_ops {
> * @mfc_hash: Hash table of all resolved routes for easy lookup
> * @mfc_cache_list: list of resovled routes for possible traversal
> * @maxvif: Identifier of highest value vif currently in use
> - * @cache_resolve_queue_len: current size of unresolved queue
> * @mroute_do_assert: Whether to inform userspace on wrong ingress
> * @mroute_do_pim: Whether to receive IGMP PIMv1
> * @mroute_reg_vif_num: PIM-device vif index
> @@ -251,7 +250,6 @@ struct mr_table {
> struct rhltable mfc_hash;
> struct list_head mfc_cache_list;
> int maxvif;
> - atomic_t cache_resolve_queue_len;
> bool mroute_do_assert;
> bool mroute_do_pim;
> int mroute_reg_vif_num;
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9f79b9803a16..c007cf9bfe82 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -747,8 +747,6 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
> struct sk_buff *skb;
> struct nlmsgerr *e;
>
> - atomic_dec(&mrt->cache_resolve_queue_len);
> -
> while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
> if (ip_hdr(skb)->version == 0) {
> struct nlmsghdr *nlh = skb_pull(skb,
> @@ -1135,9 +1133,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
> }
>
> if (!found) {
> + bool was_empty;
> +
> /* Create a new entry if allowable */
> - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
> - (c = ipmr_cache_alloc_unres()) == NULL) {
> + c = ipmr_cache_alloc_unres();
> + if (!c) {
> spin_unlock_bh(&mfc_unres_lock);
>
> kfree_skb(skb);
> @@ -1163,11 +1163,11 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
> return err;
> }
>
> - atomic_inc(&mrt->cache_resolve_queue_len);
> + was_empty = list_empty(&mrt->mfc_unres_queue);
> list_add(&c->_c.list, &mrt->mfc_unres_queue);
> mroute_netlink_event(mrt, c, RTM_NEWROUTE);
>
> - if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
> + if (was_empty)
> mod_timer(&mrt->ipmr_expire_timer,
> c->_c.mfc_un.unres.expires);

In ipmr_expire_process() and ipmr_do_expire_process(), they start mod_timer
when !list_empty(&mrt->mfc_unres_queue), should here also be !was_empty?

BTW, do you have any plan to apply this patch in kernel?

Regards
Hangbin

> }
> @@ -1274,7 +1274,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> if (uc->mfc_origin == c->mfc_origin &&
> uc->mfc_mcastgrp == c->mfc_mcastgrp) {
> list_del(&_uc->list);
> - atomic_dec(&mrt->cache_resolve_queue_len);
> found = true;
> break;
> }
> @@ -1322,7 +1321,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
> mr_cache_put(c);
> }
>
> - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> + if (!list_empty(&mrt->mfc_unres_queue)) {
> spin_lock_bh(&mfc_unres_lock);
> list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> list_del(&c->list);
> @@ -2648,9 +2647,19 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
> return ipmr_mfc_delete(tbl, &mfcc, parent);
> }
>
> +static int queue_count(struct mr_table *mrt)
> +{
> + struct list_head *pos;
> + int count = 0;
> +
> + list_for_each(pos, &mrt->mfc_unres_queue)
> + count++;
> + return count;
> +}
> +
> static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
> {
> - u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
> + u32 queue_len = queue_count(mrt);
>
> if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
> nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
> diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
> index 0d0f0053bb11..75e9c5a3e7ea 100644
> --- a/net/ipv6/ip6mr.c
> +++ b/net/ipv6/ip6mr.c
> @@ -759,8 +759,6 @@ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
> struct net *net = read_pnet(&mrt->net);
> struct sk_buff *skb;
>
> - atomic_dec(&mrt->cache_resolve_queue_len);
> -
> while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
> if (ipv6_hdr(skb)->version == 0) {
> struct nlmsghdr *nlh = skb_pull(skb,
> @@ -1139,8 +1137,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
> * Create a new entry if allowable
> */
>
> - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
> - (c = ip6mr_cache_alloc_unres()) == NULL) {
> + c = ip6mr_cache_alloc_unres();
> + if (!c) {
> spin_unlock_bh(&mfc_unres_lock);
>
> kfree_skb(skb);
> @@ -1167,7 +1165,6 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
> return err;
> }
>
> - atomic_inc(&mrt->cache_resolve_queue_len);
> list_add(&c->_c.list, &mrt->mfc_unres_queue);
> mr6_netlink_event(mrt, c, RTM_NEWROUTE);
>
> @@ -1455,7 +1452,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
> if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
> ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
> list_del(&_uc->list);
> - atomic_dec(&mrt->cache_resolve_queue_len);
> found = true;
> break;
> }
> @@ -1502,7 +1498,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
> mr_cache_put(c);
> }
>
> - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> + if (!list_empty(&mrt->mfc_unres_queue)) {
> spin_lock_bh(&mfc_unres_lock);
> list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> list_del(&c->list);