Re: [PATCH V5 3/3] tuntap: allow polling/writing/reading whendetached

From: Michael S. Tsirkin
Date: Wed Jan 16 2013 - 05:55:06 EST


On Wed, Jan 16, 2013 at 06:34:01PM +0800, Jason Wang wrote:
> We forbid polling, writing and reading when the file were detached, this may
> complex the user in several cases:
>
> - when guest pass some buffers to vhost/qemu and then disable some queues,
> host/qemu needs to do its own cleanup on those buffers which is complex
> sometimes. We can do this simply by allowing a user can still write to an
> disabled queue. Write to an disabled queue will cause the packet pass to the
> kernel and read will get nothing.
> - align the polling behavior with macvtap which never fails when the queue is
> created. This can simplify the polling errors handling of its user (e.g vhost)
>
> In order to achieve this, tfile->tun were not assign to NULL when detached. And
> tfile->tun were converted to be RCU protected in order to let the data path can
> check whether the file is deated in a lockless manner. This will be used to
> prevent the flow caches from being updated for a detached queue.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>


NAK

> ---
> drivers/net/tun.c | 43 +++++++++++++++++++++++++------------------
> 1 files changed, 25 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index c81680d..3f011e0 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -139,7 +139,7 @@ struct tun_file {
> unsigned int flags;
> u16 queue_index;
> struct list_head next;
> - struct tun_struct *detached;
> + struct tun_struct __rcu *detached;
> };
>
> struct tun_flow_entry {
> @@ -295,11 +295,12 @@ static void tun_flow_cleanup(unsigned long data)
> }
>
> static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
> - u16 queue_index)
> + struct tun_file *tfile)
> {
> struct hlist_head *head;
> struct tun_flow_entry *e;
> unsigned long delay = tun->ageing_time;
> + u16 queue_index = tfile->queue_index;
>
> if (!rxhash)
> return;
> @@ -308,7 +309,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
>
> rcu_read_lock();
>
> - if (tun->numqueues == 1)
> + if (tun->numqueues == 1 || !rtnl_dereference(tfile->detached))
> goto unlock;
>
> e = tun_flow_find(head, rxhash);

Did you try to run this with lockdep enabled?
tun_flow_update is called from tun_get_user without rtnl so
rtnl_dereference is arguably wrong, and will cause a lockdep warning.

> @@ -384,16 +385,16 @@ static void tun_set_real_num_queues(struct tun_struct *tun)
>
> static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
> {
> - tfile->detached = tun;
> + rcu_assign_pointer(tfile->detached, tun);
> list_add_tail(&tfile->next, &tun->disabled);
> ++tun->numdisabled;
> }
>
> static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
> {
> - struct tun_struct *tun = tfile->detached;
> + struct tun_struct *tun = rtnl_dereference(tfile->detached);
>
> - tfile->detached = NULL;
> + rcu_assign_pointer(tfile->detached, NULL);
> list_del_init(&tfile->next);
> --tun->numdisabled;
> return tun;
> @@ -402,26 +403,27 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
> static void __tun_detach(struct tun_file *tfile, bool clean)
> {
> struct tun_file *ntfile;
> - struct tun_struct *tun;
> + struct tun_struct *tun, *detached;
> struct net_device *dev;
>
> tun = rtnl_dereference(tfile->tun);
> + detached = rtnl_dereference(tfile->detached);
>
> - if (tun) {
> + if (tun && !detached) {
> u16 index = tfile->queue_index;
> BUG_ON(index >= tun->numqueues);
> dev = tun->dev;
>
> rcu_assign_pointer(tun->tfiles[index],
> tun->tfiles[tun->numqueues - 1]);
> - rcu_assign_pointer(tfile->tun, NULL);
> ntfile = rtnl_dereference(tun->tfiles[index]);
> ntfile->queue_index = index;
>
> --tun->numqueues;
> - if (clean)
> + if (clean) {
> + rcu_assign_pointer(tfile->tun, NULL);
> sock_put(&tfile->sk);
> - else
> + } else
> tun_disable_queue(tun, tfile);
>
> synchronize_net();
> @@ -429,7 +431,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
> /* Drop read queue */
> skb_queue_purge(&tfile->sk.sk_receive_queue);
> tun_set_real_num_queues(tun);
> - } else if (tfile->detached && clean) {
> + } else if (detached && clean) {
> tun = tun_enable_queue(tfile);
> sock_put(&tfile->sk);
> }
> @@ -466,6 +468,10 @@ static void tun_detach_all(struct net_device *dev)
> rcu_assign_pointer(tfile->tun, NULL);
> --tun->numqueues;
> }
> + list_for_each_entry(tfile, &tun->disabled, next) {
> + wake_up_all(&tfile->wq.wait);
> + rcu_assign_pointer(tfile->tun, NULL);
> + }
> BUG_ON(tun->numqueues != 0);
>
> synchronize_net();
> @@ -496,7 +502,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
> goto out;
>
> err = -EINVAL;
> - if (rtnl_dereference(tfile->tun))
> + if (rtnl_dereference(tfile->tun) && !rtnl_dereference(tfile->detached))
> goto out;
>
> err = -EBUSY;
> @@ -504,7 +510,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
> goto out;
>
> err = -E2BIG;
> - if (!tfile->detached &&
> + if (!rtnl_dereference(tfile->detached) &&
> tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
> goto out;
>
> @@ -521,7 +527,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
> rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
> tun->numqueues++;
>
> - if (tfile->detached)
> + if (rtnl_dereference(tfile->detached))
> tun_enable_queue(tfile);
> else
> sock_hold(&tfile->sk);
> @@ -1195,7 +1201,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
> tun->dev->stats.rx_packets++;
> tun->dev->stats.rx_bytes += len;
>
> - tun_flow_update(tun, rxhash, tfile->queue_index);
> + tun_flow_update(tun, rxhash, tfile);
> return total_len;
> }
>
> @@ -1796,7 +1802,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
> rtnl_lock();
>
> if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
> - tun = tfile->detached;
> + tun = rtnl_dereference(tfile->detached);
> if (!tun) {
> ret = -EINVAL;
> goto unlock;
> @@ -1807,7 +1813,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
> ret = tun_attach(tun, file);
> } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
> tun = rtnl_dereference(tfile->tun);
> - if (!tun || !(tun->flags & TUN_TAP_MQ))
> + if (!tun || !(tun->flags & TUN_TAP_MQ) ||
> + rtnl_dereference(tfile->detached))
> ret = -EINVAL;
> else
> __tun_detach(tfile, false);
> --
> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/