Re: [genirq/msi] 495c66aca3: BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c

From: Heiner Kallweit
Date: Tue Dec 28 2021 - 14:26:38 EST


On 28.12.2021 19:40, Thomas Gleixner wrote:
> On Mon, Dec 27 2021 at 23:05, kernel test robot wrote:
>>
>> FYI, we noticed the following commit (built with gcc-9):
>>
>> commit: 495c66aca3da704e063fa373fdbe371e71d3f4ee ("genirq/msi: Convert to new functions")
>> https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git irq/msi
>> kern :err : [ 126.209306] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:280
>> kern :err : [ 126.209308] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 5183, name: ls
>> kern :err : [ 126.209311] preempt_count: 2, expected: 0
>> kern :warn : [ 126.209312] CPU: 2 PID: 5183 Comm: ls Not tainted 5.16.0-rc5-00091-g495c66aca3da #1
>> kern :warn : [ 126.209315] Hardware name: Hewlett-Packard HP Pro 3340 MT/17A1, BIOS 8.07 01/24/2013
>> kern :warn : [ 126.209316] Call Trace:
>> kern :warn : [ 126.209318] <TASK>
>> kern :warn : [ 126.209319] dump_stack_lvl (lib/dump_stack.c:107)
>> kern :warn : [ 126.209323] __might_resched.cold (kernel/sched/core.c:9539 kernel/sched/core.c:9492)
>> kern :warn : [ 126.209326] ? kasan_unpoison (mm/kasan/shadow.c:108 mm/kasan/shadow.c:142)
>> kern :warn : [ 126.209330] mutex_lock (kernel/locking/mutex.c:280)
>> kern :warn : [ 126.209335] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
>> kern :warn : [ 126.209339] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:202 include/linux/atomic/atomic-instrumented.h:513 include/asm-generic/qspinlock.h:82 include/linux/spinlock.h:185 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162)
>> kern :warn : [ 126.209342] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161)
>> kern :warn : [ 126.209344] msi_get_virq (kernel/irq/msi.c:332)
>> kern :warn : [ 126.209349] pci_irq_vector (drivers/pci/msi/msi.c:1085 drivers/pci/msi/msi.c:1077)
>> kern :warn : [ 126.209354] rtl8169_netpoll (drivers/net/ethernet/realtek/r8169_main.c:4722)
>> kern :warn : [ 126.209358] netpoll_poll_dev (net/core/netpoll.c:166 net/core/netpoll.c:195)
>> kern :warn : [ 126.209363] netpoll_send_skb (net/core/netpoll.c:350 net/core/netpoll.c:376)
>> kern :warn : [ 126.209367] write_msg (drivers/net/netconsole.c:862 drivers/net/netconsole.c:836) netconsole
>
> Fix below.
>
> Thanks,
>
> tglx
> ---
> drivers/net/ethernet/realtek/r8169_main.c | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -615,6 +615,7 @@ struct rtl8169_private {
> struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
> u16 cp_cmd;
> u32 irq_mask;
> + int irq;
> struct clk *clk;
>
> struct {
> @@ -4698,7 +4699,7 @@ static int rtl8169_close(struct net_devi
>
> cancel_work_sync(&tp->wk.work);
>
> - free_irq(pci_irq_vector(pdev, 0), tp);
> + free_irq(tp->irq, tp);
>
> phy_disconnect(tp->phydev);
>
> @@ -4719,7 +4720,7 @@ static void rtl8169_netpoll(struct net_d
> {
> struct rtl8169_private *tp = netdev_priv(dev);
>
> - rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), tp);
> + rtl8169_interrupt(tp->irq, tp);
> }
> #endif
>
> @@ -4753,8 +4754,7 @@ static int rtl_open(struct net_device *d
> rtl_request_firmware(tp);
>
> irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
> - retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
> - irqflags, dev->name, tp);
> + retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
> if (retval < 0)
> goto err_release_fw_2;
>
> @@ -4771,7 +4771,7 @@ static int rtl_open(struct net_device *d
> return retval;
>
> err_free_irq:
> - free_irq(pci_irq_vector(pdev, 0), tp);
> + free_irq(tp->irq, tp);
> err_release_fw_2:
> rtl_release_firmware(tp);
> rtl8169_rx_clear(tp);
> @@ -5341,6 +5341,7 @@ static int rtl_init_one(struct pci_dev *
> dev_err(&pdev->dev, "Can't allocate interrupt\n");
> return rc;
> }
> + tp->irq = pci_irq_vector(pdev, 0);
>
> INIT_WORK(&tp->wk.work, rtl_task);
>
> @@ -5416,8 +5417,7 @@ static int rtl_init_one(struct pci_dev *
> return rc;
>
> netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
> - rtl_chip_infos[chipset].name, dev->dev_addr, xid,
> - pci_irq_vector(pdev, 0));
> + rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq);
>
> if (jumbo_max)
> netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",

Thanks for the patch, I'll submit it with your SoB.

Apart from pci_irq_vector() incl. underlying msi_get_virq(), are there more functions
that must not be called from atomic context any longer? Maybe the new constraint
should be added to kernel-doc of affected functions?