[RFC PATCH 5/8] irq_work: Make self-IPIs optable

From: Frederic Weisbecker
Date: Sat Oct 20 2012 - 12:24:00 EST


While queuing an irq work, let the caller choose between
triggering a self-IPI right away, provided the arch is able
to do so, or waiting for the next timer interrupt to run the work.

Some non-urgent enqueuers like printk may prefer not to raise
an IPI storm in case of frequent calls on short periods of
time.

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
arch/x86/kvm/pmu.c | 2 +-
drivers/acpi/apei/ghes.c | 2 +-
drivers/staging/iio/trigger/iio-trig-sysfs.c | 2 +-
include/linux/irq_work.h | 8 +++++-
kernel/events/core.c | 4 +-
kernel/events/ring_buffer.c | 2 +-
kernel/irq_work.c | 32 +++++++++++++++++++++-----
kernel/time/tick-sched.c | 2 +-
9 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29e87d3..3020e95 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -549,7 +549,7 @@ static void mce_report_event(struct pt_regs *regs)
return;
}

- irq_work_queue(&__get_cpu_var(mce_irq_work));
+ irq_work_queue(&__get_cpu_var(mce_irq_work), true);
}

/*
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a..0dfc716 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -128,7 +128,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
* NMI context. Do it from irq work instead.
*/
if (!kvm_is_in_guest())
- irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
+ irq_work_queue(&pmc->vcpu->arch.pmu.irq_work, true);
else
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 1599566..44be554 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -874,7 +874,7 @@ next:
ghes_clear_estatus(ghes);
}
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
- irq_work_queue(&ghes_proc_irq_work);
+ irq_work_queue(&ghes_proc_irq_work, true);
#endif

out:
diff --git a/drivers/staging/iio/trigger/iio-trig-sysfs.c b/drivers/staging/iio/trigger/iio-trig-sysfs.c
index 3bac972..7d6f9a9 100644
--- a/drivers/staging/iio/trigger/iio-trig-sysfs.c
+++ b/drivers/staging/iio/trigger/iio-trig-sysfs.c
@@ -105,7 +105,7 @@ static ssize_t iio_sysfs_trigger_poll(struct device *dev,
struct iio_trigger *trig = to_iio_trigger(dev);
struct iio_sysfs_trig *sysfs_trig = trig->private_data;

- irq_work_queue(&sysfs_trig->work);
+ irq_work_queue(&sysfs_trig->work, true);

return count;
}
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index b39ea0b..71a33b7 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -17,8 +17,14 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
work->func = func;
}

-bool irq_work_queue(struct irq_work *work);
+bool irq_work_queue(struct irq_work *work, bool ipi);
void irq_work_run(void);
void irq_work_sync(struct irq_work *work);

+#ifdef CONFIG_IRQ_WORK
+bool irq_work_needs_cpu(void);
+#else
+static bool irq_work_needs_cpu(void) { return false; }
+#endif
+
#endif /* _LINUX_IRQ_WORK_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cda3ebd..e7cbbcc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4900,7 +4900,7 @@ static int __perf_event_overflow(struct perf_event *event,
ret = 1;
event->pending_kill = POLL_HUP;
event->pending_disable = 1;
- irq_work_queue(&event->pending);
+ irq_work_queue(&event->pending, true);
}

if (event->overflow_handler)
@@ -4910,7 +4910,7 @@ static int __perf_event_overflow(struct perf_event *event,

if (event->fasync && event->pending_kill) {
event->pending_wakeup = 1;
- irq_work_queue(&event->pending);
+ irq_work_queue(&event->pending, true);
}

return ret;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..620df7a 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -39,7 +39,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
atomic_set(&handle->rb->poll, POLL_IN);

handle->event->pending_wakeup = 1;
- irq_work_queue(&handle->event->pending);
+ irq_work_queue(&handle->event->pending, true);
}

/*
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 44a5b19..19f537b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -12,6 +12,8 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
+#include <linux/tick.h>
+#include <linux/sched.h>
#include <asm/processor.h>

/*
@@ -52,7 +54,7 @@ static bool irq_work_claim(struct irq_work *work)
/*
* Queue the entry and raise the IPI if needed.
*/
-static void __irq_work_queue(struct irq_work *work)
+static void __irq_work_queue(struct irq_work *work, bool ipi)
{
bool empty;

@@ -60,9 +62,16 @@ static void __irq_work_queue(struct irq_work *work)

empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
/* The list was empty, raise self-interrupt to start processing. */
- if (empty)
- arch_irq_work_raise();
-
+ if (empty) {
+ /*
+ * If an IPI is requested, raise it right away. Otherwise wait
+ * for the next tick unless it's stopped. Now if the arch uses
+ * some other obscure way than IPI to raise an irq work, just raise
+ * and don't think further.
+ */
+ if (ipi || !arch_irq_work_has_ipi() || tick_nohz_tick_stopped())
+ arch_irq_work_raise();
+ }
preempt_enable();
}

@@ -72,7 +81,7 @@ static void __irq_work_queue(struct irq_work *work)
*
* Can be re-enqueued while the callback is still in progress.
*/
-bool irq_work_queue(struct irq_work *work)
+bool irq_work_queue(struct irq_work *work, bool ipi)
{
if (!irq_work_claim(work)) {
/*
@@ -81,11 +90,22 @@ bool irq_work_queue(struct irq_work *work)
return false;
}

- __irq_work_queue(work);
+ __irq_work_queue(work, ipi);
return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue);

+bool irq_work_needs_cpu(void)
+{
+ struct llist_head *this_list;
+
+ this_list = &__get_cpu_var(irq_work_list);
+ if (llist_empty(this_list))
+ return false;
+
+ return true;
+}
+
/*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
* context with local IRQs disabled.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index ccc1971..5f87bb5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
} while (read_seqretry(&xtime_lock, seq));

if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
- arch_needs_cpu(cpu)) {
+ arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {
--
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/