Re: libata/sata_nv latency on NVIDIA CK804 [was Re: AMD64 X2 lost ticks on PM timer]

From: Bill Rugolsky Jr.
Date: Wed Mar 22 2006 - 18:05:11 EST


On Wed, Mar 22, 2006 at 05:09:08PM +0100, Andi Kleen wrote:
> Also my latest patchkit has a debugging patch for lost tries
>
> ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches/lost-cli-debug
>
> Can you test it with this patch?

It didn't apply cleanly against vanilla 2.6.16; rediffed patch below.

Typical output:

time.c: Lost 12 timer tick(s)! rip 10:__do_softirq+0x4b/0xdf
last cli handle_IRQ_event+0x62/0x71
last cli caller __do_IRQ+0xa6/0x104
time.c: Lost 5 timer tick(s)! rip 10:__do_softirq+0x4b/0xdf
last cli handle_IRQ_event+0x62/0x71
last cli caller __do_IRQ+0xa6/0x104
time.c: Lost 4 timer tick(s)! rip 10:__do_softirq+0x4b/0xdf
last cli handle_IRQ_event+0x62/0x71
last cli caller __do_IRQ+0xa6/0x104
time.c: Lost 8 timer tick(s)! rip 10:__do_softirq+0x4b/0xdf
last cli handle_IRQ_event+0x62/0x71
last cli caller __do_IRQ+0xa6/0x104
time.c: Lost 3 timer tick(s)! rip 10:__do_softirq+0x4b/0xdf
last cli handle_IRQ_event+0x62/0x71
last cli caller __do_IRQ+0xa6/0x104

Some statistics:

rugolsky@ti94: grep 'last cli' /var/log/messages | sed -n -e '1p;$p'
Mar 22 17:37:06 ti94 kernel: last cli 0x0
Mar 22 17:43:48 ti94 kernel: last cli caller __do_IRQ+0xa6/0x104

rugolsky@ti94: grep 'last cli' /var/log/messages | cut -d' ' -f6- | sort | uniq -c | sort -nr
782 last cli handle_IRQ_event+0x62/0x71
782 last cli caller __do_IRQ+0xa6/0x104
2 last cli _spin_lock_irqsave+0x16/0x27
1 last cli setup_boot_APIC_clock+0x48/0x12e
1 last cli pci_direct_init+0x47/0x190
1 last cli kmem_cache_free+0x1d/0x62
1 last cli caller smp_prepare_cpus+0x36a/0x394
1 last cli caller release_console_sem+0x1a/0x1c9
1 last cli caller init+0x1c3/0x338
1 last cli caller acpi_os_release_object+0x9/0xd
1 last cli caller __up_read+0x19/0x9e
1 last cli caller 0x0
1 last cli 0x0

Thanks.

-Bill


--- linux-2.6.16/arch/x86_64/kernel/time.c.lost-cli-debug 2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16/arch/x86_64/kernel/time.c 2006-03-22 17:09:14.000000000 -0500
@@ -43,7 +43,7 @@
#endif

#ifdef CONFIG_CPU_FREQ
-static void cpufreq_delayed_get(void);
+static void cpufreq_delayed_get(struct pt_regs *);
#endif
extern void i8254_timer_resume(void);
extern int using_apic_timer;
@@ -63,7 +63,7 @@ static unsigned long hpet_period; /* f
unsigned long hpet_tick; /* HPET clocks / interrupt */
int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
unsigned long vxtime_hz = PIT_TICK_RATE;
-int report_lost_ticks; /* command line option */
+int report_lost_ticks = 1; /* command line option */
unsigned long long monotonic_base;

struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
@@ -309,15 +309,20 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);

+extern unsigned long last_clier, last_clier_caller;
+
static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
{
static long lost_count;
static int warned;

if (report_lost_ticks) {
- printk(KERN_WARNING "time.c: Lost %d timer "
- "tick(s)! ", lost);
- print_symbol("rip %s)\n", regs->rip);
+ printk(KERN_WARNING
+ "time.c: Lost %d timer tick(s)! rip %02lx",
+ lost, regs->cs);
+ print_symbol(":%s\n", regs->rip);
+ print_symbol("last cli %s\n", last_clier);
+ print_symbol("last cli caller %s\n", last_clier_caller);
}

if (lost_count == 1000 && !warned) {
@@ -345,7 +350,7 @@ static noinline void handle_lost_ticks(i
(like going into thermal throttle)
Give cpufreq a change to catch up. */
if ((lost_count+1) % 25 == 0) {
- cpufreq_delayed_get();
+ cpufreq_delayed_get(regs);
}
#endif
}
@@ -599,14 +604,15 @@ static void handle_cpufreq_delayed_get(v
* to verify the CPU frequency the timing core thinks the CPU is running
* at is still correct.
*/
-static void cpufreq_delayed_get(void)
+static void cpufreq_delayed_get(struct pt_regs *regs)
{
static int warned;
if (cpufreq_init && !cpufreq_delayed_issched) {
cpufreq_delayed_issched = 1;
if (!warned) {
warned = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+ print_symbol(KERN_DEBUG
+ "Losing some ticks... checking if CPU frequency changed (rip=%s)\n", regs->rip);
}
schedule_work(&cpufreq_delayed_get_work);
}
--- linux-2.6.16/arch/x86_64/kernel/process.c.lost-cli-debug 2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16/arch/x86_64/kernel/process.c 2006-03-22 17:12:01.000000000 -0500
@@ -841,3 +841,16 @@ unsigned long arch_align_stack(unsigned
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+unsigned long last_clier, last_clier_caller;
+
+void __local_irq_disable(void *caller)
+{
+ if (!irqs_disabled()) {
+ last_clier = __builtin_return_address(0);
+ last_clier = (unsigned long)__builtin_return_address(0);
+ last_clier_caller = (unsigned long)caller;
+ asm volatile("cli":::"memory");
+ }
+}
+EXPORT_SYMBOL(__local_irq_disable);
--- linux-2.6.16/include/asm-x86_64/system.h.lost-cli-debug 2006-03-20 00:53:29.000000000 -0500
+++ linux-2.6.16/include/asm-x86_64/system.h 2006-03-22 17:01:50.000000000 -0500
@@ -351,7 +351,10 @@ static inline unsigned long __cmpxchg(vo
/* For spinlocks etc */
#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
#else /* CONFIG_X86_VSMP */
-#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
+
+extern void __local_irq_disable(void *caller);
+#define local_irq_disable() __local_irq_disable(__builtin_return_address(0))
+
#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")

#define irqs_disabled() \
@@ -362,7 +365,7 @@ static inline unsigned long __cmpxchg(vo
})

/* For spinlocks etc */
-#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0":"=g" (x): /* no input */ :"memory"); local_irq_disable(); } while (0)
#endif

/* used in the idle loop; sti takes one instruction cycle to complete */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/