Re: [PATCH v5 1/4] usb: dbc: early driver for xhci debug capability

From: Lu Baolu
Date: Mon Jan 23 2017 - 23:44:51 EST


Hi Ingo,

On 01/22/2017 05:04 PM, Ingo Molnar wrote:
> * Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> wrote:
>
>>>> +static void xdbc_runtime_delay(unsigned long count)
>>>> +{
>>>> + udelay(count);
>>>> +}
>>>> +static void (*xdbc_delay)(unsigned long) = xdbc_early_delay;
>>> Is this udelay() complication really necessary? udelay() should work fine even in
>>> early code. It might not be precisely calibrated, but should be good enough.
>> I tried udelay() in the early code. It's not precise enough for the
>> hardware handshaking.
> Possibly because on x86 early udelay() did not work at all - i.e. there's no delay
> whatsoever.

Yes.

>
> Could you try it on top of this commit in tip:timers/core:
>
> 4c45c5167c95 x86/timer: Make delay() work during early bootup
>
> ?

I tried tip:timers/core. It's not precise enough for my context either.

__const_udelay().

157 inline void __const_udelay(unsigned long xloops)
158 {
159 unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
160 int d0;
161
162 xloops *= 4;
163 asm("mull %%edx"
164 :"=d" (xloops), "=&a" (d0)
165 :"1" (xloops), "0" (lpj * (HZ / 4)));
166
167 __delay(++xloops);
168 }


In my early code, loops_per_jiffy is not initialized yet. Hence "lpj" for the asm line
is 4096 (default value).

The cpu_info.loops_per_jiffy actually reads 8832000 after initialization. They are
about 2000 times different.

I did a hacky test in kernel to check the difference between these two different
"lpj" values. (The hacky patch is attached.) Below is the output for 100ms delay.

[ 2.494751] udelay_test uninitialized ---->start
[ 2.494820] udelay_test uninitialized ---->end
[ 2.494828] udelay_test initialized ---->start
[ 2.595234] udelay_test initialized ---->end

For 100ms delay, udelay() with uninitialized loops_per_jiffy only gives a delay of
only 69us.

Best regards,
Lu Baolu
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index a8e91ae..ffc2874 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -168,6 +168,36 @@ inline void __const_udelay(unsigned long xloops)
}
EXPORT_SYMBOL(__const_udelay);

+void udelay_uninitialized(unsigned long xloops)
+{
+ unsigned long lpj = (1<<12);
+ int d0;
+
+ xloops *= 0x10c7ul;
+ xloops *= 4;
+ asm("mull %%edx"
+ :"=d" (xloops), "=&a" (d0)
+ :"1" (xloops), "0" (lpj * (HZ / 4)));
+
+ delay_loop(++xloops);
+}
+EXPORT_SYMBOL(udelay_uninitialized);
+
+void udelay_initialized(unsigned long xloops)
+{
+ unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy);
+ int d0;
+
+ xloops *= 0x10c7ul;
+ xloops *= 4;
+ asm("mull %%edx"
+ :"=d" (xloops), "=&a" (d0)
+ :"1" (xloops), "0" (lpj * (HZ / 4)));
+
+ delay_loop(++xloops);
+}
+EXPORT_SYMBOL(udelay_initialized);
+
void __udelay(unsigned long usecs)
{
__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 954abfd..b6a7437 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -302,6 +302,21 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
pm_runtime_put_noidle(&dev->dev);

+ do {
+ int count = 1000;
+
+ pr_notice("udelay_test uninitialized ---->start\n");
+ while (count-- > 0)
+ udelay_uninitialized(100);
+ pr_notice("udelay_test uninitialized ---->end\n");
+
+ count = 1000;
+ pr_notice("udelay_test initialized ---->start\n");
+ while (count-- > 0)
+ udelay_initialized(100);
+ pr_notice("udelay_test initialized ---->end\n");
+ } while (0);
+
return 0;

put_usb3_hcd:
diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
index 0f79054..200ab55 100644
--- a/include/asm-generic/delay.h
+++ b/include/asm-generic/delay.h
@@ -9,6 +9,8 @@ extern void __udelay(unsigned long usecs);
extern void __ndelay(unsigned long nsecs);
extern void __const_udelay(unsigned long xloops);
extern void __delay(unsigned long loops);
+extern void udelay_uninitialized(unsigned long xloops);
+extern void udelay_initialized(unsigned long xloops);

/*
* The weird n/20000 thing suppresses a "comparison is always false due to