Re: [PATCH] New x86_64 time code for 2.5.70

From: Bryan O'Sullivan (bos@serpentine.com)
Date: Thu Jun 12 2003 - 16:16:40 EST


On Thu, 2003-06-12 at 12:39, john stultz wrote:

> One little tweak, you're still subtracting tick_usec when calculating
> offset.

Well spotted, thanks.

I'm leaving in the test for negative offset for now, in spite of the
fix.

        <b

 arch/x86_64/Kconfig | 12 +
 arch/x86_64/kernel/acpi/boot.c | 6
 arch/x86_64/kernel/apic.c | 84 +++++-----
 arch/x86_64/kernel/smpboot.c | 11 +
 arch/x86_64/kernel/time.c | 306 +++++++++++++++++++++++++++++++-------- arch/x86_64/kernel/vsyscall.c | 28 ++-
 arch/x86_64/vmlinux.lds.S | 6
 include/asm-x86_64/fixmap.h | 2
 include/asm-x86_64/mc146818rtc.h | 5
 include/asm-x86_64/timex.h | 30 +++
 include/asm-x86_64/vsyscall.h | 18 +-
 11 files changed, 384 insertions(+), 124 deletions(-)


# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1436 -> 1.1441
# arch/x86_64/vmlinux.lds.S 1.16 -> 1.17
# arch/x86_64/kernel/apic.c 1.20 -> 1.21
# include/asm-x86_64/fixmap.h 1.3 -> 1.4
# arch/x86_64/kernel/acpi/boot.c 1.1 -> 1.2
# include/asm-x86_64/mc146818rtc.h 1.1 -> 1.2
# include/asm-x86_64/timex.h 1.6 -> 1.7
# arch/x86_64/kernel/time.c 1.15 -> 1.19
# arch/x86_64/kernel/vsyscall.c 1.10 -> 1.11
# arch/x86_64/Kconfig 1.22 -> 1.23
# include/asm-x86_64/vsyscall.h 1.5 -> 1.6
# arch/x86_64/kernel/smpboot.c 1.19 -> 1.20
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/06/11 bos@serpentine.com 1.1437
# Forward port 2.4 time code. Optionally uses HPET instead of PIT/RTC for
# gettimeofday calculations. Far more stable than the current 2.5 code.
#
# Current caveat: this code doesn't track lost interrupts properly, so we
# can very slowly lose a jiffy here or there.
# --------------------------------------------
# 03/06/11 bos@serpentine.com 1.1438
# Fix residual bogons.
# --------------------------------------------
# 03/06/12 bos@serpentine.com 1.1440
# Further fixes to 2.5 time code.
# --------------------------------------------
# 03/06/12 bos@serpentine.com 1.1441
# Fix offset calculation.
# --------------------------------------------
#
diff -Nru a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/Kconfig Thu Jun 12 14:14:27 2003
@@ -52,6 +52,18 @@
           klogd/syslogd or the X server. You should normally N here, unless
           you want to debug such a crash.
           
+config HPET_TIMER
+ bool
+ default y
+ help
+ Use the IA-PC HPET (High Precision Event Timer) to manage
+ time in preference to the PIT and RTC, if a HPET is
+ present. The HPET provides a stable time base on SMP
+ systems, unlike the RTC, but it is more expensive to access,
+ as it is off-chip. You can find the HPET spec at
+ <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+
+ If unsure, say Y.
 
 config GENERIC_ISA_DMA
         bool
diff -Nru a/arch/x86_64/kernel/acpi/boot.c b/arch/x86_64/kernel/acpi/boot.c
--- a/arch/x86_64/kernel/acpi/boot.c Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/kernel/acpi/boot.c Thu Jun 12 14:14:27 2003
@@ -244,9 +244,11 @@
                 return -1;
         }
 
- hpet.address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32);
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long) hpet_tbl->addr.addrh << 32);
 
- printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet.address);
+ printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
 
         return 0;
 }
diff -Nru a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
--- a/arch/x86_64/kernel/apic.c Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/kernel/apic.c Thu Jun 12 14:14:27 2003
@@ -46,7 +46,7 @@
 void enable_NMI_through_LVT0 (void * dummy)
 {
         unsigned int v, ver;
-
+
         ver = apic_read(APIC_LVR);
         ver = GET_APIC_VERSION(ver);
         v = APIC_DM_NMI; /* unmask and set to NMI */
@@ -297,7 +297,7 @@
          * Double-check whether this APIC is really registered.
          * This is meaningless in clustered apic mode, so we skip it.
          */
- if (!clustered_apic_mode &&
+ if (!clustered_apic_mode &&
             !test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
                 BUG();
 
@@ -309,7 +309,7 @@
 
         if (!clustered_apic_mode) {
                 /*
- * In clustered apic mode, the firmware does this for us
+ * In clustered apic mode, the firmware does this for us
                  * Put the APIC into flat delivery mode.
                  * Must be "all ones" explicitly for 82489DX.
                  */
@@ -422,15 +422,15 @@
                 value = apic_read(APIC_ESR);
                 Dprintk("ESR value after enabling vector: %08x\n", value);
         } else {
- if (esr_disable)
- /*
- * Something untraceble is creating bad interrupts on
+ if (esr_disable)
+ /*
+ * Something untraceble is creating bad interrupts on
                          * secondary quads ... for the moment, just leave the
                          * ESR disabled - we can't do anything useful with the
                          * errors anyway - mbligh
                          */
                         printk("Leaving ESR disabled.\n");
- else
+ else
                         printk("No ESR for 82489DX.\n");
         }
 
@@ -580,7 +580,7 @@
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
  * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
+ * not correctly set up (usually the APIC timer won't work etc.)
  */
 
 static int __init detect_init_APIC (void)
@@ -683,19 +683,25 @@
 
         local_irq_save(flags);
 
- /* For some reasons this doesn't work on Simics, so fake it for now */
- if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
+ /* For some reasons this doesn't work on Simics, so fake it for now */
+ if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
         __setup_APIC_LVTT(clocks);
                 return;
- }
+ }
 
         /* wait for irq slice */
- {
+ if (vxtime.hpet_address) {
+ int trigger = hpet_readl(HPET_T0_CMP);
+ while (hpet_readl(HPET_COUNTER) >= trigger)
+ /* do nothing */ ;
+ while (hpet_readl(HPET_COUNTER) < trigger)
+ /* do nothing */ ;
+ } else {
                 int c1, c2;
                 outb_p(0x00, 0x43);
                 c2 = inb_p(0x40);
                 c2 |= inb_p(0x40) << 8;
- do {
+ do {
                         c1 = c2;
                         outb_p(0x00, 0x43);
                         c2 = inb_p(0x40);
@@ -754,10 +760,10 @@
 
 void __init setup_boot_APIC_clock (void)
 {
- if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
- return;
- }
+ if (disable_apic_timer) {
+ printk(KERN_INFO "Disabling APIC timer\n");
+ return;
+ }
 
         printk(KERN_INFO "Using local APIC timer interrupts.\n");
         using_apic_timer = 1;
@@ -816,7 +822,7 @@
         if ( (!multiplier) || (calibration_result/multiplier < 500))
                 return -EINVAL;
 
- /*
+ /*
          * Set the new multiplier for each CPU. CPUs don't start using the
          * new values until the next timer interrupt in which they do process
          * accounting. At that time they also adjust their APIC timers
@@ -856,7 +862,7 @@
                  * Interrupts are already masked off at this point.
                  */
                 per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
+ if (per_cpu(prof_counter, cpu) !=
                     per_cpu(prof_old_multiplier, cpu)) {
                         __setup_APIC_LVTT(calibration_result/
                                         per_cpu(prof_counter, cpu));
@@ -928,19 +934,19 @@
                 ack_APIC_irq();
 
 #if 0
- static unsigned long last_warning;
- static unsigned long skipped;
+ static unsigned long last_warning;
+ static unsigned long skipped;
 
         /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- if (time_before(last_warning+30*HZ,jiffies)) {
+ if (time_before(last_warning+30*HZ,jiffies)) {
                 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
                        smp_processor_id(), skipped);
- last_warning = jiffies;
+ last_warning = jiffies;
                 skipped = 0;
- } else {
- skipped++;
- }
-#endif
+ } else {
+ skipped++;
+ }
+#endif
         irq_exit();
 }
 
@@ -975,7 +981,7 @@
         irq_exit();
 }
 
-int disable_apic;
+int disable_apic;
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
@@ -983,11 +989,11 @@
  */
 int __init APIC_init_uniprocessor (void)
 {
- if (disable_apic) {
+ if (disable_apic) {
                 printk(KERN_INFO "Apic disabled\n");
- return -1;
+ return -1;
         }
- if (!cpu_has_apic) {
+ if (!cpu_has_apic) {
                 disable_apic = 1;
                 printk(KERN_INFO "Apic disabled by BIOS\n");
                 return -1;
@@ -1015,18 +1021,18 @@
         return 0;
 }
 
-static __init int setup_disableapic(char *str)
-{
+static __init int setup_disableapic(char *str)
+{
         disable_apic = 1;
         return 0;
-}
+}
 
-static __init int setup_noapictimer(char *str)
-{
+static __init int setup_noapictimer(char *str)
+{
         disable_apic_timer = 1;
         return 0;
-}
+}
 
-__setup("disableapic", setup_disableapic);
-__setup("noapictimer", setup_noapictimer);
+__setup("disableapic", setup_disableapic);
+__setup("noapictimer", setup_noapictimer);
 
diff -Nru a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
--- a/arch/x86_64/kernel/smpboot.c Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/kernel/smpboot.c Thu Jun 12 14:14:27 2003
@@ -67,6 +67,8 @@
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 
+extern void time_init_smp(void);
+
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -760,7 +762,7 @@
                 if (APIC_init_uniprocessor())
                         printk(KERN_NOTICE "Local APIC not detected."
                                            " Using dummy APIC emulation.\n");
- return;
+ goto smp_done;
         }
 
         /*
@@ -784,7 +786,7 @@
                 cpu_online_map = phys_cpu_present_map = 1;
                 phys_cpu_present_map = 1;
                 disable_apic = 1;
- return;
+ goto smp_done;
         }
 
         verify_local_APIC();
@@ -799,7 +801,7 @@
                 cpu_online_map = phys_cpu_present_map = 1;
                 phys_cpu_present_map = 1;
                 disable_apic = 1;
- return;
+ goto smp_done;
         }
 
         connect_bsp_APIC();
@@ -883,6 +885,9 @@
          */
         if (cpu_has_tsc && cpucount)
                 synchronize_tsc_bp();
+
+ smp_done:
+ time_init_smp();
 }
 
 /* These are wrappers to interface to the new boot process. Someone
diff -Nru a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
--- a/arch/x86_64/kernel/time.c Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/kernel/time.c Thu Jun 12 14:14:27 2003
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/bcd.h>
+#include <asm/pgtable.h>
 #include <asm/vsyscall.h>
 #include <asm/timex.h>
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -35,41 +36,61 @@
 extern int using_apic_timer;
 
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
 
 extern int using_apic_timer;
 extern void smp_local_timer_interrupt(struct pt_regs * regs);
 
+#undef HPET_HACK_ENABLE_DANGEROUS
 
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
-unsigned long hpet_period; /* fsecs / HPET clock */
-unsigned long hpet_tick; /* HPET clocks / interrupt */
-int hpet_report_lost_ticks; /* command line option */
 
-struct hpet_data __hpet __section_hpet; /* address, quotient, trigger, hz */
+unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+unsigned long hpet_period; /* fsecs / HPET clock */
+unsigned long hpet_tick; /* HPET clocks / interrupt */
+unsigned long vxtime_hz = 1193182;
+int report_lost_ticks; /* command line option */
+
+struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
 
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
 struct timespec __xtime __section_xtime;
 struct timezone __sys_tz __section_sys_tz;
 
+static inline void rdtscll_sync(unsigned long *tsc)
+{
+#ifdef CONFIG_SMP
+ sync_core();
+#endif
+ rdtscll(*tsc);
+}
+
 /*
  * do_gettimeoffset() returns microseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
  * of TSC, but much more reliable. It's also synchronized to the timer
  * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
- * timer interrupt has happened already, but hpet.trigger wasn't updated yet.
+ * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
  * This is not a problem, because jiffies hasn't updated either. They are bound
  * together by xtime_lock.
- */
+ */
 
-inline unsigned int do_gettimeoffset(void)
+static inline unsigned int do_gettimeoffset_tsc(void)
 {
         unsigned long t;
- sync_core();
- rdtscll(t);
- return (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset;
+ unsigned long x;
+ rdtscll_sync(&t);
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+ return x;
+}
+
+static inline unsigned int do_gettimeoffset_hpet(void)
+{
+ return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
 }
 
+unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+
 /*
  * This version of gettimeofday() has microsecond resolution and better than
  * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
@@ -87,7 +108,8 @@
                 sec = xtime.tv_sec;
                 usec = xtime.tv_nsec / 1000;
 
- t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset();
+ t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+ do_gettimeoffset();
                 usec += t;
 
         } while (read_seqretry(&xtime_lock, seq));
@@ -169,17 +191,17 @@
         real_seconds = nowtime % 60;
         real_minutes = nowtime / 60;
         if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
- real_minutes += 30; /* correct for half hour time zone */
+ real_minutes += 30; /* correct for half hour time zone */
         real_minutes %= 60;
 
         if (abs(real_minutes - cmos_minutes) < 30) {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
                 CMOS_WRITE(real_seconds, RTC_SECONDS);
                 CMOS_WRITE(real_minutes, RTC_MINUTES);
         } else
- printk(KERN_WARNING "time.c: can't update CMOS clock from %d to %d\n",
- cmos_minutes, real_minutes);
+ printk(KERN_WARNING "time.c: can't update CMOS clock "
+ "from %d to %d\n", cmos_minutes, real_minutes);
 
 /*
  * The following flags have to be released exactly in this order, otherwise the
@@ -198,27 +220,65 @@
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
         static unsigned long rtc_update = 0;
+ unsigned long tsc, lost = 0;
+ int delay, offset = 0;
 
 /*
  * Here we are in the timer irq handler. We have irqs locally disabled (so we
  * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
  * on the other CPU, so we need a lock. We also need to lock the vsyscall
  * variables, because both do_timer() and us change them -arca+vojtech
- */
+ */
 
         write_seqlock(&xtime_lock);
 
- {
- unsigned long t;
+ if (vxtime.hpet_address) {
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ delay = hpet_readl(HPET_COUNTER) - offset;
+ } else {
+ spin_lock(&i8253_lock);
+ outb_p(0x00, 0x43);
+ delay = inb_p(0x40);
+ delay |= inb(0x40) << 8;
+ spin_unlock(&i8253_lock);
+ delay = LATCH - 1 - delay;
+ }
+
+ rdtscll_sync(&tsc);
 
- sync_core();
- rdtscll(t);
- hpet.offset = (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset - 1000000L / HZ;
- if (hpet.offset >= 1000000L / HZ)
- hpet.offset = 0;
- hpet.ticks = min_t(long, max_t(long, (t - hpet.last_tsc) * (1000000L / HZ) / (1000000L / HZ - hpet.offset),
- cpu_khz * 1000/HZ * 15 / 16), cpu_khz * 1000/HZ * 16 / 15);
- hpet.last_tsc = t;
+ if (vxtime.mode == VXTIME_HPET) {
+ if (offset - vxtime.last > hpet_tick) {
+ lost = (offset - vxtime.last) / hpet_tick - 1;
+ }
+
+ vxtime.last = offset;
+ } else {
+ offset = (((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+
+ if (offset < 0)
+ offset = 0;
+
+ if (offset > (USEC_PER_SEC / HZ)) {
+ lost = offset / (USEC_PER_SEC / HZ);
+ offset %= (USEC_PER_SEC / HZ);
+ }
+
+ vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+
+ if ((((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) < offset)
+ vxtime.last_tsc = tsc -
+ (((long) offset << 32) / vxtime.tsc_quot) - 1;
+ }
+
+ if (lost) {
+ if (report_lost_ticks)
+ printk(KERN_WARNING "time.c: Lost %ld timer "
+ "tick(s)! (rip %016lx)\n",
+ (offset - vxtime.last) / hpet_tick - 1,
+ regs->rip);
+ jiffies += lost;
         }
 
 /*
@@ -244,16 +304,16 @@
  * If we have an externally synchronized Linux clock, then update CMOS clock
  * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
  * closest to exactly 500 ms before the next second. If the update fails, we
- * don'tcare, as it'll be updated on the next turn, and the problem (time way
+ * don't care, as it'll be updated on the next turn, and the problem (time way
  * off) isn't likely to go away much sooner anyway.
  */
 
         if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
- abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
+ abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
                 set_rtc_mmss(xtime.tv_sec);
                 rtc_update = xtime.tv_sec + 660;
         }
-
+
         write_sequnlock(&xtime_lock);
 
         return IRQ_HANDLED;
@@ -263,6 +323,7 @@
 {
         unsigned int timeout, year, mon, day, hour, min, sec;
         unsigned char last, this;
+ unsigned long flags;
 
 /*
  * The Linux interpretation of the CMOS clock register contents: When the
@@ -272,7 +333,7 @@
  * standard 8.3 MHz ISA bus.
  */
 
- spin_lock(&rtc_lock);
+ spin_lock_irqsave(&rtc_lock, flags);
 
         timeout = 1000000;
         last = this = 0;
@@ -286,28 +347,28 @@
 /*
  * Here we are safe to assume the registers won't change for a whole second, so
  * we just go ahead and read them.
- */
+ */
 
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
 
- spin_unlock(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
 
 /*
  * We know that x86-64 always uses BCD format, no need to check the config
  * register.
  */
 
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
 
 /*
  * This will work up to Dec 31, 2069.
@@ -326,6 +387,32 @@
 
 #define TICK_COUNT 100000000
 
+static unsigned int __init hpet_calibrate_tsc(void)
+{
+ int tsc_start, hpet_start;
+ int tsc_now, hpet_now;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ local_irq_disable();
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtscl(tsc_start);
+
+ do {
+ local_irq_disable();
+ hpet_now = hpet_readl(HPET_COUNTER);
+ sync_core();
+ rdtscl(tsc_now);
+ local_irq_restore(flags);
+ } while ((tsc_now - tsc_start) < TICK_COUNT &&
+ (hpet_now - hpet_start) < TICK_COUNT);
+
+ return (tsc_now - tsc_start) * 1000000000L
+ / ((hpet_now - hpet_start) * hpet_period / 1000);
+}
+
+
 /*
  * pit_calibrate_tsc() uses the speaker output (channel 2) of
  * the PIT. This is better than using the timer interrupt output,
@@ -339,10 +426,9 @@
         unsigned long start, end;
         unsigned long flags;
 
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ spin_lock_irqsave(&i8253_lock, flags);
 
- local_irq_save(flags);
- local_irq_disable();
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
 
         outb(0xb0, 0x43);
         outb((1193182 / (1000 / 50)) & 0xff, 0x42);
@@ -353,42 +439,146 @@
         sync_core();
         rdtscll(end);
 
+ spin_unlock_irqrestore(&i8253_lock, flags);
 
- local_irq_restore(flags);
-
         return (end - start) / 50;
 }
 
+static int hpet_init(void)
+{
+ unsigned int cfg, id;
+
+ if (!vxtime.hpet_address)
+ return -1;
+ set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+ id = hpet_readl(HPET_ID);
+
+ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+ !(id & HPET_ID_LEGSUP))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+ if (hpet_period < 100000 || hpet_period > 100000000)
+ return -1;
+
+ hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
+ hpet_period;
+
+/*
+ * Stop the timers and reset the main counter.
+ */
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+ hpet_writel(0, HPET_COUNTER);
+ hpet_writel(0, HPET_COUNTER + 4);
+
+/*
+ * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
+ * and period also hpet_tick.
+ */
+
+ hpet_writel(HPET_T0_ENABLE | HPET_T0_PERIODIC | HPET_T0_SETVAL |
+ HPET_T0_32BIT, HPET_T0_CFG);
+ hpet_writel(hpet_tick, HPET_T0_CMP);
+ hpet_writel(hpet_tick, HPET_T0_CMP);
+
+/*
+ * Go!
+ */
+
+ cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+}
+
 void __init pit_init(void)
 {
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
         outb_p(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */
         outb_p(LATCH & 0xff, 0x40); /* LSB */
         outb_p(LATCH >> 8, 0x40); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
 }
 
 int __init time_setup(char *str)
 {
- hpet_report_lost_ticks = 1;
+ report_lost_ticks = 1;
         return 1;
 }
 
-static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+static struct irqaction irq0 = {
+ timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL
+};
 
 extern void __init config_acpi_tables(void);
 
 void __init time_init(void)
 {
+ char *timename;
+
+#ifdef HPET_HACK_ENABLE_DANGEROUS
+ if (!vxtime.hpet_address) {
+ printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
+ "manually!\n");
+ outl(0x800038a0, 0xcf8);
+ outl(0xff000001, 0xcfc);
+ outl(0x800038a0, 0xcf8);
+ hpet_address = inl(0xcfc) & 0xfffffffe;
+ printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
+ "at %#lx.\n", hpet_address);
+ }
+#endif
+
         xtime.tv_sec = get_cmos_time();
         xtime.tv_nsec = 0;
 
- pit_init();
- printk(KERN_INFO "time.c: Using 1.1931816 MHz PIT timer.\n");
- cpu_khz = pit_calibrate_tsc();
+ if (!hpet_init()) {
+ vxtime_hz = (1000000000000000L + hpet_period / 2) /
+ hpet_period;
+ cpu_khz = hpet_calibrate_tsc();
+ timename = "HPET";
+ } else {
+ pit_init();
+ cpu_khz = pit_calibrate_tsc();
+ timename = "PIT";
+ }
+
+ printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
+ vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
         printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
                 cpu_khz / 1000, cpu_khz % 1000);
- hpet.ticks = cpu_khz * (1000 / HZ);
- rdtscll(hpet.last_tsc);
+ vxtime.mode = VXTIME_TSC;
+ vxtime.quot = (1000000L << 32) / vxtime_hz;
+ vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.hz = vxtime_hz;
+ rdtscll_sync(&vxtime.last_tsc);
         setup_irq(0, &irq0);
+}
+
+void __init time_init_smp(void)
+{
+ char *timetype;
+
+ if (vxtime.hpet_address) {
+ timetype = "HPET";
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ } else {
+ timetype = "PIT/TSC";
+ vxtime.mode = VXTIME_TSC;
+ }
+ printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
 }
 
 __setup("report_lost_ticks", time_setup);
diff -Nru a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
--- a/arch/x86_64/kernel/vsyscall.c Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/kernel/vsyscall.c Thu Jun 12 14:14:27 2003
@@ -33,7 +33,7 @@
  *
  * Add HPET support (port from 2.4). Still needed?
  * Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
- *
+ *
  * These are not urgent things that we need to address only before shipping the first
  * production binary kernels.
  */
@@ -77,14 +77,22 @@
 
         do {
                 sequence = read_seqbegin(&__xtime_lock);
-
- sync_core();
- rdtscll(t);
+
                 sec = __xtime.tv_sec;
                 usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ) +
- (t - __hpet.last_tsc) * (1000000 / HZ) / __hpet.ticks + __hpet.offset;
+ (__jiffies - __wall_jiffies) * (1000000 / HZ);
 
+ if (__vxtime.mode == VXTIME_TSC) {
+ sync_core();
+ rdtscll(t);
+ usec += ((t - __vxtime.last_tsc) *
+ __vxtime.tsc_quot) >> 32;
+ } else {
+#if 0
+ usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+ __vxtime.last) * __vxtime.quot) >> 32;
+#endif
+ }
         } while (read_seqretry(&__xtime_lock, sequence));
 
         tv->tv_sec = sec + usec / 1000000;
@@ -100,7 +108,7 @@
 static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
 {
         int ret;
- asm volatile("syscall"
+ asm volatile("syscall"
                 : "=a" (ret)
                 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
         return ret;
@@ -109,7 +117,7 @@
 static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
 {
         if (unlikely(!__sysctl_vsyscall))
- return gettimeofday(tv,tz);
+ return gettimeofday(tv,tz);
         if (tv)
                 do_vgettimeofday(tv);
         if (tz)
@@ -119,13 +127,13 @@
 
 static time_t __vsyscall(1) vtime(time_t * t)
 {
- struct timeval tv;
+ struct timeval tv;
         if (unlikely(!__sysctl_vsyscall))
                 gettimeofday(&tv, NULL);
         else
                 do_vgettimeofday(&tv);
         if (t)
- *t = tv.tv_sec;
+ *t = tv.tv_sec;
         return tv.tv_sec;
 }
 
diff -Nru a/arch/x86_64/vmlinux.lds.S b/arch/x86_64/vmlinux.lds.S
--- a/arch/x86_64/vmlinux.lds.S Thu Jun 12 14:14:27 2003
+++ b/arch/x86_64/vmlinux.lds.S Thu Jun 12 14:14:27 2003
@@ -50,10 +50,10 @@
   .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
   xtime_lock = LOADADDR(.xtime_lock);
   . = ALIGN(16);
- .hpet : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.hpet) }
- hpet = LOADADDR(.hpet);
+ .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
+ vxtime = LOADADDR(.vxtime);
   . = ALIGN(16);
- .wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
+ .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
   wall_jiffies = LOADADDR(.wall_jiffies);
   . = ALIGN(16);
   .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
diff -Nru a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
--- a/include/asm-x86_64/fixmap.h Thu Jun 12 14:14:27 2003
+++ b/include/asm-x86_64/fixmap.h Thu Jun 12 14:14:27 2003
@@ -35,6 +35,8 @@
 enum fixed_addresses {
         VSYSCALL_LAST_PAGE,
         VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+ VSYSCALL_HPET,
+ FIX_HPET_BASE,
 #ifdef CONFIG_X86_LOCAL_APIC
         FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
 #endif
diff -Nru a/include/asm-x86_64/mc146818rtc.h b/include/asm-x86_64/mc146818rtc.h
--- a/include/asm-x86_64/mc146818rtc.h Thu Jun 12 14:14:27 2003
+++ b/include/asm-x86_64/mc146818rtc.h Thu Jun 12 14:14:27 2003
@@ -24,6 +24,11 @@
 outb_p((val),RTC_PORT(1)); \
 })
 
+#ifndef CONFIG_HPET_TIMER
 #define RTC_IRQ 8
+#else
+/* Temporary workaround due to IRQ routing problem. */
+#define RTC_IRQ 0
+#endif
 
 #endif /* _ASM_MC146818RTC_H */
diff -Nru a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
--- a/include/asm-x86_64/timex.h Thu Jun 12 14:14:27 2003
+++ b/include/asm-x86_64/timex.h Thu Jun 12 14:14:27 2003
@@ -30,6 +30,34 @@
 
 extern unsigned int cpu_khz;
 
-extern struct hpet_data hpet;
+/*
+ * Documentation on HPET can be found at:
+ * http://www.intel.com/ial/home/sp/pcmmspec.htm
+ * ftp://download.intel.com/ial/home/sp/mmts098.pdf
+ */
+
+#define HPET_ID 0x000
+#define HPET_PERIOD 0x004
+#define HPET_CFG 0x010
+#define HPET_STATUS 0x020
+#define HPET_COUNTER 0x0f0
+#define HPET_T0_CFG 0x100
+#define HPET_T0_CMP 0x108
+#define HPET_T0_ROUTE 0x110
+
+#define HPET_ID_VENDOR 0xffff0000
+#define HPET_ID_LEGSUP 0x00008000
+#define HPET_ID_NUMBER 0x00000f00
+#define HPET_ID_REV 0x000000ff
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_T0_ENABLE 0x004
+#define HPET_T0_PERIODIC 0x008
+#define HPET_T0_SETVAL 0x040
+#define HPET_T0_32BIT 0x100
+
+extern struct vxtime_data vxtime;
 
 #endif
diff -Nru a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
--- a/include/asm-x86_64/vsyscall.h Thu Jun 12 14:14:27 2003
+++ b/include/asm-x86_64/vsyscall.h Thu Jun 12 14:14:27 2003
@@ -15,7 +15,7 @@
 
 #ifdef __KERNEL__
 
-#define __section_hpet __attribute__ ((unused, __section__ (".hpet"), aligned(16)))
+#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
 #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
 #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -23,22 +23,24 @@
 #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
 #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
 
+#define VXTIME_TSC 1
+#define VXTIME_HPET 2
 
-struct hpet_data {
- long address; /* base address */
+struct vxtime_data {
+ long hpet_address; /* HPET base address */
         unsigned long hz; /* HPET clocks / sec */
- int trigger; /* value at last interrupt */
         int last;
- int offset;
         unsigned long last_tsc;
- long ticks;
+ long quot;
+ long tsc_quot;
+ int mode;
 };
 
 #define hpet_readl(a) readl(fix_to_virt(FIX_HPET_BASE) + a)
 #define hpet_writel(d,a) writel(d, fix_to_virt(FIX_HPET_BASE) + a)
 
 /* vsyscall space (readonly) */
-extern struct hpet_data __hpet;
+extern struct vxtime_data __vxtime;
 extern struct timespec __xtime;
 extern volatile unsigned long __jiffies;
 extern unsigned long __wall_jiffies;
@@ -46,7 +48,7 @@
 extern seqlock_t __xtime_lock;
 
 /* kernel space (writeable) */
-extern struct hpet_data hpet;
+extern struct vxtime_data vxtime;
 extern unsigned long wall_jiffies;
 extern struct timezone sys_tz;
 extern int sysctl_vsyscall;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Jun 15 2003 - 22:00:34 EST