Re: gettimeofday non-monotonic on 2.2.7 SMP

dave madden (dhm@webvision.com)
Thu, 20 May 1999 11:13:08 -0700


=>From: Andrea Arcangeli <andrea@suse.de>
=>...
=>To discover which is the task that is masking irq for a so long time just
=>apply this patch against 2.2.*_andrea*.bz2:
=>
=>[extra logging patch omitted]

OK, with 2.3.3 + 2.3.3_andrea1.bz2 + your extra log message, I find
that a lot of ticks are lost by X (which is XiG's Xaccel server, not
XF86), but I also see netscape, swapper, afterstep, and even my
gettimeofday() test program (see portion of /var/log/messages below).

I also added some static variables that track the time returned by
do_gettimeofday, and if an earlier time is about to be returned, it
returns the latest time ever returned instead. It also counts the
number of times this happens and the size of the largest error, and
prints a message every 30 seconds or so. Most of the errors are very
small -- the result of xntpd -- but a few are in the 1-5ms range.
Could these indicate an error in your recover_lost_timer calculations,
or maybe that the TSCs have become unsynchronized?

I'll also append arch/i386/kernel/time.c so you can see my
modifications, which are #ifdef'd TIMEWARP_INFO.

regards,
d.

May 20 10:52:27 vheissu kernel: recover_lost_timer: lost 1 tick from 0826268a process netscape pid 499
May 20 10:52:27 vheissu kernel: recover_lost_timer: lost 1 tick from c010de4f process netscape pid 499
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from c011a964 process netscape pid 499
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 08090077 process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401c6533 process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from c010dda1 process time pid 562
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401ca1c5 process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401ca18a process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401ca18a process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401ca1ba process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401c6452 process X pid 410
May 20 10:52:28 vheissu kernel: recover_lost_timer: lost 1 tick from 401ca18a process X pid 410
May 20 10:52:53 vheissu kernel: 165475 time warps evaded in last 30 seconds (worst 0.000024)

/*
* linux/arch/i386/kernel/time.c
*
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*
* This file contains the PC-specific time handling details:
* reading the RTC at bootup, etc..
* 1994-07-02 Alan Modra
* fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
* 1995-03-26 Markus Kuhn
* fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
* precision CMOS clock update
* 1996-05-03 Ingo Molnar
* fixed time warps in do_[slow|fast]_gettimeoffset()
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1998-09-05 (Various)
* More robust do_fast_gettimeoffset() algorithm implemented
* (works with APM, Cyrix 6x86MX and Centaur C6),
* monotonic gettimeofday() with fast_get_timeoffset(),
* drift-proof precision TSC calibration on boot
* (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
* Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
* ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
* 1998-12-16 Andrea Arcangeli
* Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
* because was not accounting lost_ticks.
* 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
* Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
* serialize accesses to xtime/lost_ticks).
* 1999-03-14 Andrea Arcangeli
* recover_lost_timer(): using the TSC information we won't
* ever miss a timer irq anymore. Some fix in calibrate_tsc().
*/

#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>

#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/delay.h>
#include <asm/msr.h>

#include <linux/mc146818rtc.h>
#include <linux/timex.h>
#include <linux/config.h>

#include <asm/fixmap.h>
#include <asm/cobalt.h>

/*
* for x86_do_profile()
*/
#include "irq.h"

unsigned long cpu_hz; /* Detected as we calibrate the TSC */

/* Number of usecs that the last interrupt was delayed */
static int delay_at_last_interrupt;

static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */

/* Cached *multiplier* to convert TSC counts to microseconds.
* (see the equation below).
* Equal to 2^32 * (1 / (clocks per usec) ).
* Initialized in time_init.
*/
static unsigned long fast_gettimeoffset_quotient=0;

extern rwlock_t xtime_lock;
extern volatile unsigned long lost_ticks;

static inline unsigned long do_fast_gettimeoffset(void)
{
register unsigned long eax asm("ax");
register unsigned long edx asm("dx");

/* Read the Time Stamp Counter */

rdtsc(eax,edx);

/* .. relative to previous jiffy (32 bits is enough) */
eax -= last_tsc_low; /* tsc_low delta */

/*
* Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
* = (tsc_low delta) * (usecs_per_clock)
* = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
*
* Using a mull instead of a divl saves up to 31 clock cycles
* in the critical path.
*/

__asm__("mull %2"
:"=a" (eax), "=d" (edx)
:"g" (fast_gettimeoffset_quotient),
"0" (eax));

/* our adjusted time offset in microseconds */
return delay_at_last_interrupt + edx;
}

#define TICK_SIZE tick

#ifndef CONFIG_X86_TSC

/* This function must be called with interrupts disabled
* It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
*
* However, the pc-audio speaker driver changes the divisor so that
* it gets interrupted rather more often - it loads 64 into the
* counter rather than 11932! This has an adverse impact on
* do_gettimeoffset() -- it stops working! What is also not
* good is that the interval that our timer function gets called
* is no longer 10.0002 ms, but 9.9767 ms. To get around this
* would require using a different timing source. Maybe someone
* could use the RTC - I know that this can interrupt at frequencies
* ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
* it so that at startup, the timer code in sched.c would select
* using either the RTC or the 8253 timer. The decision would be
* based on whether there was any other device around that needed
* to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
* and then do some jiggery to have a version of do_timer that
* advanced the clock by 1/1024 s. Every time that reached over 1/100
* of a second, then do all the old code. If the time was kept correct
* then do_gettimeoffset could just return 0 - there is no low order
* divider that can be accessed.
*
* Ideally, you would be able to use the RTC for the speaker driver,
* but it appears that the speaker driver really needs interrupt more
* often than every 120 us or so.
*
* Anyway, this needs more thought.... pjsg (1993-08-28)
*
* If you are really that interested, you should be reading
* comp.protocols.time.ntp!
*/

static unsigned long do_slow_gettimeoffset(void)
{
int count;

static int count_p = LATCH; /* for the first call after boot */
static unsigned long jiffies_p = 0;

/*
* cache volatile jiffies temporarily; we have IRQs turned off.
*/
unsigned long jiffies_t;

/* timer count may underflow right here */
count = get_8254_timer_count();

jiffies_t = jiffies;

/*
* avoiding timer inconsistencies (they are rare, but they happen)...
* there are two kinds of problems that must be avoided here:
* 1. the timer counter underflows
* 2. hardware problem with the timer, not giving us continuous time,
* the counter does small "jumps" upwards on some Pentium systems,
* (see c't 95/10 page 335 for Neptun bug.)
*/

/* you can safely undefine this if you don't have the Neptune chipset */

#define BUGGY_NEPTUN_TIMER

if( jiffies_t == jiffies_p ) {
if( count > count_p ) {
/* the nutcase */

outb_p(0x0A, 0x20);

/* assumption about timer being IRQ1 */
if( inb(0x20) & 0x01 ) {
/*
* We cannot detect lost timer interrupts ...
* well, that's why we call them lost, don't we? :)
* [hmm, on the Pentium and Alpha we can ... sort of]
*/
count -= LATCH;
} else {
#ifdef BUGGY_NEPTUN_TIMER
/*
* for the Neptun bug we know that the 'latch'
* command doesnt latch the high and low value
* of the counter atomically. Thus we have to
* substract 256 from the counter
* ... funny, isnt it? :)
*/

count -= 256;
#else
printk("do_slow_gettimeoffset(): hardware timer problem?\n");
#endif
}
}
} else
jiffies_p = jiffies_t;

count_p = count;

count = ((LATCH-1) - count) * TICK_SIZE;
count = (count + LATCH/2) / LATCH;

return count;
}

static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset;

#else

#define do_gettimeoffset() do_fast_gettimeoffset()

#endif

static struct timeval latest_returned = { 0, 0 };

#define TIMEWARP_INFO

/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags;
unsigned long usec, sec;
#ifdef TIMEWARP_INFO
static int time_warps = 0;
static unsigned long warp_log = 0;
static struct timeval worst_warp = {0,0};
#endif /* defined(TIMEWARP_INFO) */

read_lock_irqsave(&xtime_lock, flags);
usec = do_gettimeoffset();
{
unsigned long lost = lost_ticks;
if (lost)
usec += lost * (1000000 / HZ);
}
sec = xtime.tv_sec;
usec += xtime.tv_usec;
read_unlock_irqrestore(&xtime_lock, flags);

while (usec >= 1000000) {
usec -= 1000000;
sec++;
}

write_lock_irqsave( &xtime_lock, flags );
if (sec < latest_returned.tv_sec ||
(sec == latest_returned.tv_sec && usec < latest_returned.tv_usec)) {
#ifdef TIMEWARP_INFO
++time_warps;
worst_warp.tv_sec = latest_returned.tv_sec - sec;
if (usec >= latest_returned.tv_usec) {
worst_warp.tv_sec -= 1;
worst_warp.tv_usec = 1000000 - (latest_returned.tv_usec - usec);
} else {
worst_warp.tv_usec = latest_returned.tv_usec - usec;
}
#endif /* defined(TIMEWARP_INFO) */
sec = latest_returned.tv_sec;
usec = latest_returned.tv_usec;
} else {
latest_returned.tv_sec = sec;
latest_returned.tv_usec = usec;
}
write_unlock_irqrestore( &xtime_lock, flags );

tv->tv_sec = sec;
tv->tv_usec = usec;

#ifdef TIMEWARP_INFO
/* Print time warp info every 30 seconds */
if (sec > warp_log + 30) {
printk( "%d time warps evaded in last 30 seconds (worst %d.%06d)\n",
time_warps, worst_warp.tv_sec, worst_warp.tv_usec );
warp_log = sec;
time_warps = 0;
worst_warp.tv_sec = worst_warp.tv_usec = 0;
}
#endif /* defined(TIMEWARP_INFO) */
}

void do_settimeofday(struct timeval *tv)
{
write_lock_irq(&xtime_lock);
/* This is revolting. We need to set the xtime.tv_usec
* correctly. However, the value in this location is
* is value at the last tick.
* Discover what correction gettimeofday
* would have done, and then undo it!
*/
tv->tv_usec -= do_gettimeoffset();

while (tv->tv_usec < 0) {
tv->tv_usec += 1000000;
tv->tv_sec--;
}

xtime = latest_returned = *tv;
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_unlock_irq(&xtime_lock);
}

/*
* In order to set the CMOS clock precisely, set_rtc_mmss has to be
* called 500 ms after the second nowtime has started, because when
* nowtime is written into the registers of the CMOS clock, it will
* jump to the next second precisely 500 ms later. Check the Motorola
* MC146818A or Dallas DS12887 data sheet for details.
*
* BUG: This routine does not handle hour overflow properly; it just
* sets the minutes. Usually you'll only notice that after reboot!
*/
static int set_rtc_mmss(unsigned long nowtime)
{
int retval = 0;
int real_seconds, real_minutes, cmos_minutes;
unsigned char save_control, save_freq_select;

save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);

save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);

cmos_minutes = CMOS_READ(RTC_MINUTES);
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
BCD_TO_BIN(cmos_minutes);

/*
* since we're only adjusting minutes and seconds,
* don't interfere with hour overflow. This avoids
* messing with unknown time zones but requires your
* RTC not to be off by more than 15 minutes
*/
real_seconds = nowtime % 60;
real_minutes = nowtime / 60;
if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
real_minutes += 30; /* correct for half hour time zone */
real_minutes %= 60;

if (abs(real_minutes - cmos_minutes) < 30) {
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
BIN_TO_BCD(real_seconds);
BIN_TO_BCD(real_minutes);
}
CMOS_WRITE(real_seconds,RTC_SECONDS);
CMOS_WRITE(real_minutes,RTC_MINUTES);
} else {
printk(KERN_WARNING
"set_rtc_mmss: can't update from %d to %d\n",
cmos_minutes, real_minutes);
retval = -1;
}

/* The following flags have to be released exactly in this order,
* otherwise the DS12887 (popular MC146818A clone with integrated
* battery and quartz) will not reset the oscillator and will not
* update precisely 500 ms later. You won't find this mentioned in
* the Dallas Semiconductor data sheets, but who believes data
* sheets anyway ... -- Markus Kuhn
*/
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);

return retval;
}

/* last time the cmos clock got updated */
static long last_rtc_update = 0;

/*
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
*/
static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
#ifdef CONFIG_VISWS
/* Clear the interrupt */
co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR);
#endif
do_timer(regs);
/*
* In the SMP case we use the local APIC timer interrupt to do the
* profiling, except when we simulate SMP mode on a uniprocessor
* system, in that case we have to call the local interrupt handler.
*/
#ifndef __SMP__
if (!user_mode(regs))
x86_do_profile(regs->eip);
#else
if (!smp_found_config)
smp_local_timer_interrupt(regs);
#endif

/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
*/
if ((time_status & STA_UNSYNC) == 0 &&
xtime.tv_sec > last_rtc_update + 660 &&
xtime.tv_usec >= 500000 - ((unsigned) tick) / 2 &&
xtime.tv_usec <= 500000 + ((unsigned) tick) / 2) {
if (set_rtc_mmss(xtime.tv_sec) == 0)
last_rtc_update = xtime.tv_sec;
else
last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
}

#ifdef CONFIG_MCA
if( MCA_bus ) {
/* The PS/2 uses level-triggered interrupts. You can't
turn them off, nor would you want to (any attempt to
enable edge-triggered interrupts usually gets intercepted by a
special hardware circuit). Hence we have to acknowledge
the timer interrupt. Through some incredibly stupid
design idea, the reset for IRQ 0 is done by setting the
high bit of the PPI port B (0x61). Note that some PS/2s,
notably the 55SX, work fine if this is removed. */

irq = inb_p( 0x61 ); /* read the current state */
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
#endif
}

static int use_tsc = 0;

/*
* Using a bit better the TSC information now we are also able to recover
* from lost timer interrupts. -Andrea
*/
static inline void recover_lost_timer(unsigned long delta_cycles,
int delay_usec, struct pt_regs *regs)
{
/*
* The algorithm I invented to know if we losed an irq in the meantime
* works this way:
*
* - convert delta from cycles to usec
* - remove from the delta the latency of the irqs
* - convert from usec to timer ticks
*
* -Andrea
*/

register unsigned long delta;

__asm__("mull %2"
:"=a" (delta_cycles), "=d" (delta)
:"g" (fast_gettimeoffset_quotient), "0" (delta_cycles));
delta -= delay_usec;
delta = (delta + 500000/HZ) / (1000000/HZ);

if ((long) delta <= 1)
return;

delta -= 1;
printk(KERN_NOTICE "recover_lost_timer: lost %lu tick%c from %08lx "
"process %s pid %d\n", delta, delta > 1 ? 's' : ' ', regs->eip,
current->comm, current->pid);
/*
* With APM enabled the TSC can break, so we don't trust it. -Andrea
*/
#if defined(CONFIG_APM) && ! defined(CONFIG_SMP)
lost_ticks += delta;
jiffies += delta;
#endif
}

/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
/*
* Here we are in the timer irq handler. We just have irqs locally
* disabled but we don't know if the timer_bh is running on the other
* CPU. We need to avoid to SMP race with it. NOTE: we don' t need
* the irq version of write_lock because as just said we have irq
* locally disabled. -Andrea
*/
write_lock(&xtime_lock);

if (use_tsc)
{
/*
* It is important that these two operations happen almost at
* the same time. We do the RDTSC stuff first, since it's
* faster. To avoid any inconsistencies, we need interrupts
* disabled locally.
*/

/*
* Interrupts are just disabled locally since the timer irq
* has the SA_INTERRUPT flag set. -Andrea
*/

unsigned long old_cycles = last_tsc_low;
int old_delay, count;

/* read Pentium cycle counter */
rdtscl(last_tsc_low);
__asm__ __volatile__("rdtsc" : "=a" (last_tsc_low) : : "edx");

count = get_8254_timer_count();
count = ((LATCH-1) - count) * TICK_SIZE;

old_delay = delay_at_last_interrupt;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;

recover_lost_timer(last_tsc_low - old_cycles,
delay_at_last_interrupt - old_delay,
regs);
}

do_timer_interrupt(irq, NULL, regs);

write_unlock(&xtime_lock);

}

/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
* => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
*
* [For the Julian calendar (which was used in Russia before 1917,
* Britain & colonies before 1752, anywhere else before 1582,
* and is still in use by some communities) leave out the
* -year/100+year/400 terms, and add 10.]
*
* This algorithm was first published by Gauss (I think).
*
* WARNING: this function will overflow on 2106-02-07 06:28:16 on
* machines were long is 32-bit! (However, as time_t is signed, we
* will already get problems at other places on 2038-01-19 03:14:08)
*/
static inline unsigned long mktime(unsigned int year, unsigned int mon,
unsigned int day, unsigned int hour,
unsigned int min, unsigned int sec)
{
if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
mon += 12; /* Puts Feb last since it has leap day */
year -= 1;
}
return (((
(unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) +
year*365 - 719499
)*24 + hour /* now have hours */
)*60 + min /* now have minutes */
)*60 + sec; /* finally seconds */
}

/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
unsigned int year, mon, day, hour, min, sec;
int i;

/* The Linux interpretation of the CMOS clock register contents:
* When the Update-In-Progress (UIP) flag goes from 1 to 0, the
* RTC registers show the second which has precisely just started.
* Let's hope other operating systems interpret the RTC the same way.
*/
/* read RTC exactly on falling edge of update flag */
for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
break;
for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
break;
do { /* Isn't this overkill ? UIP above should guarantee consistency */
sec = CMOS_READ(RTC_SECONDS);
min = CMOS_READ(RTC_MINUTES);
hour = CMOS_READ(RTC_HOURS);
day = CMOS_READ(RTC_DAY_OF_MONTH);
mon = CMOS_READ(RTC_MONTH);
year = CMOS_READ(RTC_YEAR);
} while (sec != CMOS_READ(RTC_SECONDS));
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
BCD_TO_BIN(sec);
BCD_TO_BIN(min);
BCD_TO_BIN(hour);
BCD_TO_BIN(day);
BCD_TO_BIN(mon);
BCD_TO_BIN(year);
}
if ((year += 1900) < 1970)
year += 100;
return mktime(year, mon, day, hour, min, sec);
}

static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};

/* ------ Calibrate the TSC -------
* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
* Too much 64-bit arithmetic here to do this cleanly in C, and for
* accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
* output busy loop as low as possible. We avoid reading the CTC registers
* directly because of the awkward 8-bit access mechanism of the 82C54
* device.
*/

#define CALIBRATE_LATCH (5 * LATCH)
#define CALIBRATE_TIME (5 * 1000020/HZ)

__initfunc(static unsigned long calibrate_tsc(void))
{
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);

/*
* Now let's take care of CTC channel 2
*
* Set the Gate high, program CTC channel 2 for mode 0,
* (interrupt on terminal count mode), binary count,
* load 5 * LATCH count, (LSB and MSB) to begin countdown.
*/
outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */

{
unsigned long startlow, starthigh;
unsigned long endlow, endhigh;
unsigned long count;

rdtsc(startlow,starthigh);
count = 0;
do {
count++;
} while ((inb(0x61) & 0x20) == 0);
rdtsc(endlow,endhigh);

last_tsc_low = endlow;

/* Error: ECTCNEVERSET */
if (count <= 1)
goto bad_ctc;

/* 64-bit subtract - gcc just messes up with long longs */
__asm__("subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (endlow), "=d" (endhigh)
:"g" (startlow), "g" (starthigh),
"0" (endlow), "1" (endhigh));

/* Error: ECPUTOOFAST */
if (endhigh)
goto bad_ctc;

/* Error: ECPUTOOSLOW */
if (endlow <= CALIBRATE_TIME)
goto bad_ctc;

__asm__("divl %2"
:"=a" (endlow), "=d" (endhigh)
:"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));

return endlow;
}

/*
* The CTC wasn't reliable: we got a hit on the very first read,
* or the CPU was so fast/slow that the quotient wouldn't fit in
* 32 bits..
*/
bad_ctc:
return 0;
}

__initfunc(void time_init(void))
{
xtime.tv_sec = get_cmos_time();
xtime.tv_usec = 0;

/*
* If we have APM enabled or the CPU clock speed is variable
* (CPU stops clock on HLT or slows clock to save power)
* then the TSC timestamps may diverge by up to 1 jiffy from
* 'real time' but nothing will break.
* The most frequent case is that the CPU is "woken" from a halt
* state by the timer interrupt itself, so we get 0 error. In the
* rare cases where a driver would "wake" the CPU and request a
* timestamp, the maximum error is < 1 jiffy. But timestamps are
* still perfectly ordered.
* Note that the TSC counter will be reset if APM suspends
* to disk; this won't break the kernel, though, 'cuz we're
* smart. See arch/i386/kernel/apm.c.
*/
/*
* Firstly we have to do a CPU check for chips with
* a potentially buggy TSC. At this point we haven't run
* the ident/bugs checks so we must run this hook as it
* may turn off the TSC flag.
*
* NOTE: this doesnt yet handle SMP 486 machines where only
* some CPU's have a TSC. Thats never worked and nobody has
* moaned if you have the only one in the world - you fix it!
*/

dodgy_tsc();

if (boot_cpu_data.x86_capability & X86_FEATURE_TSC) {
unsigned long tsc_quotient = calibrate_tsc();
if (tsc_quotient) {
fast_gettimeoffset_quotient = tsc_quotient;
use_tsc = 1;
#ifndef do_gettimeoffset
do_gettimeoffset = do_fast_gettimeoffset;
#endif
do_get_fast_time = do_gettimeofday;

/* report CPU clock rate in Hz.
* The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
* clock/second. Our precision is about 100 ppm.
*/
{ unsigned long eax=0, edx=1000000;
__asm__("divl %2"
:"=a" (cpu_hz), "=d" (edx)
:"r" (tsc_quotient),
"0" (eax), "1" (edx));
printk("Detected %ld Hz processor.\n", cpu_hz);
}
}
}

#ifdef CONFIG_VISWS
printk("Starting Cobalt Timer system clock\n");

/* Set the countdown value */
co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);

/* Start the timer */
co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);

/* Enable (unmask) the timer interrupt */
co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);

/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
setup_x86_irq(CO_IRQ_TIMER, &irq0);
#else
setup_x86_irq(0, &irq0);
#endif
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/