Re: [GIT pull] printk updates for 4.15

From: Thomas Gleixner
Date: Tue Nov 14 2017 - 17:51:16 EST


n Tue, 14 Nov 2017, Linus Torvalds wrote:
> On Tue, Nov 14, 2017 at 2:10 PM, Mark Salyzyn <salyzyn@xxxxxxxxxxx> wrote:
> > On 11/14/2017 01:29 PM, Linus Torvalds wrote:
> >>
> >> And then maybe you can send a single patch that changes *ONLY*
> >> print_time(), with an explanation of how Android uses
> >> CONFIG_PRINTK_TIME and would like that to be realtime instead of the
> >> monotonic clock that we traditionally use.
> >
> > print_time() uses msg->ts_nsec; and AFAIK that needs to be in reference to
> > the timebase.
>
> I'd suggest simply adding another time to the internal 'struct
> printk_log' (and probably just set it in "log_store()").
>
> Doesn't that seem much simpler and more targeted?
>
> And exactly because it's targeted and only changes that one thing, now
> people that don't run Android, and have systemd that actually looks at
> that timestamp, can _also_ enable that wall clock output if they want
> to, without it impacting timestamps at all.
>
> In fact, I would expect that at that point you actually would want to
> have multiple different formats that don't necessarily have anything
> to do with the clock source, but simply with the formatting.
>
> Do you want wallclock to be shown in UTC? Do you want to show it in
> the system timezeone? Do you want to always show the full resolution,
> or do you want to show something that takes the previous log entry
> into account, so it only shows the date if it has actually changed,
> and maybe shows the time as just a delta if it is small?
>
> Because now it's clearly about the freeflowing _text_, not about the
> timestamp that is processed by tools.
>
> The text field done by print_time() really is _that_ different from
> the timestamp field that is exposed to tools.
>
> Yes, somebody might still be parsing it, and maybe then you need to
> set that text field to a legacy format anyway, but at least it's a
> whole lot easier to explain what's going on.

Something like the untested patch below should do the storage and converts
the primary timestamp from local_clock() to clock monotonic.

The print out and /dev/kmsg is unchanged. That's a separate story.

Thanks,

tglx

8<-------------------
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -352,7 +352,11 @@ enum log_flags {
};

struct printk_log {
- u64 ts_nsec; /* timestamp in nanoseconds */
+ /* Timestamps in nanoseconds */
+ union {
+ u64 ts_nsec;
+ struct timestamps ts;
+ };
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
u16 dict_len; /* length of dictionary buffer */
@@ -578,7 +582,7 @@ static u32 truncate_msg(u16 *text_len, u

/* insert record into the buffer, discard old ones, update heads */
static int log_store(int facility, int level,
- enum log_flags flags, u64 ts_nsec,
+ enum log_flags flags, struct timestamps *ts,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
@@ -621,10 +625,10 @@ static int log_store(int facility, int l
msg->facility = facility;
msg->level = level & 7;
msg->flags = flags & 0x1f;
- if (ts_nsec > 0)
- msg->ts_nsec = ts_nsec;
+ if (ts && ts->mono > 0)
+ msg->ts = *ts;
else
- msg->ts_nsec = local_clock();
+ ktime_get_fast_timestamps(&msg->ts);
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = size;

@@ -687,7 +691,7 @@ static void append_char(char **pp, char
static ssize_t msg_print_ext_header(char *buf, size_t size,
struct printk_log *msg, u64 seq)
{
- u64 ts_usec = msg->ts_nsec;
+ u64 ts_usec = msg->ts.mono;

do_div(ts_usec, 1000);

@@ -1022,6 +1026,7 @@ void log_buf_vmcoreinfo_setup(void)
*/
VMCOREINFO_STRUCT_SIZE(printk_log);
VMCOREINFO_OFFSET(printk_log, ts_nsec);
+ VMCOREINFO_OFFSET(printk_log, ts);
VMCOREINFO_OFFSET(printk_log, len);
VMCOREINFO_OFFSET(printk_log, text_len);
VMCOREINFO_OFFSET(printk_log, dict_len);
@@ -1204,17 +1209,18 @@ static inline void boot_delay_msec(int l
static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);

-static size_t print_time(u64 ts, char *buf)
+static size_t print_time(const struct timestamps *ts, char *buf)
{
unsigned long rem_nsec;
+ u64 mono = ts->mono;

if (!printk_time)
return 0;

- rem_nsec = do_div(ts, 1000000000);
+ rem_nsec = do_div(mono, 1000000000);

if (!buf)
- return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
+ return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)mono);

return sprintf(buf, "[%5lu.%06lu] ",
(unsigned long)ts, rem_nsec / 1000);
@@ -1239,7 +1245,7 @@ static size_t print_prefix(const struct
}
}

- len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
+ len += print_time(&msg->ts, buf ? buf + len : NULL);
return len;
}

@@ -1599,7 +1605,7 @@ static struct cont {
char buf[LOG_LINE_MAX];
size_t len; /* length == 0 means unused buffer */
struct task_struct *owner; /* task of first print*/
- u64 ts_nsec; /* time of first print */
+ struct timestamps ts; /* time of first print */
u8 level; /* log level of first message */
u8 facility; /* log facility of first message */
enum log_flags flags; /* prefix, newline flags */
@@ -1610,7 +1616,7 @@ static void cont_flush(void)
if (cont.len == 0)
return;

- log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec,
+ log_store(cont.facility, cont.level, cont.flags, &cont.ts,
NULL, 0, cont.buf, cont.len);
cont.len = 0;
}
@@ -1631,7 +1637,7 @@ static bool cont_add(int facility, int l
cont.facility = facility;
cont.level = level;
cont.owner = current;
- cont.ts_nsec = local_clock();
+ ktime_get_fast_timestamps(&cont.ts);
cont.flags = flags;
}

@@ -1677,7 +1683,8 @@ static size_t log_output(int facility, i
}

/* Store it in the record log */
- return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
+ return log_store(facility, level, lflags, NULL, dict, dictlen, text,
+ text_len);
}

asmlinkage int vprintk_emit(int facility, int level,
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -146,7 +146,19 @@ extern void timekeeping_inject_sleeptime
* PPS accessor
*/
extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw,
- struct timespec64 *ts_real);
+ struct timespec64 *ts_real);
+
+/*
+ * struct timestanps - Simultaneous mono/boot/real timestamps
+ * @mono: Monotonic timestamp
+ * @boot: Boottime timestamp
+ * @real: Realtime timestamp
+ */
+struct timestamps {
+ u64 mono;
+ u64 boot;
+ u64 real;
+};

/*
* struct system_time_snapshot - simultaneous raw/real time capture with
@@ -206,6 +218,9 @@ extern int get_device_system_crosststamp
*/
extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);

+/* NMI safe mono/boot/realtime timestamps */
+extern void ktime_get_fast_timestamps(struct timestamps *snap);
+
/*
* Persistent clock related interfaces
*/
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -496,29 +496,29 @@ u64 notrace ktime_get_boot_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);

-
/*
* See comment for __ktime_get_fast_ns() vs. timestamp ordering
*/
-static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
+static notrace u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{
struct tk_read_base *tkr;
+ u64 basem, baser, delta;
unsigned int seq;
- u64 now;

do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
- now = ktime_to_ns(tkr->base_real);
+ basem = ktime_to_ns(tkr->base);
+ baser = ktime_to_ns(tkr->base_real);

- now += timekeeping_delta_to_ns(tkr,
- clocksource_delta(
- tk_clock_read(tkr),
- tkr->cycle_last,
- tkr->mask));
+ delta = timekeeping_delta_to_ns(tkr,
+ clocksource_delta(tk_clock_read(tkr),
+ tkr->cycle_last, tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));

- return now;
+ if (mono)
+ *mono = basem + delta;
+ return baser + delta;
}

/**
@@ -526,11 +526,25 @@ static __always_inline u64 __ktime_get_r
*/
u64 ktime_get_real_fast_ns(void)
{
- return __ktime_get_real_fast_ns(&tk_fast_mono);
+ return __ktime_get_real_fast(&tk_fast_mono, NULL);
}
EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);

/**
+ * ktime_get_fast_timestamps: - NMI safe timestamps
+ * @snap: Pointer to timestamp storage
+ *
+ * Stores clock monotonic, boottime and realtime time stamps
+ */
+void ktime_get_fast_timestamps(struct timestamps *snap)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ snap->real = __ktime_get_real_fast(&tk_fast_mono, &snap->mono);
+ snap->boot = snap->mono + ktime_to_ns(tk->offs_boot);
+}
+
+/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
*