[PATCH] kcov: fix race caused by unblocked interrupt

From: Congyu Liu
Date: Tue May 17 2022 - 17:07:05 EST


Some code runs in interrupts cannot be blocked by `in_task()` check.
In some unfortunate interleavings, such interrupt is raised during
serializing trace data and the incoming nested trace functionn could
lead to loss of previous trace data. For instance, in
`__sanitizer_cov_trace_pc`, if such interrupt is raised between
`area[pos] = ip;` and `WRITE_ONCE(area[0], pos);`, then trace data in
`area[pos]` could be replaced.

The fix is done by adding a flag indicating if the trace buffer is being
updated. No modification to trace buffer is allowed when the flag is set.

Signed-off-by: Congyu Liu <liu3101@xxxxxxxxxx>
---
include/linux/sched.h | 3 +++
kernel/kcov.c | 16 ++++++++++++++++
2 files changed, 19 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a8911b1f35aa..d06cedd9595f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1408,6 +1408,9 @@ struct task_struct {

/* Collect coverage from softirq context: */
unsigned int kcov_softirq;
+
+ /* Flag of if KCOV area is being written: */
+ bool kcov_writing;
#endif

#ifdef CONFIG_MEMCG
diff --git a/kernel/kcov.c b/kernel/kcov.c
index b3732b210593..a595a8ad5d8a 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -165,6 +165,8 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
*/
if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
return false;
+ if (READ_ONCE(t->kcov_writing))
+ return false;
mode = READ_ONCE(t->kcov_mode);
/*
* There is some code that runs in interrupts but for which
@@ -201,12 +203,19 @@ void notrace __sanitizer_cov_trace_pc(void)
return;

area = t->kcov_area;
+
+ /* Prevent race from unblocked interrupt. */
+ WRITE_ONCE(t->kcov_writing, true);
+ barrier();
+
/* The first 64-bit word is the number of subsequent PCs. */
pos = READ_ONCE(area[0]) + 1;
if (likely(pos < t->kcov_size)) {
area[pos] = ip;
WRITE_ONCE(area[0], pos);
}
+ barrier();
+ WRITE_ONCE(t->kcov_writing, false);
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);

@@ -230,6 +239,10 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
area = (u64 *)t->kcov_area;
max_pos = t->kcov_size * sizeof(unsigned long);

+ /* Prevent race from unblocked interrupt. */
+ WRITE_ONCE(t->kcov_writing, true);
+ barrier();
+
count = READ_ONCE(area[0]);

/* Every record is KCOV_WORDS_PER_CMP 64-bit words. */
@@ -242,6 +255,8 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
area[start_index + 3] = ip;
WRITE_ONCE(area[0], count + 1);
}
+ barrier();
+ WRITE_ONCE(t->kcov_writing, false);
}

void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2)
@@ -335,6 +350,7 @@ static void kcov_start(struct task_struct *t, struct kcov *kcov,
t->kcov_size = size;
t->kcov_area = area;
t->kcov_sequence = sequence;
+ t->kcov_writing = false;
/* See comment in check_kcov_mode(). */
barrier();
WRITE_ONCE(t->kcov_mode, mode);
--
2.34.1