RE: [PATCH 3/3] modules: fix longstanding /proc/kallsyms vs module insertion race.

From: åæéå / HIRAMATUïMASAMI
Date: Fri Feb 05 2016 - 01:41:26 EST


From: Rusty Russell [mailto:rusty@xxxxxxxxxxxxxxx]
>
>For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
>There's one full copy, marked SHF_ALLOC and laid out at the end of the
>module's init section. There's also a cut-down version that only
>contains core symbols and strings, and lives in the module's core
>section.
>
>After module init (and before we free the module memory), we switch
>the mod->symtab, mod->num_symtab and mod->strtab to point to the core
>versions. We do this under the module_mutex.
>
>However, kallsyms doesn't take the module_mutex: it uses
>preempt_disable() and rcu tricks to walk through the modules, because
>it's used in the oops path. It's also used in /proc/kallsyms.
>There's nothing atomic about the change of these variables, so we can
>get the old (larger!) num_symtab and the new symtab pointer; in fact
>this is what I saw when trying to reproduce.
>
>By grouping these variables together, we can use a
>carefully-dereferenced pointer to ensure we always get one or the
>other (the free of the module init section is already done in an RCU
>callback, so that's safe). We allocate the init one at the end of the
>module init section, and keep the core one inside the struct module
>itself (it could also have been allocated at the end of the module
>core, but that's probably overkill).
>
>Reported-by: Weilong Chen <chenweilong@xxxxxxxxxx>
>Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
>Cc: stable@xxxxxxxxxx
>Signed-off-by: Rusty Russell <rusty@xxxxxxxxxxxxxxx>

Looks good to me.

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>

Thanks!

>---
> include/linux/module.h | 19 +++++----
> kernel/module.c | 112 ++++++++++++++++++++++++++++++-------------------
> 2 files changed, 79 insertions(+), 52 deletions(-)
>
>diff --git a/include/linux/module.h b/include/linux/module.h
>index 4560d8f1545d..2bb0c3085706 100644
>--- a/include/linux/module.h
>+++ b/include/linux/module.h
>@@ -324,6 +324,12 @@ struct module_layout {
> #define __module_layout_align
> #endif
>
>+struct mod_kallsyms {
>+ Elf_Sym *symtab;
>+ unsigned int num_symtab;
>+ char *strtab;
>+};
>+
> struct module {
> enum module_state state;
>
>@@ -405,15 +411,10 @@ struct module {
> #endif
>
> #ifdef CONFIG_KALLSYMS
>- /*
>- * We keep the symbol and string tables for kallsyms.
>- * The core_* fields below are temporary, loader-only (they
>- * could really be discarded after module init).
>- */
>- Elf_Sym *symtab, *core_symtab;
>- unsigned int num_symtab, core_num_syms;
>- char *strtab, *core_strtab;
>-
>+ /* Protected by RCU and/or module_mutex: use rcu_dereference() */
>+ struct mod_kallsyms *kallsyms;
>+ struct mod_kallsyms core_kallsyms;
>+
> /* Section attributes */
> struct module_sect_attrs *sect_attrs;
>
>diff --git a/kernel/module.c b/kernel/module.c
>index 1e79d8157712..9537da37ce87 100644
>--- a/kernel/module.c
>+++ b/kernel/module.c
>@@ -303,6 +303,9 @@ struct load_info {
> struct _ddebug *debug;
> unsigned int num_debug;
> bool sig_ok;
>+#ifdef CONFIG_KALLSYMS
>+ unsigned long mod_kallsyms_init_off;
>+#endif
> struct {
> unsigned int sym, str, mod, vers, info, pcpu;
> } index;
>@@ -2480,10 +2483,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
> strsect->sh_flags |= SHF_ALLOC;
> strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
> info->index.str) | INIT_OFFSET_MASK;
>- mod->init_layout.size = debug_align(mod->init_layout.size);
> pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
>+
>+ /* We'll tack temporary mod_kallsyms on the end. */
>+ mod->init_layout.size = ALIGN(mod->init_layout.size,
>+ __alignof__(struct mod_kallsyms));
>+ info->mod_kallsyms_init_off = mod->init_layout.size;
>+ mod->init_layout.size += sizeof(struct mod_kallsyms);
>+ mod->init_layout.size = debug_align(mod->init_layout.size);
> }
>
>+/*
>+ * We use the full symtab and strtab which layout_symtab arranged to
>+ * be appended to the init section. Later we switch to the cut-down
>+ * core-only ones.
>+ */
> static void add_kallsyms(struct module *mod, const struct load_info *info)
> {
> unsigned int i, ndst;
>@@ -2492,29 +2506,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
> char *s;
> Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
>
>- mod->symtab = (void *)symsec->sh_addr;
>- mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
>+ /* Set up to point into init section. */
>+ mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
>+
>+ mod->kallsyms->symtab = (void *)symsec->sh_addr;
>+ mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
> /* Make sure we get permanent strtab: don't use info->strtab. */
>- mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>+ mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
>
> /* Set types up while we still have access to sections. */
>- for (i = 0; i < mod->num_symtab; i++)
>- mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
>-
>- mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
>- mod->core_strtab = s = mod->core_layout.base + info->stroffs;
>- src = mod->symtab;
>- for (ndst = i = 0; i < mod->num_symtab; i++) {
>+ for (i = 0; i < mod->kallsyms->num_symtab; i++)
>+ mod->kallsyms->symtab[i].st_info
>+ = elf_type(&mod->kallsyms->symtab[i], info);
>+
>+ /* Now populate the cut down core kallsyms for after init. */
>+ mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
>+ mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
>+ src = mod->kallsyms->symtab;
>+ for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
> if (i == 0 ||
> is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
> info->index.pcpu)) {
> dst[ndst] = src[i];
>- dst[ndst++].st_name = s - mod->core_strtab;
>- s += strlcpy(s, &mod->strtab[src[i].st_name],
>+ dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
>+ s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
> KSYM_NAME_LEN) + 1;
> }
> }
>- mod->core_num_syms = ndst;
>+ mod->core_kallsyms.num_symtab = ndst;
> }
> #else
> static inline void layout_symtab(struct module *mod, struct load_info *info)
>@@ -3263,9 +3282,8 @@ static noinline int do_init_module(struct module *mod)
> module_put(mod);
> trim_init_extable(mod);
> #ifdef CONFIG_KALLSYMS
>- mod->num_symtab = mod->core_num_syms;
>- mod->symtab = mod->core_symtab;
>- mod->strtab = mod->core_strtab;
>+ /* Switch to core kallsyms now init is done: kallsyms may be walking! */
>+ rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
> #endif
> mod_tree_remove_init(mod);
> disable_ro_nx(&mod->init_layout);
>@@ -3627,9 +3645,9 @@ static inline int is_arm_mapping_symbol(const char *str)
> && (str[2] == '\0' || str[2] == '.');
> }
>
>-static const char *symname(struct module *mod, unsigned int symnum)
>+static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
> {
>- return mod->strtab + mod->symtab[symnum].st_name;
>+ return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
> }
>
> static const char *get_ksymbol(struct module *mod,
>@@ -3639,6 +3657,7 @@ static const char *get_ksymbol(struct module *mod,
> {
> unsigned int i, best = 0;
> unsigned long nextval;
>+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>
> /* At worse, next value is at end of module */
> if (within_module_init(addr, mod))
>@@ -3648,32 +3667,32 @@ static const char *get_ksymbol(struct module *mod,
>
> /* Scan for closest preceding symbol, and next symbol. (ELF
> starts real symbols at 1). */
>- for (i = 1; i < mod->num_symtab; i++) {
>- if (mod->symtab[i].st_shndx == SHN_UNDEF)
>+ for (i = 1; i < kallsyms->num_symtab; i++) {
>+ if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
> continue;
>
> /* We ignore unnamed symbols: they're uninformative
> * and inserted at a whim. */
>- if (*symname(mod, i) == '\0'
>- || is_arm_mapping_symbol(symname(mod, i)))
>+ if (*symname(kallsyms, i) == '\0'
>+ || is_arm_mapping_symbol(symname(kallsyms, i)))
> continue;
>
>- if (mod->symtab[i].st_value <= addr
>- && mod->symtab[i].st_value > mod->symtab[best].st_value)
>+ if (kallsyms->symtab[i].st_value <= addr
>+ && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
> best = i;
>- if (mod->symtab[i].st_value > addr
>- && mod->symtab[i].st_value < nextval)
>- nextval = mod->symtab[i].st_value;
>+ if (kallsyms->symtab[i].st_value > addr
>+ && kallsyms->symtab[i].st_value < nextval)
>+ nextval = kallsyms->symtab[i].st_value;
> }
>
> if (!best)
> return NULL;
>
> if (size)
>- *size = nextval - mod->symtab[best].st_value;
>+ *size = nextval - kallsyms->symtab[best].st_value;
> if (offset)
>- *offset = addr - mod->symtab[best].st_value;
>- return symname(mod, best);
>+ *offset = addr - kallsyms->symtab[best].st_value;
>+ return symname(kallsyms, best);
> }
>
> /* For kallsyms to ask for address resolution. NULL means not found. Careful
>@@ -3763,18 +3782,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
>
> preempt_disable();
> list_for_each_entry_rcu(mod, &modules, list) {
>+ struct mod_kallsyms *kallsyms;
>+
> if (mod->state == MODULE_STATE_UNFORMED)
> continue;
>- if (symnum < mod->num_symtab) {
>- *value = mod->symtab[symnum].st_value;
>- *type = mod->symtab[symnum].st_info;
>- strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN);
>+ kallsyms = rcu_dereference_sched(mod->kallsyms);
>+ if (symnum < kallsyms->num_symtab) {
>+ *value = kallsyms->symtab[symnum].st_value;
>+ *type = kallsyms->symtab[symnum].st_info;
>+ strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
> strlcpy(module_name, mod->name, MODULE_NAME_LEN);
> *exported = is_exported(name, *value, mod);
> preempt_enable();
> return 0;
> }
>- symnum -= mod->num_symtab;
>+ symnum -= kallsyms->num_symtab;
> }
> preempt_enable();
> return -ERANGE;
>@@ -3783,11 +3805,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
> static unsigned long mod_find_symname(struct module *mod, const char *name)
> {
> unsigned int i;
>+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
>
>- for (i = 0; i < mod->num_symtab; i++)
>- if (strcmp(name, symname(mod, i)) == 0 &&
>- mod->symtab[i].st_info != 'U')
>- return mod->symtab[i].st_value;
>+ for (i = 0; i < kallsyms->num_symtab; i++)
>+ if (strcmp(name, symname(kallsyms, i)) == 0 &&
>+ kallsyms->symtab[i].st_info != 'U')
>+ return kallsyms->symtab[i].st_value;
> return 0;
> }
>
>@@ -3826,11 +3849,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
> module_assert_mutex();
>
> list_for_each_entry(mod, &modules, list) {
>+ /* We hold module_mutex: no need for rcu_dereference_sched */
>+ struct mod_kallsyms *kallsyms = mod->kallsyms;
>+
> if (mod->state == MODULE_STATE_UNFORMED)
> continue;
>- for (i = 0; i < mod->num_symtab; i++) {
>- ret = fn(data, symname(mod, i),
>- mod, mod->symtab[i].st_value);
>+ for (i = 0; i < kallsyms->num_symtab; i++) {
>+ ret = fn(data, symname(kallsyms, i),
>+ mod, kallsyms->symtab[i].st_value);
> if (ret != 0)
> return ret;
> }
>--
>2.5.0