[PATCH] fs, elf: drop MAP_FIXED usage from elf_map

From: Michal Hocko
Date: Mon Oct 23 2017 - 04:26:21 EST


From: Michal Hocko <mhocko@xxxxxxxx>

Both load_elf_interp and load_elf_binary rely on elf_map to map segments
on a controlled address and they use MAP_FIXED to enforce that. This is
however dangerous thing prone to silent data corruption which can be
even exploitable. Let's take CVE-2017-1000253 as an example. At the time
(before eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE"))
ELF_ET_DYN_BASE was at TASK_SIZE / 3 * 2 which is not that far away from
the stack top on 32b (legacy) memory layout (only 1GB away). Therefore
we could end up mapping over the existing stack with some luck.

The issue has been fixed since then (a87938b2e246 ("fs/binfmt_elf.c:
fix bug in loading of PIE binaries")), ELF_ET_DYN_BASE moved moved much
further from the stack (eab09532d400 and later by c715b72c1ba4 ("mm:
revert x86_64 and arm64 ELF_ET_DYN_BASE base changes")) and excessive
stack consumption early during execve fully stopped by da029c11e6b1
("exec: Limit arg stack to at most 75% of _STK_LIM"). So we should be
safe and any attack should be impractical. On the other hand this is
just too subtle assumption so it can break quite easily and hard to
spot.

I believe that the MAP_FIXED usage in load_elf_binary (et. al) is still
fundamentally dangerous. Moreover it shouldn't be even needed. We are
at the early process stage and so there shouldn't be unrelated mappings
(except for stack and loader) existing so mmap for a given address
should succeed even without MAP_FIXED. Something is terribly wrong if
this is not the case and we should rather fail than silently corrupt the
underlying mapping.

Address this issue by adding a helper elf_vm_mmap used by elf_map which
drops MAP_FIXED when asking for the mapping and check whether the
returned address really matches what the caller asked for and complain
loudly if this is not the case and fail. Such a failure would be a
kernel bug and it should alarm us to look what has gone wrong.

Changes since v1
- metag is duplicating elf_map to reflect its tightly coupled memory
(TCM) segments. In case the mapping is not TCM based we still have
to be MAP_FIXED careful so duplicated elf_vm_mmap (reusing the generic
helper seems to be rather problematic due to include header dependency
hell).

Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Jiri Kosina <jkosina@xxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Baoquan He <bhe@xxxxxxxxxx>
Cc: James Hogan <james.hogan@xxxxxxxx>
Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
Hi,
I've posted this more as an RFC previously [1] and it seems there were
no fundamental objections. I have fixed up metag issue pointed by Kees
in this version. I have also dropped the second patch because Kees was
envisioning a potential danger [2]. I cannot say I would be convinced
but the second patch is not really required for this one to go

I believe this is a more preferred way to handle potential early process
address space conflicts than a silent corruption with potentially
security drawbacks. I haven't marked this patch for stable because it
doesn't fix any real issue right now but I would recommend applying this
patch for a prevention because PIE vs. stack randomization has seen some
exploitable issues in the recent path.

I am not sure which tree to push this through. Andrew, would you be
willing to take it via mmotm (once acked of course)?

[1] http://lkml.kernel.org/r/20171016134446.19910-1-mhocko@xxxxxxxxxx
[2] http://lkml.kernel.org/r/20171016184335.hj6osq7su24e75jz@xxxxxxxxxxxxxx

arch/metag/kernel/process.c | 27 +++++++++++++++++++++++++--
fs/binfmt_elf.c | 29 ++++++++++++++++++++++++++---
2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index c4606ce743d2..b20596b4c4c2 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -378,6 +378,29 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)

#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)

+static unsigned long elf_vm_mmap(struct file *filep, unsigned long addr,
+ unsigned long size, int prot, int type, unsigned long off)
+{
+ unsigned long map_addr;
+
+ /*
+ * If caller requests the mapping at a specific place, make sure we fail
+ * rather than potentially clobber an existing mapping which can have
+ * security consequences (e.g. smash over the stack area).
+ */
+ map_addr = vm_mmap(filep, addr, size, prot, type & ~MAP_FIXED, off);
+ if (BAD_ADDR(map_addr))
+ return map_addr;
+
+ if ((type & MAP_FIXED) && map_addr != addr) {
+ pr_info("Uhuuh, elf segement at %p requested but the memory is mapped already\n",
+ (void*)addr);
+ return -EAGAIN;
+ }
+
+ return map_addr;
+}
+
unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
@@ -410,11 +433,11 @@ unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
*/
if (total_size) {
total_size = ELF_PAGEALIGN(total_size);
- map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+ map_addr = elf_vm_mmap(filep, addr, total_size, prot, type, off);
if (!BAD_ADDR(map_addr))
vm_munmap(map_addr+size, total_size-size);
} else
- map_addr = vm_mmap(filep, addr, size, prot, type, off);
+ map_addr = elf_vm_mmap(filep, addr, size, prot, type, off);

if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) {
struct tcm_allocation *tcm;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6466153f2bf0..09456e2add18 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -341,6 +341,29 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,

#ifndef elf_map

+static unsigned long elf_vm_mmap(struct file *filep, unsigned long addr,
+ unsigned long size, int prot, int type, unsigned long off)
+{
+ unsigned long map_addr;
+
+ /*
+ * If caller requests the mapping at a specific place, make sure we fail
+ * rather than potentially clobber an existing mapping which can have
+ * security consequences (e.g. smash over the stack area).
+ */
+ map_addr = vm_mmap(filep, addr, size, prot, type & ~MAP_FIXED, off);
+ if (BAD_ADDR(map_addr))
+ return map_addr;
+
+ if ((type & MAP_FIXED) && map_addr != addr) {
+ pr_info("Uhuuh, elf segement at %p requested but the memory is mapped already\n",
+ (void*)addr);
+ return -EAGAIN;
+ }
+
+ return map_addr;
+}
+
static unsigned long elf_map(struct file *filep, unsigned long addr,
struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
@@ -366,11 +389,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
*/
if (total_size) {
total_size = ELF_PAGEALIGN(total_size);
- map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+ map_addr = elf_vm_mmap(filep, addr, total_size, prot, type, off);
if (!BAD_ADDR(map_addr))
vm_munmap(map_addr+size, total_size-size);
} else
- map_addr = vm_mmap(filep, addr, size, prot, type, off);
+ map_addr = elf_vm_mmap(filep, addr, size, prot, type, off);

return(map_addr);
}
@@ -1215,7 +1238,7 @@ static int load_elf_library(struct file *file)
eppnt++;

/* Now use mmap to map the library into memory. */
- error = vm_mmap(file,
+ error = elf_vm_mmap(file,
ELF_PAGESTART(eppnt->p_vaddr),
(eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)),
--
2.14.2