Re: [RFC, PATCH 4/4] elf_core_dump(): Add extended numbering support

From: AmÃrico Wang
Date: Tue Dec 15 2009 - 02:56:48 EST


On Tue, Dec 15, 2009 at 10:41 AM, Daisuke HATAYAMA
<d.hatayama@xxxxxxxxxxxxxx> wrote:
> The current ELF dumper implementation can produce broken corefiles
> if program headers exceed 65535. This number is determined by the
> number of vmas which the process have. In particular, some extreme
> programs may use more than 65535 vmas. (If you google max_map_count,
> you can find some users facing this problem.) This kind of program
> never be able to generate correct coredumps.
>
> This patch implements ``extended numbering'' that uses sh_info
> field of the first section header instead of e_phnum field in order
> to represent upto 4294967295 vmas.
>
> This is supported by AMD64-ABI(http://www.x86-64.org/documentation.html)
> and Solaris(http://docs.sun.com/app/docs/doc/817-1984/). Of course,
> we are preparing patches for gdb and binutils.
>
> Signed-off-by: Daisuke HATAYAMA <d.hatayama@xxxxxxxxxxxxxx>


Hi,

Can you reorder your patches please?
Your patch 0/4 depends on 1/4, I am afraid. :-/

Thanks!

> ---
> Âarch/ia64/kernel/elfcore.c | Â 16 ++++++++
> Âarch/um/sys-i386/elfcore.c | Â 18 +++++++++
> Âfs/binfmt_elf.c      Â|  88 +++++++++++++++++++++++++++++++++++++++-----
> Âinclude/linux/elf.h    Â|  26 ++++++++++++-
> Â4 files changed, 137 insertions(+), 11 deletions(-)
>
> diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
> index 9c0dd8b..a15d8d4 100644
> --- a/arch/ia64/kernel/elfcore.c
> +++ b/arch/ia64/kernel/elfcore.c
> @@ -73,3 +73,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
> Â Â Â Â}
> Â Â Â Âreturn 1;
> Â}
> +
> +size_t elf_core_extra_data_size(void)
> +{
> + Â Â Â const struct elf_phdr *const gate_phdrs =
> + Â Â Â Â Â Â Â (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
> + Â Â Â int i;
> + Â Â Â size_t size = 0;
> +
> + Â Â Â for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
> + Â Â Â Â Â Â Â if (gate_phdrs[i].p_type == PT_LOAD) {
> + Â Â Â Â Â Â Â Â Â Â Â size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
> + Â Â Â Â Â Â Â Â Â Â Â break;
> + Â Â Â Â Â Â Â }
> + Â Â Â }
> + Â Â Â return size;
> +}
> diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
> index 4e320f0..4e34e47 100644
> --- a/arch/um/sys-i386/elfcore.c
> +++ b/arch/um/sys-i386/elfcore.c
> @@ -76,3 +76,21 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
> Â Â Â Â}
> Â Â Â Âreturn 1;
> Â}
> +
> +size_t elf_core_extra_data_size(void)
> +{
> + Â Â Â if ( vsyscall_ehdr ) {
> + Â Â Â Â Â Â Â const struct elfhdr *const ehdrp =
> + Â Â Â Â Â Â Â Â Â Â Â (struct elfhdr *)vsyscall_ehdr;
> + Â Â Â Â Â Â Â const struct elf_phdr *const phdrp =
> + Â Â Â Â Â Â Â Â Â Â Â (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
> + Â Â Â Â Â Â Â int i;
> +
> + Â Â Â Â Â Â Â for (i = 0; i < ehdrp->e_phnum; ++i) {
> + Â Â Â Â Â Â Â Â Â Â Â if (phdrp[i].p_type == PT_LOAD) {
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return (size_t) phdrp[i].p_filesz;
> + Â Â Â Â Â Â Â Â Â Â Â }
> + Â Â Â Â Â Â Â }
> + Â Â Â }
> + Â Â Â return 0;
> +}
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index cded1ba..ad2ad5f 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -1895,6 +1895,38 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
> Â Â Â Âreturn gate_vma;
> Â}
>
> +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Âelf_addr_t e_shoff, int segs)
> +{
> + Â Â Â elf->e_shoff = e_shoff;
> + Â Â Â elf->e_shentsize = sizeof(*shdr4extnum);
> + Â Â Â elf->e_shnum = 1;
> + Â Â Â elf->e_shstrndx = SHN_UNDEF;
> +
> + Â Â Â shdr4extnum->sh_name = 0;
> + Â Â Â shdr4extnum->sh_addr = 0;
> + Â Â Â shdr4extnum->sh_offset = 0;
> + Â Â Â shdr4extnum->sh_type = SHT_NULL;
> + Â Â Â shdr4extnum->sh_flags = 0;
> + Â Â Â shdr4extnum->sh_size = elf->e_shnum;
> + Â Â Â shdr4extnum->sh_link = elf->e_shstrndx;
> + Â Â Â shdr4extnum->sh_info = segs;
> + Â Â Â shdr4extnum->sh_addralign = 0;
> + Â Â Â shdr4extnum->sh_entsize = 0;
> +}
> +
> +static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âunsigned long mm_flags)
> +{
> + Â Â Â struct vm_area_struct *vma;
> + Â Â Â size_t size = 0;
> +
> + Â Â Â for (vma = first_vma(current, gate_vma); vma != NULL;
> + Â Â Â Â Â Âvma = next_vma(vma, gate_vma))
> + Â Â Â Â Â Â Â size += vma_dump_size(vma, mm_flags);
> + Â Â Â return size;
> +}
> +
> Â/*
> Â* It's been implemented that some architectures write out some extra
> Â* data into segments. On the other hand, other architechtures use
> @@ -1917,6 +1949,11 @@ int __weak elf_core_write_extra_data(struct file *file, size_t *size,
> Â Â Â Âreturn 1;
> Â}
>
> +size_t __weak elf_core_extra_data_size(void)
> +{
> + Â Â Â return 0;
> +}
> +
> Â/*
> Â* Actual dumper
> Â*
> @@ -1936,6 +1973,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
> Â Â Â Âunsigned long mm_flags;
> Â Â Â Âstruct elf_note_info info;
> Â Â Â Âstruct elf_phdr *phdr4note = NULL;
> + Â Â Â struct elf_shdr *shdr4extnum = NULL;
> + Â Â Â Elf_Half e_phnum = 0;
> + Â Â Â elf_addr_t e_shoff;
>
> Â Â Â Â/*
> Â Â Â Â * We no longer stop all VM operations.
> @@ -1964,12 +2004,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
> Â Â Â Âif (gate_vma != NULL)
> Â Â Â Â Â Â Â Âsegs++;
>
> + Â Â Â /* for notes section */
> + Â Â Â segs++;
> +
> + Â Â Â /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
> + Â Â Â Â* this, kernel supports extended numbering. Have a look at
> + Â Â Â Â* include/linux/elf.h for further information. */
> + Â Â Â e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
> +
> Â Â Â Â/*
> Â Â Â Â * Collect all the non-memory information about the process for the
> Â Â Â Â * notes. ÂThis also sets up the file header.
> Â Â Â Â */
> - Â Â Â if (!fill_note_info(elf, segs + 1, /* including notes section */
> - Â Â Â Â Â Â Â Â Â Â Â Â Â &info, signr, regs))
> + Â Â Â if (!fill_note_info(elf, e_phnum, &info, signr, regs))
> Â Â Â Â Â Â Â Âgoto cleanup;
>
> Â Â Â Âhas_dumped = 1;
> @@ -1979,7 +2026,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
> Â Â Â Âset_fs(KERNEL_DS);
>
> Â Â Â Âoffset += sizeof(*elf); Â Â Â Â Â Â Â Â Â Â Â Â /* Elf header */
> - Â Â Â offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
> + Â Â Â offset += segs * sizeof(struct elf_phdr); /* Program headers */
> Â Â Â Âfoffset = offset;
>
> Â Â Â Â/* Write notes phdr entry */
> @@ -1998,6 +2045,26 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
>
> Â Â Â Âdataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
>
> + Â Â Â /*
> + Â Â Â Â* We must use the same mm->flags while dumping core to avoid
> + Â Â Â Â* inconsistency between the program headers and bodies, otherwise an
> + Â Â Â Â* unusable core file can be generated.
> + Â Â Â Â*/
> + Â Â Â mm_flags = current->mm->flags;
> +
> + Â Â Â offset += elf_core_vma_data_size(gate_vma, mm_flags);
> + Â Â Â offset += elf_core_extra_data_size();
> + Â Â Â e_shoff = offset;
> +
> + Â Â Â if (e_phnum == PN_XNUM) {
> + Â Â Â Â Â Â Â shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
> + Â Â Â Â Â Â Â if (!shdr4extnum)
> + Â Â Â Â Â Â Â Â Â Â Â goto end_coredump;
> + Â Â Â Â Â Â Â fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
> + Â Â Â }
> +
> + Â Â Â offset = dataoff;
> +
> Â Â Â Âsize += sizeof(*elf);
> Â Â Â Âif (size > limit || !dump_write(file, elf, sizeof(*elf)))
> Â Â Â Â Â Â Â Âgoto end_coredump;
> @@ -2006,13 +2073,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
> Â Â Â Âif (size > limit || !dump_write(file, phdr4note, sizeof(*phdr4note)))
> Â Â Â Â Â Â Â Âgoto end_coredump;
>
> - Â Â Â /*
> - Â Â Â Â* We must use the same mm->flags while dumping core to avoid
> - Â Â Â Â* inconsistency between the program headers and bodies, otherwise an
> - Â Â Â Â* unusable core file can be generated.
> - Â Â Â Â*/
> - Â Â Â mm_flags = current->mm->flags;
> -
> Â Â Â Â/* Write program headers for segments dump */
> Â Â Â Âfor (vma = first_vma(current, gate_vma); vma != NULL;
> Â Â Â Â Â Â Â Â Â Â Â Âvma = next_vma(vma, gate_vma)) {
> @@ -2079,11 +2139,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
> Â Â Â Âif (!elf_core_write_extra_data(file, &size, limit))
> Â Â Â Â Â Â Â Âgoto end_coredump;
>
> + Â Â Â if (e_phnum == PN_XNUM) {
> + Â Â Â Â Â Â Â size += sizeof(*shdr4extnum);
> + Â Â Â Â Â Â Â if (size > limit
> + Â Â Â Â Â Â Â Â Â || !dump_write(file, shdr4extnum, sizeof(*shdr4extnum)))
> + Â Â Â Â Â Â Â Â Â Â Â goto end_coredump;
> + Â Â Â }
> +
> Âend_coredump:
> Â Â Â Âset_fs(fs);
>
> Âcleanup:
> Â Â Â Âfree_note_info(&info);
> + Â Â Â kfree(shdr4extnum);
> Â Â Â Âkfree(phdr4note);
> Â Â Â Âkfree(elf);
> Âout:
> diff --git a/include/linux/elf.h b/include/linux/elf.h
> index d103127..027fdfe 100644
> --- a/include/linux/elf.h
> +++ b/include/linux/elf.h
> @@ -50,6 +50,28 @@ typedef __s64 Â Â Â ÂElf64_Sxword;
>
> Â#define PT_GNU_STACK Â (PT_LOOS + 0x474e551)
>
> +/*
> + * Extended Numbering
> + *
> + * If the real number of program header table entries is larger than
> + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
> + * section header at index 0, and PN_XNUM is set to e_phnum
> + * field. Otherwise, the section header at index 0 is zero
> + * initialized, if it exists.
> + *
> + * Specifications are available in:
> + *
> + * - Sun microsystems: Linker and Libraries.
> + * Â Part No: 817-1984-17, September 2008.
> + * Â URL: http://docs.sun.com/app/docs/doc/817-1984
> + *
> + * - System V ABI AMD64 Architecture Processor Supplement
> + * Â Draft Version 0.99.,
> + * Â May 11, 2009.
> + * Â URL: http://www.x86-64.org/
> + */
> +#define PN_XNUM 0xffff
> +
> Â/* These constants define the different elf file types */
> Â#define ET_NONE Â 0
> Â#define ET_REL Â Â1
> @@ -286,7 +308,7 @@ typedef struct elf64_phdr {
> Â#define SHN_COMMON Â Â 0xfff2
> Â#define SHN_HIRESERVE Â0xffff
>
> -typedef struct {
> +typedef struct elf32_shdr {
>  Elf32_Word  sh_name;
>  Elf32_Word  sh_type;
>  Elf32_Word  sh_flags;
> @@ -384,6 +406,7 @@ typedef struct elf64_note {
> Âextern Elf32_Dyn _DYNAMIC [];
> Â#define elfhdr     elf32_hdr
> Â#define elf_phdr    elf32_phdr
> +#define elf_shdr    elf32_shdr
> Â#define elf_note    elf32_note
> Â#define elf_addr_t   Elf32_Off
> Â#define Elf_Half    Elf32_Half
> @@ -393,6 +416,7 @@ extern Elf32_Dyn _DYNAMIC [];
> Âextern Elf64_Dyn _DYNAMIC [];
> Â#define elfhdr     elf64_hdr
> Â#define elf_phdr    elf64_phdr
> +#define elf_shdr    elf64_shdr
> Â#define elf_note    elf64_note
> Â#define elf_addr_t   Elf64_Off
> Â#define Elf_Half    Elf64_Half
> --
> 1.6.5.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at Âhttp://vger.kernel.org/majordomo-info.html
> Please read the FAQ at Âhttp://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/