Re: [PATCH] i386: fix suspend/resume with dynamically allocated irq stacks

From: Bill Irwin
Date: Thu May 03 2007 - 02:42:06 EST


On Wed, May 02, 2007 at 10:48:09PM -0700, Bill Irwin wrote:
> Updated patch follows. Please add your Signed-off-by: if it meets your
> approval; I am operating on the assumption I should never do so myself.
> I'm a bit unsure of how to handle cpu 0 vs. potential freeing of per_cpu
> areas and error returns from __cpuinit affairs, but anyhow:
>
> This fixes three bugs:
> - the stack allocation must be marked __cpuinit, since it gets called
> on resume as well.
> - presumably the interrupt stack should be freed on unplug if its
> going to get reallocated on every plug.
> - the vm_struct got leaked by thread info freeing callbacks.
> Signed-off-by: William Irwin <bill.irwin@xxxxxxxxxx>

I think just normalizing cpu 0's stack suffices here. Still no idea what
to do about backpropagating errors from __cpuinit bits just yet.

Index: stack-paranoia/arch/i386/kernel/irq.c
===================================================================
--- stack-paranoia.orig/arch/i386/kernel/irq.c 2007-05-02 19:33:23.937945981 -0700
+++ stack-paranoia/arch/i386/kernel/irq.c 2007-05-02 23:37:07.879316906 -0700
@@ -142,78 +142,138 @@
* These should really be __section__(".bss.page_aligned") as well, but
* gcc's 3.0 and earlier don't handle that correctly.
*/
-static DEFINE_PER_CPU(char *, softirq_stack);
-static DEFINE_PER_CPU(char *, hardirq_stack);
-
+struct irq_stack_info {
+ char *stack;
#ifdef CONFIG_DEBUG_STACK
-static void * __init irq_remap_stack(void *stack)
-{
- int i;
struct page *pages[THREAD_SIZE/PAGE_SIZE];
+#endif /* CONFIG_DEBUG_STACK */
+};
+static DEFINE_PER_CPU(struct irq_stack_info, softirq_stack_info);
+static DEFINE_PER_CPU(struct irq_stack_info, hardirq_stack_info);

- for (i = 0; i < ARRAY_SIZE(pages); ++i)
- pages[i] = virt_to_page(stack + PAGE_SIZE*i);
- return vmap(pages, THREAD_SIZE/PAGE_SIZE, VM_IOREMAP, PAGE_KERNEL);
-}
-
-static int __init irq_guard_cpu0(void)
+#ifdef CONFIG_DEBUG_STACK
+static int __init irq_remap_stack(struct irq_stack_info *info)
{
+ struct page *page, *pages[THREAD_SIZE/PAGE_SIZE];
unsigned long flags;
+ int i;
void *tmp;

- tmp = irq_remap_stack(per_cpu(softirq_stack, 0));
- if (!tmp)
- return -ENOMEM;
- else {
- local_irq_save(flags);
- per_cpu(softirq_stack, 0) = tmp;
- local_irq_restore(flags);
+ for (i = 0; i < ARRAY_SIZE(pages); ++i) {
+ pages[i] = alloc_page(GFP_HIGHUSER);
+ if (!pages[i])
+ goto out_free_pages;
}
- tmp = irq_remap_stack(per_cpu(hardirq_stack, 0));
+ tmp = vmap(pages, ARRAY_SIZE(info->pages), VM_IOREMAP, PAGE_KERNEL);
if (!tmp)
- return -ENOMEM;
+ goto out_free_pages;
else {
local_irq_save(flags);
- per_cpu(hardirq_stack, 0) = tmp;
+ memcpy(info->pages, pages, sizeof(pages));
+ page = virt_to_page(info->stack);
+ for (i = 0; i < THREAD_SIZE/PAGE_SIZE; ++i) {
+ ClearPageReserved(&page[i]);
+ init_page_count(&page[i]);
+ __free_page(&page[i]);
+ }
+ info->stack = tmp;
local_irq_restore(flags);
}
return 0;
+out_free_pages:
+ for (--i; i >= 0; --i)
+ __free_page(pages[i]);
+ return -1;
+}
+
+static int __init irq_guard_cpu0(void)
+{
+ if (irq_remap_stack(&per_cpu(softirq_stack_info, 0)))
+ return -ENOMEM;
+ if (irq_remap_stack(&per_cpu(hardirq_stack_info, 0)))
+ return -ENOMEM;
+ return 0;
}
core_initcall(irq_guard_cpu0);

-static void * __init __alloc_irqstack(int cpu)
+static int __cpuinit __alloc_irqstack(int cpu, struct irq_stack_info *info)
{
int i;
- struct page *pages[THREAD_SIZE/PAGE_SIZE], **tmp = pages;
- struct vm_struct *area;

- if (!slab_is_available())
- return __alloc_bootmem(THREAD_SIZE, THREAD_SIZE,
+ if (!slab_is_available()) {
+ info->stack = __alloc_bootmem(THREAD_SIZE, THREAD_SIZE,
__pa(MAX_DMA_ADDRESS));
+ info->pages[0] = virt_to_page(info->stack);
+ for (i = 1; i < ARRAY_SIZE(info->pages); ++i)
+ info->pages[i] = info->pages[0] + i;
+ return 0;
+ }
+ for (i = 0; i < ARRAY_SIZE(info->pages); ++i) {
+ info->pages[i] = alloc_page(GFP_HIGHUSER);
+ if (!info->pages[i])
+ goto out;
+ }
+ info->stack = vmap(info->pages, ARRAY_SIZE(info->pages), VM_IOREMAP,
+ PAGE_KERNEL);
+ if (info->stack)
+ return 0;
+out:
+ for (--i; i >= 0; --i) {
+ __free_page(info->pages[i]);
+ info->pages[i] = NULL;
+ }
+ return -1;
+}

- /* failures here are unrecoverable anyway */
- area = get_vm_area(THREAD_SIZE, VM_IOREMAP);
- for (i = 0; i < ARRAY_SIZE(pages); ++i)
- pages[i] = alloc_page(GFP_HIGHUSER);
- map_vm_area(area, PAGE_KERNEL, &tmp);
- return area->addr;
+static void __cpuinit __free_irqstack(int cpu, struct irq_stack_info *info)
+{
+ int i;
+
+ vunmap(info->stack);
+ for (i = 0; i < ARRAY_SIZE(info->pages); ++i) {
+ if (!PageReserved(info->pages[i]))
+ __free_page(info->pages[i]);
+ info->pages[i] = NULL;
+ }
+ info->stack = NULL;
}
#else /* !CONFIG_DEBUG_STACK */
-static void * __init __alloc_irqstack(int cpu)
+static int __cpuinit __alloc_irqstack(int cpu, struct irq_stack_info *info)
{
if (!slab_is_available())
- return __alloc_bootmem(THREAD_SIZE, THREAD_SIZE,
+ info->stack = __alloc_bootmem(THREAD_SIZE, THREAD_SIZE,
__pa(MAX_DMA_ADDRESS));
-
- return (void *)__get_free_pages(GFP_KERNEL,
+ else
+ info->stack = (void *)__get_free_pages(GFP_KERNEL,
ilog2(THREAD_SIZE/PAGE_SIZE));
+ return info->stack ? 0 : -1;
+}
+
+static void __cpuinit __free_irqstack(int cpu, struct irq_stack_info *info)
+{
+ struct page *page = virt_to_page(info->stack);
+
+ if (!PageReserved(page))
+ __free_pages(page, ilog2(THREAD_SIZE/PAGE_SIZE));
+ info->stack = NULL;
}
#endif /* !CONFIG_DEBUG_STACK */

-static void __init alloc_irqstacks(int cpu)
+static int __cpuinit alloc_irqstacks(int cpu)
+{
+ if (__alloc_irqstack(cpu, &per_cpu(softirq_stack_info, cpu)))
+ return -1;
+ if (__alloc_irqstack(cpu, &per_cpu(hardirq_stack_info, cpu))) {
+ __free_irqstack(cpu, &per_cpu(softirq_stack_info, cpu));
+ return -1;
+ }
+ return 0;
+}
+
+static void __cpuinit free_irqstacks(int cpu)
{
- per_cpu(softirq_stack, cpu) = __alloc_irqstack(cpu);
- per_cpu(hardirq_stack, cpu) = __alloc_irqstack(cpu);
+ __free_irqstack(cpu, &per_cpu(softirq_stack_info, cpu));
+ __free_irqstack(cpu, &per_cpu(hardirq_stack_info, cpu));
}

/*
@@ -228,7 +288,7 @@

alloc_irqstacks(cpu);

- irqctx = (union irq_ctx*)per_cpu(hardirq_stack, cpu);
+ irqctx = (union irq_ctx*)per_cpu(hardirq_stack_info, cpu).stack;
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
@@ -237,7 +297,7 @@

per_cpu(hardirq_ctx, cpu) = irqctx;

- irqctx = (union irq_ctx*)per_cpu(softirq_stack, cpu);
+ irqctx = (union irq_ctx*)per_cpu(softirq_stack_info, cpu).stack;
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
@@ -252,6 +312,7 @@

void irq_ctx_exit(int cpu)
{
+ free_irqstacks(cpu);
per_cpu(hardirq_ctx, cpu) = NULL;
}

Index: stack-paranoia/arch/i386/kernel/process.c
===================================================================
--- stack-paranoia.orig/arch/i386/kernel/process.c 2007-05-02 20:15:05.412496892 -0700
+++ stack-paranoia/arch/i386/kernel/process.c 2007-05-02 21:15:15.958250168 -0700
@@ -327,43 +327,38 @@
struct thread_info *alloc_thread_info(struct task_struct *unused)
{
int i;
- struct page *pages[THREAD_SIZE/PAGE_SIZE], **tmp = pages;
- struct vm_struct *area;
+ struct page *pages[THREAD_SIZE/PAGE_SIZE];
+ struct thread_info *info;

/*
* passing VM_IOREMAP for the sake of alignment is why
* all this is done by hand.
*/
- area = get_vm_area(THREAD_SIZE, VM_IOREMAP);
- if (!area)
- return NULL;
for (i = 0; i < THREAD_SIZE/PAGE_SIZE; ++i) {
pages[i] = alloc_page(GFP_HIGHUSER);
if (!pages[i])
goto out_free_pages;
}
- /* implicitly transfer page refcounts to the vm_struct */
- if (map_vm_area(area, PAGE_KERNEL, &tmp))
- goto out_remove_area;
- /* it may be worth poisoning, save thread_info proper */
- return (struct thread_info *)area->addr;
-out_remove_area:
- remove_vm_area(area);
+ info = vmap(pages, THREAD_SIZE/PAGE_SIZE, VM_IOREMAP, PAGE_KERNEL);
+ if (info)
+ return info;
out_free_pages:
- do {
- __free_page(pages[--i]);
- } while (i >= 0);
+ for (--i; i >= 0; --i)
+ __free_page(pages[i]);
return NULL;
}

static void work_free_thread_info(struct work_struct *work)
{
int i;
+ struct page *pages[THREAD_SIZE/PAGE_SIZE];
void *p = work;

for (i = 0; i < THREAD_SIZE/PAGE_SIZE; ++i)
- __free_page(vmalloc_to_page(p + PAGE_SIZE*i));
- vfree(p);
+ pages[i] = vmalloc_to_page(p + PAGE_SIZE*i);
+ vunmap(work);
+ for (i = 0; i < THREAD_SIZE/PAGE_SIZE; ++i)
+ __free_page(pages[i]);
}

void free_thread_info(struct thread_info *info)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/