diff -ruNp linux/arch/alpha/Makefile 2.5.3/arch/alpha/Makefile --- linux/arch/alpha/Makefile Sun Dec 3 17:45:20 2000 +++ 2.5.3/arch/alpha/Makefile Sat Feb 2 16:32:39 2002 @@ -117,11 +117,13 @@ srmboot: archclean: @$(MAKE) -C arch/alpha/kernel clean @$(MAKEBOOT) clean - rm -f arch/alpha/vmlinux.lds archmrproper: + rm -f arch/alpha/vmlinux.lds + rm -f include/asm-alpha/asm_offsets.h archdep: + $(MAKE) -C arch/alpha/kernel asm_offsets @$(MAKEBOOT) dep vmlinux: arch/alpha/vmlinux.lds diff -ruNp linux/arch/alpha/kernel/Makefile 2.5.3/arch/alpha/kernel/Makefile --- linux/arch/alpha/kernel/Makefile Sun Aug 12 10:51:41 2001 +++ 2.5.3/arch/alpha/kernel/Makefile Sat Feb 2 15:29:33 2002 @@ -8,9 +8,9 @@ # Note 2! The CFLAGS definitions are now in the main makefile... .S.s: - $(CPP) $(AFLAGS) -o $*.s $< + $(CPP) $(CFLAGS) $(AFLAGS) -o $*.s $< .S.o: - $(CC) $(AFLAGS) -c -o $*.o $< + $(CC) $(CFLAGS) $(AFLAGS) -c -o $*.o $< O_TARGET := kernel.o @@ -102,11 +102,15 @@ endif # GENERIC all: kernel.o head.o -asm_offsets: check_asm - ./check_asm > $(TOPDIR)/include/asm-alpha/asm_offsets.h - -check_asm: check_asm.c - $(HOSTCC) -o $@ $< $(CPPFLAGS) -ffixed-8 +ASM_OFFSETS_H = $(TOPDIR)/include/asm-alpha/asm_offsets.h +asm_offsets: + $(CC) $(CFLAGS) -S -o - check_asm.c | \ + sed -e '/xyzzy/ { s/xyzzy //; p; }; d;' > asm_offsets.tmp + @if cmp -s asm_offsets.tmp $(ASM_OFFSETS_H); then \ + set -x; rm asm_offsets.tmp; \ + else \ + set -x; mv asm_offsets.tmp $(ASM_OFFSETS_H); \ + fi clean:: rm -f check_asm diff -ruNp linux/arch/alpha/kernel/check_asm.c 2.5.3/arch/alpha/kernel/check_asm.c --- linux/arch/alpha/kernel/check_asm.c Mon Jan 28 15:14:22 2002 +++ 2.5.3/arch/alpha/kernel/check_asm.c Sat Feb 2 15:27:37 2002 @@ -3,30 +3,38 @@ #include #include -int main() +#define OUT(x) \ + asm ("\nxyzzy " x) +#define DEF(name, val) \ + asm volatile ("\nxyzzy #define " name " %0" : : "i"(val)) + +void foo(void) { - printf("#ifndef __ASM_OFFSETS_H__\n#define __ASM_OFFSETS_H__\n"); + OUT("#ifndef __ASM_OFFSETS_H__"); + OUT("#define __ASM_OFFSETS_H__"); + OUT(""); - printf("#define TASK_STATE %ld\n", - (long)offsetof(struct task_struct, state)); - printf("#define TASK_FLAGS %ld\n", - (long)offsetof(struct task_struct, flags)); - printf("#define TASK_SIGPENDING %ld\n", -#error (long)offsetof(struct task_struct, sigpending)); - printf("#define TASK_ADDR_LIMIT %ld\n", - (long)offsetof(struct task_struct, addr_limit)); - printf("#define TASK_EXEC_DOMAIN %ld\n", - (long)offsetof(struct task_struct, exec_domain)); - printf("#define TASK_NEED_RESCHED %ld\n", -#error (long)offsetof(struct task_struct, work.need_resched)); - printf("#define TASK_SIZE %ld\n", sizeof(struct task_struct)); - printf("#define STACK_SIZE %ld\n", sizeof(union task_union)); + DEF("TASK_STATE", offsetof(struct task_struct, state)); + DEF("TASK_FLAGS", offsetof(struct task_struct, flags)); + DEF("TASK_WORK", offsetof(struct task_struct, work)); + DEF("TASK_WORK_NEED_RESCHED", + offsetof(struct task_struct, work.need_resched)); + DEF("TASK_WORK_SYSCALL_TRACE", + offsetof(struct task_struct, work.syscall_trace)); + DEF("TASK_WORK_SIGPENDING", + offsetof(struct task_struct, work.sigpending)); + DEF("TASK_ADDR_LIMIT", offsetof(struct task_struct, addr_limit)); + DEF("TASK_EXEC_DOMAIN", offsetof(struct task_struct, exec_domain)); + DEF("TASK_PTRACE", offsetof(struct task_struct, ptrace)); + DEF("TASK_SIZE", sizeof(struct task_struct)); + DEF("STACK_SIZE", sizeof(union task_union)); + DEF("PT_PTRACED", PT_PTRACED); + DEF("CLONE_VM", CLONE_VM); + DEF("SIGCHLD", SIGCHLD); - printf("#define HAE_CACHE %ld\n", - (long)offsetof(struct alpha_machine_vector, hae_cache)); - printf("#define HAE_REG %ld\n", - (long)offsetof(struct alpha_machine_vector, hae_register)); + DEF("HAE_CACHE", offsetof(struct alpha_machine_vector, hae_cache)); + DEF("HAE_REG", offsetof(struct alpha_machine_vector, hae_register)); - printf("#endif /* __ASM_OFFSETS_H__ */\n"); - return 0; + OUT(""); + OUT("#endif /* __ASM_OFFSETS_H__ */"); } diff -ruNp linux/arch/alpha/kernel/entry.S 2.5.3/arch/alpha/kernel/entry.S --- linux/arch/alpha/kernel/entry.S Mon Jan 28 15:14:22 2002 +++ 2.5.3/arch/alpha/kernel/entry.S Sat Feb 2 15:27:37 2002 @@ -7,42 +7,15 @@ #include #include #include - -#define SIGCHLD 20 +#include #define NR_SYSCALLS 378 /* - * These offsets must match with alpha_mv in . - */ -#define HAE_CACHE 0 -#define HAE_REG 8 - -/* * stack offsets */ -#define SP_OFF 184 - -#define SWITCH_STACK_SIZE 320 - -/* - * task structure offsets - */ -#define TASK_STATE 0 -#define TASK_FLAGS 8 -#error #define TASK_SIGPENDING 16 -#define TASK_ADDR_LIMIT 24 -#define TASK_EXEC_DOMAIN 32 -#error #define TASK_NEED_RESCHED 40 -#error #define TASK_PTRACE 48 -#define TASK_PROCESSOR 100 - -/* - * task flags (must match include/linux/sched.h): - */ -#define PT_PTRACED 0x00000001 - -#define CLONE_VM 0x00000100 +#define SP_OFF 184 +#define SWITCH_STACK_SIZE 320 /* * This defines the normal kernel pt-regs layout. @@ -121,9 +94,6 @@ .text .set noat -#if defined(__linux__) && !defined(__ELF__) - .set singlegp -#endif .align 3 .globl entInt @@ -559,10 +529,14 @@ entSys: lda $5,sys_call_table lda $27,sys_ni_syscall cmpult $0,$4,$4 - ldq $3,TASK_PTRACE($8) +#ifdef __alpha_bwx__ + ldbu $3,TASK_WORK_SYSCALL_TRACE($8) +#else + ldl $3,TASK_WORK($8) + extbl $3,TASK_WORK_SYSCALL_TRACE,$3 +#endif stq $17,SP_OFF+32($30) s8addq $0,$5,$5 - and $3,PT_PTRACED,$3 stq $18,SP_OFF+40($30) bne $3,strace beq $4,1f @@ -579,18 +553,47 @@ ret_from_sys_call: ldq $0,SP_OFF($30) and $0,8,$0 beq $0,restore_all -ret_from_reschedule: -#error ldq $2,TASK_NEED_RESCHED($8) - lda $4,init_task_union - bne $2,reschedule - xor $4,$8,$4 -#error ldl $5,TASK_SIGPENDING($8) - beq $4,restore_all - bne $5,signal_return + /* Make sure need_resched and sigpending don't change between + sampling and the rti. */ + lda $16,7 + call_pal PAL_swpipl + ldl $5,TASK_WORK($8) + zapnot $5,0x0D,$5 /* clear syscall trace counter */ + bne $5,work_pending restore_all: RESTORE_ALL call_pal PAL_rti +.align 3 +work_pending: + extbl $5,TASK_WORK_NEED_RESCHED,$2 + beq $2,work_notifysig + + subq $30,16,$30 + stq $19,0($30) /* save syscall nr */ + stq $20,8($30) /* and error indication (a3) */ + jsr $26,schedule + ldq $19,0($30) + ldq $20,8($30) + addq $30,16,$30 + /* Make sure need_resched and sigpending don't change between + sampling and the rti. */ + lda $16,7 + call_pal PAL_swpipl + ldl $5,TASK_WORK($8) + zapnot $5,0x0D,$5 /* clear syscall trace counter */ + bne $5,work_pending + +work_notifysig: + extbl $5,TASK_WORK_SIGPENDING,$2 + beq $2,restore_all + mov $30,$17 + br $1,do_switch_stack + mov $30,$18 + mov $31,$16 + jsr $26,do_signal + bsr $1,undo_switch_stack + br restore_all /* PTRACE syscall handler */ .align 3 @@ -677,16 +680,6 @@ ret_success: stq $0,0($30) stq $31,72($30) /* a3=0 => no error */ br ret_from_sys_call - -.align 3 -signal_return: - mov $30,$17 - br $1,do_switch_stack - mov $30,$18 - mov $31,$16 - jsr $26,do_signal - bsr $1,undo_switch_stack - br restore_all .end entSys .globl ret_from_fork @@ -697,19 +690,6 @@ ret_from_fork: mov $17,$16 jsr $31,schedule_tail .end ret_from_fork - -.align 3 -.ent reschedule -reschedule: - subq $30,16,$30 - stq $19,0($30) /* save syscall nr */ - stq $20,8($30) /* and error indication (a3) */ - jsr $26,schedule - ldq $19,0($30) - ldq $20,8($30) - addq $30,16,$30 - br ret_from_reschedule -.end reschedule .align 3 .ent sys_sigreturn diff -ruNp linux/arch/alpha/kernel/process.c 2.5.3/arch/alpha/kernel/process.c --- linux/arch/alpha/kernel/process.c Mon Jan 28 15:14:22 2002 +++ 2.5.3/arch/alpha/kernel/process.c Sat Feb 2 14:51:47 2002 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -74,17 +75,12 @@ void cpu_idle(void) { /* An endless idle loop with no priority at all. */ - current->nice = 20; - while (1) { /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ - /* Although we are an idle CPU, we do not want to - get into the scheduler unnecessarily. */ - long oldval = xchg(¤t->work.need_resched, -1UL); - if (!oldval) - while (current->work.need_resched < 0); + while (! need_resched()) + barrier(); schedule(); check_pgt_cache(); } diff -ruNp linux/arch/alpha/kernel/ptrace.c 2.5.3/arch/alpha/kernel/ptrace.c --- linux/arch/alpha/kernel/ptrace.c Tue Sep 18 17:03:51 2001 +++ 2.5.3/arch/alpha/kernel/ptrace.c Sat Feb 2 15:27:37 2002 @@ -334,10 +334,17 @@ sys_ptrace(long request, long pid, long ret = -EIO; if ((unsigned long) data > _NSIG) goto out; - if (request == PTRACE_SYSCALL) - child->ptrace |= PT_TRACESYS; - else - child->ptrace &= ~PT_TRACESYS; + if (request == PTRACE_SYSCALL) { + if (!(child->ptrace & PT_SYSCALLTRACE)) { + child->ptrace |= PT_SYSCALLTRACE; + child->work.syscall_trace++; + } + } else { + if (child->ptrace & PT_SYSCALLTRACE) { + child->ptrace &= ~PT_SYSCALLTRACE; + child->work.syscall_trace--; + } + } child->exit_code = data; wake_up_process(child); /* make sure single-step breakpoint is gone. */ @@ -365,7 +372,10 @@ sys_ptrace(long request, long pid, long if ((unsigned long) data > _NSIG) goto out; child->thread.bpt_nsaved = -1; /* mark single-stepping */ - child->ptrace &= ~PT_TRACESYS; + if (child->ptrace & PT_SYSCALLTRACE) { + child->ptrace &= ~PT_SYSCALLTRACE; + child->work.syscall_trace--; + } wake_up_process(child); child->exit_code = data; /* give it a chance to run. */ @@ -390,8 +400,8 @@ sys_ptrace(long request, long pid, long asmlinkage void syscall_trace(void) { - if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) - != (PT_PTRACED|PT_TRACESYS)) + if ((current->ptrace & (PT_PTRACED|PT_SYSCALLTRACE)) + != (PT_PTRACED|PT_SYSCALLTRACE)) return; current->exit_code = SIGTRAP; current->state = TASK_STOPPED; diff -ruNp linux/arch/alpha/kernel/setup.c 2.5.3/arch/alpha/kernel/setup.c --- linux/arch/alpha/kernel/setup.c Tue Dec 25 15:39:20 2001 +++ 2.5.3/arch/alpha/kernel/setup.c Thu Feb 7 15:25:40 2002 @@ -68,7 +68,7 @@ int boot_cpuid; * Using SRM callbacks for initial console output. This works from * setup_arch() time through the end of time_init(), as those places * are under our (Alpha) control. - + * * "srmcons" specified in the boot command arguments allows us to * see kernel messages during the period of time before the true * console device is "registered" during console_init(). As of this @@ -436,8 +436,8 @@ static void srm_console_write(struct con static kdev_t srm_console_device(struct console *c) { - /* Huh? */ - return MKDEV(TTY_MAJOR, 64 + c->index); + /* Huh? */ + return mk_kdev(TTY_MAJOR, 64 + c->index); } static int __init srm_console_setup(struct console *co, char *options) diff -ruNp linux/arch/alpha/kernel/smp.c 2.5.3/arch/alpha/kernel/smp.c --- linux/arch/alpha/kernel/smp.c Tue Jan 15 10:56:35 2002 +++ 2.5.3/arch/alpha/kernel/smp.c Thu Feb 7 14:51:03 2002 @@ -45,7 +45,7 @@ #include "irq_impl.h" -#define DEBUG_SMP 0 +#define DEBUG_SMP 1 #if DEBUG_SMP #define DBGS(args) printk args #else @@ -62,6 +62,7 @@ static struct { enum ipi_message_type { IPI_RESCHEDULE, + IPI_MIGRATION, IPI_CALL_FUNC, IPI_CPU_STOP, }; @@ -69,7 +70,7 @@ enum ipi_message_type { spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* Set to a secondary's cpuid when it comes online. */ -static unsigned long smp_secondary_alive; +static int smp_secondary_alive __initdata = 0; /* Which cpus ids came online. */ unsigned long cpu_present_mask; @@ -82,6 +83,7 @@ int smp_num_probed; /* Internal process int smp_num_cpus = 1; /* Number that came online. */ int smp_threads_ready; /* True once the per process idle is forked. */ cycles_t cacheflush_time; +unsigned long cache_decay_ticks; int __cpu_number_map[NR_CPUS]; int __cpu_logical_map[NR_CPUS]; @@ -132,7 +134,7 @@ smp_setup_percpu_timer(int cpuid) cpu_data[cpuid].prof_multiplier = 1; } -static void __init +static inline void __init wait_boot_cpu_to_stop(int cpuid) { long stop = jiffies + 10*HZ; @@ -156,13 +158,6 @@ smp_callin(void) { int cpuid = hard_smp_processor_id(); - if (current != init_tasks[cpu_number_map(cpuid)]) { - printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n", - cpuid, current, init_tasks[cpu_number_map(cpuid)]); - } - - DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state)); - /* Turn on machine checks. */ wrmces(7); @@ -178,19 +173,15 @@ smp_callin(void) /* Must have completely accurate bogos. */ __sti(); - /* - * Wait boot CPU to stop with irq enabled before - * running calibrate_delay(). - */ + /* Wait boot CPU to stop with irq enabled before running + calibrate_delay. */ wait_boot_cpu_to_stop(cpuid); mb(); calibrate_delay(); smp_store_cpu_info(cpuid); - /* - * Allow master to continue only after we written - * the loops_per_jiffy. - */ + + /* Allow master to continue only after we written loops_per_jiffy. */ wmb(); smp_secondary_alive = 1; @@ -201,12 +192,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p\n", cpuid, current)); - /* Setup the scheduler for this processor. */ - init_idle(); - - /* ??? This should be in init_idle. */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; /* Do nothing. */ cpu_idle(); } @@ -222,8 +207,9 @@ static void __init smp_tune_scheduling (int cpuid) { struct percpu_struct *cpu; - unsigned long on_chip_cache; - unsigned long freq; + unsigned long on_chip_cache; /* kB */ + unsigned long freq; /* Hz */ + unsigned long bandwidth = 350; /* MB/s */ cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset + cpuid * hwrpb->processor_size); @@ -244,43 +230,54 @@ smp_tune_scheduling (int cpuid) case EV6_CPU: case EV67_CPU: - on_chip_cache = 64 + 64; - break; - default: - on_chip_cache = 8 + 8; + on_chip_cache = 64 + 64; break; } freq = hwrpb->cycle_freq ? : est_cycle_freq; -#if 0 - /* Magic estimation stolen from x86 port. */ - cacheflush_time = freq / 1024L * on_chip_cache / 5000L; + cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth; + cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000; - printk("Using heuristic of %d cycles.\n", - cacheflush_time); -#else - /* Magic value to force potential preemption of other CPUs. */ - cacheflush_time = INT_MAX; + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", + cacheflush_time/(freq/1000000), + (cacheflush_time*100/(freq/1000000)) % 100); + printk("task migration cache decay timeout: %ld msecs.\n", + (cache_decay_ticks + 1) * 1000 / HZ); +} - printk("Using heuristic of %d cycles.\n", - cacheflush_time); -#endif +/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ +static int __init +wait_for_txrdy (unsigned long cpumask) +{ + unsigned long timeout; + + if (!(hwrpb->txrdy & cpumask)) + return 0; + + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { + if (!(hwrpb->txrdy & cpumask)) + return 0; + udelay(10); + barrier(); + } + + return -1; } /* * Send a message to a secondary's console. "START" is one such * interesting message. ;-) */ -static void +static void __init send_secondary_console_msg(char *str, int cpuid) { struct percpu_struct *cpu; register char *cp1, *cp2; unsigned long cpumask; size_t len; - long timeout; cpu = (struct percpu_struct *) ((char*)hwrpb @@ -288,9 +285,8 @@ send_secondary_console_msg(char *str, in + cpuid * hwrpb->processor_size); cpumask = (1UL << cpuid); - if (hwrpb->txrdy & cpumask) - goto delay1; - ready1: + if (wait_for_txrdy(cpumask)) + goto timeout; cp2 = str; len = strlen(cp2); @@ -302,34 +298,12 @@ send_secondary_console_msg(char *str, in wmb(); set_bit(cpuid, &hwrpb->rxrdy); - if (hwrpb->txrdy & cpumask) - goto delay2; - ready2: + if (wait_for_txrdy(cpumask)) + goto timeout; return; -delay1: - /* Wait 10 seconds. Note that jiffies aren't ticking yet. */ - for (timeout = 1000000; timeout > 0; --timeout) { - if (!(hwrpb->txrdy & cpumask)) - goto ready1; - udelay(10); - barrier(); - } - goto timeout; - -delay2: - /* Wait 10 seconds. */ - for (timeout = 1000000; timeout > 0; --timeout) { - if (!(hwrpb->txrdy & cpumask)) - goto ready2; - udelay(10); - barrier(); - } - goto timeout; - -timeout: + timeout: printk("Processor %x not ready\n", cpuid); - return; } /* @@ -439,9 +413,9 @@ secondary_cpu_start(int cpuid, struct ta send_secondary_console_msg("START\r\n", cpuid); - /* Wait 10 seconds for an ACK from the console. Note that jiffies - aren't ticking yet. */ - for (timeout = 1000000; timeout > 0; timeout--) { + /* Wait 10 seconds for an ACK from the console. */ + timeout = jiffies + 10*HZ; + while (time_before(jiffies, timeout)) { if (cpu->flags & 1) goto started; udelay(10); @@ -455,13 +429,12 @@ started: return 0; } -static int __init fork_by_hand(void) +static inline int __init +fork_by_hand(void) { struct pt_regs regs; - /* - * don't care about the regs settings since - * we'll never reschedule the forked task. - */ + /* Don't care about the regs settings since we'll never + reschedule the forked task. */ return do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0); } @@ -477,66 +450,53 @@ smp_boot_one_cpu(int cpuid, int cpunum) /* Cook up an idler for this guy. Note that the address we give to kernel_thread is irrelevant -- it's going to start where HWRPB.CPU_restart says to start. But this gets all the other - task-y sort of data structures set up like we wish. */ - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ + task-y sort of data structures set up like we wish. We can't + use kernel_thread since we must avoid rescheduling the child. */ if (fork_by_hand() < 0) panic("failed fork for CPU %d", cpuid); idle = init_task.prev_task; if (!idle) panic("No idle process for CPU %d", cpuid); - if (idle == &init_task) - panic("idle process is init_task for CPU %d", cpuid); + init_idle(idle, cpuid); - idle->processor = cpuid; - idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */ __cpu_logical_map[cpunum] = cpuid; __cpu_number_map[cpuid] = cpunum; - del_from_runqueue(idle); unhash_process(idle); - init_tasks[cpunum] = idle; DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n", cpuid, idle->state, idle->flags)); - /* The secondary will change this once it is happy. Note that - secondary_cpu_start contains the necessary memory barrier. */ + /* Signal the secondary CPU to wait a moment. */ smp_secondary_alive = -1; /* Whirrr, whirrr, whirrrrrrrrr... */ if (secondary_cpu_start(cpuid, idle)) return -1; + /* Notify the secondary CPU it can run calibrate_delay. */ mb(); - /* Notify the secondary CPU it can run calibrate_delay() */ smp_secondary_alive = 0; - /* We've been acked by the console; wait one second for the task - to start up for real. Note that jiffies aren't ticking yet. */ - for (timeout = 0; timeout < 1000000; timeout++) { - if (smp_secondary_alive == 1) - goto alive; + /* We've been acked by the console; wait one second for + the task to start up for real. */ + timeout = jiffies + 1*HZ; + while (time_before(jiffies, timeout)) { + if (smp_secondary_alive == 1) { + /* Another "Red Snapper". */ + return 0; + } udelay(10); barrier(); } - /* we must invalidate our stuff as we failed to boot the CPU */ + /* We must invalidate our stuff as we failed to boot the CPU. */ __cpu_logical_map[cpunum] = -1; __cpu_number_map[cpuid] = -1; - /* the idle task is local to us so free it as we don't use it */ - free_task_struct(idle); - printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid); return -1; - -alive: - /* Another "Red Snapper". */ - return 0; } /* @@ -605,21 +565,16 @@ smp_boot_cpus(void) __cpu_number_map[boot_cpuid] = 0; __cpu_logical_map[0] = boot_cpuid; - current->processor = boot_cpuid; + current->cpu = boot_cpuid; smp_store_cpu_info(boot_cpuid); smp_tune_scheduling(boot_cpuid); smp_setup_percpu_timer(boot_cpuid); - init_idle(); - - /* ??? This should be in init_idle. */ - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { printk(KERN_INFO "SMP mode deactivated.\n"); + cpu_present_mask = 1UL << boot_cpuid; return; } @@ -707,26 +662,38 @@ setup_profiling_timer(unsigned int multi static void send_ipi_message(unsigned long to_whom, enum ipi_message_type operation) { - long i, j; - - /* Reduce the number of memory barriers by doing two loops, - one to set the bits, one to invoke the interrupts. */ + unsigned long i, set, n; - mb(); /* Order out-of-band data and bit setting. */ - - for (i = 0, j = 1; i < NR_CPUS; ++i, j <<= 1) { - if (to_whom & j) - set_bit(operation, &ipi_data[i].bits); - } + /* Special case sending to one cpu. */ - mb(); /* Order bit setting and interrupt. */ + set = to_whom & -to_whom; + if (to_whom == set) { + n = __ffs(set); + mb(); + set_bit(operation, &ipi_data[n].bits); + mb(); + wripir(n); + } else { + mb(); + for (i = to_whom; i ; i &= ~set) { + set = i & -i; + n = __ffs(set); + set_bit(operation, &ipi_data[n].bits); + } - for (i = 0, j = 1; i < NR_CPUS; ++i, j <<= 1) { - if (to_whom & j) - wripir(i); + mb(); + for (i = to_whom; i ; i &= ~set) { + set = i & -i; + n = __ffs(set); + wripir(n); + } } } +/* Data for IPI_MIGRATION. */ +static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED; +static task_t *migration_task; + /* Structure and data for smp_call_function. This is designed to minimize static memory requirements. Plus it looks cleaner. */ @@ -792,13 +759,23 @@ handle_ipi(struct pt_regs *regs) which = ops & -ops; ops &= ~which; - which = ffz(~which); + which = __ffs(which); - if (which == IPI_RESCHEDULE) { + switch (which) { + case IPI_RESCHEDULE: /* Reschedule callback. Everything to be done is done by the interrupt return path. */ - } - else if (which == IPI_CALL_FUNC) { + break; + case IPI_MIGRATION: + { + task_t *t = migration_task; + mb(); + spin_unlock(&migration_lock); + sched_task_migrated(t); + break; + } + case IPI_CALL_FUNC: + { struct smp_call_struct *data; void (*func)(void *info); void *info; @@ -821,13 +798,16 @@ handle_ipi(struct pt_regs *regs) /* Notify the sending CPU that the task is done. */ mb(); if (wait) atomic_dec (&data->unfinished_count); - } - else if (which == IPI_CPU_STOP) { + break; + } + case IPI_CPU_STOP: halt(); - } - else { + break; + + default: printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + break; } } while (ops); @@ -849,6 +829,20 @@ smp_send_reschedule(int cpu) "smp_send_reschedule: Sending IPI to self.\n"); #endif send_ipi_message(1UL << cpu, IPI_RESCHEDULE); +} + +void +smp_migrate_task(int cpu, task_t *t) +{ +#if DEBUG_IPI_MSG + if (cpu == hard_smp_processor_id()) + printk(KERN_WARNING + "smp_migrate_task: Sending IPI to self.\n"); +#endif + /* The target CPU will unlock the migration spinlock. */ + spin_lock(&migration_lock); + migration_task = t; + send_ipi_message(1UL << cpu, IPI_MIGRATION); } void diff -ruNp linux/arch/alpha/mm/fault.c 2.5.3/arch/alpha/mm/fault.c --- linux/arch/alpha/mm/fault.c Mon Sep 17 16:15:02 2001 +++ 2.5.3/arch/alpha/mm/fault.c Sat Feb 2 15:38:22 2002 @@ -196,8 +196,7 @@ no_context: */ out_of_memory: if (current->pid == 1) { - current->policy |= SCHED_YIELD; - schedule(); + yield(); down_read(&mm->mmap_sem); goto survive; } diff -ruNp linux/arch/alpha/vmlinux.lds.in 2.5.3/arch/alpha/vmlinux.lds.in --- linux/arch/alpha/vmlinux.lds.in Wed Jan 23 10:54:42 2002 +++ 2.5.3/arch/alpha/vmlinux.lds.in Sat Feb 2 16:18:49 2002 @@ -32,6 +32,8 @@ SECTIONS . = ALIGN(8192); __init_begin = .; .text.init : { *(.text.init) } + /* Don't discard .text.exit yet -- too many problems remaining. */ + .text.exit : { *(.text.exit) } .data.init : { *(.data.init) } . = ALIGN(16); @@ -100,5 +102,5 @@ SECTIONS .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } - /DISCARD/ : { *(.text.exit) *(.data.exit) *(.exitcall.exit) } + /DISCARD/ : { *(.data.exit) *(.exitcall.exit) } } diff -ruNp linux/drivers/base/core.c 2.5.3/drivers/base/core.c --- linux/drivers/base/core.c Mon Jan 28 16:57:27 2002 +++ 2.5.3/drivers/base/core.c Sat Feb 2 14:31:08 2002 @@ -7,7 +7,8 @@ #include #include -#include +#include +#include #undef DEBUG diff -ruNp linux/drivers/base/fs.c 2.5.3/drivers/base/fs.c --- linux/drivers/base/fs.c Thu Jan 24 14:07:46 2002 +++ 2.5.3/drivers/base/fs.c Sat Feb 2 14:45:19 2002 @@ -8,7 +8,7 @@ #include #include #include -#include +#include extern struct driver_file_entry * device_default_files[]; diff -ruNp linux/drivers/block/ll_rw_blk.c 2.5.3/drivers/block/ll_rw_blk.c --- linux/drivers/block/ll_rw_blk.c Sat Jan 19 16:49:38 2002 +++ 2.5.3/drivers/block/ll_rw_blk.c Thu Feb 7 14:41:15 2002 @@ -182,12 +182,14 @@ void blk_queue_bounce_limit(request_queu static request_queue_t *last_q; /* - * set appropriate bounce gfp mask -- unfortunately we don't have a + * Set appropriate bounce gfp mask -- unfortunately we don't have a * full 4GB zone, so we have to resort to low memory for any bounces. * ISA has its own < 16MB zone. */ if (bounce_pfn < blk_max_low_pfn) { - BUG_ON(dma_addr < BLK_BOUNCE_ISA); + /* Note that some systems have an ISA bridge that frobs + addresses, so there may be no 16MB limit. */ + BUG_ON(dma_addr < BLK_BOUNCE_ISA && BLK_BOUNCE_ISA < ~0UL); init_emergency_isa_pool(); q->bounce_gfp = GFP_NOIO | GFP_DMA; } else diff -ruNp linux/include/asm-alpha/asm_offsets.h 2.5.3/include/asm-alpha/asm_offsets.h --- linux/include/asm-alpha/asm_offsets.h Mon Oct 12 11:40:12 1998 +++ 2.5.3/include/asm-alpha/asm_offsets.h Wed Dec 31 16:00:00 1969 @@ -1,13 +0,0 @@ -#ifndef __ASM_OFFSETS_H__ -#define __ASM_OFFSETS_H__ -#define TASK_STATE 0 -#define TASK_FLAGS 8 -#define TASK_SIGPENDING 16 -#define TASK_ADDR_LIMIT 24 -#define TASK_EXEC_DOMAIN 32 -#define TASK_NEED_RESCHED 40 -#define TASK_SIZE 1096 -#define STACK_SIZE 16384 -#define HAE_CACHE 0 -#define HAE_REG 8 -#endif /* __ASM_OFFSETS_H__ */ diff -ruNp linux/include/asm-alpha/bitops.h 2.5.3/include/asm-alpha/bitops.h --- linux/include/asm-alpha/bitops.h Fri Oct 12 15:35:54 2001 +++ 2.5.3/include/asm-alpha/bitops.h Thu Feb 7 14:52:42 2002 @@ -74,11 +74,11 @@ clear_bit(unsigned long nr, volatile voi * WARNING: non atomic version. */ static __inline__ void -__change_bit(unsigned long nr, volatile void * addr) +__clear_bit(unsigned long nr, volatile void * addr) { int *m = ((int *) addr) + (nr >> 5); - *m ^= 1 << (nr & 31); + *m &= ~(1 << (nr & 31)); } static inline void @@ -99,6 +99,17 @@ change_bit(unsigned long nr, volatile vo :"Ir" (1UL << (nr & 31)), "m" (*m)); } +/* + * WARNING: non atomic version. + */ +static __inline__ void +__change_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m ^= 1 << (nr & 31); +} + static inline int test_and_set_bit(unsigned long nr, volatile void *addr) { @@ -181,20 +192,6 @@ __test_and_clear_bit(unsigned long nr, v return (old & mask) != 0; } -/* - * WARNING: non atomic version. - */ -static __inline__ int -__test_and_change_bit(unsigned long nr, volatile void * addr) -{ - unsigned long mask = 1 << (nr & 0x1f); - int *m = ((int *) addr) + (nr >> 5); - int old = *m; - - *m = old ^ mask; - return (old & mask) != 0; -} - static inline int test_and_change_bit(unsigned long nr, volatile void * addr) { @@ -220,6 +217,20 @@ test_and_change_bit(unsigned long nr, vo return oldbit != 0; } +/* + * WARNING: non atomic version. + */ +static __inline__ int +__test_and_change_bit(unsigned long nr, volatile void * addr) +{ + unsigned long mask = 1 << (nr & 0x1f); + int *m = ((int *) addr) + (nr >> 5); + int old = *m; + + *m = old ^ mask; + return (old & mask) != 0; +} + static inline int test_bit(int nr, volatile void * addr) { @@ -264,17 +275,39 @@ static inline unsigned long ffz(unsigned #endif } +/* + * __ffs = Find First set bit in word. Undefined if no set bit exists. + */ +static inline unsigned long __ffs(unsigned long word) +{ +#if defined(__alpha_cix__) && defined(__alpha_fix__) + /* Whee. EV67 can calculate it directly. */ + unsigned long result; + __asm__("cttz %1,%0" : "=r"(result) : "r"(word)); + return result; +#else + unsigned long bits, qofs, bofs; + + __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word)); + qofs = ffz_b(bits); + __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs)); + bofs = ffz_b(~bits); + + return qofs*8 + bofs; +#endif +} + #ifdef __KERNEL__ /* * ffs: find first bit set. This is defined the same way as * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). + * differs in spirit from the above ffo (man ffs). */ static inline int ffs(int word) { - int result = ffz(~word); + int result = __ffs(word); return word ? result+1 : 0; } @@ -365,10 +398,53 @@ found_middle: } /* - * The optimizer actually does good code for this case.. + * Find next one bit in a bitmap reasonably efficiently. + */ +static inline unsigned long +find_next_bit(void * addr, unsigned long size, unsigned long offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp &= ~0UL << offset; + if (size < 64) + goto found_first; + if (tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if ((tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp &= ~0UL >> (64 - size); + if (!tmp) + return result + size; +found_middle: + return result + __ffs(tmp); +} + +/* + * The optimizer actually does good code for this case. */ #define find_first_zero_bit(addr, size) \ find_next_zero_bit((addr), (size), 0) +#define find_first_bit(addr, size) \ + find_next_bit((addr), (size), 0) #ifdef __KERNEL__ diff -ruNp linux/include/asm-alpha/io.h 2.5.3/include/asm-alpha/io.h --- linux/include/asm-alpha/io.h Tue Nov 27 09:23:27 2001 +++ 2.5.3/include/asm-alpha/io.h Thu Feb 7 14:52:42 2002 @@ -18,6 +18,7 @@ #include #include #include +#include #include /* @@ -60,7 +61,10 @@ static inline void * phys_to_virt(unsign return (void *) (address + IDENT_ADDR); } -#define page_to_phys(page) (((page) - (page)->zone->zone_mem_map) << PAGE_SHIFT) +#define page_to_phys(page) PAGE_TO_PA(page) + +/* This depends on working iommu. */ +#define BIO_VMERGE_BOUNDARY (alpha_mv.mv_pci_tbi ? PAGE_SIZE : 0) /* * Change addresses as seen by the kernel (virtual) to addresses as diff -ruNp linux/include/asm-alpha/mmu_context.h 2.5.3/include/asm-alpha/mmu_context.h --- linux/include/asm-alpha/mmu_context.h Fri Dec 29 14:07:23 2000 +++ 2.5.3/include/asm-alpha/mmu_context.h Thu Feb 7 14:52:48 2002 @@ -21,6 +21,32 @@ #include #endif +/* ??? This does not belong here. */ +/* + * Every architecture must define this function. It's the fastest + * way of searching a 168-bit bitmap where the first 128 bits are + * unlikely to be set. It's guaranteed that at least one of the 168 + * bits is set. + */ +#if MAX_RT_PRIO != 128 || MAX_PRIO > 192 +# error update this function. +#endif + +static inline int +sched_find_first_bit(unsigned long *b) +{ + unsigned long b0 = b[0], b1 = b[1], b2 = b[2]; + unsigned long offset = 128; + + if (unlikely(b0 | b1)) { + b2 = (b0 ? b0 : b1); + offset = (b0 ? 0 : 64); + } + + return __ffs(b2) + offset; +} + + extern inline unsigned long __reload_thread(struct thread_struct *pcb) { diff -ruNp linux/include/asm-alpha/smp.h 2.5.3/include/asm-alpha/smp.h --- linux/include/asm-alpha/smp.h Thu Sep 13 15:21:32 2001 +++ 2.5.3/include/asm-alpha/smp.h Thu Feb 7 14:52:42 2002 @@ -55,7 +55,7 @@ extern int __cpu_logical_map[NR_CPUS]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] #define hard_smp_processor_id() __hard_smp_processor_id() -#define smp_processor_id() (current->processor) +#define smp_processor_id() (current->cpu) extern unsigned long cpu_present_mask; #define cpu_online_map cpu_present_mask diff -ruNp linux/include/asm-alpha/unistd.h 2.5.3/include/asm-alpha/unistd.h --- linux/include/asm-alpha/unistd.h Fri Nov 9 13:45:35 2001 +++ 2.5.3/include/asm-alpha/unistd.h Thu Feb 7 14:52:42 2002 @@ -575,6 +575,9 @@ static inline long sync(void) return sys_sync(); } +struct rusage; +extern asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, + int options, struct rusage * ru); static inline pid_t waitpid(int pid, int * wait_stat, int flags) { return sys_wait4(pid, wait_stat, flags, NULL); diff -ruNp linux/include/linux/dcache.h 2.5.3/include/linux/dcache.h --- linux/include/linux/dcache.h Tue Jan 29 21:41:09 2002 +++ 2.5.3/include/linux/dcache.h Thu Feb 7 13:16:40 2002 @@ -5,6 +5,8 @@ #include #include +#include /* BUG */ + /* * linux/include/linux/dcache.h diff -ruNp linux/include/linux/nfs_fs.h 2.5.3/include/linux/nfs_fs.h --- linux/include/linux/nfs_fs.h Tue Jan 29 21:42:01 2002 +++ 2.5.3/include/linux/nfs_fs.h Thu Feb 7 14:52:44 2002 @@ -22,6 +22,8 @@ #include #include +#include /* BUG */ + /* * Enable debugging support for nfs client. * Requires RPC_DEBUG.