arca-24 [Re: new arca-23 released]

Andrea Arcangeli (andrea@e-mind.com)
Fri, 20 Nov 1998 01:45:02 +0100 (CET)


On Thu, 19 Nov 1998, Andrea Arcangeli wrote:

>My tree includes:
>
>o Fixed some minutes ago a critical bug in the asm of entry.S. This
> was at least the cause of the UP stall. entry.S was using a
> corrupted current-task-struct pointer.

I' ve put out arca-24 against 2.1.129 that includes the smart/right fix
for the UP fork child case and continue to improve the entry.S x86 asm.

Other differences are:

o Try to be SMP safe in removing the itimer in do_exit() (by Linus).

o Check that the timer is just pending before add it. If it' s just
pending do nothing.
I think this is the cleaner/more efficient way to fix the
getitimer race and I think that make the timer code more robust
too. So doing a add_timer(timer);add_timer(timer); will not cause
one problem.

I include the patch here to allow people to comlain. Note this diff
include also my latest schedule_timeout() and some other thing.

Index: kernel/sched.c
===================================================================
RCS file: /var/cvs/linux/kernel/sched.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.2.6
diff -u -r1.1.1.1 -r1.1.1.1.2.6
--- sched.c 1998/11/19 23:01:12 1.1.1.1
+++ sched.c 1998/11/20 00:35:23 1.1.1.1.2.6
@@ -7,6 +7,8 @@
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
* make semaphores SMP safe
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
+ * 1998-11-19 Implemented schedule_timeout() and related stuff
+ * by Andrea Arcangeli
*/

/*
@@ -33,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <asm/spinlock.h>
+#include <asm/processor.h> /* for get_wchan() */

#include <linux/timex.h>

@@ -83,7 +86,7 @@

extern void mem_use(void);

-unsigned long volatile jiffies=0;
+unsigned long volatile jiffies=JIFFIES_OFFSET;

/*
* Init task must be ok at boot for the ix86 as we will check its signals
@@ -135,8 +138,13 @@
}
#endif
#endif
- if (p->policy != SCHED_OTHER || p->counter > current->counter + 3)
- current->need_resched = 1;
+ /*
+ * If the current process is the idle one, we must reschedule ASAP.
+ * -arca
+ */
+ if (!current->pid || p->counter > current->counter ||
+ p->policy != SCHED_OTHER)
+ current->need_resched = 1;
}

/*
@@ -321,11 +329,15 @@
struct timer_list *vec[TVR_SIZE];
};

-static struct timer_vec tv5 = { 0 };
-static struct timer_vec tv4 = { 0 };
-static struct timer_vec tv3 = { 0 };
-static struct timer_vec tv2 = { 0 };
-static struct timer_vec_root tv1 = { 0 };
+static struct timer_vec tv5 = { (((JIFFIES_OFFSET - 1) >>
+ (TVR_BITS + TVN_BITS * 3)) + 1) & TVN_MASK };
+static struct timer_vec tv4 = { (((JIFFIES_OFFSET - 1) >>
+ (TVR_BITS + TVN_BITS * 2)) + 1) & TVN_MASK };
+static struct timer_vec tv3 = { (((JIFFIES_OFFSET - 1) >>
+ (TVR_BITS + TVN_BITS)) + 1) & TVN_MASK };
+static struct timer_vec tv2 = { (((JIFFIES_OFFSET - 1) >>
+ TVR_BITS) + 1) & TVN_MASK };
+static struct timer_vec_root tv1 = { JIFFIES_OFFSET & TVR_MASK };

static struct timer_vec * const tvecs[] = {
(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
@@ -333,7 +345,7 @@

#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))

-static unsigned long timer_jiffies = 0;
+static unsigned long timer_jiffies = JIFFIES_OFFSET;

static inline void insert_timer(struct timer_list *timer,
struct timer_list **vec, int idx)
@@ -380,15 +392,6 @@

spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;

-void add_timer(struct timer_list *timer)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
- internal_add_timer(timer);
- spin_unlock_irqrestore(&timerlist_lock, flags);
-}
-
static inline int detach_timer(struct timer_list *timer)
{
struct timer_list *prev = timer->prev;
@@ -402,6 +405,16 @@
return 0;
}

+void add_timer(struct timer_list *timer)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ if (!timer_pending(timer))
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+}
+
void mod_timer(struct timer_list *timer, unsigned long expires)
{
unsigned long flags;
@@ -420,8 +433,8 @@

spin_lock_irqsave(&timerlist_lock, flags);
ret = detach_timer(timer);
- timer->next = timer->prev = 0;
spin_unlock_irqrestore(&timerlist_lock, flags);
+ timer->next = timer->prev = 0;
return ret;
}

@@ -442,17 +455,18 @@
struct timer_list timer;
unsigned long expire;

- /*
- * PARANOID.
- */
- if (current->state == TASK_UNINTERRUPTIBLE)
+ switch (current->state)
{
- printk(KERN_WARNING "schedule_timeout: task not interrutible "
- "from %p\n", __builtin_return_address(0));
+ case TASK_INTERRUPTIBLE:
+ case TASK_RUNNING:
+ break;
+ default:
/*
* We don' t want to interrupt a not interruptible task
* risking to cause corruption. Better a a deadlock ;-).
*/
+ printk(KERN_ERR "schedule_timeout: task state %ld, from %p!\n",
+ current->state, __builtin_return_address(0));
timeout = MAX_SCHEDULE_TIMEOUT;
}

@@ -599,10 +613,12 @@

#ifdef __SMP__
next->has_cpu = 1;
- next->processor = this_cpu;
#endif

if (prev != next) {
+#ifdef __SMP__
+ next->processor = this_cpu;
+#endif
kstat.context_swtch++;
get_mmu_context(next);
switch_to(prev,next);
@@ -1604,7 +1620,7 @@
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
{
struct timespec t;
- unsigned long expire;
+ long expire;

if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
return -EFAULT;
@@ -1626,14 +1642,20 @@
return 0;
}

- expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+ expire = (long) (timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec));
+ /*
+ * Handle a too high timeout for the scheduler after the
+ * struct timespec to jiffies conversion. -arca
+ */
+ if (expire < 0)
+ expire = MAX_SCHEDULE_TIMEOUT;

current->state = TASK_INTERRUPTIBLE;
expire = schedule_timeout(expire);

if (expire) {
if (rmtp) {
- jiffies_to_timespec(expire, &t);
+ jiffies_to_timespec((unsigned long) expire, &t);
if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
return -EFAULT;
}
@@ -1655,16 +1677,18 @@
else
printk(" ");
#if (BITS_PER_LONG == 32)
- if (p == current)
- printk(" current ");
- else
- printk(" %08lX ", thread_saved_pc(&p->tss));
-#else
+#define current_string " current "
+#define wchan_string " %08lX "
+#else /* 64bit archs */
+#define current_string " current task "
+#define wchan_string " %016lx "
+#endif
if (p == current)
- printk(" current task ");
+ printk(current_string);
else
- printk(" %016lx ", thread_saved_pc(&p->tss));
-#endif
+ printk(wchan_string, get_wchan(p));
+#undef current_string
+#undef wchan_string
{
unsigned long * n = (unsigned long *) (p+1);
while (!*n)

o I also done a further cleanup of the aic7xxx timer case to save
some byte of memory. Really I don' t know if it will work but
it compile fine ;-). I include this patch here too:

Index: drivers/scsi/aic7xxx.c
===================================================================
RCS file: /var/cvs/linux/drivers/scsi/aic7xxx.c,v
retrieving revision 1.1.1.1
retrieving revision 1.1.1.1.2.2
diff -u -r1.1.1.1 -r1.1.1.1.2.2
--- aic7xxx.c 1998/11/19 23:02:40 1.1.1.1
+++ aic7xxx.c 1998/11/19 23:33:43 1.1.1.1.2.2
@@ -1107,6 +1107,7 @@
*/

struct timer_list dev_timer[MAX_TARGETS];
+ int dev_timer_arg[MAX_TARGETS];

/*
* The next 64....
@@ -4136,42 +4137,35 @@
* then run waiting queue to start commands.
***************************************************************************/
static void
-aic7xxx_timer(struct aic7xxx_host *p)
+aic7xxx_timer(int * arg)
{
- int i, j;
+ int i = *arg, j;
unsigned long cpu_flags = 0;
struct aic7xxx_scb *scb;
+ struct aic7xxx_host *p = ((struct aic7xxx_host *)((char *)(arg)-(unsigned long)(&((struct aic7xxx_host *)0)->dev_timer_arg[*arg])));

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,95)
DRIVER_LOCK
#else
spin_lock_irqsave(&io_request_lock, cpu_flags);
#endif
- for(i=0; i<MAX_TARGETS; i++)
+ p->dev_temp_queue_depth[i] = p->dev_max_queue_depth[i];
+ j = 0;
+ while ( ((scb = scbq_remove_head(&p->delayed_scbs[i])) != NULL) &&
+ (j++ < p->scb_data->numscbs) )
{
- if ( timer_pending(&p->dev_timer[i]) &&
- time_before_eq(p->dev_timer[i].expires, jiffies) )
- {
- del_timer(&p->dev_timer[i]);
- p->dev_temp_queue_depth[i] = p->dev_max_queue_depth[i];
- j = 0;
- while ( ((scb = scbq_remove_head(&p->delayed_scbs[i])) != NULL) &&
- (j++ < p->scb_data->numscbs) )
- {
- scbq_insert_tail(&p->waiting_scbs, scb);
- }
- if (j == p->scb_data->numscbs)
- {
- printk(INFO_LEAD "timer: Yikes, loop in delayed_scbs list.\n",
- p->host_no, 0, i, -1);
- scbq_init(&p->delayed_scbs[i]);
- scbq_init(&p->waiting_scbs);
- /*
- * Well, things are screwed now, wait for a reset to clean the junk
- * out.
- */
- }
- }
+ scbq_insert_tail(&p->waiting_scbs, scb);
+ }
+ if (j == p->scb_data->numscbs)
+ {
+ printk(INFO_LEAD "timer: Yikes, loop in delayed_scbs list.\n",
+ p->host_no, 0, i, -1);
+ scbq_init(&p->delayed_scbs[i]);
+ scbq_init(&p->waiting_scbs);
+ /*
+ * Well, things are screwed now, wait for a reset to clean the junk
+ * out.
+ */
}
aic7xxx_run_waiting_queues(p);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,95)
@@ -7487,7 +7481,8 @@
p->dev_mid_level_queue_depth[i] = 3;
scbq_init(&p->delayed_scbs[i]);
init_timer(&p->dev_timer[i]);
- p->dev_timer[i].data = (unsigned long)p;
+ p->dev_timer_arg[i] = i;
+ p->dev_timer[i].data = (unsigned long)&p->dev_timer_arg[i];
p->dev_timer[i].function = (void *)aic7xxx_timer;
}

o Don' t run swap_tick every jiffy. Reinserted the sysctl
swapout_interval to poll not too frequently and __get_free_pages()
will wakeup kswap by hand if we are low in freepages.

Let me know if there will be problems (mainly for aic7xxx users...).

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/