Re: 2.6.29 regression: ATA bus errors on resume

From: Tejun Heo
Date: Wed Apr 01 2009 - 21:50:10 EST


Hello,

Sorry about the delay.

Niel Lambrechts wrote:
> I think I should be able to reproduce when actively using X with 2.6.29,
> and I have an external disk where I could backup to / boot from if the
> corruption became a problem.
>
> These issues are keeping me from 2.6.29 so I'll gladly help where I can,
> if you can please provide me the patches and the .config settings that
> may be required?

Attached is the debug patch. Please reproduce the problem with the
patch applied and post full kernel log.

Thanks.

--
tejun
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 0183131..1597874 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1274,7 +1274,7 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
{
struct ata_port *ap = link->ap;
struct ata_eh_info *ehi = &link->eh_info;
- struct ata_eh_context *ehc = &link->eh_context;
+ //struct ata_eh_context *ehc = &link->eh_context;
unsigned long flags;

spin_lock_irqsave(ap->lock, flags);
@@ -1284,7 +1284,7 @@ void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
/* About to take EH action, set RECOVERED. Ignore actions on
* slave links as master will do them again.
*/
- if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
+ if (/*!(ehc->i.flags & ATA_EHI_QUIET) && */link != ap->slave_link)
ap->pflags |= ATA_PFLAG_RECOVERED;

spin_unlock_irqrestore(ap->lock, flags);
@@ -2017,8 +2017,13 @@ static void ata_eh_link_autopsy(struct ata_link *link)

/* determine whether the command is worth retrying */
if (!(qc->err_mask & AC_ERR_INVALID) &&
- ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV))
+ ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) {
+ ata_dev_printk(qc->dev, KERN_INFO,
+ "XXX setting retry on qc%d\n", tag);
qc->flags |= ATA_QCFLAG_RETRY;
+ } else
+ ata_dev_printk(qc->dev, KERN_INFO,
+ "XXX no retry for qc%d\n", tag);

/* accumulate error info */
ehc->i.dev = qc->dev;
@@ -2126,8 +2131,8 @@ static void ata_eh_link_report(struct ata_link *link)
char tries_buf[6];
int tag, nr_failed = 0;

- if (ehc->i.flags & ATA_EHI_QUIET)
- return;
+ /*if (ehc->i.flags & ATA_EHI_QUIET)
+ return;*/

desc = NULL;
if (ehc->i.desc[0] != '\0')
@@ -2147,8 +2152,8 @@ static void ata_eh_link_report(struct ata_link *link)
nr_failed++;
}

- if (!nr_failed && !ehc->i.err_mask)
- return;
+ /*if (!nr_failed && !ehc->i.err_mask)
+ return;*/

frozen = "";
if (ap->pflags & ATA_PFLAG_FROZEN)
@@ -3350,16 +3355,23 @@ void ata_eh_finish(struct ata_port *ap)
* generate sense data in this function,
* considering both err_mask and tf.
*/
- if (qc->flags & ATA_QCFLAG_RETRY)
+ if (qc->flags & ATA_QCFLAG_RETRY) {
+ ata_dev_printk(qc->dev, KERN_INFO, "XXX retrying qc%d, retries=%d\n",
+ tag, qc->scsicmd->retries);
ata_eh_qc_retry(qc);
- else
+ } else {
+ ata_dev_printk(qc->dev, KERN_INFO, "XXX terminating qc%d\n", tag);
ata_eh_qc_complete(qc);
+ }
} else {
if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
+ ata_dev_printk(qc->dev, KERN_INFO, "XXX terminating qc%d (SENSE), retries=%d\n",
+ tag, qc->scsicmd->retries);
ata_eh_qc_complete(qc);
} else {
/* feed zero TF to sense generation */
memset(&qc->result_tf, 0, sizeof(qc->result_tf));
+ ata_dev_printk(qc->dev, KERN_INFO, "XXX retrying qc%d (bogus SENSE)\n", tag);
ata_eh_qc_retry(qc);
}
}