Tim Blechmann wrote: > On 08/27/2009 10:38 AM, Tejun Heo wrote: >> Tim Blechmann wrote: >>>>>>> running 2.6.31-rc5 (7cb7beb31aa3d941833b6a6e553687422c31e4b6 to be >>>>>>> exact), sometimes some hard disks don't show up. >>>>>>> >>>>>>> after booting, my root hd (sda) is mounted to /, while two other hds >>>>>>> (sdb/sdc) are mounted as a user. sda is always present, but the other >>>>>>> two sometimes don't show up (i.e. they are not listed in /dev/disk/, nor >>>>>>> to they have a /dev/sdX link). with 2.6.29 and 2.6.30, all three disks >>>>>>> are reported correctly. >>>>>> Can you please attach boot logs of a successful and a failed boot? >>>>> i have two files attached: >>>>> - dmesg_good - all hds are available >>>>> - dmesg_bad - on hd is missing >>>> Can you please apply the attached patch and post the bad boot log? >>> attached you find boot logs for both a good ad a bad boot >> Sorry about the long delay. I somehow marked the message read without >> actually reading it. >> >> I suspected the problem was with getting the wrong classification code >> or phantom device detection kicking in spuriously. Looks like the >> problem happens way before that. Can you please apply the attached >> patch and report the result? > > i applied your patch onto of the current linus/master branch and > currently (after rebooting 5 or 6 times) i cannot reproduce the problem > any more ... > however, there is a warning stack trace in the boot log from libata code > (bootlog attached) Oops, that was my bad. This should remove the useless warning. Thanks. -- tejun
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 072ba5e..876ede2 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3770,6 +3770,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params, scontrol = (scontrol & 0x0f0) | 0x300; + ata_link_printk(link, KERN_INFO, "XXX bringing up link\n"); if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol))) return rc; @@ -3778,7 +3779,9 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params, */ msleep(200); - if ((rc = sata_link_debounce(link, params, deadline))) + rc = sata_link_debounce(link, params, deadline); + ata_link_printk(link, KERN_INFO, "XXX debounced rc=%d\n", rc); + if (rc) return rc; /* clear SError, some PHYs require this even for SRST to work */ @@ -3904,8 +3907,10 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, if (rc) goto out; /* if link is offline nothing more to do */ - if (ata_phys_link_offline(link)) + if (ata_phys_link_offline(link)) { + ata_link_printk(link, KERN_INFO, "XXX phys link offline\n"); goto out; + } /* Link is online. From this point, -ENODEV too is an error. */ if (online) @@ -6060,7 +6065,7 @@ static void async_port_probe(void *data, async_cookie_t cookie) ehi->probe_mask |= ATA_ALL_DEVICES; ehi->action |= ATA_EH_RESET | ATA_EH_LPM; - ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; + ehi->flags |= ATA_EHI_NO_AUTOPSY/* | ATA_EHI_QUIET*/; ap->pflags &= ~ATA_PFLAG_INITIALIZING; ap->pflags |= ATA_PFLAG_LOADING; diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index bbbb1fa..c718d12 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1998,6 +1998,9 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, if (r_err) *r_err = err; + ata_dev_printk(dev, KERN_INFO, "XXX CLASSIFY TF %02x/%02x:%02x:%02x:%02x\n", + tf.command, tf.feature, tf.lbal, tf.lbam, tf.lbah); + /* see if device passed diags: continue and warn later */ if (err == 0) /* diagnostic fail : do nothing _YET_ */ @@ -2006,11 +2009,14 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, /* do nothing */ ; else if ((dev->devno == 0) && (err == 0x81)) /* do nothing */ ; - else + else { + ata_dev_printk(dev, KERN_INFO, "XXX diag nodev\n"); return ATA_DEV_NONE; + } /* determine if device is ATA or ATAPI */ class = ata_dev_classify(&tf); + ata_dev_printk(dev, KERN_INFO, "XXX ata_dev_classify=%d\n", class); if (class == ATA_DEV_UNKNOWN) { /* If the device failed diagnostic, it's likely to @@ -2019,13 +2025,18 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, * device signature is invalid with diagnostic * failure. */ - if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC)) + if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC)) { + ata_dev_printk(dev, KERN_INFO, "XXX UNK && present -> ATA\n"); class = ATA_DEV_ATA; - else + } else { class = ATA_DEV_NONE; + ata_dev_printk(dev, KERN_INFO, "XXX UNK && !present -> NONE\n"); + } } else if ((class == ATA_DEV_ATA) && - (ap->ops->sff_check_status(ap) == 0)) + (ap->ops->sff_check_status(ap) == 0)) { class = ATA_DEV_NONE; + ata_dev_printk(dev, KERN_INFO, "XXX stat==0 -> NONE\n"); + } return class; }