Bigger discard payloads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I posted the first version of this patch a few months ago. Inspired by
the recent discussions I decided to take another stab at supporting TRIM
payloads bigger than 512 bytes.

Unfortunately the only drives I have readily available right now are
intel and they poop on payloads bigger than 512 bytes despite
advertising support for 4 KiB. So I can't really test this and I'm
throwing the patch out there for people that want to play with it...

The patch advertises the max reported by the device but in reality we
can't currently issue discards over 4GB due to a bio limitation.

-- 
Martin K. Petersen	Oracle Linux Engineering


diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 41fb691..f0130dc 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
 
 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_discard_sectors << 9, page);
+	return sprintf(page, "%lu\n",
+		       (unsigned long)q->limits.max_discard_sectors << 9);
 }
 
 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 66aa4be..5cabec1 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2120,7 +2120,13 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 	 * with the unmap bit set.
 	 */
 	if (ata_id_has_trim(args->id)) {
-		put_unaligned_be32(65535 * 512 / 8, &rbuf[20]);
+		unsigned int blocks;
+
+		/* Default to 1 if unspecified in word 105.  Cap at 1 page. */
+		blocks = clamp(ata_id_trim_range_blocks(args->id), 1U,
+			       (unsigned int)ATA_MAX_TRIM_BLOCKS);
+
+		put_unaligned_be32(65535 * 512 / 8 * blocks, &rbuf[20]);
 		put_unaligned_be32(1, &rbuf[28]);
 	}
 
@@ -3001,6 +3007,7 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 	u64 block;
 	u32 n_block;
 	u32 size;
+	unsigned int range_blocks;
 	void *buf;
 
 	/* we may not issue DMA commands if no DMA mode is set */
@@ -3023,17 +3030,25 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 
 	buf = page_address(sg_page(scsi_sglist(scmd)));
-	size = ata_set_lba_range_entries(buf, 512, block, n_block);
+	size = ata_set_lba_range_entries(buf, ATA_MAX_TRIM_BYTES, block,
+					 n_block);
+	range_blocks = size / ATA_SECT_SIZE;
 
 	tf->protocol = ATA_PROT_DMA;
 	tf->hob_feature = 0;
 	tf->feature = ATA_DSM_TRIM;
-	tf->hob_nsect = (size / 512) >> 8;
-	tf->nsect = size / 512;
+	tf->hob_nsect = range_blocks >> 8;
+	tf->nsect = range_blocks;
 	tf->command = ATA_CMD_DSM;
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
 		     ATA_TFLAG_WRITE;
 
+	/*
+	 * If we end up using more than the 512 byte payload passed by
+	 * WRITE SAME we must adjust the transfer length.
+	 */
+	if (range_blocks > 1)
+		scmd->request->extra_len = (range_blocks - 1) * ATA_SECT_SIZE;
 	ata_qc_set_pc_nbytes(qc);
 
 	return 0;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9564961..59c56bb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -456,7 +456,7 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
 		nr_sectors >>= 3;
 	}
 
-	rq->timeout = SD_TIMEOUT;
+	rq->timeout = SD_WS_UNMAP_TIMEOUT;
 
 	memset(rq->cmd, 0, rq->cmd_len);
 
@@ -2090,7 +2090,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 
 		if (lba_count)
 			q->limits.max_discard_sectors =
-				lba_count * sector_sz >> 9;
+				lba_count * (sector_sz >> 9);
 
 		granularity = get_unaligned_be32(&buffer[28]);
 
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 55488fa..872e574 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -20,6 +20,7 @@
 #define SD_TIMEOUT		(30 * HZ)
 #define SD_MOD_TIMEOUT		(75 * HZ)
 #define SD_FLUSH_TIMEOUT	(60 * HZ)
+#define SD_WS_UNMAP_TIMEOUT	(300 * HZ)
 
 /*
  * Number of allowed retries
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 0c4929f..8d628f8 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -47,6 +47,8 @@ enum {
 	ATA_MAX_SECTORS		= 256,
 	ATA_MAX_SECTORS_LBA48	= 65535,/* TODO: 65536? */
 	ATA_MAX_SECTORS_TAPE	= 65535,
+	ATA_MAX_TRIM_BYTES	= PAGE_SIZE,
+	ATA_MAX_TRIM_BLOCKS	= ATA_MAX_TRIM_BYTES >> 9,
 
 	ATA_ID_WORDS		= 256,
 	ATA_ID_CONFIG		= 0,
@@ -88,6 +90,7 @@ enum {
 	ATA_ID_HW_CONFIG	= 93,
 	ATA_ID_SPG		= 98,
 	ATA_ID_LBA_CAPACITY_2	= 100,
+	ATA_ID_TRIM_RANGE_BLKS	= 105,
 	ATA_ID_SECTOR_SIZE	= 106,
 	ATA_ID_LOGICAL_SECTOR_SIZE	= 117,	/* and 118 */
 	ATA_ID_LAST_LUN		= 126,
@@ -861,6 +864,14 @@ static inline int ata_id_has_zero_after_trim(const u16 *id)
 	return 0;
 }
 
+static inline unsigned int ata_id_trim_range_blocks(const u16 *id)
+{
+	if (ata_id_has_trim(id))
+		return id[ATA_ID_TRIM_RANGE_BLKS];
+
+	return 0;
+}
+
 static inline int ata_id_current_chs_valid(const u16 *id)
 {
 	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux