Re: [PATCH 1/5] block: rewrite blk_bvec_map_sg to avoid a nth_page call

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/15/19 11:33 PM, Christoph Hellwig wrote:
On Mon, Apr 15, 2019 at 02:07:31PM -0700, Guenter Roeck wrote:
On Mon, Apr 15, 2019 at 10:52:42PM +0200, Christoph Hellwig wrote:
On Mon, Apr 15, 2019 at 12:44:35PM -0700, Guenter Roeck wrote:
This patch causes crashes with various boot tests. Most sparc tests crash, as
well as several arm tests. Bisect results in both cases point to this patch.

That just means we trigger an existing bug more easily now.  I'll see
if I can help with the issues.

Code which previously worked reliably no longer does. I would be quite
hesitant to call this "trigger an existing bug more easily". "Regression"
seems to be a more appropriate term - even more so as it seems to cause
'init' crashes, at least on arm.

Well, we have these sgls in the wild already, it just is that they
are fairly rare.  For a related fix on a mainstream platform see
here for example:

	https://lore.kernel.org/patchwork/patch/1050367/

Below is a rework of the sparc32 iommu code that should avoid your
reported problem.  Please send any other reports to me as well.


The code below on top of next-20190415 results in

esp ffd398e4: esp0: regs[(ptrval):(ptrval)] irq[2]
esp ffd398e4: esp0: is a FAS100A, 40 MHz (ccf=0), SCSI ID 7
scsi host0: esp
scsi 0:0:0:0: Direct-Access     QEMU     QEMU HARDDISK    2.5+ PQ: 0 ANSI: 5
scsi target0:0:0: Beginning Domain Validation
scsi target0:0:0: Domain Validation Initial Inquiry Failed
scsi target0:0:0: Ending Domain Validation
scsi host0: scsi scan: INQUIRY result too short (5), using 36
scsi 0:0:2:0: Direct-Access                                    PQ: 0 ANSI: 0
scsi target0:0:2: Beginning Domain Validation
scsi target0:0:2: Domain Validation Initial Inquiry Failed
scsi target0:0:2: Ending Domain Validation
...
sd 0:0:2:0: Device offlined - not ready after error recovery
sd 0:0:2:0: rejecting I/O to offline device

and sometimes:

...
sd 0:0:0:4: [sde] Asking for cache data failed
sd 0:0:0:4: [sde] Assuming drive cache: write through
sd 0:0:0:4: rejecting I/O to offline device
sd 0:0:0:5: Device offlined - not ready after error recovery
Unable to handle kernel NULL pointer dereference
tsk->{mm,active_mm}->context = ffffffff
tsk->{mm,active_mm}->pgd = fc000000
              \|/ ____ \|/
              "@'/ ,. \`@"
              /_| \__/ |_\
                 \__U_/
kworker/u2:6(90): Oops [#1]
CPU: 0 PID: 90 Comm: kworker/u2:6 Not tainted 5.1.0-rc4-next-20190415-00001-g328cdb292761 #1
Workqueue: events_unbound async_run_entry_fn
PSR: 409010c5 PC: f02d2904 NPC: f02d2908 Y: 0000000c    Not tainted
PC: <__scsi_execute+0xc/0x18c>
%G: f5334608 00000000  00000000 00000002  00041200 00000008  f5386000 00000002
%O: f5334608 00000000  f5387c90 f50d3fb0  0000000b f5334a4c  f5387b98 f02d2a38
RPC: <__scsi_execute+0x140/0x18c>
%L: 00000000 00000000  f51b1800 00000001  00000002 00000000  f5386000 f05f9070
%I: 00000200 f5387ca0  00000002 00000000  00000000 00000000  f5387bf8 f02e92a4
Disabling lock debugging due to kernel taint
Caller[f02e92a4]: sd_revalidate_disk+0xe8/0x1f5c
Caller[f02eb418]: sd_probe+0x2b0/0x3f0
Caller[f02bbc98]: really_probe+0x1bc/0x2e8
Caller[f02bc10c]: __driver_attach_async_helper+0x48/0x58
Caller[f003f534]: async_run_entry_fn+0x38/0x124
Caller[f00373bc]: process_one_work+0x168/0x390
Caller[f0037728]: worker_thread+0x144/0x504
Caller[f003c90c]: kthread+0xd8/0x110
Caller[f00082f0]: ret_from_kernel_thread+0xc/0x38
Caller[00000000]:   (null)
Instruction DUMP:
 9de3bfa0
 b41ea001
 80a0001a
<d0062004>
 92603fff
 a4100018
 e207a06c
 e007a074
 94102008

Guenter

diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index e8d5d73ca40d..93c2fc440cb0 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -175,16 +175,38 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
  	}
  }
-static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
+static u32 __sbus_iommu_map_page(struct device *dev, struct page *page, unsigned offset,
+		unsigned len, bool need_flush)
  {
  	struct iommu_struct *iommu = dev->archdata.iommu;
+	phys_addr_t paddr = page_to_phys(page) + offset, p;
+	unsigned long pfn = __phys_to_pfn(paddr);
+	unsigned long off = (unsigned long)paddr & ~PAGE_MASK;
+	unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	int ioptex;
  	iopte_t *iopte, *iopte0;
  	unsigned int busa, busa0;
  	int i;
+ /* XXX So what is maxphys for us and how do drivers know it? */
+	if (!len || len > 256 * 1024)
+		return DMA_MAPPING_ERROR;
+
+	/*
+	 * We expect unmapped highmem pages to be not in the cache.
+	 * XXX Is this a good assumption?
+	 * XXX What if someone else unmaps it here and races us?
+	 */
+	if (need_flush && !PageHighMem(page)) {
+		for (p = paddr & PAGE_MASK; p < paddr + len; p += PAGE_SIZE) {
+			unsigned long vaddr = (unsigned long)phys_to_virt(p);
+
+			flush_page_for_dma(vaddr);
+		}
+	}
+
  	/* page color = pfn of page */
-	ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page));
+	ioptex = bit_map_string_get(&iommu->usemap, npages, pfn);
  	if (ioptex < 0)
  		panic("iommu out");
  	busa0 = iommu->start + (ioptex << PAGE_SHIFT);
@@ -193,11 +215,11 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
  	busa = busa0;
  	iopte = iopte0;
  	for (i = 0; i < npages; i++) {
-		iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
+		iopte_val(*iopte) = MKIOPTE(pfn, IOPERM);
  		iommu_invalidate_page(iommu->regs, busa);
  		busa += PAGE_SIZE;
  		iopte++;
-		page++;
+		pfn++;
  	}
iommu_flush_iotlb(iopte0, npages);
@@ -205,99 +227,62 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
  	return busa0;
  }
-static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t len)
-{
-	void *vaddr = page_address(page) + offset;
-	unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
-	unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	
-	/* XXX So what is maxphys for us and how do drivers know it? */
-	if (!len || len > 256 * 1024)
-		return DMA_MAPPING_ERROR;
-	return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
-}
-
  static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
  		struct page *page, unsigned long offset, size_t len,
  		enum dma_data_direction dir, unsigned long attrs)
  {
  	flush_page_for_dma(0);
-	return __sbus_iommu_map_page(dev, page, offset, len);
+	return __sbus_iommu_map_page(dev, page, offset, len, false);
  }
static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
  		struct page *page, unsigned long offset, size_t len,
  		enum dma_data_direction dir, unsigned long attrs)
  {
-	void *vaddr = page_address(page) + offset;
-	unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
-
-	while (p < (unsigned long)vaddr + len) {
-		flush_page_for_dma(p);
-		p += PAGE_SIZE;
-	}
-
-	return __sbus_iommu_map_page(dev, page, offset, len);
+	return __sbus_iommu_map_page(dev, page, offset, len, true);
  }
-static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
+static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs,
+		bool need_flush)
  {
  	struct scatterlist *sg;
-	int i, n;
-
-	flush_page_for_dma(0);
+	int i;
for_each_sg(sgl, sg, nents, i) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
+		sg->dma_address = __sbus_iommu_map_page(dev, sg_page(sg),
+				sg->offset, sg->length, need_flush);
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			return 0;
  		sg->dma_length = sg->length;
  	}
return nents;
  }
-static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
  		int nents, enum dma_data_direction dir, unsigned long attrs)
  {
-	unsigned long page, oldpage = 0;
-	struct scatterlist *sg;
-	int i, j, n;
-
-	for_each_sg(sgl, sg, nents, j) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-
-		/*
-		 * We expect unmapped highmem pages to be not in the cache.
-		 * XXX Is this a good assumption?
-		 * XXX What if someone else unmaps it here and races us?
-		 */
-		if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
-			for (i = 0; i < n; i++) {
-				if (page != oldpage) {	/* Already flushed? */
-					flush_page_for_dma(page);
-					oldpage = page;
-				}
-				page += PAGE_SIZE;
-			}
-		}
-
-		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
-		sg->dma_length = sg->length;
-	}
+	flush_page_for_dma(0);
+	return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false);
+}
- return nents;
+static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
  }
-static void iommu_release_one(struct device *dev, u32 busa, int npages)
+static void __sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		size_t len)
  {
  	struct iommu_struct *iommu = dev->archdata.iommu;
-	int ioptex;
-	int i;
+	unsigned busa, npages, ioptex, i;
+ busa = dma_addr & PAGE_MASK;
  	BUG_ON(busa < iommu->start);
  	ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+	npages = ((dma_addr & ~PAGE_MASK) + len + PAGE_SIZE-1) >> PAGE_SHIFT;
  	for (i = 0; i < npages; i++) {
  		iopte_val(iommu->page_table[ioptex + i]) = 0;
  		iommu_invalidate_page(iommu->regs, busa);
@@ -309,22 +294,17 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
  static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
  		size_t len, enum dma_data_direction dir, unsigned long attrs)
  {
-	unsigned long off = dma_addr & ~PAGE_MASK;
-	int npages;
-
-	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
-	iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
+	__sbus_iommu_unmap_page(dev, dma_addr, len);
  }
static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
  		int nents, enum dma_data_direction dir, unsigned long attrs)
  {
  	struct scatterlist *sg;
-	int i, n;
+	int i;
for_each_sg(sgl, sg, nents, i) {
-		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-		iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
+		__sbus_iommu_unmap_page(dev, sg->dma_address, sg->length);
  		sg->dma_address = 0x21212121;
  	}
  }





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux