T5/T6 can have different pack and pad boundary value. This patch sets packing boundary based on cache line size and PCI-E maximum payload size and sets smallest padding boundary value. Signed-off-by: Varun Prakash <varun@xxxxxxxxxxx> --- drivers/scsi/csiostor/csio_wr.c | 84 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c index faa357b..5022e82 100644 --- a/drivers/scsi/csiostor/csio_wr.c +++ b/drivers/scsi/csiostor/csio_wr.c @@ -39,6 +39,7 @@ #include <asm/page.h> #include <linux/cache.h> +#include "t4_values.h" #include "csio_hw.h" #include "csio_wr.h" #include "csio_mb.h" @@ -1309,8 +1310,11 @@ csio_wr_fixup_host_params(struct csio_hw *hw) struct csio_sge *sge = &wrm->sge; uint32_t clsz = L1_CACHE_BYTES; uint32_t s_hps = PAGE_SHIFT - 10; - uint32_t ingpad = 0; uint32_t stat_len = clsz > 64 ? 128 : 64; + u32 fl_align = clsz < 32 ? 32 : clsz; + u32 pack_align; + u32 ingpad, ingpack; + int pcie_cap; csio_wr_reg32(hw, HOSTPAGESIZEPF0_V(s_hps) | HOSTPAGESIZEPF1_V(s_hps) | HOSTPAGESIZEPF2_V(s_hps) | HOSTPAGESIZEPF3_V(s_hps) | @@ -1318,14 +1322,82 @@ csio_wr_fixup_host_params(struct csio_hw *hw) HOSTPAGESIZEPF6_V(s_hps) | HOSTPAGESIZEPF7_V(s_hps), SGE_HOST_PAGE_SIZE_A); - sge->csio_fl_align = clsz < 32 ? 32 : clsz; - ingpad = ilog2(sge->csio_fl_align) - 5; + /* T5 introduced the separation of the Free List Padding and + * Packing Boundaries. Thus, we can select a smaller Padding + * Boundary to avoid uselessly chewing up PCIe Link and Memory + * Bandwidth, and use a Packing Boundary which is large enough + * to avoid false sharing between CPUs, etc. + * + * For the PCI Link, the smaller the Padding Boundary the + * better. For the Memory Controller, a smaller Padding + * Boundary is better until we cross under the Memory Line + * Size (the minimum unit of transfer to/from Memory). If we + * have a Padding Boundary which is smaller than the Memory + * Line Size, that'll involve a Read-Modify-Write cycle on the + * Memory Controller which is never good. + */ + + /* We want the Packing Boundary to be based on the Cache Line + * Size in order to help avoid False Sharing performance + * issues between CPUs, etc. We also want the Packing + * Boundary to incorporate the PCI-E Maximum Payload Size. We + * get best performance when the Packing Boundary is a + * multiple of the Maximum Payload Size. + */ + pack_align = fl_align; + pcie_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP); + if (pcie_cap) { + u32 mps, mps_log; + u16 devctl; + + /* The PCIe Device Control Maximum Payload Size field + * [bits 7:5] encodes sizes as powers of 2 starting at + * 128 bytes. + */ + pci_read_config_word(hw->pdev, + pcie_cap + PCI_EXP_DEVCTL, + &devctl); + mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7; + mps = 1 << mps_log; + if (mps > pack_align) + pack_align = mps; + } + + /* T5/T6 have a special interpretation of the "0" + * value for the Packing Boundary. This corresponds to 16 + * bytes instead of the expected 32 bytes. + */ + if (pack_align <= 16) { + ingpack = INGPACKBOUNDARY_16B_X; + fl_align = 16; + } else if (pack_align == 32) { + ingpack = INGPACKBOUNDARY_64B_X; + fl_align = 64; + } else { + u32 pack_align_log = fls(pack_align) - 1; + + ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X; + fl_align = pack_align; + } + + /* Use the smallest Ingress Padding which isn't smaller than + * the Memory Controller Read/Write Size. We'll take that as + * being 8 bytes since we don't know of any system with a + * wider Memory Controller Bus Width. + */ + if (csio_is_t5(hw->pdev->device & CSIO_HW_CHIP_MASK)) + ingpad = INGPADBOUNDARY_32B_X; + else + ingpad = T6_INGPADBOUNDARY_8B_X; csio_set_reg_field(hw, SGE_CONTROL_A, INGPADBOUNDARY_V(INGPADBOUNDARY_M) | EGRSTATUSPAGESIZE_F, INGPADBOUNDARY_V(ingpad) | EGRSTATUSPAGESIZE_V(stat_len != 64)); + csio_set_reg_field(hw, SGE_CONTROL2_A, + INGPACKBOUNDARY_V(INGPACKBOUNDARY_M), + INGPACKBOUNDARY_V(ingpack)); /* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */ csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0_A); @@ -1337,14 +1409,16 @@ csio_wr_fixup_host_params(struct csio_hw *hw) if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) { csio_wr_reg32(hw, (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2_A) + - sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), + fl_align - 1) & ~(fl_align - 1), SGE_FL_BUFFER_SIZE2_A); csio_wr_reg32(hw, (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3_A) + - sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1), + fl_align - 1) & ~(fl_align - 1), SGE_FL_BUFFER_SIZE3_A); } + sge->csio_fl_align = fl_align; + csio_wr_reg32(hw, HPZ0_V(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ_A); /* default value of rx_dma_offset of the NIC driver */ -- 2.0.2