On Fri, Aug 16, 2024 at 10:18:43AM +0200, Christoph Hellwig wrote: > Fix the newly added discard support to only offer a FITRIM range that > spans the RT device in addition to the main device if the RT device > actually supports discard. Without this we'll incorrectly accept > a larger range than actually supported and confuse user space if the > RT device does not support discard. This can easily happen when the > main device is a SSD but the RT device is a hard driver. > > Move the code around a bit to keep the max_blocks and granularity > assignments together and explicitly reject that case where only the > RT device supports discard, as that does not fit the way the FITRIM ABI > works very well and is a very fringe use case. Is there more to this than generic/260 failing? And if not, does the following patch things up for you? --D From: Darrick J. Wong <djwong@xxxxxxxxxx> Subject: [PATCH] generic/260: fix for multi-device xfs with mixed discard support Fix this test so that it can handle XFS filesystems with a realtime volume when the data and rt devices do not both support discard. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- common/rc | 12 ++++++++++- common/xfs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/common/rc b/common/rc index b718030a59..426f2de43b 100644 --- a/common/rc +++ b/common/rc @@ -4207,6 +4207,16 @@ _require_batched_discard() fi } +_bdev_queue_property() +{ + local dev="$1" + local property="$2" + local default="$3" + + local fname="/sys/block/$(_short_dev "$dev")/queue/$property" + cat "$fname" 2>/dev/null || echo "$default" +} + # Given a mountpoint and the device associated with that mountpoint, return the # maximum start offset that the FITRIM command will accept, in units of 1024 # byte blocks. @@ -4214,7 +4224,7 @@ _discard_max_offset_kb() { case "$FSTYP" in xfs) - _xfs_discard_max_offset_kb "$1" + _xfs_discard_max_offset_kb "$@" ;; *) $DF_PROG -k | awk -v dev="$2" -v mnt="$1" '$1 == dev && $7 == mnt { print $3 }' diff --git a/common/xfs b/common/xfs index 9501adac4c..5ad60a3ddc 100644 --- a/common/xfs +++ b/common/xfs @@ -1882,7 +1882,13 @@ _require_xfs_scratch_atomicswap() # of 1024 byte blocks. _xfs_discard_max_offset_kb() { + local mount="$1" + local dev="$2" local statfs + local datadev_discard= + local rtdev_discard= + local dev_discard_max= + local rtdev_discard_max= # Use awk to read the statfs output for the XFS filesystem, compute # the two possible FITRIM offset maximums, and then use some horrid @@ -1895,31 +1901,66 @@ _xfs_discard_max_offset_kb() # 2: Realtime volume size in fsblocks. # 3: Max FITRIM offset if we can only trim the data volume # 4: Max FITRIM offset if we can trim the data and rt volumes - readarray -t statfs < <($XFS_IO_PROG -c 'statfs' "$1" | \ - awk '{g[$1] = $3} END {printf("%d\n%d\n%d\n%d\n%d\n", + # 5: Max FITRIM offset if we can only trim the rt volume + readarray -t statfs < <($XFS_IO_PROG -c 'statfs' "$mount" | \ + awk '{g[$1] = $3} END {printf("%d\n%d\n%d\n%d\n%d\n%d\n", g["geom.bsize"], g["geom.datablocks"], g["geom.rtblocks"], g["geom.bsize"] * g["geom.datablocks"] / 1024, - g["geom.bsize"] * (g["geom.datablocks"] + g["geom.rtblocks"]) / 1024);}') + g["geom.bsize"] * (g["geom.datablocks"] + g["geom.rtblocks"]) / 1024, + g["geom.bsize"] * g["geom.rtblocks"] / 1024);}') # If the kernel supports discarding the realtime volume, then it will # not reject a FITRIM for fsblock dblks+1, even if the len/minlen # arguments are absurd. if [ "${statfs[2]}" -gt 0 ]; then - if $FSTRIM_PROG -o "$((statfs[0] * statfs[1]))" \ + case "$dev" in + "$SCRATCH_DEV") + rtdev_discard_max="$(_bdev_queue_property "$SCRATCH_RTDEV" discard_max_bytes 0)" + ;; + "$TEST_DEV") + rtdev_discard_max="$(_bdev_queue_property "$TEST_RTDEV" discard_max_bytes 0)" + ;; + *) + echo "Unrecognized device $dev" >&2 + rtdev_discard_max=0 + ;; + esac + + if [ "$rtdev_discard_max" -gt 0 ] && + $FSTRIM_PROG -o "$((statfs[0] * statfs[1]))" \ -l "${statfs[0]}" \ - -m "$((statfs[0] * 2))" "$1" &>/dev/null; then - # The kernel supports discarding the rt volume, so - # print out the second answer from above. - echo "${statfs[4]}" - return + -m "$((statfs[0] * 2))" "$mount" &>/dev/null; then + # The kernel supports discarding the rt volume + rtdev_discard=2 fi fi - # The kernel does not support discarding the rt volume or there is no - # rt volume. Print out the first answer from above. - echo "${statfs[3]}" + # The kernel supports discarding the rt volume + dev_discard_max="$(_bdev_queue_property "$dev" discard_max_bytes 0)" + if [ "$dev_discard_max" -gt 0 ]; then + datadev_discard=1 + fi + + case "$datadev_discard$rtdev_discard" in + "12") + # Both devices support it + echo "${statfs[4]}" + ;; + "1") + # Only the data device supports it + echo "${statfs[3]}" + ;; + "2") + # Only the rt device supports it + echo "${statfs[5]}" + ;; + *) + # No support at all + echo 0 + ;; + esac } # check if mkfs and the kernel support nocrc (v4) file systems