* Remove double-sscanf to scan for %llu and 0x%llx, as that is not going to work--the %llu will consume the "0" in "0x" of your input, and a hex value can never be consumed. And just entering a hex number without leading 0x will either be scanned as a string and not match, or the leading decimal portion is scanned which is not correct. Thus remove the first %llu scan and leave only the %llx scan, removing the leading 0x since %llx can scan either. * Fix logic the check of the result of amdgpu_reserve_page_direct()--it is 0 on success, and non-zero on error. * Add bad_page_cnt_threshold to debugfs for reporting purposes only--it usually matches the size of EEPROM but may be different depending on module option. Small other improvements. * Improve kernel-doc for the sysfs interface. Cc: Alexander Deucher <Alexander.Deucher@xxxxxxx> Cc: John Clements <john.clements@xxxxxxx> Cc: Hawking Zhang <Hawking.Zhang@xxxxxxx> Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 67 ++++++++++++------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0541196ae1ed..c4b94b444b90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -114,7 +114,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_ras_check_bad_page(adev, address)) { dev_warn(adev->dev, - "RAS WARN: 0x%llx has been marked as bad page!\n", + "RAS WARN: 0x%llx has already been marked as bad page!\n", address); return 0; } @@ -228,11 +228,9 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; if (op != -1) { - if (op == 3) { - if (sscanf(str, "%*s %llu", &address) != 1) - if (sscanf(str, "%*s 0x%llx", &address) != 1) - return -EINVAL; + if (sscanf(str, "%*s %llx", &address) != 1) + return -EINVAL; data->op = op; data->inject.address = address; @@ -255,11 +253,9 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->op = op; if (op == 2) { - if (sscanf(str, "%*s %*s %*s %u %llu %llu", - &sub_block, &address, &value) != 3) - if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", - &sub_block, &address, &value) != 3) - return -EINVAL; + if (sscanf(str, "%*s %*s %*s %x %llx %llx", + &sub_block, &address, &value) != 3) + return -EINVAL; data->head.sub_block_index = sub_block; data->inject.address = address; data->inject.value = value; @@ -278,7 +274,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, /** * DOC: AMDGPU RAS debugfs control interface * - * It accepts struct ras_debug_if who has two members. + * The control interface accepts struct ras_debug_if which has two members. * * First member: ras_debug_if::head or ras_debug_if::inject. * @@ -303,32 +299,33 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * * How to use the interface? * - * Programs + * Program * * Copy the struct ras_debug_if in your codes and initialize it. * Write the struct to the control node. * - * Shells + * Shell * * .. code-block:: bash * - * echo op block [error [sub_block address value]] > .../ras/ras_ctrl + * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl + * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl + * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * - * Parameters: + * Where N, is the card which you want to affect. * - * op: disable, enable, inject - * disable: only block is needed - * enable: block and error are needed - * inject: error, address, value are needed - * block: umc, sdma, gfx, ......... + * "disable" requires only the block. + * "enable" requires the block and error type. + * "inject" requires the block, error type, address, and value. + * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details - * error: ue, ce - * ue: multi_uncorrectable - * ce: single_correctable - * sub_block: - * sub block index, pass 0 if there is no sub block + * The error type is one of: ue, ce, where, + * ue is multi-uncorrectable + * ce is single-correctable + * The sub-block is a the sub-block index, pass 0 if there is no sub-block. + * The address and value are hexadecimal numbers, leading 0x is optional. * - * here are some examples for bash commands: + * For instance, * * .. code-block:: bash * @@ -336,17 +333,17 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * - * How to check the result? + * How to check the result of the operation? * - * For disable/enable, please check ras features at + * To check disable/enable, see "ras" features at, * /sys/class/drm/card[0/1/2...]/device/ras/features * - * For inject, please check corresponding err count at - * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * To check inject, see the corresponding error count at, + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count * * .. note:: * Operations are only allowed on blocks which are supported. - * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask * to see which blocks support RAS on a particular asic. * */ @@ -367,11 +364,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * if (ret) return -EINVAL; - if (data.op == 3) - { + if (data.op == 3) { ret = amdgpu_reserve_page_direct(adev, data.inject.address); - - if (ret) + if (!ret) return size; else return ret; @@ -1269,6 +1264,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &amdgpu_ras_debugfs_ctrl_ops); debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev, &amdgpu_ras_debugfs_eeprom_ops); + debugfs_create_u32("bad_page_cnt_threshold", S_IRUGO, dir, + &con->bad_page_cnt_threshold); /* * After one uncorrectable error happens, usually GPU recovery will -- 2.31.0.97.g1424303384 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx