[AMD Official Use Only - Internal Distribution Only]
no. below function checks if block is valid or not.
I think you need check your code_checker. or you were checking on a very old codebase?
/* check if ras is supported on block, say, sdma, gfx */
static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
unsigned int block)
From: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
Sent: Wednesday, May 6, 2020 5:17:34 PM
To: Zhou1, Tao <Tao.Zhou1@xxxxxxx>
Cc: Pan, Xinhui <Xinhui.Pan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
Subject: Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
Sent: Wednesday, May 6, 2020 5:17:34 PM
To: Zhou1, Tao <Tao.Zhou1@xxxxxxx>
Cc: Pan, Xinhui <Xinhui.Pan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
Subject: Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
On Wed, May 06, 2020 at 07:26:16AM +0000, Zhou1, Tao wrote:
> [AMD Public Use]
>
> Hi Dan:
>
> Please check the following piece of code in amdgpu_ras_debugfs_ctrl_parse_data:
>
> if (op != -1) {
> if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
> return -EINVAL;
>
> data->head.block = block_id;
>
> amdgpu_ras_find_block_id_by_name will return error directly if someone try to provide an invalid block_name intentionally via debugfs.
>
No. It's the line after that which are the problem.
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
147 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
148 const char __user *buf, size_t size,
149 loff_t *pos, struct ras_debug_if *data)
150 {
151 ssize_t s = min_t(u64, 64, size);
152 char str[65];
153 char block_name[33];
154 char err[9] = "ue";
155 int op = -1;
156 int block_id;
157 uint32_t sub_block;
158 u64 address, value;
159
160 if (*pos)
161 return -EINVAL;
162 *pos = size;
163
164 memset(str, 0, sizeof(str));
165 memset(data, 0, sizeof(*data));
166
167 if (copy_from_user(str, buf, s))
168 return -EINVAL;
169
170 if (sscanf(str, "disable %32s", block_name) == 1)
171 op = 0;
172 else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
173 op = 1;
174 else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
175 op = 2;
176 else if (str[0] && str[1] && str[2] && str[3])
177 /* ascii string, but commands are not matched. */
Say we don't write an ascii string.
178 return -EINVAL;
179
180 if (op != -1) {
181 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
182 return -EINVAL;
183
184 data->head.block = block_id;
185 /* only ue and ce errors are supported */
186 if (!memcmp("ue", err, 2))
187 data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
188 else if (!memcmp("ce", err, 2))
189 data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
190 else
191 return -EINVAL;
192
193 data->op = op;
194
195 if (op == 2) {
196 if (sscanf(str, "%*s %*s %*s %u %llu %llu",
197 &sub_block, &address, &value) != 3)
198 if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
199 &sub_block, &address, &value) != 3)
200 return -EINVAL;
201 data->head.sub_block_index = sub_block;
202 data->inject.address = address;
203 data->inject.value = value;
204 }
205 } else {
206 if (size < sizeof(*data))
207 return -EINVAL;
208
209 if (copy_from_user(data, buf, sizeof(*data)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This lets us set the data->head.block to whatever we want. Premusably
there is a trusted app which knows how to write the correct values.
But if it has a bug that will cause a crash and we'll have to find a
way to disable it in the kernel for kernel lock down mode etc so either
way we'll need to do a bit of work.
210 return -EINVAL;
211 }
212
213 return 0;
214 }
regards,
dan carpenter
> [AMD Public Use]
>
> Hi Dan:
>
> Please check the following piece of code in amdgpu_ras_debugfs_ctrl_parse_data:
>
> if (op != -1) {
> if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
> return -EINVAL;
>
> data->head.block = block_id;
>
> amdgpu_ras_find_block_id_by_name will return error directly if someone try to provide an invalid block_name intentionally via debugfs.
>
No. It's the line after that which are the problem.
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
147 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
148 const char __user *buf, size_t size,
149 loff_t *pos, struct ras_debug_if *data)
150 {
151 ssize_t s = min_t(u64, 64, size);
152 char str[65];
153 char block_name[33];
154 char err[9] = "ue";
155 int op = -1;
156 int block_id;
157 uint32_t sub_block;
158 u64 address, value;
159
160 if (*pos)
161 return -EINVAL;
162 *pos = size;
163
164 memset(str, 0, sizeof(str));
165 memset(data, 0, sizeof(*data));
166
167 if (copy_from_user(str, buf, s))
168 return -EINVAL;
169
170 if (sscanf(str, "disable %32s", block_name) == 1)
171 op = 0;
172 else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
173 op = 1;
174 else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
175 op = 2;
176 else if (str[0] && str[1] && str[2] && str[3])
177 /* ascii string, but commands are not matched. */
Say we don't write an ascii string.
178 return -EINVAL;
179
180 if (op != -1) {
181 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
182 return -EINVAL;
183
184 data->head.block = block_id;
185 /* only ue and ce errors are supported */
186 if (!memcmp("ue", err, 2))
187 data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
188 else if (!memcmp("ce", err, 2))
189 data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
190 else
191 return -EINVAL;
192
193 data->op = op;
194
195 if (op == 2) {
196 if (sscanf(str, "%*s %*s %*s %u %llu %llu",
197 &sub_block, &address, &value) != 3)
198 if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
199 &sub_block, &address, &value) != 3)
200 return -EINVAL;
201 data->head.sub_block_index = sub_block;
202 data->inject.address = address;
203 data->inject.value = value;
204 }
205 } else {
206 if (size < sizeof(*data))
207 return -EINVAL;
208
209 if (copy_from_user(data, buf, sizeof(*data)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This lets us set the data->head.block to whatever we want. Premusably
there is a trusted app which knows how to write the correct values.
But if it has a bug that will cause a crash and we'll have to find a
way to disable it in the kernel for kernel lock down mode etc so either
way we'll need to do a bit of work.
210 return -EINVAL;
211 }
212
213 return 0;
214 }
regards,
dan carpenter
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx