Am 24.09.20 um 12:21 schrieb Guchun Chen:
Sysfs_update_group brings below calltrace problem with kernel 3.10
on RHEL7.9. The cause is the bug of sysfs_update_group on kernel 3.10,
which always fail on one named group, as next calling internal_create_group
will try to create one new sysfs dir unconditionally, so system
complains one duplicate creation.
NAK, we should not have workarounds for older kernels in the upstream
code base. You somehow need to handle this in the DKMS package.
Or is that also valid as a stand alone cleanup?
Regards,
Christian.
The patch is to merge the sysfs setup together by calling sysfs_create_group
once, though the nodes would vary on top of different configurations.
[ 6.531591] WARNING: CPU: 52 PID: 638 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x64/0x80
[ 6.531592] sysfs: cannot create duplicate filename '/devices/pci0000:20/0000:20:03.1/0000:21:00.0/0000:22:00.0/0000:23:00.0/ras'
[ 6.531593] Modules linked in: amdgpu(OE+) amd_iommu_v2 amd_sched(OE) amdttm(OE) amdkcl(OE) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ahci drm libahci igb libata crct10dif_pclmul crct10dif_common crc32c_intel ptp nvme atlantic pps_core dca drm_panel_orientation_quirks nvme_core i2c_algo_bit sdhci_acpi iosf_mbi sdhci mmc_core dm_mirror dm_region_hash dm_log dm_mod fuse
[ 6.531606] CPU: 52 PID: 638 Comm: systemd-udevd Tainted: G OE ------------ 3.10.0-1152.el7.x86_64 #1
[ 6.531609] Hardware name: Gigabyte Technology Co., Ltd. TRX40 AORUS MASTER/TRX40 AORUS MASTER, BIOS F5c 03/05/2020
[ 6.531610] Call Trace:
[ 6.531615] [<ffffffff9b18133a>] dump_stack+0x19/0x1b
[ 6.531618] [<ffffffff9aa9b228>] __warn+0xd8/0x100
[ 6.531619] [<ffffffff9aa9b2af>] warn_slowpath_fmt+0x5f/0x80
[ 6.531621] [<ffffffff9acd8e48>] ? kernfs_path+0x48/0x60
[ 6.531622] [<ffffffff9acdbb54>] sysfs_warn_dup+0x64/0x80
[ 6.531624] [<ffffffff9acdc6ba>] internal_create_group+0x23a/0x250
[ 6.531625] [<ffffffff9acdc706>] sysfs_update_group+0x16/0x20
[ 6.531660] [<ffffffffc067fb67>] amdgpu_ras_init+0x1e7/0x240 [amdgpu]
6.531691] [<ffffffffc063dc7c>] amdgpu_device_init+0xf9c/0x1cb0 [amdgpu]
[ 6.531694] [<ffffffff9abe5608>] ? kmalloc_order+0x18/0x40
[ 6.531698] [<ffffffff9ac24326>] ? kmalloc_order_trace+0x26/0xa0
[ 6.531726] [<ffffffffc0642b1a>] amdgpu_driver_load_kms+0x5a/0x330 [amdgpu]
[ 6.531753] [<ffffffffc063a832>] amdgpu_pci_probe+0x172/0x280 [amdgpu]
[ 6.531757] [<ffffffff9add653a>] local_pci_probe+0x4a/0xb0
[ 6.531760] [<ffffffff9add7c89>] pci_device_probe+0x109/0x160
[ 6.531762] [<ffffffff9aebb0e5>] driver_probe_device+0xc5/0x3e0
[ 6.531764] [<ffffffff9aebb4e3>] __driver_attach+0x93/0xa0
[ 6.531765] [<ffffffff9aebb450>] ? __device_attach+0x50/0x50
[ 6.531766] [<ffffffff9aeb8c85>] bus_for_each_dev+0x75/0xc0
[ 6.531767] [<ffffffff9aebaa5e>] driver_attach+0x1e/0x20
[ 6.531769] [<ffffffff9aeba500>] bus_add_driver+0x200/0x2d0
[ 6.531770] [<ffffffff9aebbb74>] driver_register+0x64/0xf0
[ 6.531771] [<ffffffff9add74c5>] __pci_register_driver+0xa5/0xc0
[ 6.531774] [<ffffffffc0bd5000>] ? 0xffffffffc0bd4fff
[ 6.531806] [<ffffffffc0bd50a4>] amdgpu_init+0xa4/0xb0 [amdgpu]
[ 6.531809] [<ffffffff9aa0210a>] do_one_initcall+0xba/0x240
[ 6.531812] [<ffffffff9ab1e45a>] load_module+0x271a/0x2bb0
[ 6.531815] [<ffffffff9adb41c0>] ? ddebug_proc_write+0x100/0x100
[ 6.531817] [<ffffffff9ab1e9df>] SyS_init_module+0xef/0x140
[ 6.531821] [<ffffffff9b193f92>] system_call_fastpath+0x25/0x2a
[ 6.531822] ---[ end trace e2d035c822a91de6 ]---
Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 87 +++++++++----------------
1 file changed, 31 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e5ea14774c0c..6c57521b21fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1027,58 +1027,6 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
}
-static void amdgpu_ras_sysfs_add_bad_page_node(struct amdgpu_device *adev)
-{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct attribute_group group;
- struct bin_attribute *bin_attrs[] = {
- &con->badpages_attr,
- NULL,
- };
-
- con->badpages_attr = (struct bin_attribute) {
- .attr = {
- .name = "gpu_vram_bad_pages",
- .mode = S_IRUGO,
- },
- .size = 0,
- .private = NULL,
- .read = amdgpu_ras_sysfs_badpages_read,
- };
-
- group.name = RAS_FS_NAME;
- group.bin_attrs = bin_attrs;
-
- sysfs_bin_attr_init(bin_attrs[0]);
-
- sysfs_update_group(&adev->dev->kobj, &group);
-}
-
-static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
-{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct attribute *attrs[] = {
- &con->features_attr.attr,
- NULL
- };
- struct attribute_group group = {
- .name = RAS_FS_NAME,
- .attrs = attrs,
- };
-
- con->features_attr = (struct device_attribute) {
- .attr = {
- .name = "features",
- .mode = S_IRUGO,
- },
- .show = amdgpu_ras_sysfs_features_read,
- };
-
- sysfs_attr_init(attrs[0]);
-
- return sysfs_create_group(&adev->dev->kobj, &group);
-}
-
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1300,13 +1248,40 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
/* debugfs end */
/* ras fs */
-
+static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static DEVICE_ATTR(features, S_IRUGO,
+ amdgpu_ras_sysfs_features_read, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
- amdgpu_ras_sysfs_create_feature_node(adev);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct attribute_group group = {
+ .name = RAS_FS_NAME,
+ };
+ struct attribute *attrs[] = {
+ &con->features_attr.attr,
+ NULL
+ };
+ struct bin_attribute *bin_attrs[] = {
+ NULL,
+ NULL,
+ };
- if (amdgpu_bad_page_threshold != 0)
- amdgpu_ras_sysfs_add_bad_page_node(adev);
+ /* add features entry */
+ con->features_attr = dev_attr_features;
+ group.attrs = attrs;
+ sysfs_attr_init(attrs[0]);
+
+ if (amdgpu_bad_page_threshold != 0) {
+ /* add bad_page_features entry */
+ bin_attr_gpu_vram_bad_pages.private = NULL;
+ con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ bin_attrs[0] = &con->badpages_attr;
+ group.bin_attrs = bin_attrs;
+ sysfs_bin_attr_init(bin_attrs[0]);
+ }
+
+ sysfs_create_group(&adev->dev->kobj, &group);
return 0;
}
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx