Thanks for the comments. >-----Original Message----- >From: Dave Jiang <dave.jiang@xxxxxxxxx> >Sent: 04 November 2024 18:31 >To: Shiju Jose <shiju.jose@xxxxxxxxxx>; linux-edac@xxxxxxxxxxxxxxx; linux- >cxl@xxxxxxxxxxxxxxx; linux-acpi@xxxxxxxxxxxxxxx; linux-mm@xxxxxxxxx; linux- >kernel@xxxxxxxxxxxxxxx >Cc: bp@xxxxxxxxx; tony.luck@xxxxxxxxx; rafael@xxxxxxxxxx; lenb@xxxxxxxxxx; >mchehab@xxxxxxxxxx; dan.j.williams@xxxxxxxxx; dave@xxxxxxxxxxxx; Jonathan >Cameron <jonathan.cameron@xxxxxxxxxx>; gregkh@xxxxxxxxxxxxxxxxxxx; >sudeep.holla@xxxxxxx; jassisinghbrar@xxxxxxxxx; alison.schofield@xxxxxxxxx; >vishal.l.verma@xxxxxxxxx; ira.weiny@xxxxxxxxx; david@xxxxxxxxxx; >Vilas.Sridharan@xxxxxxx; leo.duran@xxxxxxx; Yazen.Ghannam@xxxxxxx; >rientjes@xxxxxxxxxx; jiaqiyan@xxxxxxxxxx; Jon.Grimm@xxxxxxx; >dave.hansen@xxxxxxxxxxxxxxx; naoya.horiguchi@xxxxxxx; >james.morse@xxxxxxx; jthoughton@xxxxxxxxxx; somasundaram.a@xxxxxxx; >erdemaktas@xxxxxxxxxx; pgonda@xxxxxxxxxx; duenwen@xxxxxxxxxx; >gthelen@xxxxxxxxxx; wschwartz@xxxxxxxxxxxxxxxxxxx; >dferguson@xxxxxxxxxxxxxxxxxxx; wbs@xxxxxxxxxxxxxxxxxxxxxx; >nifan.cxl@xxxxxxxxx; tanxiaofei <tanxiaofei@xxxxxxxxxx>; Zengtao (B) ><prime.zeng@xxxxxxxxxxxxx>; Roberto Sassu <roberto.sassu@xxxxxxxxxx>; >kangkang.shen@xxxxxxxxxxxxx; wanghuiqiang <wanghuiqiang@xxxxxxxxxx>; >Linuxarm <linuxarm@xxxxxxxxxx> >Subject: Re: [PATCH v15 08/15] cxl/memfeature: Add CXL memory device ECS >control feature > > > >On 11/1/24 2:17 AM, shiju.jose@xxxxxxxxxx wrote: >> From: Shiju Jose <shiju.jose@xxxxxxxxxx> >> >> CXL spec 3.1 section 8.2.9.9.11.2 describes the DDR5 ECS (Error Check >> Scrub) control feature. >> The Error Check Scrub (ECS) is a feature defined in JEDEC DDR5 SDRAM >> Specification (JESD79-5) and allows the DRAM to internally read, >> correct single-bit errors, and write back corrected data bits to the >> DRAM array while providing transparency to error counts. >> >> The ECS control allows the requester to change the log entry type, the >> ECS threshold count (provided the request falls within the limits >> specified in >> DDR5 mode registers), switch between codeword mode and row count mode, >> and reset the ECS counter. >> >> Register with EDAC device driver, which retrieves the ECS attribute >> descriptors from the EDAC ECS and exposes the ECS control attributes >> to userspace via sysfs. For example, the ECS control for the memory >> media FRU0 in CXL mem0 device is located at >> /sys/bus/edac/devices/cxl_mem0/ecs_fru0/ >> >> Signed-off-by: Shiju Jose <shiju.jose@xxxxxxxxxx> >> --- >> drivers/cxl/core/memfeature.c | 342 >> +++++++++++++++++++++++++++++++++- >> 1 file changed, 339 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/cxl/core/memfeature.c >> b/drivers/cxl/core/memfeature.c index 41298acc01de..e641396a32f5 >> 100644 >> --- a/drivers/cxl/core/memfeature.c >> +++ b/drivers/cxl/core/memfeature.c >> @@ -17,7 +17,7 @@ >> #include <cxl.h> >> #include <cxlmem.h> >> [...] >> +static int cxl_mem_ecs_set_attrs(struct device *dev, >> + struct cxl_ecs_context *cxl_ecs_ctx, >> + int fru_id, struct cxl_ecs_params *params, >> + u8 param_type) >> +{ >> + struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd; >> + struct cxl_dev_state *cxlds = cxlmd->cxlds; >> + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); >> + struct cxl_ecs_fru_rd_attrs *fru_rd_attrs; >> + struct cxl_ecs_fru_wr_attrs *fru_wr_attrs; >> + size_t rd_data_size, wr_data_size; >> + u16 num_media_frus, count; >> + size_t data_size; >> + int ret; >> + >> + num_media_frus = cxl_ecs_ctx->num_media_frus; >> + rd_data_size = cxl_ecs_ctx->get_feat_size; >> + wr_data_size = cxl_ecs_ctx->set_feat_size; >> + struct cxl_ecs_rd_attrs *rd_attrs __free(kfree) = >> + kmalloc(rd_data_size, GFP_KERNEL); >> + if (!rd_attrs) >> + return -ENOMEM; >> + >> + data_size = cxl_get_feature(mds, cxl_ecs_uuid, >> + CXL_GET_FEAT_SEL_CURRENT_VALUE, >> + rd_attrs, rd_data_size); >> + if (!data_size) >> + return -EIO; >> + >> + struct cxl_ecs_wr_attrs *wr_attrs __free(kfree) = >> + kmalloc(wr_data_size, GFP_KERNEL); >> + if (!wr_attrs) >> + return -ENOMEM; >> + >> + /* >> + * Fill writable attributes from the current attributes read >> + * for all the media FRUs. >> + */ >> + fru_rd_attrs = rd_attrs->fru_attrs; >> + fru_wr_attrs = wr_attrs->fru_attrs; >> + wr_attrs->ecs_log_cap = rd_attrs->ecs_log_cap; >> + for (count = 0; count < num_media_frus; count++) >> + fru_wr_attrs[count].ecs_config = fru_rd_attrs[count].ecs_config; >> + >> + /* Fill attribute to be set for the media FRU */ >> + switch (param_type) { >> + case CXL_ECS_PARAM_LOG_ENTRY_TYPE: >> + if (params->log_entry_type != ECS_LOG_ENTRY_TYPE_DRAM >&& >> + params->log_entry_type != >ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU) { >> + dev_err(dev, >> + "Invalid CXL ECS scrub log entry type(%d) to >set\n", >> + params->log_entry_type); >> + dev_err(dev, >> + "Log Entry Type 0: per DRAM 1: per Memory >Media FRU\n"); >> + return -EINVAL; >> + } >> + wr_attrs->ecs_log_cap = >FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, >> + params->log_entry_type); >> + break; >> + case CXL_ECS_PARAM_THRESHOLD: >> + fru_wr_attrs[fru_id].ecs_config &= >~CXL_ECS_THRESHOLD_COUNT_MASK; >> + switch (params->threshold) { >> + case 256: > >Why not just use the enums instead? Fixed. >> + fru_wr_attrs[fru_id].ecs_config |= >FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, >> + >ECS_THRESHOLD_256); >> + break; >> + case 1024: >> + fru_wr_attrs[fru_id].ecs_config |= >FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, >> + >ECS_THRESHOLD_1024); >> + break; >> + case 4096: >> + fru_wr_attrs[fru_id].ecs_config |= >FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK, >> + >ECS_THRESHOLD_4096); >> + break; >> + default: >> + dev_err(dev, >> + "Invalid CXL ECS scrub threshold count(%d) to >set\n", >> + params->threshold); >> + dev_err(dev, >> + "Supported scrub threshold counts: %u, %u, >%u\n", >> + ecs_supp_threshold[ECS_THRESHOLD_256], >> + ecs_supp_threshold[ECS_THRESHOLD_1024], >> + ecs_supp_threshold[ECS_THRESHOLD_4096]); >> + return -EINVAL; >> + } >> + break; >> + case CXL_ECS_PARAM_MODE: >> + if (params->count_mode != ECS_MODE_COUNTS_ROWS && >> + params->count_mode != ECS_MODE_COUNTS_CODEWORDS) >{ >> + dev_err(dev, >> + "Invalid CXL ECS scrub mode(%d) to set\n", >> + params->count_mode); >> + dev_err(dev, >> + "Supported ECS Modes: 0: ECS counts rows with >errors," >> + " 1: ECS counts codewords with errors\n"); >> + return -EINVAL; >> + } >> + fru_wr_attrs[fru_id].ecs_config &= >~CXL_ECS_COUNT_MODE_MASK; >> + fru_wr_attrs[fru_id].ecs_config |= >FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, >> + params- >>count_mode); >> + break; >> + case CXL_ECS_PARAM_RESET_COUNTER: >> + if (params->reset_counter != 1) > >Compare with magic number? Fixed. > >> + return -EINVAL; [...] >> >> @@ -344,10 +643,10 @@ int cxl_mem_ras_features_init(struct cxl_memdev >*cxlmd, struct cxl_region *cxlr) >> rc = cxl_get_supported_feature_entry(mds, >&cxl_patrol_scrub_uuid, >> &feat_entry); >> if (rc < 0) >> - return rc; >> + goto feat_scrub_done; >> >> if (!(feat_entry.attr_flags & >CXL_FEAT_ENTRY_FLAG_CHANGABLE)) >> - return -EOPNOTSUPP; >> + goto feat_scrub_done; >> } >> >> cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), >> GFP_KERNEL); @@ -378,6 +677,43 @@ int cxl_mem_ras_features_init(struct >cxl_memdev *cxlmd, struct cxl_region *cxlr) >> ras_features[num_ras_features].ctx = cxl_ps_ctx; >> num_ras_features++; >> >> +feat_scrub_done: >> + if (!cxlr) { >> + rc = cxl_get_supported_feature_entry(mds, &cxl_ecs_uuid, >> + &feat_entry); >> + if (rc < 0) >> + goto feat_ecs_done; >> + >> + if (!(feat_entry.attr_flags & >CXL_FEAT_ENTRY_FLAG_CHANGABLE)) >> + goto feat_ecs_done; >> + num_media_frus = (feat_entry.get_feat_size - sizeof(struct >cxl_ecs_rd_attrs)) / >> + sizeof(struct cxl_ecs_fru_rd_attrs); >> + if (!num_media_frus) >> + goto feat_ecs_done; >> + >> + cxl_ecs_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), >> + GFP_KERNEL); >> + if (!cxl_ecs_ctx) >> + goto feat_ecs_done; >> + *cxl_ecs_ctx = (struct cxl_ecs_context) { >> + .get_feat_size = feat_entry.get_feat_size, >> + .set_feat_size = feat_entry.set_feat_size, >> + .get_version = feat_entry.get_feat_ver, >> + .set_version = feat_entry.set_feat_ver, >> + .set_effects = feat_entry.set_effects, >> + .num_media_frus = num_media_frus, >> + .cxlmd = cxlmd, >> + }; >> + >> + ras_features[num_ras_features].ft_type = RAS_FEAT_ECS; >> + ras_features[num_ras_features].ecs_ops = &cxl_ecs_ops; >> + ras_features[num_ras_features].ctx = cxl_ecs_ctx; >> + ras_features[num_ras_features].ecs_info.num_media_frus = >> + > num_media_frus; >> + num_ras_features++; >> + } > >The function is getting awfully large. Maybe a helper function? Sure . Will add helper function. > >DJ > >> + >> +feat_ecs_done: >> return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL, >> num_ras_features, ras_features); } > Thanks, Shiju