> -----Original Message----- > From: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > Sent: 2019年8月22日 4:02 > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Pan, Xinhui > <Xinhui.Pan@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; > Tuikov, Luben <Luben.Tuikov@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; > Quan, Evan <Evan.Quan@xxxxxxx>; Panariti, David > <David.Panariti@xxxxxxx>; Russell, Kent <Kent.Russell@xxxxxxx>; Zhou1, > Tao <Tao.Zhou1@xxxxxxx>; Grodzovsky, Andrey > <Andrey.Grodzovsky@xxxxxxx> > Subject: [PATCH v4 1/4] drm/amdgpu: Add RAS EEPROM table. > > Add RAS EEPROM table manager to eanble RAS errors to be stored upon > appearance and retrived on driver load. > > v2: Fix some prints. > > v3: > Fix checksum calculation. > Make table record and header structs packed to do correct byte value sum. > Fix record crossing EEPROM page boundry. > > v4: > Fix byte sum val calculation for record - look at sizeof(record). > Fix some style comments. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 482 > +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 90 +++++ > 4 files changed, 576 insertions(+), 1 deletion(-) create mode 100644 > drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > create mode 100644 > drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile > b/drivers/gpu/drm/amd/amdgpu/Makefile > index 28d76bd..f016cf1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/Makefile > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile > @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ > amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o > amdgpu_atomfirmware.o \ > amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o > amdgpu_ids.o \ > amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o > amdgpu_vm_cpu.o \ > - amdgpu_vm_sdma.o amdgpu_discovery.o > + amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o > amdgpu_ras_eeprom.o > > amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 2765f2d..8d5bcd8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -29,6 +29,7 @@ > #include "amdgpu.h" > #include "amdgpu_psp.h" > #include "ta_ras_if.h" > +#include "amdgpu_ras_eeprom.h" > > enum amdgpu_ras_block { > AMDGPU_RAS_BLOCK__UMC = 0, > @@ -333,6 +334,8 @@ struct amdgpu_ras { > struct mutex recovery_lock; > > uint32_t flags; > + > + struct amdgpu_ras_eeprom_control eeprom_control; > }; > > struct ras_fs_data { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > new file mode 100644 > index 0000000..bf07515 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > @@ -0,0 +1,482 @@ > +/* > + * Copyright 2019 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person > +obtaining a > + * copy of this software and associated documentation files (the > +"Software"), > + * to deal in the Software without restriction, including without > +limitation > + * the rights to use, copy, modify, merge, publish, distribute, > +sublicense, > + * and/or sell copies of the Software, and to permit persons to whom > +the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be > +included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > +EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > +MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT > +SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > +DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > +OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE > +OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#include "amdgpu_ras_eeprom.h" > +#include "amdgpu.h" > +#include "amdgpu_ras.h" > +#include <linux/bits.h> > + > +#define EEPROM_I2C_TARGET_ADDR 0xA0 > + > +#define EEPROM_TABLE_HEADER_SIZE 20 > +#define EEPROM_TABLE_RECORD_SIZE 24 [Tao] should we replace fixed value with sizeof for the two macros? > +#define EEPROM_ADDRESS_SIZE 0x2 > + > +/* Table hdr is 'AMDR' */ > +#define EEPROM_TABLE_HDR_VAL 0x414d4452 #define > EEPROM_TABLE_VER > +0x00010000 > + > +/* Assume 2 Mbit size */ > +#define EEPROM_SIZE_BYTES 256000 > +#define EEPROM_PAGE__SIZE_BYTES 256 > +#define EEPROM_HDR_START 0 > +#define EEPROM_RECORD_START (EEPROM_HDR_START + > +EEPROM_TABLE_HEADER_SIZE) #define EEPROM_MAX_RECORD_NUM > +((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / > +EEPROM_TABLE_RECORD_SIZE) #define EEPROM_ADDR_MSB_MASK > GENMASK(17, 8) > + > +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, > +eeprom_control))->adev > + > +static void __encode_table_header_to_buff(struct > amdgpu_ras_eeprom_table_header *hdr, > + unsigned char *buff) > +{ > + uint32_t *pp = (uint32_t *) buff; > + > + pp[0] = cpu_to_le32(hdr->header); > + pp[1] = cpu_to_le32(hdr->version); > + pp[2] = cpu_to_le32(hdr->first_rec_offset); > + pp[3] = cpu_to_le32(hdr->tbl_size); > + pp[4] = cpu_to_le32(hdr->checksum); > +} > + > +static void __decode_table_header_from_buff(struct > amdgpu_ras_eeprom_table_header *hdr, > + unsigned char *buff) > +{ > + uint32_t *pp = (uint32_t *)buff; > + > + hdr->header = le32_to_cpu(pp[0]); > + hdr->version = le32_to_cpu(pp[1]); > + hdr->first_rec_offset = le32_to_cpu(pp[2]); > + hdr->tbl_size = le32_to_cpu(pp[3]); > + hdr->checksum = le32_to_cpu(pp[4]); > +} > + > +static int __update_table_header(struct amdgpu_ras_eeprom_control > *control, > + unsigned char *buff) > +{ > + int ret = 0; > + struct i2c_msg msg = { > + .addr = EEPROM_I2C_TARGET_ADDR, > + .flags = 0, > + .len = EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_HEADER_SIZE, > + .buf = buff, > + }; > + > + > + *(uint16_t *)buff = EEPROM_HDR_START; > + __encode_table_header_to_buff(&control->tbl_hdr, buff + > +EEPROM_ADDRESS_SIZE); > + > + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); > + if (ret < 1) > + DRM_ERROR("Failed to write EEPROM table header, ret:%d", > ret); > + > + return ret; > +} > + > +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control > +*control); > + > +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) > { > + int ret = 0; > + struct amdgpu_device *adev = to_amdgpu_device(control); > + unsigned char buff[EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_HEADER_SIZE] = { 0 }; > + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; > + struct i2c_msg msg = { > + .addr = EEPROM_I2C_TARGET_ADDR, > + .flags = I2C_M_RD, > + .len = EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_HEADER_SIZE, > + .buf = buff, > + }; > + > + mutex_init(&control->tbl_mutex); > + > + switch (adev->asic_type) { > + case CHIP_VEGA20: > + /*TODO Add MI-60 */ > + break; > + > + default: > + return 0; > + } > + > + if (ret) { > + DRM_ERROR("Failed to init I2C controller, ret:%d", ret); > + return ret; > + } > + > + /* Read/Create table header from EEPROM address 0 */ > + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); > + if (ret < 1) { > + DRM_ERROR("Failed to read EEPROM table header, ret:%d", > ret); > + return ret; > + } > + > + __decode_table_header_from_buff(hdr, &buff[2]); > + > + if (hdr->header == EEPROM_TABLE_HDR_VAL) { > + control->num_recs = (hdr->tbl_size - > EEPROM_TABLE_HEADER_SIZE) / > + EEPROM_TABLE_RECORD_SIZE; > + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d > records", > + control->num_recs); > + > + } else { > + DRM_INFO("Creating new EEPROM table"); > + > + hdr->header = EEPROM_TABLE_HDR_VAL; > + hdr->version = EEPROM_TABLE_VER; > + hdr->first_rec_offset = EEPROM_RECORD_START; > + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; > + > + adev->psp.ras.ras->eeprom_control.tbl_byte_sum = > + __calc_hdr_byte_sum(&adev->psp.ras.ras- > >eeprom_control); > + ret = __update_table_header(control, buff); > + } > + > + /* Start inserting records from here */ > + adev->psp.ras.ras->eeprom_control.next_addr = > EEPROM_RECORD_START; > + > + return ret == 1 ? 0 : -EIO; > +} > + > +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control > *control) > +{ > + struct amdgpu_device *adev = to_amdgpu_device(control); > + > + switch (adev->asic_type) { > + case CHIP_VEGA20: > + /*TODO Add MI-60 */ > + break; > + > + default: > + return; > + } > +} > + > +static void __encode_table_record_to_buff(struct > amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record *record, > + unsigned char *buff) > +{ > + __le64 tmp = 0; > + int i = 0; > + > + /* Next are all record fields according to EEPROM page spec in LE > foramt */ > + buff[i++] = record->err_type; > + > + buff[i++] = record->bank; > + > + tmp = cpu_to_le64(record->ts); > + memcpy(buff + i, &tmp, 8); > + i += 8; [Tao] I think sizeof(record->ts) is better > + > + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); [Tao] (0x1ULL << 49 - 1) is more readable than & 0xffffffffffff, or a macro can be defined, but either way is OK. > + memcpy(buff + i, &tmp, 6); > + i += 6; > + > + buff[i++] = record->mem_channel; > + buff[i++] = record->mcumc_id; > + > + tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); > + memcpy(buff + i, &tmp, 6); > +} > + > +static void __decode_table_record_from_buff(struct > amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record > *record, > + unsigned char *buff) > +{ > + __le64 tmp = 0; > + int i = 0; > + > + /* Next are all record fields according to EEPROM page spec in LE > foramt */ > + record->err_type = buff[i++]; > + > + record->bank = buff[i++]; > + > + memcpy(&tmp, buff + i, 8); > + record->ts = le64_to_cpu(tmp); > + i += 8; > + > + memcpy(&tmp, buff + i, 6); > + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); > + i += 6; > + > + buff[i++] = record->mem_channel; > + buff[i++] = record->mcumc_id; > + > + memcpy(&tmp, buff + i, 6); > + record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); } > + > +/* > + * When reaching end of EEPROM memory jump back to 0 record address > + * When next record access will go beyond EEPROM page boundary modify > +bits A17/A8 > + * in I2C selector to go to next page > + */ > +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) { > + uint32_t next_address = curr_address + > EEPROM_TABLE_RECORD_SIZE; > + > + /* When all EEPROM memory used jump back to 0 address */ > + if (next_address > EEPROM_SIZE_BYTES) { > + DRM_INFO("Reached end of EEPROM memory, jumping to 0 > " > + "and overriding old record"); > + return EEPROM_RECORD_START; > + } > + > + /* > + * To check if we overflow page boundary compare next address > with > + * current and see if bits 17/8 of the EEPROM address will change > + * If they do start from the next 256b page > + * > + * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. > 5.1.2 > + */ > + if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & > EEPROM_ADDR_MSB_MASK)) { > + DRM_DEBUG_DRIVER("Reached end of EEPROM memory > page, jumpimng to next: %lx", > + (next_address & > EEPROM_ADDR_MSB_MASK)); > + > + return (next_address & EEPROM_ADDR_MSB_MASK); > + } > + > + return curr_address; > +} > + > + > +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control > +*control) { > + int i; > + uint32_t tbl_sum = 0; > + > + /* Header checksum, skip checksum field in the calculation */ > + for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control- > >tbl_hdr.checksum); i++) > + tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); > + > + return tbl_sum; > +} > + > +static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, > + int num) > +{ > + int i, j; > + uint32_t tbl_sum = 0; > + > + /* Records checksum */ > + for (i = 0; i < num; i++) { > + struct eeprom_table_record *record = &records[i]; > + > + for (j = 0; j < sizeof(*record); j++) { > + tbl_sum += *(((unsigned char *)record) + j); > + } > + } > + > + return tbl_sum; > +} > + > +static inline uint32_t __calc_tbl_byte_sum(struct > amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record *records, int > num) { > + return __calc_hdr_byte_sum(control) + > __calc_recs_byte_sum(records, > +num); } > + > +/* Checksum = 256 -((sum of all table entries) mod 256) */ static void > +__update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record *records, int > num, > + uint32_t old_hdr_byte_sum) > +{ > + /* > + * This will update the table sum with new records. > + * > + * TODO: What happens when the EEPROM table is to be wrapped > around > + * and old records from start will get overridden. > + */ > + > + /* need to recalculate updated header byte sum */ > + control->tbl_byte_sum -= old_hdr_byte_sum; > + control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, > num); > + > + control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); } [Tao] we can change 256 to EEPROM_PAGE__SIZE_BYTES > + > +/* table sum mod 256 + checksum must equals 256 */ static bool > +__validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record *records, int num) { > + control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); > + > + if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) > { > + DRM_WARN("Checksum mismatch, checksum: %u ", control- > >tbl_hdr.checksum); > + return false; > + } > + > + return true; > +} > + > +int amdgpu_ras_eeprom_process_recods(struct > amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record > *records, > + bool write, > + int num) > +{ > + int i, ret = 0; > + struct i2c_msg *msgs; > + unsigned char *buffs; > + struct amdgpu_device *adev = to_amdgpu_device(control); > + > + if (adev->asic_type != CHIP_VEGA20) > + return 0; > + > + buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_RECORD_SIZE, > + GFP_KERNEL); > + if (!buffs) > + return -ENOMEM; > + > + mutex_lock(&control->tbl_mutex); > + > + msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); > + if (!msgs) { > + ret = -ENOMEM; > + goto free_buff; > + } > + > + /* In case of overflow just start from beginning to not lose newest > records */ > + if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * > num > EEPROM_SIZE_BYTES)) > + control->next_addr = EEPROM_RECORD_START; > + > + > + /* > + * TODO Currently makes EEPROM writes for each record, this > creates > + * internal fragmentation. Optimized the code to do full page write of > + * 256b > + */ > + for (i = 0; i < num; i++) { > + unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_RECORD_SIZE)]; > + struct eeprom_table_record *record = &records[i]; > + struct i2c_msg *msg = &msgs[i]; > + > + control->next_addr = > +__correct_eeprom_dest_address(control->next_addr); > + > + /* > + * Update bits 16,17 of EEPROM address in I2C address by > setting them > + * to bits 1,2 of Device address byte > + */ > + msg->addr = EEPROM_I2C_TARGET_ADDR | > + ((control->next_addr & > EEPROM_ADDR_MSB_MASK) >> 15); > + msg->flags = write ? 0 : I2C_M_RD; > + msg->len = EEPROM_ADDRESS_SIZE + > EEPROM_TABLE_RECORD_SIZE; > + msg->buf = buff; > + > + /* Insert the EEPROM dest addess, bits 0-15 */ > + buff[0] = ((control->next_addr >> 8) & 0xff); > + buff[1] = (control->next_addr & 0xff); > + > + /* EEPROM table content is stored in LE format */ > + if (write) > + __encode_table_record_to_buff(control, record, buff > + > +EEPROM_ADDRESS_SIZE); > + > + /* > + * The destination EEPROM address might need to be > corrected to account > + * for page or entire memory wrapping > + */ > + control->next_addr += EEPROM_TABLE_RECORD_SIZE; > + } > + > + ret = i2c_transfer(&control->eeprom_accessor, msgs, num); > + if (ret < 1) { > + DRM_ERROR("Failed to process EEPROM table records, > ret:%d", ret); > + > + /* TODO Restore prev next EEPROM address ? */ > + goto free_msgs; > + } > + > + > + if (!write) { > + for (i = 0; i < num; i++) { > + unsigned char *buff = > &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; [Tao] space is needed before and after "*" > + struct eeprom_table_record *record = &records[i]; [Tao] add a space after "*" > + > + __decode_table_record_from_buff(control, record, > buff + EEPROM_ADDRESS_SIZE); > + } > + } > + > + if (write) { > + uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); > + > + /* > + * Update table header with size and CRC and account for > table > + * wrap around where the assumption is that we treat it as > empty > + * table > + * > + * TODO - Check the assumption is correct > + */ > + control->num_recs += num; > + control->num_recs %= EEPROM_MAX_RECORD_NUM; > + control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * > num; > + if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) > + control->tbl_hdr.tbl_size = > EEPROM_TABLE_HEADER_SIZE + > + control->num_recs * EEPROM_TABLE_RECORD_SIZE; > + > + __update_tbl_checksum(control, records, num, > old_hdr_byte_sum); > + > + __update_table_header(control, buffs); > + } else if (!__validate_tbl_checksum(control, records, num)) { > + DRM_WARN("EEPROM Table checksum mismatch!"); > + /* TODO Uncomment when EEPROM read/write is relliable > */ > + /* ret = -EIO; */ > + } > + > +free_msgs: > + kfree(msgs); > + > +free_buff: > + kfree(buffs); > + > + mutex_unlock(&control->tbl_mutex); > + > + return ret == num ? 0 : -EIO; > +} > + > +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control > *control) > +{ > + int i; > + struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), > +GFP_KERNEL); > + > + if (!recs) > + return; > + > + for (i = 0; i < 1 ; i++) { > + recs[i].address = 0xdeadbeef; > + recs[i].retired_page = i; > + } > + > + if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { > + > + memset(recs, 0, sizeof(*recs) * 1); > + > + control->next_addr = EEPROM_RECORD_START; > + > + if (!amdgpu_ras_eeprom_process_recods(control, recs, false, > 1)) { > + for (i = 0; i < 1; i++) > + DRM_INFO("rec.address :0x%llx, > rec.retired_page :%llu", > + recs[i].address, recs[i].retired_page); > + } else > + DRM_ERROR("Failed in reading from table"); > + > + } else > + DRM_ERROR("Failed in writing to table"); } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > new file mode 100644 > index 0000000..41f3fcb > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > @@ -0,0 +1,90 @@ > +/* > + * Copyright 2019 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person > +obtaining a > + * copy of this software and associated documentation files (the > +"Software"), > + * to deal in the Software without restriction, including without > +limitation > + * the rights to use, copy, modify, merge, publish, distribute, > +sublicense, > + * and/or sell copies of the Software, and to permit persons to whom > +the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be > +included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > +EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > +MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT > +SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > +DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > +OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE > +OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#ifndef _AMDGPU_RAS_EEPROM_H > +#define _AMDGPU_RAS_EEPROM_H > + > +#include <linux/i2c.h> > + > +struct amdgpu_device; > + > +enum amdgpu_ras_eeprom_err_type{ > + AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, > + AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, > + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE > +}; > + > +struct amdgpu_ras_eeprom_table_header { > + uint32_t header; > + uint32_t version; > + uint32_t first_rec_offset; > + uint32_t tbl_size; > + uint32_t checksum; > +}__attribute__((__packed__)); > + > +struct amdgpu_ras_eeprom_control { > + struct amdgpu_ras_eeprom_table_header tbl_hdr; > + struct i2c_adapter eeprom_accessor; > + uint32_t next_addr; > + unsigned int num_recs; > + struct mutex tbl_mutex; > + bool bus_locked; > + uint32_t tbl_byte_sum; > +}; > + > +/* > + * Represents single table record. Packed to be easily serialized into > +byte > + * stream. > + */ > +struct eeprom_table_record { > + > + union { > + uint64_t address; > + uint64_t offset; > + }; > + > + uint64_t retired_page; > + uint64_t ts; > + > + enum amdgpu_ras_eeprom_err_type err_type; > + > + union { > + unsigned char bank; > + unsigned char cu; > + }; > + > + unsigned char mem_channel; > + unsigned char mcumc_id; > +}__attribute__((__packed__)); > + > +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); > +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control > *control); > + > +int amdgpu_ras_eeprom_process_recods(struct > amdgpu_ras_eeprom_control *control, > + struct eeprom_table_record > *records, > + bool write, > + int num); > + > +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control > *control); > + > +#endif // _AMDGPU_RAS_EEPROM_H > -- > 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx