From: Stephen Horvath <s.horvath@xxxxxxxxxxxxxx> JESD400 specifies that an error log can be written to anywhere in the end user programmable eeprom section, following a specific format. This adds some code to find and read this error log. This is also completely untested on actual hardware implementations, only tested by reading some manually constructed files. Signed-off-by: Stephen Horvath <s.horvath@xxxxxxxxxxxxxx> --- eeprom/decode-dimms | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/eeprom/decode-dimms b/eeprom/decode-dimms index a3b613bc869bbd1d8183958c42d05c3b3e3653ca..a6a16696b126b16b7a8e62b15120f99789d9b762 100755 --- a/eeprom/decode-dimms +++ b/eeprom/decode-dimms @@ -2656,6 +2656,108 @@ sub decode_ddr5_mfg_data($) } } +# Parameter: EEPROM bytes 0-1023 (using 640-1023) +sub decode_ddr5_error_data($) +{ + my $bytes = shift; + + # Zero or more error logs may appear anywhere in any End User Programmable blocks of the SPD, + # including over SPD Block boundaries. They may be found by searching for a four byte anchor string. + + my $errors = []; + + my $size = scalar @{$bytes} < 1023 ? scalar @{$bytes} : 1023; + + for (my $ii = 0; $ii < $size - 640 - 23; $ii++) { + if (join('', @{$bytes}[640 + $ii .. 640 + $ii + 3]) eq "95707695") { + push @{$errors}, [@{$bytes}[640 + $ii .. 640 + $ii + 23]]; + $ii += 23; + } + } + + if (@{$errors} == 0) { + # No error logs found + return; + } + + prints("Error Log"); + + printl("Error Log Count", scalar @{$errors}); + + for (my $ii = 0; $ii < scalar @{$errors}; $ii++) { + my $error = @{$errors}[$ii]; + + # error location + printl_cond($error->[4] & (1 << 0), "Error $ii Type", "DRAM Uncorrectable Error"); + printl_cond($error->[4] & (1 << 1), "Error $ii Type", "DRAM Correctable Error"); + printl_cond($error->[4] & (1 << 2), "Error $ii Type", "DRAM ECS Error"); + printl_cond($error->[4] & (1 << 3), "Error $ii Type", "hPPR Was Required"); + printl_cond($error->[4] & (1 << 4), "Error $ii Type", "hPPR Resource Error"); + + printl("Error $ii Location CPU", ($error->[5] >> 3) & 0x07); + printl("Error $ii Location CPUMC", (($error->[5] & 3) << 2) | ($error->[6] >> 6)); + printl("Error $ii Location DIMM", ($error->[6] >> 4) & 0x01); + + # these are active low + printl_cond(~$error->[6] & (1 << 3), "Error $ii Location Rank", "0 (sub-channel A)"); + printl_cond(~$error->[6] & (1 << 2), "Error $ii Location Rank", "1 (sub-channel A)"); + printl_cond(~$error->[6] & (1 << 1), "Error $ii Location Rank", "0 (sub-channel B)"); + printl_cond(~$error->[6] & (1 << 0), "Error $ii Location Rank", "1 (sub-channel B)"); + + printl("Error $ii Location Parity", ($error->[7] >> 6) & 0x01); + if (($error->[7] >> 5) & 1) { + # chip identifier? + printl("Error $ii Location Chip", ($error->[7] >> 2) & 0x07); + } else { + # row address? + printl("Error $ii Location Bank Group", (($error->[7] & 0x03) << 1) | (($error->[8] & 0x80) >> 7)); + printl("Error $ii Location Bank Address", ($error->[8] >> 5) & 0x03); + printl("Error $ii Location Row Address", (($error->[8] & 0x1f) << 12) | ($error->[9] << 4) | ($error->[10] >> 4)); + printl("Error $ii Location Column Address", (($error->[10] & 0x0f) << 7) | (($error->[11] & 0xf0) >> 1)); + } + + # also active low + printl_cond(~$error->[11] & (1 << 0), "Error $ii Location Device", "DQS6A"); + printl_cond(~$error->[11] & (1 << 1), "Error $ii Location Device", "DQS7A"); + printl_cond(~$error->[11] & (1 << 2), "Error $ii Location Device", "DQS8A"); + printl_cond(~$error->[11] & (1 << 3), "Error $ii Location Device", "DQS9A"); + + printl_cond(~$error->[12] & (1 << 0), "Error $ii Location Device", "DQS8B"); + printl_cond(~$error->[12] & (1 << 1), "Error $ii Location Device", "DQS9B"); + printl_cond(~$error->[12] & (1 << 2), "Error $ii Location Device", "DQS0A"); + printl_cond(~$error->[12] & (1 << 3), "Error $ii Location Device", "DQS1A"); + printl_cond(~$error->[12] & (1 << 4), "Error $ii Location Device", "DQS2A"); + printl_cond(~$error->[12] & (1 << 5), "Error $ii Location Device", "DQS3A"); + printl_cond(~$error->[12] & (1 << 6), "Error $ii Location Device", "DQS4A"); + printl_cond(~$error->[12] & (1 << 7), "Error $ii Location Device", "DQS5A"); + + printl_cond(~$error->[12] & (1 << 0), "Error $ii Location Device", "DQS0B"); + printl_cond(~$error->[12] & (1 << 1), "Error $ii Location Device", "DQS1B"); + printl_cond(~$error->[12] & (1 << 2), "Error $ii Location Device", "DQS2B"); + printl_cond(~$error->[12] & (1 << 3), "Error $ii Location Device", "DQS3B"); + printl_cond(~$error->[12] & (1 << 4), "Error $ii Location Device", "DQS4B"); + printl_cond(~$error->[12] & (1 << 5), "Error $ii Location Device", "DQS5B"); + printl_cond(~$error->[12] & (1 << 6), "Error $ii Location Device", "DQS6B"); + printl_cond(~$error->[12] & (1 << 7), "Error $ii Location Device", "DQS7B"); + + # timestamp + my $year = ($error->[14] >> 2) + 2020; + my $month = (($error->[14] & 0x03) << 2) | ($error->[15] >> 6); + my $day = ($error->[15] & 0x3e) >> 1; + my $hour = (($error->[15] & 0x01) << 4) | ($error->[16] >> 4); + my $minute = (($error->[16] & 0x0f) << 2) | ($error->[17] >> 6); + my $second = $error->[17] & 0x3f; + printl("Error $ii Timestamp", sprintf("%04d-%02d-%02d %02d:%02d:%02d", + $year, $month, $day, $hour, $minute, $second)); + + # DRAM refresh settings + # TODO + + # measured temperature + # TODO + } +} + # Parameter: EEPROM bytes 0-127 (using 64-98) sub decode_manufacturing_information($) { @@ -3215,6 +3317,9 @@ for $current (0 .. $#dimm) { # Decode DDR5-specific manufacturing data in bytes # 512-639 decode_ddr5_mfg_data(\@bytes); + # Decode DDR5-specific error log + # 640-1023 (max) + decode_ddr5_error_data(\@bytes); } } else { # Decode next 35 bytes (64-98, common to most -- 2.45.2