| #!/usr/bin/env python3 |
| |
| import sys |
| import ppc |
| import re |
| |
| # Mnemonic PHB_ESR - Address Offset 0x0C80 - phbErrorStatusRegister |
| phb_esr_bits = [ |
| (0, "ETU/RSB Request Address Error"), |
| (1, "Fundamental A Request Address Error"), |
| (2, "Fundamental A Request Size/Alignment Error"), |
| (3, "Fundamental A PCI CFG Addr/Size Error"), |
| (4, "Fundamental A IODA Table Access Error"), |
| (5, "Fundamental A Internal Registers Parity Error"), |
| (6, "PHB Error Registers Request Address Error"), |
| (7, "PHB Error Registers Request Size/Alignment Error"), |
| (8, "Fundamental B Request Address Error"), |
| (9, "Fundamental B Request Size/Alignment Error"), |
| (10, "Fundamental B Internal Registers Parity Error"), |
| (11, "Internal Bus Logic Bad PCIE Macro Request Address"), |
| (12, "Debug Request Address Error"), |
| (13, "Debug Request Size/Alignment Error"), |
| (14, "Debug Internal Registers Parity Error"), |
| (15, "Internal Bus Logic State Machine One-Hot Error"), |
| (16, "UV Page Request Address Error"), |
| (17, "UV Page Request Size/Alignment Error"), |
| (18, "UV Page Internal Registers Parity Error"), |
| (20, "RXE_ARB OR Error Status"), |
| (21, "RXE_MRG OR Error Status"), |
| (22, "RXE_TCE OR Error Status"), |
| (23, "TXE OR Error Status"), |
| (24, "pcie_etu_regb_err_inf"), |
| (25, "pcie_etu_regb_err_erc"), |
| (26, "pcie_etu_regb_err_fat"), |
| (27, "bus_regs_req_wr_data_p_e"), |
| (28, "SCOM HV Indirect Access Error"), |
| (29, "SCOM UV Indirect Access Error"), |
| (30, "SCOM Internal Registers Parity Error"), |
| (31, "SCOM Satellite Finite State Machine Error"), |
| ] |
| |
| # Mnemonic TXE_ESR - Address Offset 0x0D00 - txeFirstErrorStatus |
| txe_esr_bits = [ |
| (0, "AIB Command Invalid"), |
| (2, "AIB Address Decode Error"), |
| (3, "AIB Size Invalid"), |
| (4, "AIB Cmd Ctrls Parity Error"), |
| (5, "AIB Data Ctrls Parity Error"), |
| (8, "AIB Alignment Error"), |
| (9, "AIB Cmd Bus Parity Error"), |
| (10, "AIB Data Bus UE ECC Error"), |
| (11, "AIB Data Ctrls Sequence Error"), |
| (12, "AIB Data Bus CE ECC Error"), |
| (13, "TCE Rd Response DAT_ERR Indication"), |
| (14, "AIB Command Credits Error"), |
| (15, "AIB Data Credits Error"), |
| (16, "BLIF Controls Parity Error"), |
| (17, "CFG Write Error CA or UR response"), |
| (18, "BLIF Forward Progress Timeout"), |
| (19, "MMIO RD Pending Error"), |
| (20, "MMIO WR Pending Error"), |
| (21, "MMIO CFG Pending Error"), |
| (22, "MMIO Write DAT_ERR Indication"), |
| (23, "CI Store Data Fifo Error"), |
| (24, "CFG Enable Error, RRB"), |
| (25, "CFG Size Error"), |
| (26, "CFG Bus Address Error"), |
| (27, "CFG Link Down Error"), |
| (28, "PAPR TXE Injection Error Triggered"), |
| (29, "CFG Write Request Timeout"), |
| (30, "PAPR TXE Injection Error Triggered"), |
| (36, "CI Trigger Buffer ECC Correctable Error"), |
| (37, "CI Trigger Buffer ECC Uncorrectable Error"), |
| (38, "CI Trigger Buffer Stage Data Parity Error"), |
| (40, "MMIO BAR Table (MBT) Parity Error"), |
| (42, "MMIO Domain Table (MDT) ECC Correctable Error"), |
| (43, "MMIO Domain Table (MDT) ECC Uncorrectable Error"), |
| (44, "MMIO Domain Table (MDT) Stage Parity Error"), |
| (45, "MMIO Domain Table (MDT) Stage Valid Error"), |
| (46, "AIB Data Special Uncorrectable Error (SUE)"), |
| (47, "MMIO Domain Table (MDT)"), |
| (48, "P2P Store Data Fifo Error"), |
| (49, "EPAT Table Parity Error"), |
| (50, "MMIO Cmd Parity Error"), |
| (51, "BLIF1 Reg Parity Error"), |
| (52, "P2P1 Reg Parity Error"), |
| (53, "P2P WR Pending Error"), |
| (54, "CRW Onehot Error"), |
| (55, "CRW Pending Error"), |
| (56, "RRB Parity Error"), |
| (57, "RRB Size/Alignment Error"), |
| (58, "s_bad_addr_e_q"), |
| (59, "s_req_size_align_e_q"), |
| ] |
| |
| # Mnemonic RXE_ARB_ESR - Address Offset 0x0D80 - phbRxeArbErrorStatus |
| rxe_arb_bits = [ |
| (0, "BLIF Inbound CA Completion Error"), |
| (1, "BLIF Inbound UR Completion Error"), |
| (2, "MSI Size Error"), |
| (3, "MSI Address Alignment Error"), |
| (5, "BLIF Inbound Header ECC Correctable (CE)"), |
| (6, "BLIF Inbound Header ECC Uncorrectable (UE)"), |
| (7, "ARB Stage Valid Error"), |
| (8, "TCE Tag Release Unused"), |
| (9, "TCE Tag Used, Not Free"), |
| (10, "ARB MMIO Buffer Overflow"), |
| (11, "ARB MMIO Buffer Underflow"), |
| (12, "ARB MMIO Internal Parity Error"), |
| (13, "ARB DMA Buffer Overflow"), |
| (14, "ARB DMA Buffer Underflow"), |
| (15, "ARB DMA Internal Parity Error"), |
| (16, "BLIF Header Control Bits Parity Error"), |
| (17, "BLIF Data Control Bits Parity Error"), |
| (18, "BLIF Unsupported Request (UR) Error"), |
| (19, "BLIF Completion Timeout Error"), |
| (20, "SEID Table ECC Correctable (CE)"), |
| (21, "SEID Table ECC Uncorrectable (UE)"), |
| (22, "NBW Size Error"), |
| (23, "DEC IODA Table Fatal Error"), |
| (24, "TLP Poisoned Error"), |
| (25, "MIST ECC Correctable Error"), |
| (26, "IODA TVT Entry Invalid"), |
| (27, "MSI PE# Mismatch"), |
| (28, "IODA TVT Address"), |
| (29, "TVT ECC Correctable Error"), |
| (30, "TVT ECC Uncorrectable Error"), |
| (31, "MIST ECC Uncorrectable Error"), |
| (32, "PELT-V BAR Disabled Error"), |
| (33, "IODA Table Parity Error"), |
| (34, "PCT Timeout"), |
| (35, "PCT Unexpected Completion"), |
| (36, "PCT Parity Error"), |
| (37, "DEC Stage Valid Error"), |
| (38, "DEC Stage Parity Error"), |
| (39, "PAPR Inbound Injection Error Triggered"), |
| (40, "DMA/MSI: RTE PE Number"), |
| (41, "RTT BAR Disabled Error"), |
| (42, "RTC Internal Parity Error"), |
| (43, "RTC Queue Overflow"), |
| (44, "RTC Queue Underflow"), |
| (45, "RTC Stage Valid Error"), |
| (46, "RTC RCAM Bad State Error"), |
| (47, "RTC RCAM Multiple Hit Error"), |
| (48, "RRB Parity Error"), |
| (49, "RRB request Size / Alignment Error"), |
| (50, "s_bad_addr_e_q"), |
| (51, "s_req_size_align_e_q"), |
| (54, "Discontiguous DMA Write Fragmentation"), |
| (55, "LIST Table Parity Error"), |
| (56, "LKP PEST Data Queue Error"), |
| (57, "PCIE Fatal Error Message Received"), |
| (58, "PCIE Nonfatal Error Message Received"), |
| (59, "PCIE Correctable Error Message Received"), |
| ] |
| |
| #Mnemonic RXE_MRG_ESR - Address Offset 0x0E00, phbRxeMrgErrorStatus |
| rxe_mrg_bits = [ |
| (8, "MRG TMB Allocation Error"), |
| (9, "MRG TMB Response Invalid"), |
| (10, "MRG TMB Response Ready Error"), |
| (11, "MRG MMIO Queue Overflow Error"), |
| (12, "MRG MMIO Queue Underflow Error"), |
| (13, "MRG MMIO Internal Parity Error"), |
| (14, "MRG DMA Queue Overflow Error"), |
| (15, "MRG DMA Queue Underflow Error"), |
| (16, "MRG DMA Internal Parity Error"), |
| (17, "MRG Migration Register Table"), |
| (18, "MRG Migration Register Table"), |
| (20, "s_bad_addr_e_q"), |
| (21, "s_req_size_align_e_q"), |
| (22, "RRB Parity Error"), |
| (23, "RRB request Size / Alignment Error"), |
| (24, "DSP AIB TX Timeout Error"), |
| (25, "Reserved (vA4.1)"), |
| (26, "DSP AIB TX CMD Credit Parity Error"), |
| (28, "DSP AIB TX DAT Credit Parity Error"), |
| (30, "DSP Command Credit Overflow Error"), |
| (31, "DSP Command Credit Underflow Error"), |
| (32, "DSP Command Credit Parity Error"), |
| (33, "DSP Data Credit Overflow Error"), |
| (34, "DSP Data Credit Underflow Error"), |
| (35, "DSP Data Credit Parity Error"), |
| (36, "DSP Completion State Machine One-Hot Error"), |
| (37, "DSP Write Thread State Machine One-Hot Error"), |
| (38, "DSP DMA Secure Address Error (vA4.2)"), |
| (39, "DSP MSI Interrupt Notification Secure Address"), |
| (40, "DSP TREQ ECC Correctable Error"), |
| (41, "DSP TREQ ECC Uncorrectable Error"), |
| (42, "DSP MMIO Queue Overflow Error"), |
| (43, "DSP MMIO Queue Underflow Error"), |
| (44, "DSP MMIO Internal Parity Error"), |
| (45, "DSP DMA Queue Overflow Error"), |
| (46, "DSP DMA Queue Underflow Error"), |
| (47, "DSP DMA Internal Parity Error"), |
| (48, "DSP Read Thread State Machine One-Hot Error"), |
| (49, "DSP Table State Machine One-Hot Error"), |
| (50, "DSP NBW State Machine One-Hot Error"), |
| (51, "DSP TSM PEST BAR Disabled Error"), |
| (56, "IPD ECC Correctable Error"), |
| (57, "IPD ECC Uncorrectable Error"), |
| (58, "ICPLD ECC Correctable Error"), |
| (59, "ICPLD ECC Uncorrectable Error"), |
| (60, "NBWD ECC Correctable Error"), |
| (61, "NBWD ECC Uncorrectable Error"), |
| (63, "pb_etu_ai_rx_raise_fence"), |
| ] |
| |
| |
| # Mnemonic RXE_TCE_ESR - Address Offset 0x0E80 - phbRxeTceErrorStatus |
| rxe_tce_bits = [ |
| (0, "TCE CMP Internal Parity Error"), |
| (1, "TCE Request Page Access Error"), |
| (2, "TCE Response Page Access Error"), |
| (3, "TCE CMP Queue Overflow"), |
| (4, "TCE CMP Queue Underflow"), |
| (5, "TCE Secure Address Error"), |
| (6, "TCE Cache Bad State Error"), |
| (7, "TCE Cache Multi-Way Hit Error"), |
| (8, "TCE Request Timeout Error"), |
| (9, "TCE TCR ECC Correctable Error"), |
| (10, "TCE TCR ECC Uncorrectable Error"), |
| (11, "TCE TDR ECC Correctable Error"), |
| (12, "TCE TDR ECC Uncorrectable Error"), |
| (13, "TCE Unexpected Response Error"), |
| (14, "RRB Parity Error"), |
| (15, "RRB request Size / Alignment Error"), |
| (16, "TCE RES Internal Parity Error"), |
| (17, "s_bad_addr_e_q"), |
| (18, "s_req_size_align_e_q"), |
| (19, "TCE RES Queue Overflow"), |
| (20, "TCE RES Queue Underflow"), |
| (21, "TCE Response Data Parity Error"), |
| (22, "TCE TCLB CAM Bad State Error"), |
| (23, "TCE TCLB CAM Multi-Hit Error"), |
| (24, "TCE Kill Internal Parity Error"), |
| (25, "TCE THASH Array ECC Correctable Error"), |
| (26, "TCE THASH Array ECC Uncorrectable Error"), |
| (27, "TCE TCLB TDAT ECC Correctable Error"), |
| (28, "TCE TCLB TDAT ECC Uncorrectable Error"), |
| (29, "TCE Kill State Machine One-Hot Error"), |
| (30, "TCE Kill Queue Overflow"), |
| (31, "TCE Kill Queue Underflow"), |
| (32, "TCE Request Secure Address Register"), |
| (33, "TCE Response Secure Address Register"), |
| ] |
| |
| |
| #Mnemonic PBL_ESR - Address Offset 0x1900 - phbPblErrorStatus |
| pbl_esr_bits = [ |
| (0, "pb_err_p_fe_tlif_rx_par_e Parity error detected on TLIF Receive interface."), |
| (1, "pb_err_p_fe_tlif_tx_par_e Parity error detected on TLIF Transmit interface."), |
| (2, "pb_err_p_fe_blif_out_par_e"), |
| (3, "pb_err_p_fe_blif_in_par_e"), |
| (4, "pb_err_p_fe_int_par_e"), |
| (5, "pb_err_p_fe_toc_cred_e"), |
| (6, "pb_err_p_fe_ocf_par_e"), |
| (7, "pb_err_p_fe_ocf_prot_e"), |
| (12, "pb_err_p_fe_pct_erq_overflow_e"), |
| (13, "pb_err_p_fe_pct_erq_underflow_e"), |
| (14, "pb_err_p_fe_pct_onp_tags_rls_unused_e"), |
| (15, "pb_err_p_fe_pct_onp_tags_used_notfree_e"), |
| (16, "pb_err_p_fe_pct_onp_tags_used_unexp_e"), |
| (17, "pb_err_p_fe_bct_onp_tags_rls_unused_e"), |
| (18, "pb_err_p_fe_bct_onp_tags_used_notfree_e"), |
| (19, "pb_err_p_fe_ib_bct_rd_inv"), |
| (20, "pb_err_p_fe_ob_buffer_overflow_e"), |
| (21, "pb_err_p_fe_ob_buffer_underflow_e"), |
| (22, "pb_err_p_fe_ib_buffer_overflow_e"), |
| (23, "pb_err_p_fe_ib_buffer_underflow_e"), |
| (24, "pb_err_p_fe_ib_d_ecc_ue"), |
| (25, "pb_err_p_fe_ib_h_ecc_ue"), |
| (26, "pb_err_p_fe_ob_d_ecc_ue"), |
| (27, "pb_err_p_fe_ob_h_ecc_ue"), |
| (28, "pb_err_p_fe_ocf_ecc_ue"), |
| (32, "pb_err_p_fe_tx_pst_discard_e"), |
| (33, "pb_err_p_inf_tx_npst_discard_e"), |
| (34, "pb_err_p_fe_nbw_tlp_e"), |
| (36, "pb_err_p_fe_pci_rcv_cpl_ca_e"), |
| (37, "pb_err_p_fe_pci_rcv_cpl_crs_e"), |
| (38, "pb_err_p_fe_pci_rcv_cpl_rsvd_e"), |
| (39, "pb_err_p_fe_pci_rcv_cpl_ur_e"), |
| (40, "pb_err_p_fe_pci_rcv_ecrc_e"), |
| (41, "pb_err_p_fe_pci_rcv_malf_tlp_e"), |
| (42, "pb_err_p_fe_pci_rcv_overflow_e"), |
| (43, "pb_err_p_fe_pci_rcv_poisoned_tlp_e"), |
| (44, "pb_err_p_fe_pci_rcv_unexp_cpl_e"), |
| (45, "pb_err_p_fe_pci_rcv_unsup_req_e"), |
| (46, "pb_err_p_fe_pci_sig_cpl_abort_e"), |
| (47, "pb_err_p_fe_pci_sig_cpl_timeout_e"), |
| (48, "pb_err_p_fe_pci_sig_poisoned_tlp_e"), |
| (52, "pb_err_p_inf_out_trans_to_pst_e"), |
| (53, "pb_err_p_inf_out_trans_to_npst_e"), |
| (54, "pb_err_p_inf_out_trans_to_cpl_e"), |
| (56, "pb_err_p_inf_ib_d_ecc_ce"), |
| (57, "pb_err_p_inf_ib_h_ecc_ce"), |
| (58, "pb_err_p_inf_ob_d_ecc_ce"), |
| (59, "pb_err_p_inf_ob_h_ecc_ce"), |
| (60, "pb_err_p_inf_ocf_ecc_ce"), |
| (62, "PBL Bad Register Address Error"), |
| (63, "PBL Register Parity Error"), |
| ] |
| |
| # Mnemonic REGB_ESR - Address Offset 0x1C00 - phbRegbErrorStatus |
| regb_esr_bits = [ |
| (0, "REGB Internal Register Parity Error"), |
| (1, "PBL Internal Register Parity Error"), |
| (2, "Invalid Address Decode Error"), |
| (3, "Register Access Invalid Address+Size Error"), |
| (5, "Register State Machine or Other Internal Error"), |
| (6, "PCI CFG Core Registers Parity Error"), |
| (7, "Register access to CFG core while in reset error."), |
| (8, "PCIE Link Down"), |
| (9, "PCIE Link Up"), |
| (10, "PCIE Link Auto Bandwidth Event Status"), |
| (11, "PCIE Link BW Management Event Status"), |
| (25, "PBL Error Trap: INF Error"), |
| (26, "PBL Error Trap: ERC Error"), |
| (27, "PBL Error Trap: FAT Error"), |
| (28, "tldlpo_dl_mon_rxreceivererror(0)"), |
| (29, "tldlpo_dl_mon_rxreceivererror(1)"), |
| (30, "tldlpo_dl_mon_rxreceivererror(2)"), |
| (32, "DL_EC08_BADDLLP"), |
| (33, "DL_EC08_BADTLP"), |
| (34, "DL_EC08_DLLPE"), |
| (35, "DL_EC08_RECEIVERERROR"), |
| (36, "DL_EC08_ REPLAYROLLOVER"), |
| (37, "DL_EC08_REPLAYTIMEOUT"), |
| (39, "DL_INTERNALERROR"), |
| (40, "DL_LB_ERROR"), |
| (41, "DL_RX_MALFORMED"), |
| (42, "DL_RX_NULLIFY"), |
| (43, "DL_RX_OVERFLOW"), |
| (44, "DL_TX_CORRERROR"), |
| (45, "DL_TX_UNCORRERROR"), |
| (46, "TL_EC08_FCPE"), |
| (48, "Replay ECC Correctable Error (CE)"), |
| (49, "Replay ECC UnCorrectable Error (UE)"), |
| (50, "Bad DLLP Error Count Saturated"), |
| (51, "Bad TLP Error Count Saturated"), |
| (52, "Receiver Error Count Saturated"), |
| (53, "DLLPE Error Count Saturated"), |
| (58, "pbl_ptl_dl_al_rx_initcredit_p_e"), |
| (59, "pbl_ptl_dl_al_rx_updatecredit_p_e"), |
| (60, "PTL Core DLIF Protocol Error"), |
| (61, "PTL Core TLIF Protocol Error"), |
| (62, "PTL Core Internal Parity Error"), |
| ] |
| |
| # FIXME: use the long desc |
| nfir_bits = [ |
| (0, "bar_pe"), # One of the BARs or BAR Mask Register parity error. |
| (1, "nonbar_pe"), # Any non-BAR parity error. |
| (2, "PB_to_PEC_ce"), # ECC correctable error off of outbound SMP interconnect. |
| (3, "PB_to_PEC_ue"), # ECC uncorrectable error off of outbound SMP interconnect. |
| (4, "PB_to_PEC_sue"), # ECC special uncorrectable error off of outbound SMP interconnect |
| (5, "ary_ecc_ce"), # ECC correctable error on an internal array. |
| (6, "ary_ecc_ue"), # ECC uncorrectable error on an internal array. |
| (7, "ary_ecc_sue"), # ECC special uncorrectable error on an internal array. |
| (8, "register_array_pe"), # Parity error on an internal register file. |
| (9, "pb_interface_pe"), # Parity error on the PB interface (address/aTag/tTag/rTAG). |
| (10, "pb_data_hang_errors"), # Any SMP interconnect data hang poll error (only checked for CI stores). |
| (11, "pb_hang_errors"), # Any SMP interconnect command hang error (domestic address range). |
| (12, "rd_are_errors"), # SMP interconnect address error (ARE) detected by a DMA read. |
| (13, "nonrd_are_errors"), # SMP interconnect address error detected by a DMA write or an interrupt engine. |
| (14, "pci_hang_error"), # PBCQ detected that the PCI load, store, EOI, or DMA read response did not make forward progress. |
| (15, "pci_clock_error"), # PBCQ has detected that the PCI clock has stopped. |
| (16, "PFIR_freeze"), # This is the freeze signal from the PFIR freeze output. |
| (17, "hw_errors"), # Any miscellaneous hardware error. |
| (18, "UnsolicitiedPBData"), # The PEC received data with an rTAG matching a queue that was not expecting data or too much data was received. |
| (19, "UnExpectedCResp"), # PEC received an unexpected combined response. |
| (20, "InvalidCResp"), # PEC received an invalid combined response. |
| (21, "PBUnsupportedSize"), # PEC received a CI load/store that hits a BAR but is an unsupported size or address alignment. |
| ] |
| |
| pfir_bits = [ |
| (0, "register_pe"), # PBAIB register parity error. |
| (1, "hardware_error"), # Hardware error. |
| (2, "AIB_intf_error"), # AIB interface error. |
| (3, "ETU_Reset_error"), # ETU reset error. |
| (4, "PEC_scom_error"), # Common PEC SCOM error. |
| (5, "scomfir_error0"), # SCOM Error bit 0 |
| (6, "scomfir_error1"), # SCOM Error bit 1 |
| ] |
| |
| class PHBError: |
| reg_bits = { |
| "NEST FIR": nfir_bits, |
| "PCI FIR": pfir_bits, |
| "phbErrorStatus": phb_esr_bits, |
| "phbTxeErrorStatus": txe_esr_bits, |
| "phbRxeArbErrorStatus": rxe_arb_bits, |
| "phbRxeMrgErrorStatus": rxe_mrg_bits, |
| "phbRxeTceErrorStatus": rxe_tce_bits, |
| "phbRegbErrorStatus": regb_esr_bits, |
| "phbPblErrorStatus": pbl_esr_bits, |
| } |
| |
| def __str__(self): |
| s = "" |
| for k, v in self.regs.items(): |
| s += "{:30s} - {:#018x} - {}\n".format(k, v, ppc.setbits(v)) |
| return s |
| |
| def __init__(self, timestamp = 0): |
| self.timestamp = timestamp |
| self.pest = [] |
| self.regs = {} |
| |
| # NB: Value is a str, FIXME: Work out how to use python's type annotations |
| def set_reg(self, reg, value): |
| reg = reg.replace(" ", "") |
| if not self.regs.get(reg): |
| self.regs[reg] = value |
| return True |
| return False |
| |
| def get_reg(self, reg): |
| reg = reg.replace(" ", "") |
| v = self.regs.get(reg) |
| if v: |
| return v |
| return 0 |
| |
| # NB: pest entries should be inserted in sort order, but it might be a good |
| # idea to explicitly sort them by PE number |
| def set_pest(self, pe, pesta, pestb): |
| self.pest.append((pe, pesta, pestb)) |
| |
| def get_pest(self, pe_number): |
| for pe, a, b in self.pest: |
| if pe == pe_number: |
| return (a, b) |
| return None |
| |
| def header(self): |
| return self.timestamp |
| |
| # TODO: move the formatting out of here and into the main loop |
| def show_errs(self): |
| out = "" |
| for reg_name,reg_bits in self.reg_bits.items(): |
| reg_value = self.get_reg(reg_name) |
| parts = reg_name.split("Error"); |
| if len(parts) > 1: |
| first_name = "{:s}FirstError{:s}".format(parts[0], parts[1]) |
| first_value = self.get_reg(first_name) |
| |
| # skiboot spells it wrong, so check Frst too |
| if first_value == 0: |
| frst_name = "{:s}FrstError{:s}".format(parts[0], parts[1]) |
| first_value = self.get_reg(frst_name) |
| else: |
| first_value = 0 |
| |
| if reg_value == 0: |
| continue |
| out += "{} = {:016x}:\n".format(reg_name, reg_value); |
| |
| for bit in reg_bits: |
| if ppc.ppcbit(bit[0]) & reg_value: |
| bang = "!" if (ppc.ppcbit(bit[0]) & reg_value & first_value) == ppc.ppcbit(bit[0]) else "" |
| out += "{:s}\t{:2d} - {}\n".format(bang, bit[0], bit[1]) |
| out += "\n" |
| |
| if len(self.pest) == 0: |
| return out |
| |
| out += "PEST entries:\n" |
| for pe, pesta, pestb in self.pest: |
| out += "\tPEST[{:03x}] = {:016x} {:016x}\n".format(pe, pesta, pestb) |
| |
| return out |
| |
| |
| |
| def parse_opal_log(log_text): |
| # Patterns to match: |
| # |
| # [ 938.249526636,3] PHB#0030[8:0]: NEST FIR WOF=0000800000000000 |
| # [ 938.250657886,3] PHB#0030[8:0]: slotStatus = 00402000 |
| # [ 938.254305278,3] PHB#0030[8:0]: PEST[511] = 3740002a01000000 0000000000000000 |
| # |
| phblog_re = re.compile("" + |
| "^\[\s*[\d.,]+] " + # skiboot log header |
| "(PHB#....\[.:.]):" + # PHB name |
| "\s+" + # whitespace between the PHB and register name |
| "([^:=]+)" + # register name, NB: this might have some trailing WS |
| "=\s*" + # the '=' seperating name and value, along with the whitespace |
| "([a-fA-F\d ]+)") # register value(s) |
| |
| # this alone isn't really sufficent. There's a few cases that can cause a register |
| # dump to be generated (e.g. when the link is retrained we do a reg dump) |
| new_log_marker = re.compile("" + |
| "^\[ [\d.,]+] " + |
| "(PHB#....\[.:.]): " + |
| "PHB Freeze/Fence detected !") |
| |
| # Store the current register set for each PHB. Keep in mind that we can have register |
| # dumps from different PHBs being interleaved in the register log. |
| current = {} |
| |
| # list discovered error logs |
| error_logs = [] |
| |
| # Match things and split them on a per-PHB basis. We can get multiple PHB error logs |
| # printed interleaved in the skiboot log if there are multiple PHBs frozen. |
| for l in log_text.split("\n"): |
| m = new_log_marker.match(l) |
| if not m: |
| m = phblog_re.match(l) |
| if not m: |
| continue |
| |
| match = m.groups() |
| phb = match[0] |
| |
| # new log marker, save the current log and create a new one to store register values in |
| log = current.get(phb) |
| if not log: |
| current[phb] = PHBError(l); |
| elif len(match) == 1: |
| error_logs.append(current[phb]) |
| current[phb] = PHBError(l) # create a new log object |
| log = current[phb] |
| |
| if len(match) > 1: |
| if match[1].find("PEST") >= 0: # PEST entry |
| # NB: unlike .match() .search() scans the whole string |
| m = re.search("PEST\[([\da-fA-F]+)] = ([\da-fA-F]+) ([\da-fA-F]+)", l) |
| pe, pesta, pestb = [int(i, 16) for i in m.groups()] |
| current[phb].set_pest(pe, pesta, pestb) |
| else: # Normal register |
| name = match[1].strip() |
| value = int(match[2].strip(), 16) |
| |
| ok = current[phb].set_reg(name, value) |
| |
| # If we have duplicate registers then we're in a new log context |
| # so stash the current one and init a new one. |
| if not ok: |
| error_logs.append(current[phb]) |
| current[phb] = PHBError(l) |
| current[phb].set_reg(name, value) |
| |
| # save all the logs we're still processing |
| for k,v in current.items(): |
| error_logs.append(v) |
| |
| return error_logs |
| |
| |
| ''' |
| Mar 25 10:01:49 localhost kernel: PHB4 PHB#48 Diag-data (Version: 1) |
| Mar 25 10:01:49 localhost kernel: brdgCtl: 00000002 |
| Mar 25 10:01:49 localhost kernel: RootSts: 00010020 00402000 a1030008 00100107 00002000 |
| Mar 25 10:01:49 localhost kernel: RootErrSts: 00000000 00000000 00000001 |
| Mar 25 10:01:49 localhost kernel: PhbSts: 0000001c00000000 0000001c00000000 |
| Mar 25 10:01:49 localhost kernel: Lem: 0000000100280000 0000000000000000 0000000100000000 |
| Mar 25 10:01:49 localhost kernel: PhbErr: 0000088000000000 0000008000000000 2148000098000240 a008400000000000 |
| Mar 25 10:01:49 localhost kernel: RxeArbErr: 4000200000000000 0000200000000000 02409fde30000000 0000000000000000 |
| Mar 25 10:01:49 localhost kernel: PblErr: 0000000001000000 0000000001000000 0000000000000000 0000000000000000 |
| Mar 25 10:01:49 localhost kernel: PcieDlp: 0000000000000000 0000000000000000 ffff000000000000 |
| Mar 25 10:01:49 localhost kernel: RegbErr: 0000004a10000800 0000000810000000 8800003c00000000 0000000007011000 |
| Mar 25 10:01:49 localhost kernel: PE[1fd] A/B: a440002a05000000 8000000000000000 |
| ''' |
| |
| def parse_kernel_log(log_text): |
| reg8 = "([0-9a-fA-F]{8})" |
| reg16 = "([0-9a-fA-F]{16})" |
| |
| # TODO: pick up the AER stuff the kernel logs too? |
| # NB: The register names used for set_reg are the skiboot register names, not the kernel. |
| # TODO: check these for completeness / accuracy. I might have missed something |
| register_patterns = [ |
| (re.compile("brdgCtl: {}" .format(reg8)), "brdgCtl"), |
| (re.compile("RootSts: {} {} {} {} {}".format(reg8, reg8, reg8, reg8, reg8)), |
| 'deviceStatus', 'slotStatus', 'linkStatus', 'devCmdStatus', 'devSecStatus'), |
| (re.compile("RootErrSts: {} {} {}" .format(reg8, reg8, reg8)), |
| 'rootErrorStatus', 'uncorrErrorStatus', 'corrErrorStatus'), |
| (re.compile("PhbSts: {} {}" .format(reg16, reg16)), "phbPlssr", "phbCsr"), |
| (re.compile("nFir: {} {} {}" .format(reg16, reg16, reg16)), "nFir", "nFirMask", "nFirWOF"), |
| (re.compile("Lem: {} {} {}" .format(reg16, reg16, reg16)), "lemFir", "lemErrorMask", "lemWOF"), |
| (re.compile("PhbErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbErrorStatus", "phbFirstErrorStatus", "phbErrorLog0", "phbErrorLog1"), |
| (re.compile("PhbTxeErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbPhbTxeErrorStatus", "phbPhbTxeFirstErrorStatus", "phbPhbTxeErrorLog0", "phbTxeErrorLog1"), |
| (re.compile("RxeArbErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbRxeArbErrorStatus", "phbRxeArbFirstErrorStatus", "phbRxeArbErrorLog0", "phbRxeArbErrorLog1"), |
| (re.compile("RxeMrgErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbRxeMrgErrorStatus", "phbRxeMrgFirstErrorStatus", "phbRxeMrgErrorLog0", "phbRxeMrgErrorLog1"), |
| (re.compile("RxeTceErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbRxeTceErrorStatus", "phbRxeTceFirstErrorStatus", "phbRxeTceErrorLog0", "phbRxeTceErrorLog1"), |
| (re.compile("PblErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbPblErrorStatus", "phbPblFirstErrorStatus", "phbPblErrorLog0", "phbPblErrorLog1"), |
| (re.compile("PcieDlp: {} {} {}" .format(reg16, reg16, reg16)), |
| "phbPcieDlpErrorLog1", "phbPcieDlpErrorLog2", "phbPcieDlpErrorStatus"), |
| (re.compile("RegbErr: {} {} {} {}" .format(reg16, reg16, reg16, reg16)), |
| "phbRegbErrorStatus", "phbRegbFirstErrorStatus", "phbRegbErrorLog0", "phbRegbErrorLog1"), |
| ] |
| |
| header_pattern = re.compile("PHB4 PHB#[0-9]+ Diag-data") # match header |
| pe_pattern = re.compile("PE\[{}\] A/B: {} {}".format("([ 0-9a-fA-F]{3})", reg16, reg16)) # the PE number is three hex digits |
| |
| logs = [] |
| log = PHBError(""); |
| |
| # pretty nasty but since interpreting the kernel logs requires context I |
| # don't have any better ideas |
| for l in log_text.split("\n"): |
| m = header_pattern.search(l) |
| if m: # start a new log |
| logs.append(log) |
| log = PHBError(l) |
| continue |
| |
| for p,*names in register_patterns: |
| m = p.search(l) |
| if not m: |
| continue |
| for name, val in zip(names, m.groups()): |
| log.set_reg(name, int(val, 16)) |
| break |
| |
| m = pe_pattern.search(l) |
| if m: |
| pe = int(m.groups()[0], 16) |
| pesta = int(m.groups()[1], 16) |
| pestb = int(m.groups()[2], 16) |
| log.set_pest(pe, pesta, pestb) |
| |
| logs.append(log) |
| |
| return logs |
| |
| def main(argv): |
| if len(argv) < 2: |
| print("Usage: {} <log file>".format(argv[0])); |
| return |
| |
| try: |
| log_text = open(argv[1]).read(); |
| except Exception as err: |
| print(err) |
| sys.exit(1) |
| |
| logs = parse_opal_log(log_text); |
| logs.extend(parse_kernel_log(log_text)) |
| |
| for err in logs: |
| print("==== PHB Register dump found ====") |
| print("") |
| print(err.header()) |
| print("") |
| print(err.show_errs()) |
| |
| if __name__ == "__main__": |
| main(sys.argv) |