| #!/usr/bin/env python3 |
| # |
| # pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511 |
| # SPDX-License-Identifier: GPL-2.0-or-later |
| # |
| # Copyright (C) 2024-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> |
| |
| # TODO: current implementation has dummy defaults. |
| # |
| # For a better implementation, a QMP addition/call is needed to |
| # retrieve some data for ARM Processor Error injection: |
| # |
| # - ARM registers: power_state, mpidr. |
| |
| """ |
| Generate an ARM processor error CPER, compatible with |
| UEFI 2.9A Errata. |
| |
| Injecting such errors can be done using: |
| |
| $ ./scripts/ghes_inject.py arm |
| Error injected. |
| |
| Produces a simple CPER register, as detected on a Linux guest: |
| |
| [Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 |
| [Hardware Error]: event severity: recoverable |
| [Hardware Error]: Error 0, type: recoverable |
| [Hardware Error]: section_type: ARM processor error |
| [Hardware Error]: MIDR: 0x0000000000000000 |
| [Hardware Error]: running state: 0x0 |
| [Hardware Error]: Power State Coordination Interface state: 0 |
| [Hardware Error]: Error info structure 0: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x02: cache error |
| [Hardware Error]: error_info: 0x000000000091000f |
| [Hardware Error]: transaction type: Data Access |
| [Hardware Error]: cache error, operation type: Data write |
| [Hardware Error]: cache level: 2 |
| [Hardware Error]: processor context not corrupted |
| [Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error |
| |
| The ARM Processor Error message can be customized via command line |
| parameters. For instance: |
| |
| $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \ |
| --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \ |
| -t cache tlb bus micro-arch tlb,micro-arch |
| Error injected. |
| |
| Injects this error, as detected on a Linux guest: |
| |
| [Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 |
| [Hardware Error]: event severity: recoverable |
| [Hardware Error]: Error 0, type: recoverable |
| [Hardware Error]: section_type: ARM processor error |
| [Hardware Error]: MIDR: 0x0000000000000000 |
| [Hardware Error]: Multiprocessor Affinity Register (MPIDR): 0x0000000000000000 |
| [Hardware Error]: error affinity level: 0 |
| [Hardware Error]: running state: 0x1 |
| [Hardware Error]: Power State Coordination Interface state: 0 |
| [Hardware Error]: Error info structure 0: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x02: cache error |
| [Hardware Error]: error_info: 0x0000000000bc614e |
| [Hardware Error]: cache level: 2 |
| [Hardware Error]: processor context not corrupted |
| [Hardware Error]: Error info structure 1: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x04: TLB error |
| [Hardware Error]: error_info: 0x000000000054007f |
| [Hardware Error]: transaction type: Instruction |
| [Hardware Error]: TLB error, operation type: Instruction fetch |
| [Hardware Error]: TLB level: 1 |
| [Hardware Error]: processor context not corrupted |
| [Hardware Error]: the error has not been corrected |
| [Hardware Error]: PC is imprecise |
| [Hardware Error]: Error info structure 2: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x08: bus error |
| [Hardware Error]: error_info: 0x00000080d6460fff |
| [Hardware Error]: transaction type: Generic |
| [Hardware Error]: bus error, operation type: Generic read (type of instruction or data request cannot be determined) |
| [Hardware Error]: affinity level at which the bus error occurred: 1 |
| [Hardware Error]: processor context corrupted |
| [Hardware Error]: the error has been corrected |
| [Hardware Error]: PC is imprecise |
| [Hardware Error]: Program execution can be restarted reliably at the PC associated with the error. |
| [Hardware Error]: participation type: Local processor observed |
| [Hardware Error]: request timed out |
| [Hardware Error]: address space: External Memory Access |
| [Hardware Error]: memory access attributes:0x20 |
| [Hardware Error]: access mode: secure |
| [Hardware Error]: Error info structure 3: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x10: micro-architectural error |
| [Hardware Error]: error_info: 0x0000000078da03ff |
| [Hardware Error]: Error info structure 4: |
| [Hardware Error]: num errors: 2 |
| [Hardware Error]: error_type: 0x14: TLB error|micro-architectural error |
| [Hardware Error]: Context info structure 0: |
| [Hardware Error]: register context type: AArch64 EL1 context registers |
| [Hardware Error]: 00000000: 00000000 00000000 |
| [Hardware Error]: Vendor specific error info has 5 bytes: |
| [Hardware Error]: 00000000: 13 7b 04 05 01 .{... |
| [Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error |
| [Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error |
| [Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error |
| [Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error |
| [Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error |
| """ |
| |
| import argparse |
| import re |
| |
| from qmp_helper import qmp, util, cper_guid |
| |
| |
| class ArmProcessorEinj: |
| """ |
| Implements ARM Processor Error injection via GHES |
| """ |
| |
| DESC = """ |
| Generates an ARM processor error CPER, compatible with |
| UEFI 2.9A Errata. |
| """ |
| |
| ACPI_GHES_ARM_CPER_LENGTH = 40 |
| ACPI_GHES_ARM_CPER_PEI_LENGTH = 32 |
| |
| # Context types |
| CONTEXT_AARCH32_EL1 = 1 |
| CONTEXT_AARCH64_EL1 = 5 |
| CONTEXT_MISC_REG = 8 |
| |
| def __init__(self, subparsers): |
| """Initialize the error injection class and add subparser""" |
| |
| # Valid choice values |
| self.arm_valid_bits = { |
| "mpidr": util.bit(0), |
| "affinity": util.bit(1), |
| "running": util.bit(2), |
| "vendor": util.bit(3), |
| } |
| |
| self.pei_flags = { |
| "first": util.bit(0), |
| "last": util.bit(1), |
| "propagated": util.bit(2), |
| "overflow": util.bit(3), |
| } |
| |
| self.pei_error_types = { |
| "cache": util.bit(1), |
| "tlb": util.bit(2), |
| "bus": util.bit(3), |
| "micro-arch": util.bit(4), |
| } |
| |
| self.pei_valid_bits = { |
| "multiple-error": util.bit(0), |
| "flags": util.bit(1), |
| "error-info": util.bit(2), |
| "virt-addr": util.bit(3), |
| "phy-addr": util.bit(4), |
| } |
| |
| self.data = bytearray() |
| |
| parser = subparsers.add_parser("arm", description=self.DESC) |
| |
| arm_valid_bits = ",".join(self.arm_valid_bits.keys()) |
| flags = ",".join(self.pei_flags.keys()) |
| error_types = ",".join(self.pei_error_types.keys()) |
| pei_valid_bits = ",".join(self.pei_valid_bits.keys()) |
| |
| # UEFI N.16 ARM Validation bits |
| g_arm = parser.add_argument_group("ARM processor") |
| g_arm.add_argument("--arm", "--arm-valid", |
| help=f"ARM valid bits: {arm_valid_bits}") |
| g_arm.add_argument("-a", "--affinity", "--level", "--affinity-level", |
| type=lambda x: int(x, 0), |
| help="Affinity level (when multiple levels apply)") |
| g_arm.add_argument("-l", "--mpidr", type=lambda x: int(x, 0), |
| help="Multiprocessor Affinity Register") |
| g_arm.add_argument("-i", "--midr", type=lambda x: int(x, 0), |
| help="Main ID Register") |
| g_arm.add_argument("-r", "--running", |
| action=argparse.BooleanOptionalAction, |
| default=None, |
| help="Indicates if the processor is running or not") |
| g_arm.add_argument("--psci", "--psci-state", |
| type=lambda x: int(x, 0), |
| help="Power State Coordination Interface - PSCI state") |
| |
| # TODO: Add vendor-specific support |
| |
| # UEFI N.17 bitmaps (type and flags) |
| g_pei = parser.add_argument_group("ARM Processor Error Info (PEI)") |
| g_pei.add_argument("-t", "--type", nargs="+", |
| help=f"one or more error types: {error_types}") |
| g_pei.add_argument("-f", "--flags", nargs="*", |
| help=f"zero or more error flags: {flags}") |
| g_pei.add_argument("-V", "--pei-valid", "--error-valid", nargs="*", |
| help=f"zero or more PEI valid bits: {pei_valid_bits}") |
| |
| # UEFI N.17 Integer values |
| g_pei.add_argument("-m", "--multiple-error", nargs="+", |
| help="Number of errors: 0: Single error, 1: Multiple errors, 2-65535: Error count if known") |
| g_pei.add_argument("-e", "--error-info", nargs="+", |
| help="Error information (UEFI 2.10 tables N.18 to N.20)") |
| g_pei.add_argument("-p", "--physical-address", nargs="+", |
| help="Physical address") |
| g_pei.add_argument("-v", "--virtual-address", nargs="+", |
| help="Virtual address") |
| |
| # UEFI N.21 Context |
| g_ctx = parser.add_argument_group("Processor Context") |
| g_ctx.add_argument("--ctx-type", "--context-type", nargs="*", |
| help="Type of the context (0=ARM32 GPR, 5=ARM64 EL1, other values supported)") |
| g_ctx.add_argument("--ctx-size", "--context-size", nargs="*", |
| help="Minimal size of the context") |
| g_ctx.add_argument("--ctx-array", "--context-array", nargs="*", |
| help="Comma-separated arrays for each context") |
| |
| # Vendor-specific data |
| g_vendor = parser.add_argument_group("Vendor-specific data") |
| g_vendor.add_argument("--vendor", "--vendor-specific", nargs="+", |
| help="Vendor-specific byte arrays of data") |
| |
| # Add arguments for Generic Error Data |
| qmp.argparse(parser) |
| |
| parser.set_defaults(func=self.send_cper) |
| |
| def send_cper(self, args): |
| """Parse subcommand arguments and send a CPER via QMP""" |
| |
| qmp_cmd = qmp(args.host, args.port, args.debug) |
| |
| # Handle Generic Error Data arguments if any |
| qmp_cmd.set_args(args) |
| |
| is_cpu_type = re.compile(r"^([\w+]+\-)?arm\-cpu$") |
| cpus = qmp_cmd.search_qom("/machine/unattached/device", |
| "type", is_cpu_type) |
| |
| cper = {} |
| pei = {} |
| ctx = {} |
| vendor = {} |
| |
| arg = vars(args) |
| |
| # Handle global parameters |
| if args.arm: |
| arm_valid_init = False |
| cper["valid"] = util.get_choice(name="valid", |
| value=args.arm, |
| choices=self.arm_valid_bits, |
| suffixes=["-error", "-err"]) |
| else: |
| cper["valid"] = 0 |
| arm_valid_init = True |
| |
| if "running" in arg: |
| if args.running: |
| cper["running-state"] = util.bit(0) |
| else: |
| cper["running-state"] = 0 |
| else: |
| cper["running-state"] = 0 |
| |
| if arm_valid_init: |
| if args.affinity: |
| cper["valid"] |= self.arm_valid_bits["affinity"] |
| |
| if args.mpidr: |
| cper["valid"] |= self.arm_valid_bits["mpidr"] |
| |
| if "running-state" in cper: |
| cper["valid"] |= self.arm_valid_bits["running"] |
| |
| if args.psci: |
| cper["valid"] |= self.arm_valid_bits["running"] |
| |
| # Handle PEI |
| if not args.type: |
| args.type = ["cache-error"] |
| |
| util.get_mult_choices( |
| pei, |
| name="valid", |
| values=args.pei_valid, |
| choices=self.pei_valid_bits, |
| suffixes=["-valid", "--addr"], |
| ) |
| util.get_mult_choices( |
| pei, |
| name="type", |
| values=args.type, |
| choices=self.pei_error_types, |
| suffixes=["-error", "-err"], |
| ) |
| util.get_mult_choices( |
| pei, |
| name="flags", |
| values=args.flags, |
| choices=self.pei_flags, |
| suffixes=["-error", "-cap"], |
| ) |
| util.get_mult_int(pei, "error-info", args.error_info) |
| util.get_mult_int(pei, "multiple-error", args.multiple_error) |
| util.get_mult_int(pei, "phy-addr", args.physical_address) |
| util.get_mult_int(pei, "virt-addr", args.virtual_address) |
| |
| # Handle context |
| util.get_mult_int(ctx, "type", args.ctx_type, allow_zero=True) |
| util.get_mult_int(ctx, "minimal-size", args.ctx_size, allow_zero=True) |
| util.get_mult_array(ctx, "register", args.ctx_array, allow_zero=True) |
| |
| util.get_mult_array(vendor, "bytes", args.vendor, max_val=255) |
| |
| # Store PEI |
| pei_data = bytearray() |
| default_flags = self.pei_flags["first"] |
| default_flags |= self.pei_flags["last"] |
| |
| error_info_num = 0 |
| |
| for i, p in pei.items(): # pylint: disable=W0612 |
| error_info_num += 1 |
| |
| # UEFI 2.10 doesn't define how to encode error information |
| # when multiple types are raised. So, provide a default only |
| # if a single type is there |
| if "error-info" not in p: |
| if p["type"] == util.bit(1): |
| p["error-info"] = 0x0091000F |
| if p["type"] == util.bit(2): |
| p["error-info"] = 0x0054007F |
| if p["type"] == util.bit(3): |
| p["error-info"] = 0x80D6460FFF |
| if p["type"] == util.bit(4): |
| p["error-info"] = 0x78DA03FF |
| |
| if "valid" not in p: |
| p["valid"] = 0 |
| if "multiple-error" in p: |
| p["valid"] |= self.pei_valid_bits["multiple-error"] |
| |
| if "flags" in p: |
| p["valid"] |= self.pei_valid_bits["flags"] |
| |
| if "error-info" in p: |
| p["valid"] |= self.pei_valid_bits["error-info"] |
| |
| if "phy-addr" in p: |
| p["valid"] |= self.pei_valid_bits["phy-addr"] |
| |
| if "virt-addr" in p: |
| p["valid"] |= self.pei_valid_bits["virt-addr"] |
| |
| # Version |
| util.data_add(pei_data, 0, 1) |
| |
| util.data_add(pei_data, |
| self.ACPI_GHES_ARM_CPER_PEI_LENGTH, 1) |
| |
| util.data_add(pei_data, p["valid"], 2) |
| util.data_add(pei_data, p["type"], 1) |
| util.data_add(pei_data, p.get("multiple-error", 1), 2) |
| util.data_add(pei_data, p.get("flags", default_flags), 1) |
| util.data_add(pei_data, p.get("error-info", 0), 8) |
| util.data_add(pei_data, p.get("virt-addr", 0xDEADBEEF), 8) |
| util.data_add(pei_data, p.get("phy-addr", 0xABBA0BAD), 8) |
| |
| # Store Context |
| ctx_data = bytearray() |
| context_info_num = 0 |
| |
| if ctx: |
| ret = qmp_cmd.send_cmd("query-target", may_open=True) |
| |
| default_ctx = self.CONTEXT_MISC_REG |
| |
| if "arch" in ret: |
| if ret["arch"] == "aarch64": |
| default_ctx = self.CONTEXT_AARCH64_EL1 |
| elif ret["arch"] == "arm": |
| default_ctx = self.CONTEXT_AARCH32_EL1 |
| |
| for k in sorted(ctx.keys()): |
| context_info_num += 1 |
| |
| if "type" not in ctx[k]: |
| ctx[k]["type"] = default_ctx |
| |
| if "register" not in ctx[k]: |
| ctx[k]["register"] = [] |
| |
| reg_size = len(ctx[k]["register"]) |
| size = 0 |
| |
| if "minimal-size" in ctx: |
| size = ctx[k]["minimal-size"] |
| |
| size = max(size, reg_size) |
| |
| size = (size + 1) % 0xFFFE |
| |
| # Version |
| util.data_add(ctx_data, 0, 2) |
| |
| util.data_add(ctx_data, ctx[k]["type"], 2) |
| |
| util.data_add(ctx_data, 8 * size, 4) |
| |
| for r in ctx[k]["register"]: |
| util.data_add(ctx_data, r, 8) |
| |
| for i in range(reg_size, size): # pylint: disable=W0612 |
| util.data_add(ctx_data, 0, 8) |
| |
| # Vendor-specific bytes are not grouped |
| vendor_data = bytearray() |
| if vendor: |
| for k in sorted(vendor.keys()): |
| for b in vendor[k]["bytes"]: |
| util.data_add(vendor_data, b, 1) |
| |
| # Encode ARM Processor Error |
| data = bytearray() |
| |
| util.data_add(data, cper["valid"], 4) |
| |
| util.data_add(data, error_info_num, 2) |
| util.data_add(data, context_info_num, 2) |
| |
| # Calculate the length of the CPER data |
| cper_length = self.ACPI_GHES_ARM_CPER_LENGTH |
| cper_length += len(pei_data) |
| cper_length += len(vendor_data) |
| cper_length += len(ctx_data) |
| util.data_add(data, cper_length, 4) |
| |
| util.data_add(data, arg.get("affinity-level", 0), 1) |
| |
| # Reserved |
| util.data_add(data, 0, 3) |
| |
| if "midr-el1" not in arg: |
| if cpus: |
| cmd_arg = { |
| 'path': cpus[0], |
| 'property': "midr" |
| } |
| ret = qmp_cmd.send_cmd("qom-get", cmd_arg, may_open=True) |
| if isinstance(ret, int): |
| arg["midr-el1"] = ret |
| |
| util.data_add(data, arg.get("mpidr-el1", 0), 8) |
| util.data_add(data, arg.get("midr-el1", 0), 8) |
| util.data_add(data, cper["running-state"], 4) |
| util.data_add(data, arg.get("psci-state", 0), 4) |
| |
| # Add PEI |
| data.extend(pei_data) |
| data.extend(ctx_data) |
| data.extend(vendor_data) |
| |
| self.data = data |
| |
| qmp_cmd.send_cper(cper_guid.CPER_PROC_ARM, self.data) |