blob: 73748a5d358b48aeecf29d24dce5f4e41f3642b7 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
/*
* skiboot C entry point
*
* Copyright 2013-2019 IBM Corp.
*/
#include <skiboot.h>
#include <slw.h>
#include <psi.h>
#include <chiptod.h>
#include <nx.h>
#include <cpu.h>
#include <processor.h>
#include <xscom.h>
#include <opal.h>
#include <opal-msg.h>
#include <elf.h>
#include <elf-abi.h>
#include <io.h>
#include <cec.h>
#include <device.h>
#include <pci.h>
#include <lpc.h>
#include <i2c.h>
#include <chip.h>
#include <interrupts.h>
#include <mem_region.h>
#include <trace.h>
#include <console.h>
#include <fsi-master.h>
#include <centaur.h>
#include <ocmb.h>
#include <libfdt/libfdt.h>
#include <timer.h>
#include <ipmi.h>
#include <sensor.h>
#include <xive.h>
#include <nvram.h>
#include <vas.h>
#include <libstb/secureboot.h>
#include <libstb/trustedboot.h>
#include <phys-map.h>
#include <imc.h>
#include <dts.h>
#include <dio-p9.h>
#include <sbe-p9.h>
#include <debug_descriptor.h>
#include <occ.h>
#include <opal-dump.h>
#include <xscom-p9-regs.h>
#include <xscom-p10-regs.h>
enum proc_gen proc_gen;
unsigned int pcie_max_link_speed;
bool pci_tracing;
bool verbose_eeh;
extern const char version[];
static uint64_t kernel_entry;
static size_t kernel_size;
static bool kernel_32bit;
/* We backup the previous vectors here before copying our own */
static uint8_t old_vectors[EXCEPTION_VECTORS_END];
#ifdef DEBUG
#define DEBUG_STR "-debug"
#else
#define DEBUG_STR ""
#endif
#ifdef SKIBOOT_GCOV
void skiboot_gcov_done(void);
#endif
struct debug_descriptor debug_descriptor = {
.eye_catcher = "OPALdbug",
.version = CPU_TO_BE32(DEBUG_DESC_VERSION),
.state_flags = 0,
.memcons_phys = 0, /* cpu_to_be64(&memcons) can't init constant */
.trace_mask = 0, /* All traces disabled by default */
/* console log level:
* high 4 bits in memory, low 4 bits driver (e.g. uart). */
#ifdef DEBUG
.console_log_levels = (PR_TRACE << 4) | PR_DEBUG,
#else
.console_log_levels = (PR_DEBUG << 4) | PR_NOTICE,
#endif
};
static void checksum_romem(void);
static bool try_load_elf64_le(struct elf_hdr *header)
{
struct elf64le_hdr *kh = (struct elf64le_hdr *)header;
uint64_t load_base = (uint64_t)kh;
struct elf64le_phdr *ph;
unsigned int i;
printf("INIT: 64-bit LE kernel discovered\n");
/* Look for a loadable program header that has our entry in it
*
* Note that we execute the kernel in-place, we don't actually
* obey the load informations in the headers. This is expected
* to work for the Linux Kernel because it's a fairly dumb ELF
* but it will not work for any ELF binary.
*/
ph = (struct elf64le_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
continue;
if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) ||
(le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) <
le64_to_cpu(kh->e_entry))
continue;
/* Get our entry */
kernel_entry = le64_to_cpu(kh->e_entry) -
le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset);
break;
}
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
return false;
}
kernel_entry += load_base;
kernel_32bit = false;
kernel_size = le64_to_cpu(kh->e_shoff) +
((uint32_t)le16_to_cpu(kh->e_shentsize) *
(uint32_t)le16_to_cpu(kh->e_shnum));
prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
kernel_entry, kernel_size);
return true;
}
static bool try_load_elf64(struct elf_hdr *header)
{
struct elf64be_hdr *kh = (struct elf64be_hdr *)header;
struct elf64le_hdr *khle = (struct elf64le_hdr *)header;
uint64_t load_base = (uint64_t)kh;
struct elf64be_phdr *ph;
struct elf64be_shdr *sh;
unsigned int i;
/* Check it's a ppc64 LE ELF */
if (khle->ei_ident == ELF_IDENT &&
khle->ei_data == ELF_DATA_LSB &&
le16_to_cpu(khle->e_machine) == ELF_MACH_PPC64) {
return try_load_elf64_le(header);
}
/* Check it's a ppc64 ELF */
if (kh->ei_ident != ELF_IDENT ||
kh->ei_data != ELF_DATA_MSB ||
be16_to_cpu(kh->e_machine) != ELF_MACH_PPC64) {
prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
return false;
}
/* Look for a loadable program header that has our entry in it
*
* Note that we execute the kernel in-place, we don't actually
* obey the load informations in the headers. This is expected
* to work for the Linux Kernel because it's a fairly dumb ELF
* but it will not work for any ELF binary.
*/
ph = (struct elf64be_phdr *)(load_base + be64_to_cpu(kh->e_phoff));
for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
continue;
if (be64_to_cpu(ph->p_vaddr) > be64_to_cpu(kh->e_entry) ||
(be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_memsz)) <
be64_to_cpu(kh->e_entry))
continue;
/* Get our entry */
kernel_entry = be64_to_cpu(kh->e_entry) -
be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
break;
}
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
return false;
}
/* For the normal big-endian ELF ABI, the kernel entry points
* to a function descriptor in the data section. Linux instead
* has it point directly to code. Test whether it is pointing
* into an executable section or not to figure this out. Default
* to assuming it obeys the ABI.
*/
sh = (struct elf64be_shdr *)(load_base + be64_to_cpu(kh->e_shoff));
for (i = 0; i < be16_to_cpu(kh->e_shnum); i++, sh++) {
if (be64_to_cpu(sh->sh_addr) <= be64_to_cpu(kh->e_entry) &&
(be64_to_cpu(sh->sh_addr) + be64_to_cpu(sh->sh_size)) >
be64_to_cpu(kh->e_entry))
break;
}
if (i == be16_to_cpu(kh->e_shnum) ||
!(be64_to_cpu(sh->sh_flags) & ELF_SFLAGS_X)) {
kernel_entry = *(uint64_t *)(kernel_entry + load_base);
kernel_entry = kernel_entry -
be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
}
kernel_entry += load_base;
kernel_32bit = false;
kernel_size = be64_to_cpu(kh->e_shoff) +
((uint32_t)be16_to_cpu(kh->e_shentsize) *
(uint32_t)be16_to_cpu(kh->e_shnum));
printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
kernel_entry, kernel_size);
return true;
}
static bool try_load_elf32_le(struct elf_hdr *header)
{
struct elf32le_hdr *kh = (struct elf32le_hdr *)header;
uint64_t load_base = (uint64_t)kh;
struct elf32le_phdr *ph;
unsigned int i;
printf("INIT: 32-bit LE kernel discovered\n");
/* Look for a loadable program header that has our entry in it
*
* Note that we execute the kernel in-place, we don't actually
* obey the load informations in the headers. This is expected
* to work for the Linux Kernel because it's a fairly dumb ELF
* but it will not work for any ELF binary.
*/
ph = (struct elf32le_phdr *)(load_base + le32_to_cpu(kh->e_phoff));
for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
continue;
if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) ||
(le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) <
le32_to_cpu(kh->e_entry))
continue;
/* Get our entry */
kernel_entry = le32_to_cpu(kh->e_entry) -
le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset);
break;
}
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
return false;
}
kernel_entry += load_base;
kernel_32bit = true;
printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
return true;
}
static bool try_load_elf32(struct elf_hdr *header)
{
struct elf32be_hdr *kh = (struct elf32be_hdr *)header;
struct elf32le_hdr *khle = (struct elf32le_hdr *)header;
uint64_t load_base = (uint64_t)kh;
struct elf32be_phdr *ph;
unsigned int i;
/* Check it's a ppc32 LE ELF */
if (khle->ei_ident == ELF_IDENT &&
khle->ei_data == ELF_DATA_LSB &&
le16_to_cpu(khle->e_machine) == ELF_MACH_PPC32) {
return try_load_elf32_le(header);
}
/* Check it's a ppc32 ELF */
if (kh->ei_ident != ELF_IDENT ||
kh->ei_data != ELF_DATA_MSB ||
be16_to_cpu(kh->e_machine) != ELF_MACH_PPC32) {
prerror("INIT: Kernel doesn't look like an ppc32 ELF\n");
return false;
}
/* Look for a loadable program header that has our entry in it
*
* Note that we execute the kernel in-place, we don't actually
* obey the load informations in the headers. This is expected
* to work for the Linux Kernel because it's a fairly dumb ELF
* but it will not work for any ELF binary.
*/
ph = (struct elf32be_phdr *)(load_base + be32_to_cpu(kh->e_phoff));
for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
continue;
if (be32_to_cpu(ph->p_vaddr) > be32_to_cpu(kh->e_entry) ||
(be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_memsz)) <
be32_to_cpu(kh->e_entry))
continue;
/* Get our entry */
kernel_entry = be32_to_cpu(kh->e_entry) -
be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_offset);
break;
}
if (!kernel_entry) {
prerror("INIT: Failed to find kernel entry !\n");
return false;
}
kernel_entry += load_base;
kernel_32bit = true;
printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
return true;
}
extern char __builtin_kernel_start[];
extern char __builtin_kernel_end[];
extern uint64_t boot_offset;
static size_t initramfs_size;
bool start_preload_kernel(void)
{
int loaded;
/* Try to load an external kernel payload through the platform hooks */
kernel_size = KERNEL_LOAD_SIZE;
loaded = start_preload_resource(RESOURCE_ID_KERNEL,
RESOURCE_SUBID_NONE,
KERNEL_LOAD_BASE,
&kernel_size);
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform start load kernel failed\n");
kernel_size = 0;
return false;
}
initramfs_size = INITRAMFS_LOAD_SIZE;
loaded = start_preload_resource(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE,
INITRAMFS_LOAD_BASE, &initramfs_size);
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform start load initramfs failed\n");
initramfs_size = 0;
return false;
}
return true;
}
static bool load_kernel(void)
{
void *stb_container = NULL;
struct elf_hdr *kh;
int loaded;
prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,
RESOURCE_SUBID_NONE);
if (loaded != OPAL_SUCCESS) {
printf("INIT: platform wait for kernel load failed\n");
kernel_size = 0;
}
/* Try embedded kernel payload */
if (!kernel_size) {
kernel_size = __builtin_kernel_end - __builtin_kernel_start;
if (kernel_size) {
/* Move the built-in kernel up */
uint64_t builtin_base =
((uint64_t)__builtin_kernel_start) -
SKIBOOT_BASE + boot_offset;
printf("Using built-in kernel\n");
memmove(KERNEL_LOAD_BASE, (void*)builtin_base,
kernel_size);
}
}
if (dt_has_node_property(dt_chosen, "kernel-base-address", NULL)) {
kernel_entry = dt_prop_get_u64(dt_chosen,
"kernel-base-address");
prlog(PR_DEBUG, "INIT: Kernel image at 0x%llx\n", kernel_entry);
kh = (struct elf_hdr *)kernel_entry;
/*
* If the kernel is at 0, restore it as it was overwritten
* by our vectors.
*/
if (kernel_entry < EXCEPTION_VECTORS_END) {
cpu_set_sreset_enable(false);
memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
sync_icache();
} else {
/* Hack for STB in Mambo, assume at least 4kb in mem */
if (!kernel_size)
kernel_size = SECURE_BOOT_HEADERS_SIZE;
if (stb_is_container((void*)kernel_entry, kernel_size)) {
stb_container = (void*)kernel_entry;
kh = (struct elf_hdr *) (kernel_entry + SECURE_BOOT_HEADERS_SIZE);
} else
kh = (struct elf_hdr *) (kernel_entry);
}
} else {
if (!kernel_size) {
printf("INIT: Assuming kernel at %p\n",
KERNEL_LOAD_BASE);
/* Hack for STB in Mambo, assume at least 4kb in mem */
kernel_size = SECURE_BOOT_HEADERS_SIZE;
kernel_entry = (uint64_t)KERNEL_LOAD_BASE;
}
if (stb_is_container(KERNEL_LOAD_BASE, kernel_size)) {
stb_container = KERNEL_LOAD_BASE;
kh = (struct elf_hdr *) (KERNEL_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE);
} else
kh = (struct elf_hdr *) (KERNEL_LOAD_BASE);
}
prlog(PR_DEBUG,
"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
kernel_size);
if (kh->ei_ident != ELF_IDENT) {
prerror("INIT: ELF header not found. Assuming raw binary.\n");
return true;
}
if (kh->ei_class == ELF_CLASS_64) {
if (!try_load_elf64(kh))
return false;
} else if (kh->ei_class == ELF_CLASS_32) {
if (!try_load_elf32(kh))
return false;
} else {
prerror("INIT: Neither ELF32 not ELF64 ?\n");
return false;
}
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
secureboot_verify(RESOURCE_ID_KERNEL,
stb_container,
SECURE_BOOT_HEADERS_SIZE + kernel_size);
trustedboot_measure(RESOURCE_ID_KERNEL,
stb_container,
SECURE_BOOT_HEADERS_SIZE + kernel_size);
}
return true;
}
static void load_initramfs(void)
{
uint64_t *initramfs_start;
void *stb_container = NULL;
int loaded;
loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE);
if (loaded != OPAL_SUCCESS || !initramfs_size)
return;
if (stb_is_container(INITRAMFS_LOAD_BASE, initramfs_size)) {
stb_container = INITRAMFS_LOAD_BASE;
initramfs_start = INITRAMFS_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE;
} else {
initramfs_start = INITRAMFS_LOAD_BASE;
}
dt_check_del_prop(dt_chosen, "linux,initrd-start");
dt_check_del_prop(dt_chosen, "linux,initrd-end");
printf("INIT: Initramfs loaded, size: %zu bytes\n", initramfs_size);
dt_add_property_u64(dt_chosen, "linux,initrd-start",
(uint64_t)initramfs_start);
dt_add_property_u64(dt_chosen, "linux,initrd-end",
(uint64_t)initramfs_start + initramfs_size);
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
secureboot_verify(RESOURCE_ID_INITRAMFS,
stb_container,
SECURE_BOOT_HEADERS_SIZE + initramfs_size);
trustedboot_measure(RESOURCE_ID_INITRAMFS,
stb_container,
SECURE_BOOT_HEADERS_SIZE + initramfs_size);
}
}
static void cpu_disable_ME_RI_one(void *param __unused)
{
disable_machine_check();
mtmsrd(0, 1);
}
static int64_t cpu_disable_ME_RI_all(void)
{
struct cpu_thread *cpu;
struct cpu_job **jobs;
jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
assert(jobs);
for_each_available_cpu(cpu) {
if (cpu == this_cpu())
continue;
jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
cpu_disable_ME_RI_one, NULL);
}
/* this cpu */
cpu_disable_ME_RI_one(NULL);
for_each_available_cpu(cpu) {
if (jobs[cpu->pir])
cpu_wait_job(jobs[cpu->pir], true);
}
free(jobs);
return OPAL_SUCCESS;
}
static void *fdt;
void __noreturn load_and_boot_kernel(bool is_reboot)
{
const struct dt_property *memprop;
const char *cmdline, *stdoutp;
uint64_t mem_top;
memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
if (memprop)
mem_top = (u64)dt_property_get_cell(memprop, 0) << 32
| dt_property_get_cell(memprop, 1);
else /* XXX HB hack, might want to calc it */
mem_top = 0x40000000;
op_display(OP_LOG, OP_MOD_INIT, 0x000A);
/* Load kernel LID */
if (!load_kernel()) {
op_display(OP_FATAL, OP_MOD_INIT, 1);
abort();
}
load_initramfs();
trustedboot_exit_boot_services();
ipmi_set_fw_progress_sensor(IPMI_FW_OS_BOOT);
if (!is_reboot) {
/* We wait for the nvram read to complete here so we can
* grab stuff from there such as the kernel arguments
*/
nvram_wait_for_load();
if (!occ_sensors_init())
dts_sensor_create_nodes(sensor_node);
opal_mpipl_init();
} else {
/* fdt will be rebuilt */
free(fdt);
fdt = NULL;
nvram_reinit();
occ_pstates_init();
}
/* Use nvram bootargs over device tree */
cmdline = nvram_query_safe("bootargs");
if (cmdline) {
dt_check_del_prop(dt_chosen, "bootargs");
dt_add_property_string(dt_chosen, "bootargs", cmdline);
prlog(PR_DEBUG, "INIT: Command line from NVRAM: %s\n",
cmdline);
}
op_display(OP_LOG, OP_MOD_INIT, 0x000B);
add_fast_reboot_dt_entries();
if (platform.finalise_dt)
platform.finalise_dt(is_reboot);
/* Create the device tree blob to boot OS. */
fdt = create_dtb(dt_root, false);
if (!fdt) {
op_display(OP_FATAL, OP_MOD_INIT, 2);
abort();
}
op_display(OP_LOG, OP_MOD_INIT, 0x000C);
mem_dump_free();
/* Dump the selected console */
stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
/* Check there is something there before we branch to it */
if (*(uint32_t *)kernel_entry == 0) {
prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
assert(0);
}
if (platform.exit)
platform.exit();
/* Take processors out of nap */
cpu_set_sreset_enable(false);
cpu_set_ipi_enable(false);
printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n",
kernel_entry, fdt, fdt_totalsize(fdt));
/* Disable machine checks on all */
cpu_disable_ME_RI_all();
patch_traps(false);
cpu_set_hile_mode(false); /* Clear HILE on all CPUs */
checksum_romem();
debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
cpu_give_self_os();
if (kernel_32bit)
start_kernel32(kernel_entry, fdt, mem_top);
start_kernel(kernel_entry, fdt, mem_top);
}
static void storage_keys_fixup(void)
{
struct dt_node *cpus, *n;
cpus = dt_find_by_path(dt_root, "/cpus");
assert(cpus);
if (proc_gen == proc_gen_unknown)
return;
dt_for_each_child(cpus, n) {
/* There may be cache nodes in /cpus. */
if (!dt_has_node_property(n, "device_type", "cpu") ||
dt_has_node_property(n, "ibm,processor-storage-keys", NULL))
continue;
/*
* skiboot supports p8 & p9, both of which support the IAMR, and
* both of which support 32 keys. So advertise 32 keys for data
* accesses and 32 for instruction accesses.
*/
dt_add_property_cells(n, "ibm,processor-storage-keys", 32, 32);
}
}
static void dt_fixups(void)
{
struct dt_node *n;
struct dt_node *primary_lpc = NULL;
/* lpc node missing #address/size cells. Also pick one as
* primary for now (TBD: How to convey that from HB)
*/
dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") {
if (!primary_lpc || dt_has_node_property(n, "primary", NULL))
primary_lpc = n;
if (dt_has_node_property(n, "#address-cells", NULL))
break;
dt_add_property_cells(n, "#address-cells", 2);
dt_add_property_cells(n, "#size-cells", 1);
dt_add_property_strings(n, "status", "ok");
}
/* Missing "primary" property in LPC bus */
if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL))
dt_add_property(primary_lpc, "primary", NULL, 0);
/* Missing "scom-controller" */
dt_for_each_compatible(dt_root, n, "ibm,xscom") {
if (!dt_has_node_property(n, "scom-controller", NULL))
dt_add_property(n, "scom-controller", NULL, 0);
}
storage_keys_fixup();
}
static void add_arch_vector(void)
{
/**
* vec5 = a PVR-list : Number-of-option-vectors :
* option-vectors[Number-of-option-vectors + 1]
*/
uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00};
if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL))
return;
dt_add_property(dt_chosen, "ibm,architecture-vec-5",
vec5, sizeof(vec5));
}
static void dt_init_misc(void)
{
/* Check if there's a /chosen node, if not, add one */
dt_chosen = dt_find_by_path(dt_root, "/chosen");
if (!dt_chosen)
dt_chosen = dt_new(dt_root, "chosen");
assert(dt_chosen);
/* Add IBM architecture vectors if needed */
add_arch_vector();
/* Add the "OPAL virtual ICS*/
add_ics_node();
/* Additional fixups. TODO: Move into platform */
dt_fixups();
}
static u8 console_get_level(const char *s)
{
if (strcmp(s, "emerg") == 0)
return PR_EMERG;
if (strcmp(s, "alert") == 0)
return PR_ALERT;
if (strcmp(s, "crit") == 0)
return PR_CRIT;
if (strcmp(s, "err") == 0)
return PR_ERR;
if (strcmp(s, "warning") == 0)
return PR_WARNING;
if (strcmp(s, "notice") == 0)
return PR_NOTICE;
if (strcmp(s, "printf") == 0)
return PR_PRINTF;
if (strcmp(s, "info") == 0)
return PR_INFO;
if (strcmp(s, "debug") == 0)
return PR_DEBUG;
if (strcmp(s, "trace") == 0)
return PR_TRACE;
if (strcmp(s, "insane") == 0)
return PR_INSANE;
/* Assume it's a number instead */
return atoi(s);
}
static void console_log_level(void)
{
const char *s;
u8 level;
/* console log level:
* high 4 bits in memory, low 4 bits driver (e.g. uart). */
s = nvram_query_safe("log-level-driver");
if (s) {
level = console_get_level(s);
debug_descriptor.console_log_levels =
(debug_descriptor.console_log_levels & 0xf0 ) |
(level & 0x0f);
prlog(PR_NOTICE, "console: Setting driver log level to %i\n",
level & 0x0f);
}
s = nvram_query_safe("log-level-memory");
if (s) {
level = console_get_level(s);
debug_descriptor.console_log_levels =
(debug_descriptor.console_log_levels & 0x0f ) |
((level & 0x0f) << 4);
prlog(PR_NOTICE, "console: Setting memory log level to %i\n",
level & 0x0f);
}
}
typedef void (*ctorcall_t)(void);
static void __nomcount do_ctors(void)
{
extern ctorcall_t __ctors_start[], __ctors_end[];
ctorcall_t *call;
for (call = __ctors_start; call < __ctors_end; call++)
(*call)();
}
#ifdef ELF_ABI_v2
static void setup_branch_null_catcher(void)
{
asm volatile( \
".section .rodata" "\n\t" \
"3: .string \"branch to NULL\"" "\n\t" \
".previous" "\n\t" \
".section .trap_table,\"aw\"" "\n\t" \
".llong 0" "\n\t" \
".llong 3b" "\n\t" \
".previous" "\n\t" \
);
}
#else
static void branch_null(void)
{
assert(0);
}
static void setup_branch_null_catcher(void)
{
void (*bn)(void) = branch_null;
/*
* FIXME: This copies the function descriptor (16 bytes) for
* ABI v1 (ie. big endian). This will be broken if we ever
* move to ABI v2 (ie little endian)
*/
memcpy_null((void *)0, bn, 16);
}
#endif
void copy_sreset_vector(void)
{
uint32_t *src, *dst;
/* Copy the reset code over the entry point. */
src = &reset_patch_start;
dst = (uint32_t *)0x100;
while(src < &reset_patch_end)
*(dst++) = *(src++);
sync_icache();
}
void copy_sreset_vector_fast_reboot(void)
{
uint32_t *src, *dst;
/* Copy the reset code over the entry point. */
src = &reset_fast_reboot_patch_start;
dst = (uint32_t *)0x100;
while(src < &reset_fast_reboot_patch_end)
*(dst++) = *(src++);
sync_icache();
}
void copy_exception_vectors(void)
{
/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
* this is the boot flag used by CPUs still potentially entering
* skiboot.
*/
void *skiboot_constant_addr exception_vectors_start_addr = (void *)(SKIBOOT_BASE + 0x100);
void *skiboot_constant_addr dst = (void *)0x100;
memcpy(dst, exception_vectors_start_addr,
EXCEPTION_VECTORS_END - 0x100);
sync_icache();
}
/*
* When skiboot owns the exception vectors, patch in 'trap' for assert fails.
* Otherwise use assert_fail()
*/
void patch_traps(bool enable)
{
struct trap_table_entry *tte;
for (tte = __trap_table_start; tte < __trap_table_end; tte++) {
uint32_t *insn;
insn = (uint32_t *)tte->address;
if (enable) {
*insn = PPC_INST_TRAP;
} else {
*insn = PPC_INST_NOP;
}
}
sync_icache();
}
static void per_thread_sanity_checks(void)
{
struct cpu_thread *cpu = this_cpu();
/**
* @fwts-label NonZeroHRMOR
* @fwts-advice The contents of the hypervisor real mode offset register
* (HRMOR) is bitwise orded with the address of any hypervisor real mode
* (i.e Skiboot) memory accesses. Skiboot does not support operating
* with a non-zero HRMOR and setting it will break some things (e.g
* XSCOMs) in hard-to-debug ways.
*/
assert(mfspr(SPR_HRMOR) == 0);
/**
* @fwts-label UnknownSecondary
* @fwts-advice The boot CPU attampted to call in a secondary thread
* without initialising the corresponding cpu_thread structure. This may
* happen if the HDAT or devicetree reports too few threads or cores for
* this processor.
*/
assert(cpu->state != cpu_state_no_cpu);
}
void pci_nvram_init(void)
{
const char *nvram_speed;
verbose_eeh = nvram_query_eq_safe("pci-eeh-verbose", "true");
if (verbose_eeh)
prlog(PR_INFO, "PHB: Verbose EEH enabled\n");
pcie_max_link_speed = 0;
nvram_speed = nvram_query_dangerous("pcie-max-link-speed");
if (nvram_speed) {
pcie_max_link_speed = atoi(nvram_speed);
prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
pcie_max_link_speed);
}
pci_tracing = nvram_query_eq_safe("pci-tracing", "true");
}
static uint32_t mem_csum(void *_p, void *_e)
{
size_t len = _e - _p;
uint32_t *p = _p;
uint32_t v1 = 0, v2 = 0;
uint32_t csum;
unsigned int i;
for (i = 0; i < len; i += 4) {
uint32_t v = *p++;
v1 += v;
v2 += v1;
}
csum = v1 ^ v2;
return csum;
}
static uint32_t romem_csum;
static void checksum_romem(void)
{
uint32_t csum;
romem_csum = 0;
if (chip_quirk(QUIRK_SLOW_SIM))
return;
csum = mem_csum(_start, _head_end);
romem_csum ^= csum;
csum = mem_csum(_stext, _romem_end);
romem_csum ^= csum;
csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);
romem_csum ^= csum;
}
bool verify_romem(void)
{
uint32_t old = romem_csum;
checksum_romem();
if (old != romem_csum) {
romem_csum = old;
prlog(PR_NOTICE, "OPAL checksums did not match\n");
return false;
}
return true;
}
static void mask_pc_system_xstop(void)
{
struct cpu_thread *cpu;
uint32_t chip_id, core_id;
int rc;
if (proc_gen != proc_gen_p10)
return;
if (chip_quirk(QUIRK_MAMBO_CALLOUTS) || chip_quirk(QUIRK_AWAN))
return;
/*
* On P10 Mask PC system checkstop (bit 28). This is needed
* for HW570622. We keep processor recovery disabled via
* HID[5] and mask the checkstop that it can cause. CME does
* the recovery handling for us.
*/
for_each_cpu(cpu) {
chip_id = cpu->chip_id;
core_id = pir_to_core_id(cpu->pir);
rc = xscom_write(chip_id,
XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIRMASK_OR),
PPC_BIT(28));
if (rc)
prerror("Error setting FIR MASK rc:%d on PIR:%x\n",
rc, cpu->pir);
}
}
bool lpar_per_core = false;
static void probe_lpar_per_core(void)
{
struct cpu_thread *cpu = this_cpu();
uint32_t chip_id = pir_to_chip_id(cpu->pir);
uint32_t core_id = pir_to_core_id(cpu->pir);
uint64_t addr;
uint64_t core_thread_state;
int rc;
if (chip_quirk(QUIRK_MAMBO_CALLOUTS) || chip_quirk(QUIRK_AWAN))
return;
if (proc_gen == proc_gen_p9)
addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE);
else if (proc_gen == proc_gen_p10)
addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_CORE_THREAD_STATE);
else
return;
rc = xscom_read(chip_id, addr, &core_thread_state);
if (rc) {
prerror("Error reading CORE_THREAD_STATE rc:%d on PIR:%x\n",
rc, cpu->pir);
return;
}
if (core_thread_state & PPC_BIT(62)) {
lpar_per_core = true;
prlog(PR_WARNING, "LPAR-per-core mode detected. KVM may not be usable.\n");
}
}
/* Called from head.S, thus no prototype. */
void __noreturn __nomcount main_cpu_entry(const void *fdt);
void __noreturn __nomcount main_cpu_entry(const void *fdt)
{
/*
* WARNING: At this point. the timebases have
* *not* been synchronized yet. Do not use any timebase
* related functions for timeouts etc... unless you can cope
* with the speed being some random core clock divider and
* the value jumping backward when the synchronization actually
* happens (in chiptod_init() below).
*
* Also the current cpu_thread() struct is not initialized
* either so we need to clear it out first thing first (without
* putting any other useful info in there jus yet) otherwise
* printf an locks are going to play funny games with "con_suspend"
*/
pre_init_boot_cpu();
/*
* Point to our mem console
*/
debug_descriptor.memcons_phys = cpu_to_be64((uint64_t)&memcons);
/*
* Before first printk, ensure console buffer is clear or
* reading tools might think it has wrapped
*/
clear_console();
/* Backup previous vectors as this could contain a kernel
* image.
*/
memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
/*
* Some boot firmwares enter OPAL with MSR[ME]=1, as they presumably
* handle machine checks until we take over. As we overwrite the
* previous exception vectors with our own handlers, disable MSR[ME].
* This could be done atomically by patching in a branch then patching
* it out last, but that's a lot of effort.
*/
disable_machine_check();
/* Copy all vectors down to 0 */
copy_exception_vectors();
/* Enable trap based asserts */
patch_traps(true);
/*
* Enable MSR[ME] bit so we can take MCEs. We don't currently
* recover, but we print some useful information.
*/
enable_machine_check();
mtmsrd(MSR_RI, 1);
/* Setup a NULL catcher to catch accidental NULL ptr calls */
setup_branch_null_catcher();
/* Call library constructors */
do_ctors();
prlog(PR_NOTICE, "OPAL %s%s starting...\n", version, DEBUG_STR);
prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n",
(debug_descriptor.console_log_levels >> 4),
(debug_descriptor.console_log_levels & 0x0f));
prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n");
#ifdef SKIBOOT_GCOV
skiboot_gcov_done();
#endif
/* Initialize boot cpu's cpu_thread struct */
init_boot_cpu();
/* Now locks can be used */
init_locks();
/* Create the OPAL call table early on, entries can be overridden
* later on (FSP console code for example)
*/
opal_table_init();
/* Init the physical map table so we can start mapping things */
phys_map_init(mfspr(SPR_PVR));
/*
* If we are coming in with a flat device-tree, we expand it
* now. Else look for HDAT and create a device-tree from them
*
* Hack alert: When entering via the OPAL entry point, fdt
* is set to -1, we record that and pass it to parse_hdat
*/
dt_root = dt_new_root("");
if (fdt == (void *)-1ul) {
if (parse_hdat(true) < 0)
abort();
} else if (fdt == NULL) {
if (parse_hdat(false) < 0)
abort();
} else {
dt_expand(fdt);
}
dt_add_cpufeatures(dt_root);
/* Now that we have a full devicetree, verify that we aren't on fire. */
per_thread_sanity_checks();
/*
* From there, we follow a fairly strict initialization order.
*
* First we need to build up our chip data structures and initialize
* XSCOM which will be needed for a number of susbequent things.
*
* We want XSCOM available as early as the platform probe in case the
* probe requires some HW accesses.
*
* We also initialize the FSI master at that point in case we need
* to access chips via that path early on.
*/
init_chips();
xscom_init();
mfsi_init();
/*
* Direct controls facilities provides some controls over CPUs
* using scoms.
*/
direct_controls_init();
/*
* Put various bits & pieces in device-tree that might not
* already be there such as the /chosen node if not there yet,
* the ICS node, etc... This can potentially use XSCOM
*/
dt_init_misc();
/*
* Initialize LPC (P8 and beyond) so we can get to UART, BMC and
* other system controller. This is done before probe_platform
* so that the platform probing code can access an external
* BMC if needed.
*/
lpc_init();
/*
* This should be done before mem_region_init, so the stack
* region length can be set according to the maximum PIR.
*/
init_cpu_max_pir();
/*
* Now, we init our memory map from the device-tree, and immediately
* reserve areas which we know might contain data coming from
* HostBoot. We need to do these things before we start doing
* allocations outside of our heap, such as chip local allocs,
* otherwise we might clobber those data.
*/
mem_region_init();
/*
* Reserve memory required to capture OPAL dump. This should be done
* immediately after mem_region_init to avoid any clash with local
* memory allocation.
*/
opal_mpipl_reserve_mem();
/* Reserve HOMER and OCC area */
homer_init();
/* Initialize the rest of the cpu thread structs */
init_all_cpus();
if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10)
cpu_set_ipi_enable(true);
/* Once all CPU are up apply this workaround */
mask_pc_system_xstop();
/* P9/10 may be in LPAR-per-core mode, which is incompatible with KVM */
probe_lpar_per_core();
/* Add the /opal node to the device-tree */
add_opal_node();
/*
* We probe the platform now. This means the platform probe gets
* the opportunity to reserve additional areas of memory if needed.
*
* Note: Timebases still not synchronized.
*/
probe_platform();
/* Allocate our split trace buffers now. Depends add_opal_node() */
init_trace_buffers();
/* On P8, get the ICPs and make sure they are in a sane state */
init_interrupts();
if (proc_gen == proc_gen_p8)
cpu_set_ipi_enable(true);
/* On P9 and P10, initialize XIVE */
if (proc_gen == proc_gen_p9)
init_xive();
else if (proc_gen == proc_gen_p10)
xive2_init();
/* Grab centaurs from device-tree if present (only on FSP-less) */
centaur_init();
/* initialize ocmb scom-controller */
ocmb_init();
/* Initialize PSI (depends on probe_platform being called) */
psi_init();
/* Initialize/enable LPC interrupts. This must be done after the
* PSI interface has been initialized since it serves as an interrupt
* source for LPC interrupts.
*/
lpc_init_interrupts();
/* Call in secondary CPUs */
cpu_bringup();
/* We can now overwrite the 0x100 vector as we are no longer being
* entered there.
*/
copy_sreset_vector();
/* We can now do NAP mode */
cpu_set_sreset_enable(true);
/*
* Synchronize time bases. Prior to chiptod_init() the timebase
* is free-running at a frequency based on the core clock rather
* than being synchronised to the ChipTOD network. This means
* that the timestamps in early boot might be a little off compared
* to wall clock time.
*/
chiptod_init();
/* Initialize P9 DIO */
p9_dio_init();
/*
* SBE uses TB value for scheduling timer. Hence init after
* chiptod init
*/
p9_sbe_init();
/* Initialize i2c */
p8_i2c_init();
/* Register routine to dispatch and read sensors */
sensor_init();
/*
* Initialize the opal messaging before platform.init as we are
* getting request to queue occ load opal message when host services
* got load occ request from FSP
*/
opal_init_msg();
/*
* We have initialized the basic HW, we can now call into the
* platform to perform subsequent inits, such as establishing
* communication with the FSP or starting IPMI.
*/
if (platform.init)
platform.init();
/* Read in NVRAM and set it up */
nvram_init();
/* Set the console level */
console_log_level();
/* Secure/Trusted Boot init. We look for /ibm,secureboot in DT */
secureboot_init();
trustedboot_init();
/* Secure variables init, handled by platform */
if (platform.secvar_init && is_fw_secureboot())
platform.secvar_init();
/*
* BMC platforms load version information from flash after
* secure/trustedboot init.
*/
if (platform.bmc)
flash_fw_version_preload();
/* preload the IMC catalog dtb */
imc_catalog_preload();
/* Install the OPAL Console handlers */
init_opal_console();
/*
* Some platforms set a flag to wait for SBE validation to be
* performed by the BMC. If this occurs it leaves the SBE in a
* bad state and the system will reboot at this point.
*/
if (platform.seeprom_update)
platform.seeprom_update();
/* Init SLW related stuff, including fastsleep */
slw_init();
op_display(OP_LOG, OP_MOD_INIT, 0x0002);
/*
* On some POWER9 BMC systems, we need to initialise the OCC
* before the NPU to facilitate NVLink/OpenCAPI presence
* detection, so we set it up as early as possible. On FSP
* systems, Hostboot starts booting the OCC later, so we delay
* OCC initialisation as late as possible to give it the
* maximum time to boot up.
*/
if (platform.bmc)
occ_pstates_init();
pci_nvram_init();
preload_capp_ucode();
start_preload_kernel();
/* Catalog decompression routine */
imc_decompress_catalog();
/* Probe all HWPROBE hardware we have code linked for */
probe_hardware();
/* Initialize PCI */
pci_init_slots();
/* Add OPAL timer related properties */
late_init_timers();
/* Setup ibm,firmware-versions if able */
if (platform.bmc) {
flash_dt_add_fw_version();
ipmi_dt_add_bmc_info();
}
ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
/*
* These last few things must be done as late as possible
* because they rely on various other things having been setup,
* for example, add_opal_interrupts() will add all the interrupt
* sources that are going to the firmware. We can't add a new one
* after that call. Similarly, the mem_region calls will construct
* the reserve maps in the DT so we shouldn't affect the memory
* regions after that
*/
/* Create the LPC bus interrupt-map on P9 */
lpc_finalize_interrupts();
/* Add the list of interrupts going to OPAL */
add_opal_interrupts();
/* Init In-Memory Collection related stuff (load the IMC dtb into memory) */
imc_init();
/* Disable protected execution facility in BML */
cpu_disable_pef();
/* export the trace buffers */
trace_add_dt_props();
/* Now release parts of memory nodes we haven't used ourselves... */
mem_region_release_unused();
/* ... and add remaining reservations to the DT */
mem_region_add_dt_reserved();
/*
* Update /ibm,secureboot/ibm,cvc/memory-region to point to
* /reserved-memory/secure-crypt-algo-code instead of
* /ibm,hostboot/reserved-memory/secure-crypt-algo-code.
*/
cvc_update_reserved_memory_phandle();
prd_register_reserved_memory();
load_and_boot_kernel(false);
}
void __noreturn __secondary_cpu_entry(void)
{
struct cpu_thread *cpu = this_cpu();
/* Secondary CPU called in */
cpu_callin(cpu);
enable_machine_check();
mtmsrd(MSR_RI, 1);
/* Some XIVE setup */
if (proc_gen == proc_gen_p9)
xive_cpu_callin(cpu);
else if (proc_gen == proc_gen_p10)
xive2_cpu_callin(cpu);
/* Wait for work to do */
while(true) {
if (cpu_check_jobs(cpu))
cpu_process_jobs();
else
cpu_idle_job();
}
}
/* Called from head.S, thus no prototype. */
void __noreturn __nomcount secondary_cpu_entry(void);
void __noreturn __nomcount secondary_cpu_entry(void)
{
struct cpu_thread *cpu = this_cpu();
per_thread_sanity_checks();
prlog(PR_DEBUG, "INIT: CPU PIR 0x%04x called in\n", cpu->pir);
__secondary_cpu_entry();
}