blob: 67c24b532d8df79a7044f53f0ac49a7a9ac7c471 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
/* Copyright 2017-2019 IBM Corp. */
#include <skiboot.h>
#include <device.h>
#include <console.h>
#include <chip.h>
#include <ipmi.h>
#include <psi.h>
#include <npu-regs.h>
#include <xscom.h>
#include <xscom-p9-regs.h>
#include <timebase.h>
#include <pci.h>
#include <pci-slot.h>
#include <phb4.h>
#include <npu2.h>
#include <occ.h>
#include <i2c.h>
#include <secvar.h>
#include "astbmc.h"
#include "ast.h"
static enum {
WITHERSPOON_TYPE_UNKNOWN,
WITHERSPOON_TYPE_SEQUOIA,
WITHERSPOON_TYPE_REDBUD
} witherspoon_type;
/*
* HACK: Hostboot doesn't export the correct data for the system VPD EEPROM
* for this system. So we need to work around it here.
*/
static void vpd_dt_fixup(void)
{
struct dt_node *n = dt_find_by_path(dt_root,
"/xscom@603fc00000000/i2cm@a2000/i2c-bus@0/eeprom@50");
if (n) {
dt_check_del_prop(n, "compatible");
dt_add_property_string(n, "compatible", "atmel,24c512");
dt_check_del_prop(n, "label");
dt_add_property_string(n, "label", "system-vpd");
}
}
static void witherspoon_create_ocapi_i2c_bus(void)
{
struct dt_node *xscom, *i2cm, *i2c_bus;
prlog(PR_DEBUG, "OCAPI: Adding I2C bus device node for OCAPI reset\n");
dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
i2cm = dt_find_by_name(xscom, "i2cm@a1000");
if (!i2cm) {
prlog(PR_ERR, "OCAPI: Failed to add I2C bus device node\n");
continue;
}
if (dt_find_by_name(i2cm, "i2c-bus@4"))
continue;
i2c_bus = dt_new_addr(i2cm, "i2c-bus", 4);
dt_add_property_cells(i2c_bus, "reg", 4);
dt_add_property_cells(i2c_bus, "bus-frequency", 0x61a80);
dt_add_property_strings(i2c_bus, "compatible",
"ibm,opal-i2c", "ibm,power8-i2c-port",
"ibm,power9-i2c-port");
}
}
static bool witherspoon_probe(void)
{
struct dt_node *np;
int highest_gpu_group_id = 0;
int gpu_group_id;
if (!dt_node_is_compatible(dt_root, "ibm,witherspoon"))
return false;
/* Lot of common early inits here */
astbmc_early_init();
/* Setup UART for use by OPAL (Linux hvc) */
uart_set_console_policy(UART_CONSOLE_OPAL);
vpd_dt_fixup();
witherspoon_create_ocapi_i2c_bus();
dt_for_each_compatible(dt_root, np, "ibm,npu-link") {
gpu_group_id = dt_prop_get_u32(np, "ibm,npu-group-id");
if (gpu_group_id > highest_gpu_group_id)
highest_gpu_group_id = gpu_group_id;
};
switch (highest_gpu_group_id) {
case 1:
witherspoon_type = WITHERSPOON_TYPE_REDBUD;
break;
case 2:
witherspoon_type = WITHERSPOON_TYPE_SEQUOIA;
break;
default:
witherspoon_type = WITHERSPOON_TYPE_UNKNOWN;
prlog(PR_NOTICE, "PLAT: Unknown Witherspoon variant detected\n");
}
return true;
}
static void phb4_activate_shared_slot_witherspoon(struct proc_chip *chip)
{
uint64_t val;
/*
* Shared slot activation is done by raising a GPIO line on the
* chip with the secondary slot. It will somehow activate the
* sideband signals between the slots.
* Need to wait 100us for stability.
*/
xscom_read(chip->id, P9_GPIO_DATA_OUT_ENABLE, &val);
val |= PPC_BIT(2);
xscom_write(chip->id, P9_GPIO_DATA_OUT_ENABLE, val);
xscom_read(chip->id, P9_GPIO_DATA_OUT, &val);
val |= PPC_BIT(2);
xscom_write(chip->id, P9_GPIO_DATA_OUT, val);
time_wait_us(100);
prlog(PR_INFO, "Shared PCI slot activated\n");
}
static void witherspoon_shared_slot_fixup(void)
{
struct pci_slot *slot0, *slot1;
struct proc_chip *chip0, *chip1;
uint8_t p0 = 0, p1 = 0;
/*
* Detect if a x16 card is present on the shared slot and
* do some extra configuration if it is.
*
* The shared slot, a.k.a "Slot 2" in the documentation, is
* connected to PEC2 phb index 3 on both chips. From skiboot,
* it looks like two x8 slots, each with its own presence bit.
*
* Here is the matrix of possibilities for the presence bits:
*
* slot0 presence slot1 presence
* 0 0 => no card
* 1 0 => x8 or less card detected
* 1 1 => x16 card detected
* 0 1 => invalid combination
*
* We only act if a x16 card is detected ('1 1' combination above).
*
* One issue is that we don't really know if it is a
* shared-slot-compatible card (such as Mellanox CX5) or
* a 'normal' x16 PCI card. We activate the shared slot in both cases,
* as it doesn't seem to hurt.
*
* If the card is a normal x16 PCI card, the link won't train on the
* second slot (nothing to do with the shared slot activation), the
* procedure will timeout, thus adding some delay to the boot time.
* Therefore the recommendation is that we shouldn't use a normal
* x16 card on the shared slot of a witherspoon.
*
* Plugging a x8 or less adapter on the shared slot should work
* like any other physical slot.
*/
chip0 = next_chip(NULL);
chip1 = next_chip(chip0);
if (!chip1 || next_chip(chip1)) {
prlog(PR_WARNING,
"PLAT: Can't find second chip, "
"skipping PCIe shared slot detection\n");
return;
}
/* the shared slot is connected to PHB3 on both chips */
slot0 = pci_slot_find(phb4_get_opal_id(chip0->id, 3));
slot1 = pci_slot_find(phb4_get_opal_id(chip1->id, 3));
if (slot0 && slot1) {
if (slot0->ops.get_presence_state)
slot0->ops.get_presence_state(slot0, &p0);
if (slot1->ops.get_presence_state)
slot1->ops.get_presence_state(slot1, &p1);
if (p0 == 1 && p1 == 1) {
phb4_activate_shared_slot_witherspoon(chip1);
slot0->peer_slot = slot1;
slot1->peer_slot = slot0;
}
}
}
static int check_mlx_cards(struct phb *phb __unused, struct pci_device *dev,
void *userdata __unused)
{
uint16_t mlx_cards[] = {
0x1017, /* ConnectX-5 */
0x1019, /* ConnectX-5 Ex */
0x101b, /* ConnectX-6 */
0x101d, /* ConnectX-6 Dx */
0x101f, /* ConnectX-6 Lx */
0x1021, /* ConnectX-7 */
};
if (PCI_VENDOR_ID(dev->vdid) == 0x15b3) { /* Mellanox */
for (int i = 0; i < ARRAY_SIZE(mlx_cards); i++) {
if (mlx_cards[i] == PCI_DEVICE_ID(dev->vdid))
return 1;
}
}
return 0;
}
static void witherspoon_pci_probe_complete(void)
{
struct pci_device *dev;
struct phb *phb;
struct phb4 *p;
/*
* Reallocate dma engines between stacks in PEC2 if a Mellanox
* card is found on the shared slot, as it is required to get
* good GPU direct performance.
*/
for_each_phb(phb) {
/* skip the virtual PHBs */
if (phb->phb_type != phb_type_pcie_v4)
continue;
p = phb_to_phb4(phb);
/* Keep only the first PHB on PEC2 */
if (p->index != 3)
continue;
dev = pci_walk_dev(phb, NULL, check_mlx_cards, NULL);
if (dev)
phb4_pec2_dma_engine_realloc(p);
}
}
static void set_link_details(struct npu2 *npu, uint32_t link_index,
uint32_t brick_index, enum npu2_dev_type type)
{
struct npu2_dev *dev = NULL;
for (int i = 0; i < npu->total_devices; i++) {
if (npu->devices[i].link_index == link_index) {
dev = &npu->devices[i];
break;
}
}
if (!dev) {
prlog(PR_ERR, "PLAT: Could not find NPU link index %d\n",
link_index);
return;
}
dev->brick_index = brick_index;
dev->type = type;
}
static void witherspoon_npu2_device_detect(struct npu2 *npu)
{
struct proc_chip *chip;
uint8_t state;
uint64_t i2c_port_id = 0;
char port_name[17];
struct dt_node *dn;
int rc;
bool gpu0_present, gpu1_present;
if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
for (int i = 0; i < npu->total_devices; i++) {
npu->devices[i].type = NPU2_DEV_TYPE_NVLINK;
}
return;
}
assert(npu->total_devices == 6);
chip = get_chip(npu->chip_id);
/* Find I2C port */
snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
chip->id, platform.ocapi->i2c_engine,
platform.ocapi->i2c_port);
dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
i2c_port_id = dt_prop_get_u32(dn, "ibm,opal-id");
break;
}
}
if (!i2c_port_id) {
prlog(PR_ERR, "PLAT: Could not find NPU presence I2C port\n");
return;
}
gpu0_present = occ_get_gpu_presence(chip, 0);
if (gpu0_present) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
}
gpu1_present = occ_get_gpu_presence(chip, 1);
if (gpu1_present) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
}
/*
* The following I2C ops generate errors if no device is
* present on any SXM2 slot. Since it's useless, let's skip it
*/
if (!gpu0_present && !gpu1_present)
return;
/* Set pins to input */
state = 0xff;
rc = i2c_request_send(i2c_port_id,
platform.ocapi->i2c_presence_addr, SMBUS_WRITE, 3,
1, &state, 1, 120);
if (rc)
goto i2c_failed;
/* Read the presence value */
state = 0x00;
rc = i2c_request_send(i2c_port_id,
platform.ocapi->i2c_presence_addr, SMBUS_READ, 0,
1, &state, 1, 120);
if (rc)
goto i2c_failed;
if (gpu0_present) {
if (state & (1 << 0)) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
chip->id);
/*
* On witherspoon, bricks 2 and 3 are connected to
* the lanes matching links 0 and 1 in OpenCAPI mode.
*/
set_link_details(npu, 1, 3, NPU2_DEV_TYPE_OPENCAPI);
/* We current don't support using the second link */
set_link_details(npu, 0, 2, NPU2_DEV_TYPE_UNKNOWN);
} else {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
chip->id);
set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
}
}
if (gpu1_present) {
if (state & (1 << 1)) {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
chip->id);
set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
/* We current don't support using the second link */
set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
} else {
prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
chip->id);
set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
}
}
return;
i2c_failed:
prlog(PR_ERR, "PLAT: NPU device type detection failed, rc=%d\n", rc);
return;
}
static const char *witherspoon_ocapi_slot_label(uint32_t chip_id,
uint32_t brick_index)
{
const char *name = NULL;
if (chip_id == 0) {
if (brick_index == 3)
name = "OPENCAPI-GPU0";
else if (brick_index == 4)
name = "OPENCAPI-GPU1";
} else {
if (brick_index == 3)
name = "OPENCAPI-GPU3";
else if (brick_index == 4)
name = "OPENCAPI-GPU4";
}
return name;
}
static const struct platform_ocapi witherspoon_ocapi = {
.i2c_engine = 1,
.i2c_port = 4,
.odl_phy_swap = false,
.i2c_reset_addr = 0x20,
/*
* Witherspoon uses SXM2 connectors, carrying 2 OCAPI links
* over a single connector - hence each pair of bricks shares
* the same pin for resets. We currently only support using
* bricks 3 and 4, among other reasons because we can't handle
* a reset on one link causing the other link to reset as
* well.
*/
.i2c_reset_brick2 = 1 << 0,
.i2c_reset_brick3 = 1 << 0,
.i2c_reset_brick4 = 1 << 1,
.i2c_reset_brick5 = 1 << 1,
.i2c_presence_addr = 0x20,
/* unused, we do this in custom presence detect */
.i2c_presence_brick2 = 0,
.i2c_presence_brick3 = 0,
.i2c_presence_brick4 = 0,
.i2c_presence_brick5 = 0,
.ocapi_slot_label = witherspoon_ocapi_slot_label,
};
static int gpu_slot_to_num(const char *slot)
{
char *p = NULL;
int ret;
if (!slot)
return -1;
if (memcmp(slot, "GPU", 3))
return -1;
ret = strtol(slot + 3, &p, 10);
if (*p || p == slot + 3)
return -1;
return ret;
}
static void npu2_phb_nvlink_dt(struct phb *npuphb)
{
struct dt_node *g[3] = { NULL }; /* Current maximum 3 GPUs per 1 NPU */
struct dt_node *n[6] = { NULL };
int max_gpus, i, gpuid, first, last;
struct npu2 *npu2_phb = phb_to_npu2_nvlink(npuphb);
struct pci_device *npd;
switch (witherspoon_type) {
case WITHERSPOON_TYPE_REDBUD:
max_gpus = 4;
break;
case WITHERSPOON_TYPE_SEQUOIA:
max_gpus = 6;
break;
default:
/* witherspoon_probe() already reported missing support */
return;
}
/* Find the indexes of GPUs connected to this NPU */
for (i = 0, first = max_gpus, last = 0; i < npu2_phb->total_devices;
++i) {
gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label);
if (gpuid < 0)
continue;
if (gpuid > last)
last = gpuid;
if (gpuid < first)
first = gpuid;
}
/* Either no "GPUx" slots found or they are not consecutive, abort */
if (!last || last + 1 - first > max_gpus)
return;
/* Collect GPU device nodes, sorted by an index from "GPUn" */
for (i = 0; i < npu2_phb->total_devices; ++i) {
gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label);
g[gpuid - first] = npu2_phb->devices[i].nvlink.pd->dn;
/* Collect NVLink bridge nodes too, for their phandles */
list_for_each(&npuphb->devices, npd, link) {
if (npd->bdfn == npu2_phb->devices[i].bdfn) {
assert(npu2_phb->devices[i].brick_index <
ARRAY_SIZE(n));
n[npu2_phb->devices[i].brick_index] = npd->dn;
}
}
}
/*
* Store interconnect phandles in the device tree.
* The mapping is from Witherspoon_Design_Workbook_v1.7_19June2018.pdf,
* pages 39 (Sequoia), 40 (Redbud):
* Figure 16: NVLink wiring diagram for planar with 6 GPUs
* Figure 17: NVLink wiring diagram for planar with 4 GPUs
*/
#define PEERPH(g) ((g)?(g)->phandle:0)
switch (witherspoon_type) {
case WITHERSPOON_TYPE_REDBUD:
if (g[0])
dt_add_property_cells(g[0], "ibm,nvlink-peers",
PEERPH(g[1]), PEERPH(n[0]),
PEERPH(g[1]), PEERPH(n[1]),
PEERPH(g[1]), PEERPH(n[2]));
if (g[1])
dt_add_property_cells(g[1], "ibm,nvlink-peers",
PEERPH(g[0]), PEERPH(n[3]),
PEERPH(g[0]), PEERPH(n[4]),
PEERPH(g[0]), PEERPH(n[5]));
break;
case WITHERSPOON_TYPE_SEQUOIA:
if (g[0])
dt_add_property_cells(g[0], "ibm,nvlink-peers",
PEERPH(g[1]), PEERPH(n[0]),
PEERPH(g[2]), PEERPH(g[2]),
PEERPH(g[1]), PEERPH(n[1]));
if (g[1])
dt_add_property_cells(g[1], "ibm,nvlink-peers",
PEERPH(g[0]), PEERPH(n[2]),
PEERPH(g[2]), PEERPH(g[2]),
PEERPH(g[0]), PEERPH(n[3]));
if (g[2])
dt_add_property_cells(g[2], "ibm,nvlink-peers",
PEERPH(g[1]), PEERPH(g[0]),
PEERPH(g[1]), PEERPH(n[4]),
PEERPH(g[0]), PEERPH(n[5]));
break;
default:
break;
}
}
static void witherspoon_finalise_dt(bool is_reboot)
{
struct dt_node *np;
struct proc_chip *c;
if (is_reboot)
return;
dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") {
u32 opal_id = dt_prop_get_cell(np, "ibm,opal-phbid", 1);
struct phb *npphb = pci_get_phb(opal_id);
if (!npphb)
continue;
if (npphb->phb_type != phb_type_npu_v2)
continue;
npu2_phb_nvlink_dt(npphb);
}
/*
* The I2C bus on used to talk to the GPUs has a 750K pullup
* which is way too big. If there's no GPUs connected to the
* chip all I2C transactions fail with an Arb loss error since
* SCL/SDA don't return to the idle state fast enough. Disable
* the port to squash the errors.
*/
for (c = next_chip(NULL); c; c = next_chip(c)) {
bool detected = false;
int i;
np = dt_find_by_path(c->devnode, "i2cm@a1000/i2c-bus@4");
if (!np)
continue;
for (i = 0; i < 3; i++)
detected |= occ_get_gpu_presence(c, i);
if (!detected) {
dt_check_del_prop(np, "status");
dt_add_property_string(np, "status", "disabled");
}
}
}
static int witherspoon_secvar_init(void)
{
return secvar_main(secboot_tpm_driver, edk2_compatible_v1);
}
/* The only difference between these is the PCI slot handling */
DECLARE_PLATFORM(witherspoon) = {
.name = "Witherspoon",
.probe = witherspoon_probe,
.init = astbmc_init,
.pre_pci_fixup = witherspoon_shared_slot_fixup,
.pci_probe_complete = witherspoon_pci_probe_complete,
.start_preload_resource = flash_start_preload_resource,
.resource_loaded = flash_resource_loaded,
.bmc = &bmc_plat_ast2500_openbmc,
.cec_power_down = astbmc_ipmi_power_down,
.cec_reboot = astbmc_ipmi_reboot,
.elog_commit = ipmi_elog_commit,
.finalise_dt = witherspoon_finalise_dt,
.exit = astbmc_exit,
.terminate = ipmi_terminate,
.pci_get_slot_info = dt_slot_get_slot_info,
.ocapi = &witherspoon_ocapi,
.npu2_device_detect = witherspoon_npu2_device_detect,
.op_display = op_display_lpc,
.secvar_init = witherspoon_secvar_init,
};