| // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later |
| /* Copyright 2017-2019 IBM Corp. */ |
| |
| #include <skiboot.h> |
| #include <device.h> |
| #include <console.h> |
| #include <chip.h> |
| #include <ipmi.h> |
| #include <psi.h> |
| #include <npu-regs.h> |
| #include <xscom.h> |
| #include <xscom-p9-regs.h> |
| #include <timebase.h> |
| #include <pci.h> |
| #include <pci-slot.h> |
| #include <phb4.h> |
| #include <npu2.h> |
| #include <occ.h> |
| #include <i2c.h> |
| #include <secvar.h> |
| |
| #include "astbmc.h" |
| #include "ast.h" |
| |
| static enum { |
| WITHERSPOON_TYPE_UNKNOWN, |
| WITHERSPOON_TYPE_SEQUOIA, |
| WITHERSPOON_TYPE_REDBUD |
| } witherspoon_type; |
| |
| /* |
| * HACK: Hostboot doesn't export the correct data for the system VPD EEPROM |
| * for this system. So we need to work around it here. |
| */ |
| static void vpd_dt_fixup(void) |
| { |
| struct dt_node *n = dt_find_by_path(dt_root, |
| "/xscom@603fc00000000/i2cm@a2000/i2c-bus@0/eeprom@50"); |
| |
| if (n) { |
| dt_check_del_prop(n, "compatible"); |
| dt_add_property_string(n, "compatible", "atmel,24c512"); |
| |
| dt_check_del_prop(n, "label"); |
| dt_add_property_string(n, "label", "system-vpd"); |
| } |
| } |
| |
| static void witherspoon_create_ocapi_i2c_bus(void) |
| { |
| struct dt_node *xscom, *i2cm, *i2c_bus; |
| prlog(PR_DEBUG, "OCAPI: Adding I2C bus device node for OCAPI reset\n"); |
| dt_for_each_compatible(dt_root, xscom, "ibm,xscom") { |
| i2cm = dt_find_by_name(xscom, "i2cm@a1000"); |
| if (!i2cm) { |
| prlog(PR_ERR, "OCAPI: Failed to add I2C bus device node\n"); |
| continue; |
| } |
| |
| if (dt_find_by_name(i2cm, "i2c-bus@4")) |
| continue; |
| |
| i2c_bus = dt_new_addr(i2cm, "i2c-bus", 4); |
| dt_add_property_cells(i2c_bus, "reg", 4); |
| dt_add_property_cells(i2c_bus, "bus-frequency", 0x61a80); |
| dt_add_property_strings(i2c_bus, "compatible", |
| "ibm,opal-i2c", "ibm,power8-i2c-port", |
| "ibm,power9-i2c-port"); |
| } |
| } |
| |
| static bool witherspoon_probe(void) |
| { |
| struct dt_node *np; |
| int highest_gpu_group_id = 0; |
| int gpu_group_id; |
| |
| if (!dt_node_is_compatible(dt_root, "ibm,witherspoon")) |
| return false; |
| |
| /* Lot of common early inits here */ |
| astbmc_early_init(); |
| |
| /* Setup UART for use by OPAL (Linux hvc) */ |
| uart_set_console_policy(UART_CONSOLE_OPAL); |
| |
| vpd_dt_fixup(); |
| |
| witherspoon_create_ocapi_i2c_bus(); |
| |
| dt_for_each_compatible(dt_root, np, "ibm,npu-link") { |
| gpu_group_id = dt_prop_get_u32(np, "ibm,npu-group-id"); |
| if (gpu_group_id > highest_gpu_group_id) |
| highest_gpu_group_id = gpu_group_id; |
| }; |
| |
| switch (highest_gpu_group_id) { |
| case 1: |
| witherspoon_type = WITHERSPOON_TYPE_REDBUD; |
| break; |
| case 2: |
| witherspoon_type = WITHERSPOON_TYPE_SEQUOIA; |
| break; |
| default: |
| witherspoon_type = WITHERSPOON_TYPE_UNKNOWN; |
| prlog(PR_NOTICE, "PLAT: Unknown Witherspoon variant detected\n"); |
| } |
| |
| return true; |
| } |
| |
| static void phb4_activate_shared_slot_witherspoon(struct proc_chip *chip) |
| { |
| uint64_t val; |
| |
| /* |
| * Shared slot activation is done by raising a GPIO line on the |
| * chip with the secondary slot. It will somehow activate the |
| * sideband signals between the slots. |
| * Need to wait 100us for stability. |
| */ |
| xscom_read(chip->id, P9_GPIO_DATA_OUT_ENABLE, &val); |
| val |= PPC_BIT(2); |
| xscom_write(chip->id, P9_GPIO_DATA_OUT_ENABLE, val); |
| |
| xscom_read(chip->id, P9_GPIO_DATA_OUT, &val); |
| val |= PPC_BIT(2); |
| xscom_write(chip->id, P9_GPIO_DATA_OUT, val); |
| time_wait_us(100); |
| prlog(PR_INFO, "Shared PCI slot activated\n"); |
| } |
| |
| static void witherspoon_shared_slot_fixup(void) |
| { |
| struct pci_slot *slot0, *slot1; |
| struct proc_chip *chip0, *chip1; |
| uint8_t p0 = 0, p1 = 0; |
| |
| /* |
| * Detect if a x16 card is present on the shared slot and |
| * do some extra configuration if it is. |
| * |
| * The shared slot, a.k.a "Slot 2" in the documentation, is |
| * connected to PEC2 phb index 3 on both chips. From skiboot, |
| * it looks like two x8 slots, each with its own presence bit. |
| * |
| * Here is the matrix of possibilities for the presence bits: |
| * |
| * slot0 presence slot1 presence |
| * 0 0 => no card |
| * 1 0 => x8 or less card detected |
| * 1 1 => x16 card detected |
| * 0 1 => invalid combination |
| * |
| * We only act if a x16 card is detected ('1 1' combination above). |
| * |
| * One issue is that we don't really know if it is a |
| * shared-slot-compatible card (such as Mellanox CX5) or |
| * a 'normal' x16 PCI card. We activate the shared slot in both cases, |
| * as it doesn't seem to hurt. |
| * |
| * If the card is a normal x16 PCI card, the link won't train on the |
| * second slot (nothing to do with the shared slot activation), the |
| * procedure will timeout, thus adding some delay to the boot time. |
| * Therefore the recommendation is that we shouldn't use a normal |
| * x16 card on the shared slot of a witherspoon. |
| * |
| * Plugging a x8 or less adapter on the shared slot should work |
| * like any other physical slot. |
| */ |
| chip0 = next_chip(NULL); |
| chip1 = next_chip(chip0); |
| if (!chip1 || next_chip(chip1)) { |
| prlog(PR_WARNING, |
| "PLAT: Can't find second chip, " |
| "skipping PCIe shared slot detection\n"); |
| return; |
| } |
| |
| /* the shared slot is connected to PHB3 on both chips */ |
| slot0 = pci_slot_find(phb4_get_opal_id(chip0->id, 3)); |
| slot1 = pci_slot_find(phb4_get_opal_id(chip1->id, 3)); |
| if (slot0 && slot1) { |
| if (slot0->ops.get_presence_state) |
| slot0->ops.get_presence_state(slot0, &p0); |
| if (slot1->ops.get_presence_state) |
| slot1->ops.get_presence_state(slot1, &p1); |
| if (p0 == 1 && p1 == 1) { |
| phb4_activate_shared_slot_witherspoon(chip1); |
| slot0->peer_slot = slot1; |
| slot1->peer_slot = slot0; |
| } |
| } |
| } |
| |
| static int check_mlx_cards(struct phb *phb __unused, struct pci_device *dev, |
| void *userdata __unused) |
| { |
| uint16_t mlx_cards[] = { |
| 0x1017, /* ConnectX-5 */ |
| 0x1019, /* ConnectX-5 Ex */ |
| 0x101b, /* ConnectX-6 */ |
| 0x101d, /* ConnectX-6 Dx */ |
| 0x101f, /* ConnectX-6 Lx */ |
| 0x1021, /* ConnectX-7 */ |
| }; |
| |
| if (PCI_VENDOR_ID(dev->vdid) == 0x15b3) { /* Mellanox */ |
| for (int i = 0; i < ARRAY_SIZE(mlx_cards); i++) { |
| if (mlx_cards[i] == PCI_DEVICE_ID(dev->vdid)) |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| static void witherspoon_pci_probe_complete(void) |
| { |
| struct pci_device *dev; |
| struct phb *phb; |
| struct phb4 *p; |
| |
| /* |
| * Reallocate dma engines between stacks in PEC2 if a Mellanox |
| * card is found on the shared slot, as it is required to get |
| * good GPU direct performance. |
| */ |
| for_each_phb(phb) { |
| /* skip the virtual PHBs */ |
| if (phb->phb_type != phb_type_pcie_v4) |
| continue; |
| p = phb_to_phb4(phb); |
| /* Keep only the first PHB on PEC2 */ |
| if (p->index != 3) |
| continue; |
| dev = pci_walk_dev(phb, NULL, check_mlx_cards, NULL); |
| if (dev) |
| phb4_pec2_dma_engine_realloc(p); |
| } |
| } |
| |
| static void set_link_details(struct npu2 *npu, uint32_t link_index, |
| uint32_t brick_index, enum npu2_dev_type type) |
| { |
| struct npu2_dev *dev = NULL; |
| for (int i = 0; i < npu->total_devices; i++) { |
| if (npu->devices[i].link_index == link_index) { |
| dev = &npu->devices[i]; |
| break; |
| } |
| } |
| if (!dev) { |
| prlog(PR_ERR, "PLAT: Could not find NPU link index %d\n", |
| link_index); |
| return; |
| } |
| dev->brick_index = brick_index; |
| dev->type = type; |
| } |
| |
| static void witherspoon_npu2_device_detect(struct npu2 *npu) |
| { |
| struct proc_chip *chip; |
| uint8_t state; |
| uint64_t i2c_port_id = 0; |
| char port_name[17]; |
| struct dt_node *dn; |
| int rc; |
| |
| bool gpu0_present, gpu1_present; |
| |
| if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) { |
| prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n"); |
| for (int i = 0; i < npu->total_devices; i++) { |
| npu->devices[i].type = NPU2_DEV_TYPE_NVLINK; |
| } |
| return; |
| } |
| assert(npu->total_devices == 6); |
| |
| chip = get_chip(npu->chip_id); |
| |
| /* Find I2C port */ |
| snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d", |
| chip->id, platform.ocapi->i2c_engine, |
| platform.ocapi->i2c_port); |
| dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") { |
| if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) { |
| i2c_port_id = dt_prop_get_u32(dn, "ibm,opal-id"); |
| break; |
| } |
| } |
| |
| if (!i2c_port_id) { |
| prlog(PR_ERR, "PLAT: Could not find NPU presence I2C port\n"); |
| return; |
| } |
| |
| gpu0_present = occ_get_gpu_presence(chip, 0); |
| if (gpu0_present) { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id); |
| } |
| |
| gpu1_present = occ_get_gpu_presence(chip, 1); |
| if (gpu1_present) { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id); |
| } |
| |
| /* |
| * The following I2C ops generate errors if no device is |
| * present on any SXM2 slot. Since it's useless, let's skip it |
| */ |
| if (!gpu0_present && !gpu1_present) |
| return; |
| |
| /* Set pins to input */ |
| state = 0xff; |
| rc = i2c_request_send(i2c_port_id, |
| platform.ocapi->i2c_presence_addr, SMBUS_WRITE, 3, |
| 1, &state, 1, 120); |
| if (rc) |
| goto i2c_failed; |
| |
| /* Read the presence value */ |
| state = 0x00; |
| rc = i2c_request_send(i2c_port_id, |
| platform.ocapi->i2c_presence_addr, SMBUS_READ, 0, |
| 1, &state, 1, 120); |
| if (rc) |
| goto i2c_failed; |
| |
| if (gpu0_present) { |
| if (state & (1 << 0)) { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n", |
| chip->id); |
| /* |
| * On witherspoon, bricks 2 and 3 are connected to |
| * the lanes matching links 0 and 1 in OpenCAPI mode. |
| */ |
| set_link_details(npu, 1, 3, NPU2_DEV_TYPE_OPENCAPI); |
| /* We current don't support using the second link */ |
| set_link_details(npu, 0, 2, NPU2_DEV_TYPE_UNKNOWN); |
| } else { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n", |
| chip->id); |
| set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK); |
| set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK); |
| set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK); |
| } |
| } |
| |
| if (gpu1_present) { |
| if (state & (1 << 1)) { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n", |
| chip->id); |
| set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI); |
| /* We current don't support using the second link */ |
| set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN); |
| } else { |
| prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n", |
| chip->id); |
| set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK); |
| set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK); |
| set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK); |
| } |
| } |
| |
| return; |
| |
| i2c_failed: |
| prlog(PR_ERR, "PLAT: NPU device type detection failed, rc=%d\n", rc); |
| return; |
| } |
| |
| static const char *witherspoon_ocapi_slot_label(uint32_t chip_id, |
| uint32_t brick_index) |
| { |
| const char *name = NULL; |
| |
| if (chip_id == 0) { |
| if (brick_index == 3) |
| name = "OPENCAPI-GPU0"; |
| else if (brick_index == 4) |
| name = "OPENCAPI-GPU1"; |
| } else { |
| if (brick_index == 3) |
| name = "OPENCAPI-GPU3"; |
| else if (brick_index == 4) |
| name = "OPENCAPI-GPU4"; |
| } |
| return name; |
| } |
| |
| static const struct platform_ocapi witherspoon_ocapi = { |
| .i2c_engine = 1, |
| .i2c_port = 4, |
| .odl_phy_swap = false, |
| .i2c_reset_addr = 0x20, |
| /* |
| * Witherspoon uses SXM2 connectors, carrying 2 OCAPI links |
| * over a single connector - hence each pair of bricks shares |
| * the same pin for resets. We currently only support using |
| * bricks 3 and 4, among other reasons because we can't handle |
| * a reset on one link causing the other link to reset as |
| * well. |
| */ |
| .i2c_reset_brick2 = 1 << 0, |
| .i2c_reset_brick3 = 1 << 0, |
| .i2c_reset_brick4 = 1 << 1, |
| .i2c_reset_brick5 = 1 << 1, |
| .i2c_presence_addr = 0x20, |
| /* unused, we do this in custom presence detect */ |
| .i2c_presence_brick2 = 0, |
| .i2c_presence_brick3 = 0, |
| .i2c_presence_brick4 = 0, |
| .i2c_presence_brick5 = 0, |
| .ocapi_slot_label = witherspoon_ocapi_slot_label, |
| }; |
| |
| static int gpu_slot_to_num(const char *slot) |
| { |
| char *p = NULL; |
| int ret; |
| |
| if (!slot) |
| return -1; |
| |
| if (memcmp(slot, "GPU", 3)) |
| return -1; |
| |
| ret = strtol(slot + 3, &p, 10); |
| if (*p || p == slot + 3) |
| return -1; |
| |
| return ret; |
| } |
| |
| static void npu2_phb_nvlink_dt(struct phb *npuphb) |
| { |
| struct dt_node *g[3] = { NULL }; /* Current maximum 3 GPUs per 1 NPU */ |
| struct dt_node *n[6] = { NULL }; |
| int max_gpus, i, gpuid, first, last; |
| struct npu2 *npu2_phb = phb_to_npu2_nvlink(npuphb); |
| struct pci_device *npd; |
| |
| switch (witherspoon_type) { |
| case WITHERSPOON_TYPE_REDBUD: |
| max_gpus = 4; |
| break; |
| case WITHERSPOON_TYPE_SEQUOIA: |
| max_gpus = 6; |
| break; |
| default: |
| /* witherspoon_probe() already reported missing support */ |
| return; |
| } |
| |
| /* Find the indexes of GPUs connected to this NPU */ |
| for (i = 0, first = max_gpus, last = 0; i < npu2_phb->total_devices; |
| ++i) { |
| gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label); |
| if (gpuid < 0) |
| continue; |
| if (gpuid > last) |
| last = gpuid; |
| if (gpuid < first) |
| first = gpuid; |
| } |
| |
| /* Either no "GPUx" slots found or they are not consecutive, abort */ |
| if (!last || last + 1 - first > max_gpus) |
| return; |
| |
| /* Collect GPU device nodes, sorted by an index from "GPUn" */ |
| for (i = 0; i < npu2_phb->total_devices; ++i) { |
| gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label); |
| g[gpuid - first] = npu2_phb->devices[i].nvlink.pd->dn; |
| |
| /* Collect NVLink bridge nodes too, for their phandles */ |
| list_for_each(&npuphb->devices, npd, link) { |
| if (npd->bdfn == npu2_phb->devices[i].bdfn) { |
| assert(npu2_phb->devices[i].brick_index < |
| ARRAY_SIZE(n)); |
| n[npu2_phb->devices[i].brick_index] = npd->dn; |
| } |
| } |
| } |
| |
| /* |
| * Store interconnect phandles in the device tree. |
| * The mapping is from Witherspoon_Design_Workbook_v1.7_19June2018.pdf, |
| * pages 39 (Sequoia), 40 (Redbud): |
| * Figure 16: NVLink wiring diagram for planar with 6 GPUs |
| * Figure 17: NVLink wiring diagram for planar with 4 GPUs |
| */ |
| #define PEERPH(g) ((g)?(g)->phandle:0) |
| switch (witherspoon_type) { |
| case WITHERSPOON_TYPE_REDBUD: |
| if (g[0]) |
| dt_add_property_cells(g[0], "ibm,nvlink-peers", |
| PEERPH(g[1]), PEERPH(n[0]), |
| PEERPH(g[1]), PEERPH(n[1]), |
| PEERPH(g[1]), PEERPH(n[2])); |
| if (g[1]) |
| dt_add_property_cells(g[1], "ibm,nvlink-peers", |
| PEERPH(g[0]), PEERPH(n[3]), |
| PEERPH(g[0]), PEERPH(n[4]), |
| PEERPH(g[0]), PEERPH(n[5])); |
| break; |
| case WITHERSPOON_TYPE_SEQUOIA: |
| if (g[0]) |
| dt_add_property_cells(g[0], "ibm,nvlink-peers", |
| PEERPH(g[1]), PEERPH(n[0]), |
| PEERPH(g[2]), PEERPH(g[2]), |
| PEERPH(g[1]), PEERPH(n[1])); |
| if (g[1]) |
| dt_add_property_cells(g[1], "ibm,nvlink-peers", |
| PEERPH(g[0]), PEERPH(n[2]), |
| PEERPH(g[2]), PEERPH(g[2]), |
| PEERPH(g[0]), PEERPH(n[3])); |
| if (g[2]) |
| dt_add_property_cells(g[2], "ibm,nvlink-peers", |
| PEERPH(g[1]), PEERPH(g[0]), |
| PEERPH(g[1]), PEERPH(n[4]), |
| PEERPH(g[0]), PEERPH(n[5])); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| static void witherspoon_finalise_dt(bool is_reboot) |
| { |
| struct dt_node *np; |
| struct proc_chip *c; |
| |
| if (is_reboot) |
| return; |
| |
| dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") { |
| u32 opal_id = dt_prop_get_cell(np, "ibm,opal-phbid", 1); |
| struct phb *npphb = pci_get_phb(opal_id); |
| |
| if (!npphb) |
| continue; |
| if (npphb->phb_type != phb_type_npu_v2) |
| continue; |
| npu2_phb_nvlink_dt(npphb); |
| } |
| |
| /* |
| * The I2C bus on used to talk to the GPUs has a 750K pullup |
| * which is way too big. If there's no GPUs connected to the |
| * chip all I2C transactions fail with an Arb loss error since |
| * SCL/SDA don't return to the idle state fast enough. Disable |
| * the port to squash the errors. |
| */ |
| for (c = next_chip(NULL); c; c = next_chip(c)) { |
| bool detected = false; |
| int i; |
| |
| np = dt_find_by_path(c->devnode, "i2cm@a1000/i2c-bus@4"); |
| if (!np) |
| continue; |
| |
| for (i = 0; i < 3; i++) |
| detected |= occ_get_gpu_presence(c, i); |
| |
| if (!detected) { |
| dt_check_del_prop(np, "status"); |
| dt_add_property_string(np, "status", "disabled"); |
| } |
| } |
| } |
| |
| static int witherspoon_secvar_init(void) |
| { |
| return secvar_main(secboot_tpm_driver, edk2_compatible_v1); |
| } |
| |
| /* The only difference between these is the PCI slot handling */ |
| |
| DECLARE_PLATFORM(witherspoon) = { |
| .name = "Witherspoon", |
| .probe = witherspoon_probe, |
| .init = astbmc_init, |
| .pre_pci_fixup = witherspoon_shared_slot_fixup, |
| .pci_probe_complete = witherspoon_pci_probe_complete, |
| .start_preload_resource = flash_start_preload_resource, |
| .resource_loaded = flash_resource_loaded, |
| .bmc = &bmc_plat_ast2500_openbmc, |
| .cec_power_down = astbmc_ipmi_power_down, |
| .cec_reboot = astbmc_ipmi_reboot, |
| .elog_commit = ipmi_elog_commit, |
| .finalise_dt = witherspoon_finalise_dt, |
| .exit = astbmc_exit, |
| .terminate = ipmi_terminate, |
| |
| .pci_get_slot_info = dt_slot_get_slot_info, |
| .ocapi = &witherspoon_ocapi, |
| .npu2_device_detect = witherspoon_npu2_device_detect, |
| .op_display = op_display_lpc, |
| .secvar_init = witherspoon_secvar_init, |
| }; |