blob: 76b2a3487b5d6f21528e9c301341eb27bc8fec1d [file] [log] [blame]
Alexey Kardashevskiy9fc34ad2014-06-10 15:39:23 +10001/*
2 * QEMU sPAPR PCI host for VFIO
3 *
4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
Peter Maydell0d755902016-01-26 18:16:58 +000020#include "qemu/osdep.h"
Zhenzhong Duan54876d22023-11-02 15:12:22 +080021#include <sys/ioctl.h>
Markus Armbrustera9c94272016-06-22 19:11:19 +020022#include <linux/vfio.h>
Alexey Kardashevskiy9fc34ad2014-06-10 15:39:23 +100023#include "hw/ppc/spapr.h"
24#include "hw/pci-host/spapr.h"
Gavin Shan6319b1d2015-07-02 16:23:28 +100025#include "hw/pci/msix.h"
Markus Armbrusteredf5ca52022-12-22 11:03:28 +010026#include "hw/pci/pci_device.h"
Zhenzhong Duan54876d22023-11-02 15:12:22 +080027#include "hw/vfio/vfio-common.h"
David Gibson72700d72016-02-29 17:19:50 +110028#include "qemu/error-report.h"
Cédric Le Goater4278df92023-11-21 15:03:55 +010029#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
David Gibson72700d72016-02-29 17:19:50 +110030
Zhenzhong Duan54876d22023-11-02 15:12:22 +080031/*
32 * Interfaces for IBM EEH (Enhanced Error Handling)
33 */
Cédric Le Goater4278df92023-11-21 15:03:55 +010034#ifdef CONFIG_VFIO_PCI
Zhenzhong Duan54876d22023-11-02 15:12:22 +080035static bool vfio_eeh_container_ok(VFIOContainer *container)
36{
37 /*
38 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
39 * implementation is broken if there are multiple groups in a
40 * container. The hardware works in units of Partitionable
41 * Endpoints (== IOMMU groups) and the EEH operations naively
42 * iterate across all groups in the container, without any logic
43 * to make sure the groups have their state synchronized. For
44 * certain operations (ENABLE) that might be ok, until an error
45 * occurs, but for others (GET_STATE) it's clearly broken.
46 */
47
48 /*
49 * XXX Once fixed kernels exist, test for them here
50 */
51
52 if (QLIST_EMPTY(&container->group_list)) {
53 return false;
54 }
55
56 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
57 return false;
58 }
59
60 return true;
61}
62
63static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
64{
65 struct vfio_eeh_pe_op pe_op = {
66 .argsz = sizeof(pe_op),
67 .op = op,
68 };
69 int ret;
70
71 if (!vfio_eeh_container_ok(container)) {
72 error_report("vfio/eeh: EEH_PE_OP 0x%x: "
73 "kernel requires a container with exactly one group", op);
74 return -EPERM;
75 }
76
77 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
78 if (ret < 0) {
79 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
80 return -errno;
81 }
82
83 return ret;
84}
85
86static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
87{
88 VFIOAddressSpace *space = vfio_get_address_space(as);
Eric Augere5597062023-11-02 15:12:32 +080089 VFIOContainerBase *bcontainer = NULL;
Zhenzhong Duan54876d22023-11-02 15:12:22 +080090
91 if (QLIST_EMPTY(&space->containers)) {
92 /* No containers to act on */
93 goto out;
94 }
95
Eric Augere5597062023-11-02 15:12:32 +080096 bcontainer = QLIST_FIRST(&space->containers);
Zhenzhong Duan54876d22023-11-02 15:12:22 +080097
Eric Augere5597062023-11-02 15:12:32 +080098 if (QLIST_NEXT(bcontainer, next)) {
Zhenzhong Duan54876d22023-11-02 15:12:22 +080099 /*
100 * We don't yet have logic to synchronize EEH state across
101 * multiple containers
102 */
Eric Augere5597062023-11-02 15:12:32 +0800103 bcontainer = NULL;
Zhenzhong Duan54876d22023-11-02 15:12:22 +0800104 goto out;
105 }
106
107out:
108 vfio_put_address_space(space);
Eric Augere5597062023-11-02 15:12:32 +0800109 return container_of(bcontainer, VFIOContainer, bcontainer);
Zhenzhong Duan54876d22023-11-02 15:12:22 +0800110}
111
112static bool vfio_eeh_as_ok(AddressSpace *as)
113{
114 VFIOContainer *container = vfio_eeh_as_container(as);
115
116 return (container != NULL) && vfio_eeh_container_ok(container);
117}
118
119static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
120{
121 VFIOContainer *container = vfio_eeh_as_container(as);
122
123 if (!container) {
124 return -ENODEV;
125 }
126 return vfio_eeh_container_op(container, op);
127}
128
David Gibsonce2918c2019-03-06 15:35:37 +1100129bool spapr_phb_eeh_available(SpaprPhbState *sphb)
David Gibsonc1fa0172016-02-29 17:19:42 +1100130{
131 return vfio_eeh_as_ok(&sphb->iommu_as);
132}
133
David Gibsonce2918c2019-03-06 15:35:37 +1100134static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
Gavin Shanaef87d12015-07-02 16:23:27 +1000135{
David Gibson76a9e9f2016-02-29 14:00:34 +1100136 vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
Gavin Shanaef87d12015-07-02 16:23:27 +1000137}
138
David Gibsonfbb4e982016-02-29 17:45:05 +1100139void spapr_phb_vfio_reset(DeviceState *qdev)
Alexey Kardashevskiy9fc34ad2014-06-10 15:39:23 +1000140{
Gavin Shanaef87d12015-07-02 16:23:27 +1000141 /*
142 * The PE might be in frozen state. To reenable the EEH
143 * functionality on it will clean the frozen state, which
144 * ensures that the contained PCI devices will work properly
145 * after reboot.
146 */
David Gibson76a9e9f2016-02-29 14:00:34 +1100147 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
Alexey Kardashevskiy9fc34ad2014-06-10 15:39:23 +1000148}
149
Mahesh Salgaonkarac9ef662021-05-21 13:35:51 +0530150static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
151 void *opaque)
152{
153 bool *found = opaque;
154
155 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
156 *found = true;
157 }
158}
159
David Gibsonce2918c2019-03-06 15:35:37 +1100160int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
David Gibsonfbb4e982016-02-29 17:45:05 +1100161 unsigned int addr, int option)
Gavin Shan2aad88f2015-02-20 15:58:53 +1100162{
David Gibson76a9e9f2016-02-29 14:00:34 +1100163 uint32_t op;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100164 int ret;
165
166 switch (option) {
167 case RTAS_EEH_DISABLE:
David Gibson76a9e9f2016-02-29 14:00:34 +1100168 op = VFIO_EEH_PE_DISABLE;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100169 break;
170 case RTAS_EEH_ENABLE: {
171 PCIHostState *phb;
Mahesh Salgaonkarac9ef662021-05-21 13:35:51 +0530172 bool found = false;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100173
174 /*
Mahesh Salgaonkarac9ef662021-05-21 13:35:51 +0530175 * The EEH functionality is enabled per sphb level instead of
176 * per PCI device. We have already identified this specific sphb
177 * based on buid passed as argument to ibm,set-eeh-option rtas
178 * call. Now we just need to check the validity of the PCI
179 * pass-through devices (vfio-pci) under this sphb bus.
180 * We have already validated that all the devices under this sphb
Michael Tokareve6a19a62023-07-14 14:18:16 +0300181 * are from same iommu group (within same PE) before coming here.
Mahesh Salgaonkarac9ef662021-05-21 13:35:51 +0530182 *
183 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
184 * Rework device EEH PE determination") kernel would call
185 * eeh-set-option for each device in the PE using the device's
186 * config_address as the argument rather than the PE address.
187 * Hence if we check validity of supplied config_addr whether
188 * it matches to this PHB will cause issues with older kernel
189 * versions v5.9 and older. If we return an error from
190 * eeh-set-option when the argument isn't a valid PE address
191 * then older kernels (v5.9 and older) will interpret that as
192 * EEH not being supported.
Gavin Shan2aad88f2015-02-20 15:58:53 +1100193 */
194 phb = PCI_HOST_BRIDGE(sphb);
Mahesh Salgaonkarac9ef662021-05-21 13:35:51 +0530195 pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
196 spapr_eeh_pci_find_device, &found);
197
198 if (!found) {
Gavin Shan2aad88f2015-02-20 15:58:53 +1100199 return RTAS_OUT_PARAM_ERROR;
200 }
201
David Gibson76a9e9f2016-02-29 14:00:34 +1100202 op = VFIO_EEH_PE_ENABLE;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100203 break;
204 }
205 case RTAS_EEH_THAW_IO:
David Gibson76a9e9f2016-02-29 14:00:34 +1100206 op = VFIO_EEH_PE_UNFREEZE_IO;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100207 break;
208 case RTAS_EEH_THAW_DMA:
David Gibson76a9e9f2016-02-29 14:00:34 +1100209 op = VFIO_EEH_PE_UNFREEZE_DMA;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100210 break;
211 default:
212 return RTAS_OUT_PARAM_ERROR;
213 }
214
David Gibson76a9e9f2016-02-29 14:00:34 +1100215 ret = vfio_eeh_as_op(&sphb->iommu_as, op);
Gavin Shan2aad88f2015-02-20 15:58:53 +1100216 if (ret < 0) {
217 return RTAS_OUT_HW_ERROR;
218 }
219
220 return RTAS_OUT_SUCCESS;
221}
222
David Gibsonce2918c2019-03-06 15:35:37 +1100223int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
Gavin Shan2aad88f2015-02-20 15:58:53 +1100224{
Gavin Shan2aad88f2015-02-20 15:58:53 +1100225 int ret;
226
David Gibson76a9e9f2016-02-29 14:00:34 +1100227 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
Gavin Shan2aad88f2015-02-20 15:58:53 +1100228 if (ret < 0) {
229 return RTAS_OUT_PARAM_ERROR;
230 }
231
232 *state = ret;
233 return RTAS_OUT_SUCCESS;
234}
235
Gavin Shan6319b1d2015-07-02 16:23:28 +1000236static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
237 PCIDevice *pdev,
238 void *opaque)
239{
240 /* Check if the device is VFIO PCI device */
241 if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
242 return;
243 }
244
245 /*
246 * The MSIx table will be cleaned out by reset. We need
247 * disable it so that it can be reenabled properly. Also,
248 * the cached MSIx table should be cleared as it's not
249 * reflecting the contents in hardware.
250 */
251 if (msix_enabled(pdev)) {
252 uint16_t flags;
253
254 flags = pci_host_config_read_common(pdev,
255 pdev->msix_cap + PCI_MSIX_FLAGS,
256 pci_config_size(pdev), 2);
257 flags &= ~PCI_MSIX_FLAGS_ENABLE;
258 pci_host_config_write_common(pdev,
259 pdev->msix_cap + PCI_MSIX_FLAGS,
260 pci_config_size(pdev), flags, 2);
261 }
262
263 msix_reset(pdev);
264}
265
266static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
267{
Peter Xu2914fc62021-10-28 12:31:26 +0800268 pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
269 NULL);
Gavin Shan6319b1d2015-07-02 16:23:28 +1000270}
271
David Gibsonce2918c2019-03-06 15:35:37 +1100272static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
Gavin Shan6319b1d2015-07-02 16:23:28 +1000273{
274 PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
275
276 pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
277}
278
David Gibsonce2918c2019-03-06 15:35:37 +1100279int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
Gavin Shan2aad88f2015-02-20 15:58:53 +1100280{
David Gibson76a9e9f2016-02-29 14:00:34 +1100281 uint32_t op;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100282 int ret;
283
284 switch (option) {
285 case RTAS_SLOT_RESET_DEACTIVATE:
David Gibson76a9e9f2016-02-29 14:00:34 +1100286 op = VFIO_EEH_PE_RESET_DEACTIVATE;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100287 break;
288 case RTAS_SLOT_RESET_HOT:
Gavin Shan6319b1d2015-07-02 16:23:28 +1000289 spapr_phb_vfio_eeh_pre_reset(sphb);
David Gibson76a9e9f2016-02-29 14:00:34 +1100290 op = VFIO_EEH_PE_RESET_HOT;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100291 break;
292 case RTAS_SLOT_RESET_FUNDAMENTAL:
Gavin Shan6319b1d2015-07-02 16:23:28 +1000293 spapr_phb_vfio_eeh_pre_reset(sphb);
David Gibson76a9e9f2016-02-29 14:00:34 +1100294 op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
Gavin Shan2aad88f2015-02-20 15:58:53 +1100295 break;
296 default:
297 return RTAS_OUT_PARAM_ERROR;
298 }
299
David Gibson76a9e9f2016-02-29 14:00:34 +1100300 ret = vfio_eeh_as_op(&sphb->iommu_as, op);
Gavin Shan2aad88f2015-02-20 15:58:53 +1100301 if (ret < 0) {
302 return RTAS_OUT_HW_ERROR;
303 }
304
305 return RTAS_OUT_SUCCESS;
306}
307
David Gibsonce2918c2019-03-06 15:35:37 +1100308int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
Gavin Shan2aad88f2015-02-20 15:58:53 +1100309{
Gavin Shan2aad88f2015-02-20 15:58:53 +1100310 int ret;
311
David Gibson76a9e9f2016-02-29 14:00:34 +1100312 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
Gavin Shan2aad88f2015-02-20 15:58:53 +1100313 if (ret < 0) {
314 return RTAS_OUT_PARAM_ERROR;
315 }
316
317 return RTAS_OUT_SUCCESS;
318}
Cédric Le Goater4278df92023-11-21 15:03:55 +0100319
320#else
321
322bool spapr_phb_eeh_available(SpaprPhbState *sphb)
323{
324 return false;
325}
326
327void spapr_phb_vfio_reset(DeviceState *qdev)
328{
329}
330
331int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
332 unsigned int addr, int option)
333{
334 return RTAS_OUT_NOT_SUPPORTED;
335}
336
337int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
338{
339 return RTAS_OUT_NOT_SUPPORTED;
340}
341
342int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
343{
344 return RTAS_OUT_NOT_SUPPORTED;
345}
346
347int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
348{
349 return RTAS_OUT_NOT_SUPPORTED;
350}
351
352#endif /* CONFIG_VFIO_PCI */