blob: 59ef4fb2174e8a92b9d9aa2cc1f034c93fced463 [file] [log] [blame]
Eric Auger22c37a12020-02-14 14:27:36 +01001/*
2 * virtio-iommu device
3 *
4 * Copyright (c) 2020 Red Hat, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20#include "qemu/osdep.h"
Eric Augerfe2caca2020-02-14 14:27:39 +010021#include "qemu/log.h"
Eric Auger22c37a12020-02-14 14:27:36 +010022#include "qemu/iov.h"
Eric Auger30d40e32023-10-19 15:45:16 +020023#include "qemu/range.h"
Eric Auger5c476ba2023-10-19 15:45:17 +020024#include "qemu/reserved-region.h"
Philippe Mathieu-Daudée414ed22023-05-24 11:37:42 +020025#include "exec/target_page.h"
Eric Auger22c37a12020-02-14 14:27:36 +010026#include "hw/qdev-properties.h"
27#include "hw/virtio/virtio.h"
28#include "sysemu/kvm.h"
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +000029#include "sysemu/reset.h"
Eric Auger94df5b22023-07-05 18:51:17 +020030#include "sysemu/sysemu.h"
Eric Auger908cae02023-10-19 15:45:13 +020031#include "qemu/reserved-region.h"
Eric Auger294ac5f2024-03-07 14:43:03 +010032#include "qemu/units.h"
Eric Augercfb42182020-02-14 14:27:38 +010033#include "qapi/error.h"
34#include "qemu/error-report.h"
Eric Auger22c37a12020-02-14 14:27:36 +010035#include "trace.h"
36
37#include "standard-headers/linux/virtio_ids.h"
38
39#include "hw/virtio/virtio-bus.h"
Eric Auger22c37a12020-02-14 14:27:36 +010040#include "hw/virtio/virtio-iommu.h"
Eric Augercfb42182020-02-14 14:27:38 +010041#include "hw/pci/pci_bus.h"
42#include "hw/pci/pci.h"
Eric Auger22c37a12020-02-14 14:27:36 +010043
44/* Max size */
45#define VIOMMU_DEFAULT_QUEUE_SIZE 256
Eric Auger1733eeb2020-07-03 16:59:42 +010046#define VIOMMU_PROBE_SIZE 512
Eric Auger22c37a12020-02-14 14:27:36 +010047
Eric Augercfb42182020-02-14 14:27:38 +010048typedef struct VirtIOIOMMUDomain {
49 uint32_t id;
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +000050 bool bypass;
Eric Augercfb42182020-02-14 14:27:38 +010051 GTree *mappings;
52 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
53} VirtIOIOMMUDomain;
54
55typedef struct VirtIOIOMMUEndpoint {
56 uint32_t id;
57 VirtIOIOMMUDomain *domain;
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +010058 IOMMUMemoryRegion *iommu_mr;
Eric Augercfb42182020-02-14 14:27:38 +010059 QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
60} VirtIOIOMMUEndpoint;
61
62typedef struct VirtIOIOMMUInterval {
63 uint64_t low;
64 uint64_t high;
65} VirtIOIOMMUInterval;
66
Eric Augerfe2caca2020-02-14 14:27:39 +010067typedef struct VirtIOIOMMUMapping {
68 uint64_t phys_addr;
69 uint32_t flags;
70} VirtIOIOMMUMapping;
71
Eric Auger817ef102024-06-14 11:52:52 +020072struct hiod_key {
73 PCIBus *bus;
74 uint8_t devfn;
75};
76
Eric Augercfb42182020-02-14 14:27:38 +010077static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
78{
79 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
80}
81
Zhenzhong Duan90519b92022-06-13 14:10:08 +080082static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
83{
84 uint32_t sid;
85 bool bypassed;
86 VirtIOIOMMU *s = sdev->viommu;
87 VirtIOIOMMUEndpoint *ep;
88
89 sid = virtio_iommu_get_bdf(sdev);
90
Zhenzhong Duan08f20302022-06-13 14:10:09 +080091 qemu_rec_mutex_lock(&s->mutex);
Zhenzhong Duan90519b92022-06-13 14:10:08 +080092 /* need to check bypass before system reset */
93 if (!s->endpoints) {
94 bypassed = s->config.bypass;
95 goto unlock;
96 }
97
98 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
99 if (!ep || !ep->domain) {
100 bypassed = s->config.bypass;
101 } else {
102 bypassed = ep->domain->bypass;
103 }
104
105unlock:
Zhenzhong Duan08f20302022-06-13 14:10:09 +0800106 qemu_rec_mutex_unlock(&s->mutex);
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800107 return bypassed;
108}
109
110/* Return whether the device is using IOMMU translation. */
111static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
112{
113 bool use_remapping;
114
115 assert(sdev);
116
117 use_remapping = !virtio_iommu_device_bypassed(sdev);
118
119 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
120 PCI_SLOT(sdev->devfn),
121 PCI_FUNC(sdev->devfn),
122 use_remapping);
123
124 /* Turn off first then on the other */
125 if (use_remapping) {
126 memory_region_set_enabled(&sdev->bypass_mr, false);
127 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
128 } else {
129 memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
130 memory_region_set_enabled(&sdev->bypass_mr, true);
131 }
132
133 return use_remapping;
134}
135
136static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
137{
138 GHashTableIter iter;
139 IOMMUPciBus *iommu_pci_bus;
140 int i;
141
142 g_hash_table_iter_init(&iter, s->as_by_busptr);
143 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
144 for (i = 0; i < PCI_DEVFN_MAX; i++) {
145 if (!iommu_pci_bus->pbdev[i]) {
146 continue;
147 }
148 virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
149 }
150 }
151}
152
Eric Augercfb42182020-02-14 14:27:38 +0100153/**
154 * The bus number is used for lookup when SID based operations occur.
155 * In that case we lazily populate the IOMMUPciBus array from the bus hash
156 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
157 * numbers may not be always initialized yet.
158 */
159static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
160{
161 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
162
163 if (!iommu_pci_bus) {
164 GHashTableIter iter;
165
166 g_hash_table_iter_init(&iter, s->as_by_busptr);
167 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
168 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
169 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
170 return iommu_pci_bus;
171 }
172 }
173 return NULL;
174 }
175 return iommu_pci_bus;
176}
177
178static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
179{
180 uint8_t bus_n, devfn;
181 IOMMUPciBus *iommu_pci_bus;
182 IOMMUDevice *dev;
183
184 bus_n = PCI_BUS_NUM(sid);
185 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
186 if (iommu_pci_bus) {
Jean-Philippe Bruckerbfe7a962020-10-30 19:05:01 +0100187 devfn = sid & (PCI_DEVFN_MAX - 1);
Eric Augercfb42182020-02-14 14:27:38 +0100188 dev = iommu_pci_bus->pbdev[devfn];
189 if (dev) {
190 return &dev->iommu_mr;
191 }
192 }
193 return NULL;
194}
195
196static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
197{
198 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
199 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
200
201 if (inta->high < intb->low) {
202 return -1;
203 } else if (intb->high < inta->low) {
204 return 1;
205 } else {
206 return 0;
207 }
208}
209
Jean-Philippe Brucker0522be92022-07-18 14:56:37 +0100210static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
211 IOMMUTLBEvent *event,
212 hwaddr virt_start, hwaddr virt_end)
213{
214 uint64_t delta = virt_end - virt_start;
215
216 event->entry.iova = virt_start;
217 event->entry.addr_mask = delta;
218
219 if (delta == UINT64_MAX) {
220 memory_region_notify_iommu(mr, 0, *event);
221 }
222
223 while (virt_start != virt_end + 1) {
224 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
225
226 event->entry.addr_mask = mask;
227 event->entry.iova = virt_start;
228 memory_region_notify_iommu(mr, 0, *event);
229 virt_start += mask + 1;
230 if (event->entry.perm != IOMMU_NONE) {
231 event->entry.translated_addr += mask + 1;
232 }
233 }
234}
235
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100236static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
237 hwaddr virt_end, hwaddr paddr,
238 uint32_t flags)
239{
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100240 IOMMUTLBEvent event;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100241 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
242 flags & VIRTIO_IOMMU_MAP_F_WRITE);
243
244 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
245 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
246 return;
247 }
248
249 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
250 paddr, perm);
251
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100252 event.type = IOMMU_NOTIFIER_MAP;
253 event.entry.target_as = &address_space_memory;
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100254 event.entry.perm = perm;
255 event.entry.translated_addr = paddr;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100256
Jean-Philippe Brucker0522be92022-07-18 14:56:37 +0100257 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100258}
259
260static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
261 hwaddr virt_end)
262{
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100263 IOMMUTLBEvent event;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100264
265 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
266 return;
267 }
268
269 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
270
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100271 event.type = IOMMU_NOTIFIER_UNMAP;
272 event.entry.target_as = &address_space_memory;
Eugenio Pérez5039caf2020-11-16 17:55:03 +0100273 event.entry.perm = IOMMU_NONE;
274 event.entry.translated_addr = 0;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100275
Jean-Philippe Brucker0522be92022-07-18 14:56:37 +0100276 virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100277}
278
Bharat Bhushan2f6eeb52020-10-30 19:05:04 +0100279static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
280 gpointer data)
281{
282 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
283 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
284
285 virtio_iommu_notify_unmap(mr, interval->low, interval->high);
286
287 return false;
288}
289
290static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
291 gpointer data)
292{
293 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
294 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
295 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
296
297 virtio_iommu_notify_map(mr, interval->low, interval->high,
298 mapping->phys_addr, mapping->flags);
299
300 return false;
301}
302
Eric Augercfb42182020-02-14 14:27:38 +0100303static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
304{
Bharat Bhushan2f6eeb52020-10-30 19:05:04 +0100305 VirtIOIOMMUDomain *domain = ep->domain;
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800306 IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
Bharat Bhushan2f6eeb52020-10-30 19:05:04 +0100307
Eric Augercfb42182020-02-14 14:27:38 +0100308 if (!ep->domain) {
309 return;
310 }
Eric Auger6c027a92024-07-16 11:45:08 +0200311 trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id);
Bharat Bhushan2f6eeb52020-10-30 19:05:04 +0100312 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
313 ep->iommu_mr);
Eric Augercfb42182020-02-14 14:27:38 +0100314 QLIST_REMOVE(ep, next);
315 ep->domain = NULL;
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800316 virtio_iommu_switch_address_space(sdev);
Eric Augercfb42182020-02-14 14:27:38 +0100317}
318
319static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
320 uint32_t ep_id)
321{
322 VirtIOIOMMUEndpoint *ep;
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +0100323 IOMMUMemoryRegion *mr;
Eric Augercfb42182020-02-14 14:27:38 +0100324
325 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
326 if (ep) {
327 return ep;
328 }
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +0100329 mr = virtio_iommu_mr(s, ep_id);
330 if (!mr) {
Eric Augercfb42182020-02-14 14:27:38 +0100331 return NULL;
332 }
333 ep = g_malloc0(sizeof(*ep));
334 ep->id = ep_id;
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +0100335 ep->iommu_mr = mr;
Eric Augercfb42182020-02-14 14:27:38 +0100336 trace_virtio_iommu_get_endpoint(ep_id);
337 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
338 return ep;
339}
340
341static void virtio_iommu_put_endpoint(gpointer data)
342{
343 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
344
345 if (ep->domain) {
346 virtio_iommu_detach_endpoint_from_domain(ep);
347 }
348
349 trace_virtio_iommu_put_endpoint(ep->id);
350 g_free(ep);
351}
352
353static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000354 uint32_t domain_id,
355 bool bypass)
Eric Augercfb42182020-02-14 14:27:38 +0100356{
357 VirtIOIOMMUDomain *domain;
358
359 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
360 if (domain) {
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000361 if (domain->bypass != bypass) {
362 return NULL;
363 }
Eric Augercfb42182020-02-14 14:27:38 +0100364 return domain;
365 }
366 domain = g_malloc0(sizeof(*domain));
367 domain->id = domain_id;
368 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
369 NULL, (GDestroyNotify)g_free,
370 (GDestroyNotify)g_free);
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000371 domain->bypass = bypass;
Eric Augercfb42182020-02-14 14:27:38 +0100372 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
373 QLIST_INIT(&domain->endpoint_list);
374 trace_virtio_iommu_get_domain(domain_id);
375 return domain;
376}
377
378static void virtio_iommu_put_domain(gpointer data)
379{
380 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
381 VirtIOIOMMUEndpoint *iter, *tmp;
382
383 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
384 virtio_iommu_detach_endpoint_from_domain(iter);
385 }
386 g_tree_destroy(domain->mappings);
387 trace_virtio_iommu_put_domain(domain->id);
388 g_free(domain);
389}
390
Eric Auger908cae02023-10-19 15:45:13 +0200391static void add_prop_resv_regions(IOMMUDevice *sdev)
392{
393 VirtIOIOMMU *s = sdev->viommu;
394 int i;
395
396 for (i = 0; i < s->nr_prop_resv_regions; i++) {
397 ReservedRegion *reg = g_new0(ReservedRegion, 1);
398
399 *reg = s->prop_resv_regions[i];
400 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
401 }
402}
403
Eric Augercfb42182020-02-14 14:27:38 +0100404static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
405 int devfn)
406{
407 VirtIOIOMMU *s = opaque;
408 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
409 static uint32_t mr_index;
410 IOMMUDevice *sdev;
411
412 if (!sbus) {
413 sbus = g_malloc0(sizeof(IOMMUPciBus) +
414 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
415 sbus->bus = bus;
416 g_hash_table_insert(s->as_by_busptr, bus, sbus);
417 }
418
419 sdev = sbus->pbdev[devfn];
420 if (!sdev) {
421 char *name = g_strdup_printf("%s-%d-%d",
422 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
423 mr_index++, devfn);
Markus Armbrusterb21e2382022-03-15 15:41:56 +0100424 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
Eric Augercfb42182020-02-14 14:27:38 +0100425
426 sdev->viommu = s;
427 sdev->bus = bus;
428 sdev->devfn = devfn;
429
430 trace_virtio_iommu_init_iommu_mr(name);
431
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800432 memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
433 address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
Eric Auger908cae02023-10-19 15:45:13 +0200434 add_prop_resv_regions(sdev);
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800435
436 /*
437 * Build the IOMMU disabled container with aliases to the
438 * shared MRs. Note that aliasing to a shared memory region
439 * could help the memory API to detect same FlatViews so we
440 * can have devices to share the same FlatView when in bypass
441 * mode. (either by not configuring virtio-iommu driver or with
442 * "iommu=pt"). It will greatly reduce the total number of
443 * FlatViews of the system hence VM runs faster.
444 */
445 memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
446 "system", get_system_memory(), 0,
447 memory_region_size(get_system_memory()));
448
Eric Augercfb42182020-02-14 14:27:38 +0100449 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
450 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
451 OBJECT(s), name,
452 UINT64_MAX);
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800453
454 /*
455 * Hook both the containers under the root container, we
456 * switch between iommu & bypass MRs by enable/disable
457 * corresponding sub-containers
458 */
459 memory_region_add_subregion_overlap(&sdev->root, 0,
460 MEMORY_REGION(&sdev->iommu_mr),
461 0);
462 memory_region_add_subregion_overlap(&sdev->root, 0,
463 &sdev->bypass_mr, 0);
464
465 virtio_iommu_switch_address_space(sdev);
Eric Augercfb42182020-02-14 14:27:38 +0100466 g_free(name);
467 }
468 return &sdev->as;
469}
470
Eric Auger817ef102024-06-14 11:52:52 +0200471static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
472{
473 const struct hiod_key *key1 = v1;
474 const struct hiod_key *key2 = v2;
475
476 return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
477}
478
479static guint hiod_hash(gconstpointer v)
480{
481 const struct hiod_key *key = v;
482 guint value = (guint)(uintptr_t)key->bus;
483
484 return (guint)(value << 8 | key->devfn);
485}
486
487static void hiod_destroy(gpointer v)
488{
489 object_unref(v);
490}
491
492static HostIOMMUDevice *
493get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
494 struct hiod_key key = {
495 .bus = bus,
496 .devfn = devfn,
497 };
498
499 return g_hash_table_lookup(viommu->host_iommu_devices, &key);
500}
501
Eric Augercf2647a2024-06-14 11:52:55 +0200502/**
503 * rebuild_resv_regions: rebuild resv regions with both the
504 * info of host resv ranges and property set resv ranges
505 */
506static int rebuild_resv_regions(IOMMUDevice *sdev)
507{
508 GList *l;
509 int i = 0;
510
511 /* free the existing list and rebuild it from scratch */
512 g_list_free_full(sdev->resv_regions, g_free);
513 sdev->resv_regions = NULL;
514
515 /* First add host reserved regions if any, all tagged as RESERVED */
516 for (l = sdev->host_resv_ranges; l; l = l->next) {
517 ReservedRegion *reg = g_new0(ReservedRegion, 1);
518 Range *r = (Range *)l->data;
519
520 reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
521 range_set_bounds(&reg->range, range_lob(r), range_upb(r));
522 sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
523 trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
524 range_lob(&reg->range),
525 range_upb(&reg->range));
526 i++;
527 }
528 /*
529 * then add higher priority reserved regions set by the machine
530 * through properties
531 */
532 add_prop_resv_regions(sdev);
533 return 0;
534}
535
536static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
537 int devfn, GList *iova_ranges,
538 Error **errp)
539{
540 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
541 IOMMUDevice *sdev;
Eric Augercf2647a2024-06-14 11:52:55 +0200542 int ret = -EINVAL;
543
544 if (!sbus) {
Eric Auger37baedf2024-07-01 10:48:53 +0200545 error_setg(errp, "%s: no IOMMUPciBus found!", __func__);
546 return ret;
Eric Augercf2647a2024-06-14 11:52:55 +0200547 }
548
549 sdev = sbus->pbdev[devfn];
Eric Auger37baedf2024-07-01 10:48:53 +0200550 if (!sdev) {
551 error_setg(errp, "%s: no IOMMUDevice found!", __func__);
552 return ret;
553 }
Eric Augercf2647a2024-06-14 11:52:55 +0200554
Eric Augercf2647a2024-06-14 11:52:55 +0200555 if (sdev->host_resv_ranges) {
Eric Auger62ac01d2024-07-16 11:45:05 +0200556 error_setg(errp, "%s virtio-iommu does not support aliased BDF",
557 __func__);
558 return ret;
Eric Augercf2647a2024-06-14 11:52:55 +0200559 }
560
561 range_inverse_array(iova_ranges,
562 &sdev->host_resv_ranges,
563 0, UINT64_MAX);
564 rebuild_resv_regions(sdev);
565
566 return 0;
Eric Augercf2647a2024-06-14 11:52:55 +0200567}
568
Eric Auger62ac01d2024-07-16 11:45:05 +0200569static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
570 int devfn)
571{
572 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
573 IOMMUDevice *sdev;
574
575 if (!sbus) {
576 return;
577 }
578
579 sdev = sbus->pbdev[devfn];
580 if (!sdev) {
581 return;
582 }
583
584 g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free);
585 g_list_free_full(sdev->resv_regions, g_free);
586 sdev->host_resv_ranges = NULL;
587 sdev->resv_regions = NULL;
588 add_prop_resv_regions(sdev);
589}
590
591
Eric Augerd7c8c952024-07-01 10:48:57 +0200592static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask,
593 Error **errp)
594{
595 uint64_t cur_mask = viommu->config.page_size_mask;
596
597 if ((cur_mask & new_mask) == 0) {
598 error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64
599 " incompatible with currently supported mask 0x%"PRIx64,
600 new_mask, cur_mask);
601 return false;
602 }
603 /*
604 * Once the granule is frozen we can't change the mask anymore. If by
605 * chance the hotplugged device supports the same granule, we can still
606 * accept it.
607 */
608 if (viommu->granule_frozen) {
609 int cur_granule = ctz64(cur_mask);
610
611 if (!(BIT_ULL(cur_granule) & new_mask)) {
612 error_setg(errp,
613 "virtio-iommu does not support frozen granule 0x%llx",
614 BIT_ULL(cur_granule));
615 return false;
616 }
617 }
618 return true;
619}
620
Eric Auger817ef102024-06-14 11:52:52 +0200621static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
622 HostIOMMUDevice *hiod, Error **errp)
623{
Eric Augerd7c8c952024-07-01 10:48:57 +0200624 ERRP_GUARD();
Eric Auger817ef102024-06-14 11:52:52 +0200625 VirtIOIOMMU *viommu = opaque;
Eric Augercf2647a2024-06-14 11:52:55 +0200626 HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
Eric Auger817ef102024-06-14 11:52:52 +0200627 struct hiod_key *new_key;
Eric Augercf2647a2024-06-14 11:52:55 +0200628 GList *host_iova_ranges = NULL;
Eric Auger817ef102024-06-14 11:52:52 +0200629
630 assert(hiod);
631
632 if (get_host_iommu_device(viommu, bus, devfn)) {
633 error_setg(errp, "Host IOMMU device already exists");
634 return false;
635 }
636
Eric Augercf2647a2024-06-14 11:52:55 +0200637 if (hiodc->get_iova_ranges) {
638 int ret;
Eric Augerd59ca1c2024-07-01 10:48:55 +0200639 host_iova_ranges = hiodc->get_iova_ranges(hiod);
Eric Augercf2647a2024-06-14 11:52:55 +0200640 if (!host_iova_ranges) {
641 return true; /* some old kernels may not support that capability */
642 }
643 ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus,
644 hiod->aliased_devfn,
645 host_iova_ranges, errp);
646 if (ret) {
Eric Augerd7c8c952024-07-01 10:48:57 +0200647 goto error;
648 }
649 }
650 if (hiodc->get_page_size_mask) {
651 uint64_t new_mask = hiodc->get_page_size_mask(hiod);
652
653 if (check_page_size_mask(viommu, new_mask, errp)) {
654 /*
655 * The default mask depends on the "granule" property. For example,
656 * with 4k granule, it is -(4 * KiB). When an assigned device has
657 * page size restrictions due to the hardware IOMMU configuration,
658 * apply this restriction to the mask.
659 */
660 trace_virtio_iommu_update_page_size_mask(hiod->name,
661 viommu->config.page_size_mask,
662 new_mask);
663 if (!viommu->granule_frozen) {
664 viommu->config.page_size_mask &= new_mask;
665 }
666 } else {
667 error_prepend(errp, "%s: ", hiod->name);
668 goto error;
Eric Augercf2647a2024-06-14 11:52:55 +0200669 }
670 }
671
Eric Auger817ef102024-06-14 11:52:52 +0200672 new_key = g_malloc(sizeof(*new_key));
673 new_key->bus = bus;
674 new_key->devfn = devfn;
675
676 object_ref(hiod);
677 g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod);
Eric Augercf2647a2024-06-14 11:52:55 +0200678 g_list_free_full(host_iova_ranges, g_free);
Eric Auger817ef102024-06-14 11:52:52 +0200679
680 return true;
Eric Augerd7c8c952024-07-01 10:48:57 +0200681error:
682 g_list_free_full(host_iova_ranges, g_free);
683 return false;
Eric Auger817ef102024-06-14 11:52:52 +0200684}
685
686static void
687virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
688{
689 VirtIOIOMMU *viommu = opaque;
690 HostIOMMUDevice *hiod;
691 struct hiod_key key = {
692 .bus = bus,
693 .devfn = devfn,
694 };
695
696 hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key);
697 if (!hiod) {
698 return;
699 }
Eric Auger62ac01d2024-07-16 11:45:05 +0200700 virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus,
701 hiod->aliased_devfn);
Eric Auger817ef102024-06-14 11:52:52 +0200702
703 g_hash_table_remove(viommu->host_iommu_devices, &key);
704}
705
Yi Liuba7d12e2023-10-17 18:14:04 +0200706static const PCIIOMMUOps virtio_iommu_ops = {
707 .get_address_space = virtio_iommu_find_add_as,
Eric Auger817ef102024-06-14 11:52:52 +0200708 .set_iommu_device = virtio_iommu_set_iommu_device,
709 .unset_iommu_device = virtio_iommu_unset_iommu_device,
Yi Liuba7d12e2023-10-17 18:14:04 +0200710};
711
Eric Auger5442b852020-02-14 14:27:37 +0100712static int virtio_iommu_attach(VirtIOIOMMU *s,
713 struct virtio_iommu_req_attach *req)
Eric Auger22c37a12020-02-14 14:27:36 +0100714{
Eric Auger5442b852020-02-14 14:27:37 +0100715 uint32_t domain_id = le32_to_cpu(req->domain);
716 uint32_t ep_id = le32_to_cpu(req->endpoint);
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000717 uint32_t flags = le32_to_cpu(req->flags);
Eric Augercfb42182020-02-14 14:27:38 +0100718 VirtIOIOMMUDomain *domain;
719 VirtIOIOMMUEndpoint *ep;
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800720 IOMMUDevice *sdev;
Eric Auger5442b852020-02-14 14:27:37 +0100721
722 trace_virtio_iommu_attach(domain_id, ep_id);
723
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000724 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
725 return VIRTIO_IOMMU_S_INVAL;
726 }
727
Eric Augercfb42182020-02-14 14:27:38 +0100728 ep = virtio_iommu_get_endpoint(s, ep_id);
729 if (!ep) {
730 return VIRTIO_IOMMU_S_NOENT;
731 }
732
733 if (ep->domain) {
734 VirtIOIOMMUDomain *previous_domain = ep->domain;
735 /*
736 * the device is already attached to a domain,
737 * detach it first
738 */
739 virtio_iommu_detach_endpoint_from_domain(ep);
740 if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
741 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
742 }
743 }
744
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000745 domain = virtio_iommu_get_domain(s, domain_id,
746 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
747 if (!domain) {
748 /* Incompatible bypass flag */
749 return VIRTIO_IOMMU_S_INVAL;
750 }
Eric Augercfb42182020-02-14 14:27:38 +0100751 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
752
753 ep->domain = domain;
Zhenzhong Duan90519b92022-06-13 14:10:08 +0800754 sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
755 virtio_iommu_switch_address_space(sdev);
Eric Augercfb42182020-02-14 14:27:38 +0100756
Bharat Bhushan2f6eeb52020-10-30 19:05:04 +0100757 /* Replay domain mappings on the associated memory region */
758 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
759 ep->iommu_mr);
760
Eric Augercfb42182020-02-14 14:27:38 +0100761 return VIRTIO_IOMMU_S_OK;
Eric Auger22c37a12020-02-14 14:27:36 +0100762}
Eric Auger5442b852020-02-14 14:27:37 +0100763
764static int virtio_iommu_detach(VirtIOIOMMU *s,
765 struct virtio_iommu_req_detach *req)
Eric Auger22c37a12020-02-14 14:27:36 +0100766{
Eric Auger5442b852020-02-14 14:27:37 +0100767 uint32_t domain_id = le32_to_cpu(req->domain);
768 uint32_t ep_id = le32_to_cpu(req->endpoint);
Eric Augercfb42182020-02-14 14:27:38 +0100769 VirtIOIOMMUDomain *domain;
770 VirtIOIOMMUEndpoint *ep;
Eric Auger5442b852020-02-14 14:27:37 +0100771
772 trace_virtio_iommu_detach(domain_id, ep_id);
773
Eric Augercfb42182020-02-14 14:27:38 +0100774 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
775 if (!ep) {
776 return VIRTIO_IOMMU_S_NOENT;
777 }
778
779 domain = ep->domain;
780
781 if (!domain || domain->id != domain_id) {
782 return VIRTIO_IOMMU_S_INVAL;
783 }
784
785 virtio_iommu_detach_endpoint_from_domain(ep);
786
787 if (QLIST_EMPTY(&domain->endpoint_list)) {
788 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
789 }
Eric Auger1993d632024-07-16 11:45:06 +0200790 g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id));
Eric Augercfb42182020-02-14 14:27:38 +0100791 return VIRTIO_IOMMU_S_OK;
Eric Auger22c37a12020-02-14 14:27:36 +0100792}
Eric Auger5442b852020-02-14 14:27:37 +0100793
794static int virtio_iommu_map(VirtIOIOMMU *s,
795 struct virtio_iommu_req_map *req)
Eric Auger22c37a12020-02-14 14:27:36 +0100796{
Eric Auger5442b852020-02-14 14:27:37 +0100797 uint32_t domain_id = le32_to_cpu(req->domain);
798 uint64_t phys_start = le64_to_cpu(req->phys_start);
799 uint64_t virt_start = le64_to_cpu(req->virt_start);
800 uint64_t virt_end = le64_to_cpu(req->virt_end);
801 uint32_t flags = le32_to_cpu(req->flags);
Eric Augerfe2caca2020-02-14 14:27:39 +0100802 VirtIOIOMMUDomain *domain;
803 VirtIOIOMMUInterval *interval;
804 VirtIOIOMMUMapping *mapping;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100805 VirtIOIOMMUEndpoint *ep;
Eric Augerfe2caca2020-02-14 14:27:39 +0100806
807 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
808 return VIRTIO_IOMMU_S_INVAL;
809 }
810
811 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
812 if (!domain) {
813 return VIRTIO_IOMMU_S_NOENT;
814 }
815
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000816 if (domain->bypass) {
817 return VIRTIO_IOMMU_S_INVAL;
818 }
819
Eric Augerfe2caca2020-02-14 14:27:39 +0100820 interval = g_malloc0(sizeof(*interval));
821
822 interval->low = virt_start;
823 interval->high = virt_end;
824
825 mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
826 if (mapping) {
827 g_free(interval);
828 return VIRTIO_IOMMU_S_INVAL;
829 }
Eric Auger5442b852020-02-14 14:27:37 +0100830
831 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
832
Eric Augerfe2caca2020-02-14 14:27:39 +0100833 mapping = g_malloc0(sizeof(*mapping));
834 mapping->phys_addr = phys_start;
835 mapping->flags = flags;
836
837 g_tree_insert(domain->mappings, interval, mapping);
838
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100839 QLIST_FOREACH(ep, &domain->endpoint_list, next) {
840 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
841 flags);
842 }
843
Eric Augerfe2caca2020-02-14 14:27:39 +0100844 return VIRTIO_IOMMU_S_OK;
Eric Auger22c37a12020-02-14 14:27:36 +0100845}
Eric Auger5442b852020-02-14 14:27:37 +0100846
847static int virtio_iommu_unmap(VirtIOIOMMU *s,
848 struct virtio_iommu_req_unmap *req)
Eric Auger22c37a12020-02-14 14:27:36 +0100849{
Eric Auger5442b852020-02-14 14:27:37 +0100850 uint32_t domain_id = le32_to_cpu(req->domain);
851 uint64_t virt_start = le64_to_cpu(req->virt_start);
852 uint64_t virt_end = le64_to_cpu(req->virt_end);
Eric Augerfe2caca2020-02-14 14:27:39 +0100853 VirtIOIOMMUMapping *iter_val;
854 VirtIOIOMMUInterval interval, *iter_key;
855 VirtIOIOMMUDomain *domain;
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100856 VirtIOIOMMUEndpoint *ep;
Eric Augerfe2caca2020-02-14 14:27:39 +0100857 int ret = VIRTIO_IOMMU_S_OK;
Eric Auger5442b852020-02-14 14:27:37 +0100858
859 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
860
Eric Augerfe2caca2020-02-14 14:27:39 +0100861 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
862 if (!domain) {
863 return VIRTIO_IOMMU_S_NOENT;
864 }
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +0000865
866 if (domain->bypass) {
867 return VIRTIO_IOMMU_S_INVAL;
868 }
869
Eric Augerfe2caca2020-02-14 14:27:39 +0100870 interval.low = virt_start;
871 interval.high = virt_end;
872
873 while (g_tree_lookup_extended(domain->mappings, &interval,
874 (void **)&iter_key, (void**)&iter_val)) {
875 uint64_t current_low = iter_key->low;
876 uint64_t current_high = iter_key->high;
877
878 if (interval.low <= current_low && interval.high >= current_high) {
Bharat Bhushan15e4c8f2020-10-30 19:05:03 +0100879 QLIST_FOREACH(ep, &domain->endpoint_list, next) {
880 virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
881 current_high);
882 }
Eric Augerfe2caca2020-02-14 14:27:39 +0100883 g_tree_remove(domain->mappings, iter_key);
884 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
885 } else {
886 ret = VIRTIO_IOMMU_S_RANGE;
887 break;
888 }
889 }
890 return ret;
Eric Auger22c37a12020-02-14 14:27:36 +0100891}
892
Eric Auger09b4c3d2023-10-19 15:45:15 +0200893static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep,
Eric Auger1733eeb2020-07-03 16:59:42 +0100894 uint8_t *buf, size_t free)
895{
896 struct virtio_iommu_probe_resv_mem prop = {};
897 size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
Eric Auger908cae02023-10-19 15:45:13 +0200898 GList *l;
Eric Auger1733eeb2020-07-03 16:59:42 +0100899
Eric Auger908cae02023-10-19 15:45:13 +0200900 total = size * g_list_length(sdev->resv_regions);
Eric Auger1733eeb2020-07-03 16:59:42 +0100901 if (total > free) {
902 return -ENOSPC;
903 }
904
Eric Auger908cae02023-10-19 15:45:13 +0200905 for (l = sdev->resv_regions; l; l = l->next) {
906 ReservedRegion *reg = l->data;
907 unsigned subtype = reg->type;
908 Range *range = &reg->range;
Eric Auger1733eeb2020-07-03 16:59:42 +0100909
910 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
911 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
912 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
913 prop.head.length = cpu_to_le16(length);
914 prop.subtype = subtype;
Eric Auger41cc70c2023-10-19 15:45:10 +0200915 prop.start = cpu_to_le64(range_lob(range));
916 prop.end = cpu_to_le64(range_upb(range));
Eric Auger1733eeb2020-07-03 16:59:42 +0100917
918 memcpy(buf, &prop, size);
919
920 trace_virtio_iommu_fill_resv_property(ep, prop.subtype,
921 prop.start, prop.end);
922 buf += size;
923 }
924 return total;
925}
926
927/**
928 * virtio_iommu_probe - Fill the probe request buffer with
929 * the properties the device is able to return
930 */
931static int virtio_iommu_probe(VirtIOIOMMU *s,
932 struct virtio_iommu_req_probe *req,
933 uint8_t *buf)
934{
935 uint32_t ep_id = le32_to_cpu(req->endpoint);
Eric Auger09b4c3d2023-10-19 15:45:15 +0200936 IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id);
Eric Auger1733eeb2020-07-03 16:59:42 +0100937 size_t free = VIOMMU_PROBE_SIZE;
Eric Auger09b4c3d2023-10-19 15:45:15 +0200938 IOMMUDevice *sdev;
Eric Auger1733eeb2020-07-03 16:59:42 +0100939 ssize_t count;
940
Eric Auger09b4c3d2023-10-19 15:45:15 +0200941 if (!iommu_mr) {
Eric Auger1733eeb2020-07-03 16:59:42 +0100942 return VIRTIO_IOMMU_S_NOENT;
943 }
944
Eric Auger09b4c3d2023-10-19 15:45:15 +0200945 sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr);
Eric Auger09b4c3d2023-10-19 15:45:15 +0200946
947 count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free);
Eric Auger1733eeb2020-07-03 16:59:42 +0100948 if (count < 0) {
949 return VIRTIO_IOMMU_S_INVAL;
950 }
951 buf += count;
952 free -= count;
953
954 return VIRTIO_IOMMU_S_OK;
955}
956
Eric Auger5442b852020-02-14 14:27:37 +0100957static int virtio_iommu_iov_to_req(struct iovec *iov,
958 unsigned int iov_cnt,
Zhenzhong Duan45461aa2022-06-23 10:31:52 +0800959 void *req, size_t payload_sz)
Eric Auger5442b852020-02-14 14:27:37 +0100960{
Zhenzhong Duan45461aa2022-06-23 10:31:52 +0800961 size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
Eric Auger5442b852020-02-14 14:27:37 +0100962
Eric Auger5442b852020-02-14 14:27:37 +0100963 if (unlikely(sz != payload_sz)) {
964 return VIRTIO_IOMMU_S_INVAL;
965 }
966 return 0;
967}
968
969#define virtio_iommu_handle_req(__req) \
970static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
971 struct iovec *iov, \
972 unsigned int iov_cnt) \
973{ \
974 struct virtio_iommu_req_ ## __req req; \
Zhenzhong Duan45461aa2022-06-23 10:31:52 +0800975 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \
976 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\
Eric Auger5442b852020-02-14 14:27:37 +0100977 \
978 return ret ? ret : virtio_iommu_ ## __req(s, &req); \
979}
980
981virtio_iommu_handle_req(attach)
982virtio_iommu_handle_req(detach)
983virtio_iommu_handle_req(map)
984virtio_iommu_handle_req(unmap)
985
Eric Auger1733eeb2020-07-03 16:59:42 +0100986static int virtio_iommu_handle_probe(VirtIOIOMMU *s,
987 struct iovec *iov,
988 unsigned int iov_cnt,
989 uint8_t *buf)
990{
991 struct virtio_iommu_req_probe req;
992 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req));
993
994 return ret ? ret : virtio_iommu_probe(s, &req, buf);
995}
996
Eric Auger22c37a12020-02-14 14:27:36 +0100997static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
998{
999 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
1000 struct virtio_iommu_req_head head;
1001 struct virtio_iommu_req_tail tail = {};
1002 VirtQueueElement *elem;
1003 unsigned int iov_cnt;
1004 struct iovec *iov;
Eric Auger1733eeb2020-07-03 16:59:42 +01001005 void *buf = NULL;
Eric Augercf2f89e2023-07-17 18:21:26 +02001006 size_t sz;
Eric Auger22c37a12020-02-14 14:27:36 +01001007
1008 for (;;) {
Eric Augercf2f89e2023-07-17 18:21:26 +02001009 size_t output_size = sizeof(tail);
1010
Eric Auger22c37a12020-02-14 14:27:36 +01001011 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1012 if (!elem) {
1013 return;
1014 }
1015
1016 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
1017 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
1018 virtio_error(vdev, "virtio-iommu bad head/tail size");
1019 virtqueue_detach_element(vq, elem, 0);
1020 g_free(elem);
1021 break;
1022 }
1023
1024 iov_cnt = elem->out_num;
1025 iov = elem->out_sg;
1026 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
1027 if (unlikely(sz != sizeof(head))) {
Manos Pitsidianakis704391f2024-06-13 08:49:12 +03001028 qemu_log_mask(LOG_GUEST_ERROR,
1029 "%s: read %zu bytes from command head"
1030 "but expected %zu\n", __func__, sz, sizeof(head));
Eric Auger22c37a12020-02-14 14:27:36 +01001031 tail.status = VIRTIO_IOMMU_S_DEVERR;
1032 goto out;
1033 }
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001034 qemu_rec_mutex_lock(&s->mutex);
Eric Auger22c37a12020-02-14 14:27:36 +01001035 switch (head.type) {
1036 case VIRTIO_IOMMU_T_ATTACH:
1037 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
1038 break;
1039 case VIRTIO_IOMMU_T_DETACH:
1040 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
1041 break;
1042 case VIRTIO_IOMMU_T_MAP:
1043 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
1044 break;
1045 case VIRTIO_IOMMU_T_UNMAP:
1046 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
1047 break;
Eric Auger1733eeb2020-07-03 16:59:42 +01001048 case VIRTIO_IOMMU_T_PROBE:
1049 {
1050 struct virtio_iommu_req_tail *ptail;
1051
1052 output_size = s->config.probe_size + sizeof(tail);
1053 buf = g_malloc0(output_size);
1054
Markus Armbruster3d558332022-11-23 14:38:11 +01001055 ptail = buf + s->config.probe_size;
Eric Auger1733eeb2020-07-03 16:59:42 +01001056 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
Eric Augere95e05d2020-07-13 14:36:08 +01001057 break;
Eric Auger1733eeb2020-07-03 16:59:42 +01001058 }
Eric Auger22c37a12020-02-14 14:27:36 +01001059 default:
1060 tail.status = VIRTIO_IOMMU_S_UNSUPP;
1061 }
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001062 qemu_rec_mutex_unlock(&s->mutex);
Eric Auger22c37a12020-02-14 14:27:36 +01001063
1064out:
1065 sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
Eric Auger1733eeb2020-07-03 16:59:42 +01001066 buf ? buf : &tail, output_size);
Manos Pitsidianakis704391f2024-06-13 08:49:12 +03001067 if (unlikely(sz != output_size)) {
1068 qemu_log_mask(LOG_GUEST_ERROR,
1069 "%s: wrote %zu bytes to command response"
1070 "but response size is %zu\n",
1071 __func__, sz, output_size);
1072 tail.status = VIRTIO_IOMMU_S_DEVERR;
1073 /*
1074 * We checked that sizeof(tail) can fit to elem->in_sg at the
1075 * beginning of the loop
1076 */
1077 output_size = sizeof(tail);
1078 g_free(buf);
1079 buf = NULL;
1080 sz = iov_from_buf(elem->in_sg,
1081 elem->in_num,
1082 0,
1083 &tail,
1084 output_size);
1085 }
Eric Auger1733eeb2020-07-03 16:59:42 +01001086 assert(sz == output_size);
Eric Auger22c37a12020-02-14 14:27:36 +01001087
Eric Auger1733eeb2020-07-03 16:59:42 +01001088 virtqueue_push(vq, elem, sz);
Eric Auger22c37a12020-02-14 14:27:36 +01001089 virtio_notify(vdev, vq);
1090 g_free(elem);
Eric Auger1733eeb2020-07-03 16:59:42 +01001091 g_free(buf);
Wentao Liang4bf58c72022-04-07 05:51:59 -04001092 buf = NULL;
Eric Auger22c37a12020-02-14 14:27:36 +01001093 }
1094}
1095
Eric Augera7c1da82020-02-14 14:27:41 +01001096static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
1097 int flags, uint32_t endpoint,
1098 uint64_t address)
1099{
1100 VirtIODevice *vdev = &viommu->parent_obj;
1101 VirtQueue *vq = viommu->event_vq;
1102 struct virtio_iommu_fault fault;
1103 VirtQueueElement *elem;
1104 size_t sz;
1105
1106 memset(&fault, 0, sizeof(fault));
1107 fault.reason = reason;
1108 fault.flags = cpu_to_le32(flags);
1109 fault.endpoint = cpu_to_le32(endpoint);
1110 fault.address = cpu_to_le64(address);
1111
1112 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1113
1114 if (!elem) {
1115 error_report_once(
1116 "no buffer available in event queue to report event");
1117 return;
1118 }
1119
1120 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
1121 virtio_error(vdev, "error buffer of wrong size");
1122 virtqueue_detach_element(vq, elem, 0);
1123 g_free(elem);
1124 return;
1125 }
1126
1127 sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
1128 &fault, sizeof(fault));
1129 assert(sz == sizeof(fault));
1130
1131 trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
1132 virtqueue_push(vq, elem, sz);
1133 virtio_notify(vdev, vq);
1134 g_free(elem);
1135
1136}
1137
Eric Augercfb42182020-02-14 14:27:38 +01001138static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
1139 IOMMUAccessFlags flag,
1140 int iommu_idx)
1141{
1142 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
Eric Augered8449b2020-02-14 14:27:40 +01001143 VirtIOIOMMUInterval interval, *mapping_key;
1144 VirtIOIOMMUMapping *mapping_value;
1145 VirtIOIOMMU *s = sdev->viommu;
Eric Augera7c1da82020-02-14 14:27:41 +01001146 bool read_fault, write_fault;
Eric Augered8449b2020-02-14 14:27:40 +01001147 VirtIOIOMMUEndpoint *ep;
Eric Augera7c1da82020-02-14 14:27:41 +01001148 uint32_t sid, flags;
Eric Augered8449b2020-02-14 14:27:40 +01001149 bool bypass_allowed;
Eric Auger1084fed2023-07-18 20:21:36 +02001150 int granule;
Eric Augered8449b2020-02-14 14:27:40 +01001151 bool found;
Eric Auger908cae02023-10-19 15:45:13 +02001152 GList *l;
Eric Augered8449b2020-02-14 14:27:40 +01001153
1154 interval.low = addr;
1155 interval.high = addr + 1;
Eric Auger1084fed2023-07-18 20:21:36 +02001156 granule = ctz64(s->config.page_size_mask);
Eric Augercfb42182020-02-14 14:27:38 +01001157
1158 IOMMUTLBEntry entry = {
1159 .target_as = &address_space_memory,
1160 .iova = addr,
1161 .translated_addr = addr,
Eric Auger1084fed2023-07-18 20:21:36 +02001162 .addr_mask = BIT_ULL(granule) - 1,
Eric Augercfb42182020-02-14 14:27:38 +01001163 .perm = IOMMU_NONE,
1164 };
1165
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001166 bypass_allowed = s->config.bypass;
Eric Augered8449b2020-02-14 14:27:40 +01001167
Eric Augercfb42182020-02-14 14:27:38 +01001168 sid = virtio_iommu_get_bdf(sdev);
1169
1170 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001171 qemu_rec_mutex_lock(&s->mutex);
Eric Augered8449b2020-02-14 14:27:40 +01001172
1173 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
Zhenzhong Duan23b5f0f2022-06-13 14:10:10 +08001174
1175 if (bypass_allowed)
1176 assert(ep && ep->domain && !ep->domain->bypass);
1177
Eric Augered8449b2020-02-14 14:27:40 +01001178 if (!ep) {
1179 if (!bypass_allowed) {
1180 error_report_once("%s sid=%d is not known!!", __func__, sid);
Eric Augera7c1da82020-02-14 14:27:41 +01001181 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
1182 VIRTIO_IOMMU_FAULT_F_ADDRESS,
1183 sid, addr);
Eric Augered8449b2020-02-14 14:27:40 +01001184 } else {
1185 entry.perm = flag;
1186 }
1187 goto unlock;
1188 }
1189
Eric Auger908cae02023-10-19 15:45:13 +02001190 for (l = sdev->resv_regions; l; l = l->next) {
1191 ReservedRegion *reg = l->data;
Eric Auger0f5a3092020-07-03 16:59:42 +01001192
Eric Augere8f433f2023-10-19 15:45:07 +02001193 if (range_contains(&reg->range, addr)) {
Eric Auger0f5a3092020-07-03 16:59:42 +01001194 switch (reg->type) {
1195 case VIRTIO_IOMMU_RESV_MEM_T_MSI:
1196 entry.perm = flag;
1197 break;
1198 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
1199 default:
1200 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
1201 VIRTIO_IOMMU_FAULT_F_ADDRESS,
1202 sid, addr);
1203 break;
1204 }
1205 goto unlock;
1206 }
1207 }
1208
Eric Augered8449b2020-02-14 14:27:40 +01001209 if (!ep->domain) {
1210 if (!bypass_allowed) {
1211 error_report_once("%s %02x:%02x.%01x not attached to any domain",
1212 __func__, PCI_BUS_NUM(sid),
1213 PCI_SLOT(sid), PCI_FUNC(sid));
Eric Augera7c1da82020-02-14 14:27:41 +01001214 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
1215 VIRTIO_IOMMU_FAULT_F_ADDRESS,
1216 sid, addr);
Eric Augered8449b2020-02-14 14:27:40 +01001217 } else {
1218 entry.perm = flag;
1219 }
1220 goto unlock;
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +00001221 } else if (ep->domain->bypass) {
1222 entry.perm = flag;
1223 goto unlock;
Eric Augered8449b2020-02-14 14:27:40 +01001224 }
1225
1226 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
1227 (void **)&mapping_key,
1228 (void **)&mapping_value);
1229 if (!found) {
1230 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
1231 __func__, addr, sid);
Eric Augera7c1da82020-02-14 14:27:41 +01001232 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
1233 VIRTIO_IOMMU_FAULT_F_ADDRESS,
1234 sid, addr);
Eric Augered8449b2020-02-14 14:27:40 +01001235 goto unlock;
1236 }
1237
Eric Augera7c1da82020-02-14 14:27:41 +01001238 read_fault = (flag & IOMMU_RO) &&
1239 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
1240 write_fault = (flag & IOMMU_WO) &&
1241 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
1242
1243 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
1244 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
1245 if (flags) {
Eric Augered8449b2020-02-14 14:27:40 +01001246 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
1247 __func__, addr, flag, mapping_value->flags);
Eric Augera7c1da82020-02-14 14:27:41 +01001248 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
1249 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
1250 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
1251 sid, addr);
Eric Augered8449b2020-02-14 14:27:40 +01001252 goto unlock;
1253 }
1254 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
1255 entry.perm = flag;
1256 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
1257
1258unlock:
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001259 qemu_rec_mutex_unlock(&s->mutex);
Eric Augercfb42182020-02-14 14:27:38 +01001260 return entry;
1261}
1262
Eric Auger22c37a12020-02-14 14:27:36 +01001263static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
1264{
1265 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
Eric Auger3a411b22021-11-27 08:29:08 +01001266 struct virtio_iommu_config *dev_config = &dev->config;
1267 struct virtio_iommu_config *out_config = (void *)config_data;
Eric Auger22c37a12020-02-14 14:27:36 +01001268
Eric Auger3a411b22021-11-27 08:29:08 +01001269 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
1270 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
1271 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
1272 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
1273 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
1274 out_config->probe_size = cpu_to_le32(dev_config->probe_size);
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001275 out_config->bypass = dev_config->bypass;
Eric Auger3a411b22021-11-27 08:29:08 +01001276
1277 trace_virtio_iommu_get_config(dev_config->page_size_mask,
1278 dev_config->input_range.start,
1279 dev_config->input_range.end,
1280 dev_config->domain_range.start,
1281 dev_config->domain_range.end,
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001282 dev_config->probe_size,
1283 dev_config->bypass);
1284}
1285
1286static void virtio_iommu_set_config(VirtIODevice *vdev,
1287 const uint8_t *config_data)
1288{
1289 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
1290 struct virtio_iommu_config *dev_config = &dev->config;
1291 const struct virtio_iommu_config *in_config = (void *)config_data;
1292
1293 if (in_config->bypass != dev_config->bypass) {
1294 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
1295 virtio_error(vdev, "cannot set config.bypass");
1296 return;
1297 } else if (in_config->bypass != 0 && in_config->bypass != 1) {
1298 virtio_error(vdev, "invalid config.bypass value '%u'",
1299 in_config->bypass);
1300 return;
1301 }
1302 dev_config->bypass = in_config->bypass;
Zhenzhong Duan90519b92022-06-13 14:10:08 +08001303 virtio_iommu_switch_address_space_all(dev);
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001304 }
1305
1306 trace_virtio_iommu_set_config(in_config->bypass);
Eric Auger22c37a12020-02-14 14:27:36 +01001307}
1308
Eric Auger22c37a12020-02-14 14:27:36 +01001309static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
1310 Error **errp)
1311{
1312 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
1313
1314 f |= dev->features;
1315 trace_virtio_iommu_get_features(f);
1316 return f;
1317}
1318
Eric Augercfb42182020-02-14 14:27:38 +01001319static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
1320{
1321 guint ua = GPOINTER_TO_UINT(a);
1322 guint ub = GPOINTER_TO_UINT(b);
1323 return (ua > ub) - (ua < ub);
1324}
1325
Bharat Bhushan308e5e12020-10-30 19:05:05 +01001326static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
1327{
1328 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
1329 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
1330 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
1331
1332 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
1333 mapping->phys_addr);
1334 virtio_iommu_notify_map(mr, interval->low, interval->high,
1335 mapping->phys_addr, mapping->flags);
1336 return false;
1337}
1338
1339static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
1340{
1341 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
1342 VirtIOIOMMU *s = sdev->viommu;
1343 uint32_t sid;
1344 VirtIOIOMMUEndpoint *ep;
1345
1346 sid = virtio_iommu_get_bdf(sdev);
1347
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001348 qemu_rec_mutex_lock(&s->mutex);
Bharat Bhushan308e5e12020-10-30 19:05:05 +01001349
1350 if (!s->endpoints) {
1351 goto unlock;
1352 }
1353
1354 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
1355 if (!ep || !ep->domain) {
1356 goto unlock;
1357 }
1358
1359 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
1360
1361unlock:
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001362 qemu_rec_mutex_unlock(&s->mutex);
Bharat Bhushan308e5e12020-10-30 19:05:05 +01001363}
1364
Bharat Bhushan6978bfa2020-10-30 19:05:06 +01001365static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
1366 IOMMUNotifierFlag old,
1367 IOMMUNotifierFlag new,
1368 Error **errp)
1369{
Peter Xu958ec332021-02-04 14:12:28 -05001370 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
1371 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
1372 return -EINVAL;
1373 }
1374
Bharat Bhushan6978bfa2020-10-30 19:05:06 +01001375 if (old == IOMMU_NOTIFIER_NONE) {
1376 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
1377 } else if (new == IOMMU_NOTIFIER_NONE) {
1378 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
1379 }
1380 return 0;
1381}
1382
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001383static void virtio_iommu_system_reset(void *opaque)
1384{
1385 VirtIOIOMMU *s = opaque;
1386
1387 trace_virtio_iommu_system_reset();
1388
Zhenzhong Duan9a457382024-01-25 15:37:05 +08001389 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
1390
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001391 /*
1392 * config.bypass is sticky across device reset, but should be restored on
1393 * system reset
1394 */
1395 s->config.bypass = s->boot_bypass;
Zhenzhong Duan90519b92022-06-13 14:10:08 +08001396 virtio_iommu_switch_address_space_all(s);
1397
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001398}
1399
Eric Auger94df5b22023-07-05 18:51:17 +02001400static void virtio_iommu_freeze_granule(Notifier *notifier, void *data)
1401{
1402 VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done);
1403 int granule;
1404
Eric Auger94df5b22023-07-05 18:51:17 +02001405 s->granule_frozen = true;
1406 granule = ctz64(s->config.page_size_mask);
Eric Auger1084fed2023-07-18 20:21:36 +02001407 trace_virtio_iommu_freeze_granule(BIT_ULL(granule));
Eric Auger94df5b22023-07-05 18:51:17 +02001408}
1409
Eric Auger22c37a12020-02-14 14:27:36 +01001410static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
1411{
1412 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1413 VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1414
Jonah Palmer3857cd52022-04-01 09:23:18 -04001415 virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
Eric Auger22c37a12020-02-14 14:27:36 +01001416
1417 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
1418 virtio_iommu_handle_command);
1419 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
1420
Zhenzhong Duan90519b92022-06-13 14:10:08 +08001421 /*
1422 * config.bypass is needed to get initial address space early, such as
1423 * in vfio realize
1424 */
1425 s->config.bypass = s->boot_bypass;
Eric Auger01e7e492024-03-07 14:43:07 +01001426 if (s->aw_bits < 32 || s->aw_bits > 64) {
1427 error_setg(errp, "aw-bits must be within [32,64]");
1428 return;
1429 }
1430 s->config.input_range.end =
1431 s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;
Eric Auger294ac5f2024-03-07 14:43:03 +01001432
1433 switch (s->granule_mode) {
1434 case GRANULE_MODE_4K:
1435 s->config.page_size_mask = -(4 * KiB);
1436 break;
1437 case GRANULE_MODE_8K:
1438 s->config.page_size_mask = -(8 * KiB);
1439 break;
1440 case GRANULE_MODE_16K:
1441 s->config.page_size_mask = -(16 * KiB);
1442 break;
1443 case GRANULE_MODE_64K:
1444 s->config.page_size_mask = -(64 * KiB);
1445 break;
1446 case GRANULE_MODE_HOST:
1447 s->config.page_size_mask = qemu_real_host_page_mask();
1448 break;
1449 default:
1450 error_setg(errp, "Unsupported granule mode");
1451 }
Eric Auger6b77ae02021-11-27 08:29:09 +01001452 s->config.domain_range.end = UINT32_MAX;
Eric Auger1733eeb2020-07-03 16:59:42 +01001453 s->config.probe_size = VIOMMU_PROBE_SIZE;
Eric Auger22c37a12020-02-14 14:27:36 +01001454
1455 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
1456 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
1457 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
1458 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
1459 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
1460 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
Eric Auger22c37a12020-02-14 14:27:36 +01001461 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
Eric Auger1733eeb2020-07-03 16:59:42 +01001462 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001463 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
Eric Auger22c37a12020-02-14 14:27:36 +01001464
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001465 qemu_rec_mutex_init(&s->mutex);
Eric Augercfb42182020-02-14 14:27:38 +01001466
1467 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
1468
Eric Auger817ef102024-06-14 11:52:52 +02001469 s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal,
1470 g_free, hiod_destroy);
1471
Eric Augercfb42182020-02-14 14:27:38 +01001472 if (s->primary_bus) {
Yi Liuba7d12e2023-10-17 18:14:04 +02001473 pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
Eric Augercfb42182020-02-14 14:27:38 +01001474 } else {
1475 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
1476 }
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001477
Eric Auger94df5b22023-07-05 18:51:17 +02001478 s->machine_done.notify = virtio_iommu_freeze_granule;
1479 qemu_add_machine_init_done_notifier(&s->machine_done);
1480
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001481 qemu_register_reset(virtio_iommu_system_reset, s);
Eric Auger22c37a12020-02-14 14:27:36 +01001482}
1483
Markus Armbrusterb69c3c22020-05-05 17:29:24 +02001484static void virtio_iommu_device_unrealize(DeviceState *dev)
Eric Auger22c37a12020-02-14 14:27:36 +01001485{
1486 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
Eric Augercfb42182020-02-14 14:27:38 +01001487 VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1488
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001489 qemu_unregister_reset(virtio_iommu_system_reset, s);
Eric Auger94df5b22023-07-05 18:51:17 +02001490 qemu_remove_machine_init_done_notifier(&s->machine_done);
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001491
Pan Nengyuande38ed32020-03-28 08:57:05 +08001492 g_hash_table_destroy(s->as_by_busptr);
Eric Auger59bf9802020-09-08 21:33:08 +02001493 if (s->domains) {
1494 g_tree_destroy(s->domains);
1495 }
1496 if (s->endpoints) {
1497 g_tree_destroy(s->endpoints);
1498 }
Eric Auger22c37a12020-02-14 14:27:36 +01001499
Zhenzhong Duan08f20302022-06-13 14:10:09 +08001500 qemu_rec_mutex_destroy(&s->mutex);
1501
Pan Nengyuande38ed32020-03-28 08:57:05 +08001502 virtio_delete_queue(s->req_vq);
1503 virtio_delete_queue(s->event_vq);
Eric Auger22c37a12020-02-14 14:27:36 +01001504 virtio_cleanup(vdev);
1505}
1506
1507static void virtio_iommu_device_reset(VirtIODevice *vdev)
1508{
Eric Augercfb42182020-02-14 14:27:38 +01001509 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
1510
Eric Auger22c37a12020-02-14 14:27:36 +01001511 trace_virtio_iommu_device_reset();
Eric Augercfb42182020-02-14 14:27:38 +01001512
1513 if (s->domains) {
1514 g_tree_destroy(s->domains);
1515 }
1516 if (s->endpoints) {
1517 g_tree_destroy(s->endpoints);
1518 }
1519 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
1520 NULL, NULL, virtio_iommu_put_domain);
1521 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
1522 NULL, NULL, virtio_iommu_put_endpoint);
Eric Auger22c37a12020-02-14 14:27:36 +01001523}
1524
1525static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
1526{
1527 trace_virtio_iommu_device_status(status);
1528}
1529
1530static void virtio_iommu_instance_init(Object *obj)
1531{
1532}
1533
Eric Augerbd0ab872020-02-14 14:27:42 +01001534#define VMSTATE_INTERVAL \
1535{ \
1536 .name = "interval", \
1537 .version_id = 1, \
1538 .minimum_version_id = 1, \
Richard Hendersonca02a172023-12-21 14:16:41 +11001539 .fields = (const VMStateField[]) { \
Eric Augerbd0ab872020-02-14 14:27:42 +01001540 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1541 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1542 VMSTATE_END_OF_LIST() \
1543 } \
1544}
1545
1546#define VMSTATE_MAPPING \
1547{ \
1548 .name = "mapping", \
1549 .version_id = 1, \
1550 .minimum_version_id = 1, \
Richard Hendersonca02a172023-12-21 14:16:41 +11001551 .fields = (const VMStateField[]) { \
Eric Augerbd0ab872020-02-14 14:27:42 +01001552 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1553 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1554 VMSTATE_END_OF_LIST() \
1555 }, \
1556}
1557
1558static const VMStateDescription vmstate_interval_mapping[2] = {
1559 VMSTATE_MAPPING, /* value */
1560 VMSTATE_INTERVAL /* key */
1561};
1562
1563static int domain_preload(void *opaque)
1564{
1565 VirtIOIOMMUDomain *domain = opaque;
1566
1567 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
1568 NULL, g_free, g_free);
1569 return 0;
1570}
1571
1572static const VMStateDescription vmstate_endpoint = {
1573 .name = "endpoint",
1574 .version_id = 1,
1575 .minimum_version_id = 1,
Richard Hendersonca02a172023-12-21 14:16:41 +11001576 .fields = (const VMStateField[]) {
Eric Augerbd0ab872020-02-14 14:27:42 +01001577 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
1578 VMSTATE_END_OF_LIST()
1579 }
1580};
1581
1582static const VMStateDescription vmstate_domain = {
1583 .name = "domain",
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +00001584 .version_id = 2,
1585 .minimum_version_id = 2,
Eric Augerbd0ab872020-02-14 14:27:42 +01001586 .pre_load = domain_preload,
Richard Hendersonca02a172023-12-21 14:16:41 +11001587 .fields = (const VMStateField[]) {
Eric Augerbd0ab872020-02-14 14:27:42 +01001588 VMSTATE_UINT32(id, VirtIOIOMMUDomain),
1589 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
1590 vmstate_interval_mapping,
1591 VirtIOIOMMUInterval, VirtIOIOMMUMapping),
1592 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
1593 vmstate_endpoint, VirtIOIOMMUEndpoint, next),
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +00001594 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
Eric Augerbd0ab872020-02-14 14:27:42 +01001595 VMSTATE_END_OF_LIST()
1596 }
1597};
1598
1599static gboolean reconstruct_endpoints(gpointer key, gpointer value,
1600 gpointer data)
1601{
1602 VirtIOIOMMU *s = (VirtIOIOMMU *)data;
1603 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
1604 VirtIOIOMMUEndpoint *iter;
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +01001605 IOMMUMemoryRegion *mr;
Eric Augerbd0ab872020-02-14 14:27:42 +01001606
1607 QLIST_FOREACH(iter, &d->endpoint_list, next) {
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +01001608 mr = virtio_iommu_mr(s, iter->id);
1609 assert(mr);
1610
Eric Augerbd0ab872020-02-14 14:27:42 +01001611 iter->domain = d;
Jean-Philippe Brucker31aa3232020-10-30 19:05:02 +01001612 iter->iommu_mr = mr;
Eric Augerbd0ab872020-02-14 14:27:42 +01001613 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
1614 }
1615 return false; /* continue the domain traversal */
1616}
1617
1618static int iommu_post_load(void *opaque, int version_id)
1619{
1620 VirtIOIOMMU *s = opaque;
1621
1622 g_tree_foreach(s->domains, reconstruct_endpoints, s);
Zhenzhong Duand3555662022-06-24 17:37:40 +08001623
1624 /*
1625 * Memory regions are dynamically turned on/off depending on
1626 * 'config.bypass' and attached domain type if there is. After
1627 * migration, we need to make sure the memory regions are
1628 * still correct.
1629 */
1630 virtio_iommu_switch_address_space_all(s);
Eric Augerbd0ab872020-02-14 14:27:42 +01001631 return 0;
1632}
1633
1634static const VMStateDescription vmstate_virtio_iommu_device = {
1635 .name = "virtio-iommu-device",
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001636 .minimum_version_id = 2,
1637 .version_id = 2,
Eric Augerbd0ab872020-02-14 14:27:42 +01001638 .post_load = iommu_post_load,
Richard Hendersonca02a172023-12-21 14:16:41 +11001639 .fields = (const VMStateField[]) {
Jean-Philippe Bruckerd9c96f22022-02-14 12:43:55 +00001640 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
Eric Augerbd0ab872020-02-14 14:27:42 +01001641 &vmstate_domain, VirtIOIOMMUDomain),
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001642 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
Eric Augerbd0ab872020-02-14 14:27:42 +01001643 VMSTATE_END_OF_LIST()
1644 },
1645};
1646
Eric Auger22c37a12020-02-14 14:27:36 +01001647static const VMStateDescription vmstate_virtio_iommu = {
1648 .name = "virtio-iommu",
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001649 .minimum_version_id = 2,
Eric Augerbd0ab872020-02-14 14:27:42 +01001650 .priority = MIG_PRI_IOMMU,
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001651 .version_id = 2,
Richard Hendersonca02a172023-12-21 14:16:41 +11001652 .fields = (const VMStateField[]) {
Eric Auger22c37a12020-02-14 14:27:36 +01001653 VMSTATE_VIRTIO_DEVICE,
1654 VMSTATE_END_OF_LIST()
1655 },
1656};
1657
1658static Property virtio_iommu_properties[] = {
Philippe Mathieu-Daudéc45e7612023-01-17 20:30:14 +01001659 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
1660 TYPE_PCI_BUS, PCIBus *),
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001661 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
Eric Auger294ac5f2024-03-07 14:43:03 +01001662 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode,
Eric Auger9dd5e802024-03-07 14:43:04 +01001663 GRANULE_MODE_HOST),
Eric Auger01e7e492024-03-07 14:43:07 +01001664 DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
Eric Auger22c37a12020-02-14 14:27:36 +01001665 DEFINE_PROP_END_OF_LIST(),
1666};
1667
1668static void virtio_iommu_class_init(ObjectClass *klass, void *data)
1669{
1670 DeviceClass *dc = DEVICE_CLASS(klass);
1671 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1672
1673 device_class_set_props(dc, virtio_iommu_properties);
1674 dc->vmsd = &vmstate_virtio_iommu;
1675
1676 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1677 vdc->realize = virtio_iommu_device_realize;
1678 vdc->unrealize = virtio_iommu_device_unrealize;
1679 vdc->reset = virtio_iommu_device_reset;
1680 vdc->get_config = virtio_iommu_get_config;
Jean-Philippe Brucker448179e2022-02-14 12:43:54 +00001681 vdc->set_config = virtio_iommu_set_config;
Eric Auger22c37a12020-02-14 14:27:36 +01001682 vdc->get_features = virtio_iommu_get_features;
1683 vdc->set_status = virtio_iommu_set_status;
1684 vdc->vmsd = &vmstate_virtio_iommu_device;
1685}
1686
Eric Augercfb42182020-02-14 14:27:38 +01001687static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
1688 void *data)
1689{
1690 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1691
1692 imrc->translate = virtio_iommu_translate;
Bharat Bhushan308e5e12020-10-30 19:05:05 +01001693 imrc->replay = virtio_iommu_replay;
Bharat Bhushan6978bfa2020-10-30 19:05:06 +01001694 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
Eric Augercfb42182020-02-14 14:27:38 +01001695}
1696
Eric Auger22c37a12020-02-14 14:27:36 +01001697static const TypeInfo virtio_iommu_info = {
1698 .name = TYPE_VIRTIO_IOMMU,
1699 .parent = TYPE_VIRTIO_DEVICE,
1700 .instance_size = sizeof(VirtIOIOMMU),
1701 .instance_init = virtio_iommu_instance_init,
1702 .class_init = virtio_iommu_class_init,
1703};
1704
Eric Augercfb42182020-02-14 14:27:38 +01001705static const TypeInfo virtio_iommu_memory_region_info = {
1706 .parent = TYPE_IOMMU_MEMORY_REGION,
1707 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
1708 .class_init = virtio_iommu_memory_region_class_init,
1709};
1710
Eric Auger22c37a12020-02-14 14:27:36 +01001711static void virtio_register_types(void)
1712{
1713 type_register_static(&virtio_iommu_info);
Eric Augercfb42182020-02-14 14:27:38 +01001714 type_register_static(&virtio_iommu_memory_region_info);
Eric Auger22c37a12020-02-14 14:27:36 +01001715}
1716
1717type_init(virtio_register_types)