Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 1 | /* |
| 2 | * generic functions used by VFIO devices |
| 3 | * |
| 4 | * Copyright Red Hat, Inc. 2012 |
| 5 | * |
| 6 | * Authors: |
| 7 | * Alex Williamson <alex.williamson@redhat.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 10 | * the COPYING file in the top-level directory. |
| 11 | * |
| 12 | * Based on qemu-kvm device-assignment: |
| 13 | * Adapted for KVM by Qumranet. |
| 14 | * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) |
| 15 | * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) |
| 16 | * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) |
| 17 | * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) |
| 18 | * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) |
| 19 | */ |
| 20 | |
| 21 | #include <sys/ioctl.h> |
| 22 | #include <sys/mman.h> |
| 23 | #include <linux/vfio.h> |
| 24 | |
| 25 | #include "hw/vfio/vfio-common.h" |
| 26 | #include "hw/vfio/vfio.h" |
| 27 | #include "exec/address-spaces.h" |
| 28 | #include "exec/memory.h" |
| 29 | #include "hw/hw.h" |
| 30 | #include "qemu/error-report.h" |
| 31 | #include "sysemu/kvm.h" |
| 32 | #include "trace.h" |
| 33 | |
| 34 | struct vfio_group_head vfio_group_list = |
Chen Fan | 39cb514 | 2015-02-04 11:45:32 -0700 | [diff] [blame] | 35 | QLIST_HEAD_INITIALIZER(vfio_group_list); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 36 | struct vfio_as_head vfio_address_spaces = |
| 37 | QLIST_HEAD_INITIALIZER(vfio_address_spaces); |
| 38 | |
| 39 | #ifdef CONFIG_KVM |
| 40 | /* |
| 41 | * We have a single VFIO pseudo device per KVM VM. Once created it lives |
| 42 | * for the life of the VM. Closing the file descriptor only drops our |
| 43 | * reference to it and the device's reference to kvm. Therefore once |
| 44 | * initialized, this file descriptor is only released on QEMU exit and |
| 45 | * we'll re-use it should another vfio device be attached before then. |
| 46 | */ |
| 47 | static int vfio_kvm_device_fd = -1; |
| 48 | #endif |
| 49 | |
| 50 | /* |
| 51 | * Common VFIO interrupt disable |
| 52 | */ |
| 53 | void vfio_disable_irqindex(VFIODevice *vbasedev, int index) |
| 54 | { |
| 55 | struct vfio_irq_set irq_set = { |
| 56 | .argsz = sizeof(irq_set), |
| 57 | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, |
| 58 | .index = index, |
| 59 | .start = 0, |
| 60 | .count = 0, |
| 61 | }; |
| 62 | |
| 63 | ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
| 64 | } |
| 65 | |
| 66 | void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) |
| 67 | { |
| 68 | struct vfio_irq_set irq_set = { |
| 69 | .argsz = sizeof(irq_set), |
| 70 | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, |
| 71 | .index = index, |
| 72 | .start = 0, |
| 73 | .count = 1, |
| 74 | }; |
| 75 | |
| 76 | ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
| 77 | } |
| 78 | |
| 79 | void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) |
| 80 | { |
| 81 | struct vfio_irq_set irq_set = { |
| 82 | .argsz = sizeof(irq_set), |
| 83 | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, |
| 84 | .index = index, |
| 85 | .start = 0, |
| 86 | .count = 1, |
| 87 | }; |
| 88 | |
| 89 | ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
| 90 | } |
| 91 | |
| 92 | /* |
| 93 | * IO Port/MMIO - Beware of the endians, VFIO is always little endian |
| 94 | */ |
| 95 | void vfio_region_write(void *opaque, hwaddr addr, |
| 96 | uint64_t data, unsigned size) |
| 97 | { |
| 98 | VFIORegion *region = opaque; |
| 99 | VFIODevice *vbasedev = region->vbasedev; |
| 100 | union { |
| 101 | uint8_t byte; |
| 102 | uint16_t word; |
| 103 | uint32_t dword; |
| 104 | uint64_t qword; |
| 105 | } buf; |
| 106 | |
| 107 | switch (size) { |
| 108 | case 1: |
| 109 | buf.byte = data; |
| 110 | break; |
| 111 | case 2: |
| 112 | buf.word = cpu_to_le16(data); |
| 113 | break; |
| 114 | case 4: |
| 115 | buf.dword = cpu_to_le32(data); |
| 116 | break; |
| 117 | default: |
| 118 | hw_error("vfio: unsupported write size, %d bytes", size); |
| 119 | break; |
| 120 | } |
| 121 | |
| 122 | if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { |
| 123 | error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 |
| 124 | ",%d) failed: %m", |
| 125 | __func__, vbasedev->name, region->nr, |
| 126 | addr, data, size); |
| 127 | } |
| 128 | |
| 129 | trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); |
| 130 | |
| 131 | /* |
| 132 | * A read or write to a BAR always signals an INTx EOI. This will |
| 133 | * do nothing if not pending (including not in INTx mode). We assume |
| 134 | * that a BAR access is in response to an interrupt and that BAR |
| 135 | * accesses will service the interrupt. Unfortunately, we don't know |
| 136 | * which access will service the interrupt, so we're potentially |
| 137 | * getting quite a few host interrupts per guest interrupt. |
| 138 | */ |
| 139 | vbasedev->ops->vfio_eoi(vbasedev); |
| 140 | } |
| 141 | |
| 142 | uint64_t vfio_region_read(void *opaque, |
| 143 | hwaddr addr, unsigned size) |
| 144 | { |
| 145 | VFIORegion *region = opaque; |
| 146 | VFIODevice *vbasedev = region->vbasedev; |
| 147 | union { |
| 148 | uint8_t byte; |
| 149 | uint16_t word; |
| 150 | uint32_t dword; |
| 151 | uint64_t qword; |
| 152 | } buf; |
| 153 | uint64_t data = 0; |
| 154 | |
| 155 | if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { |
| 156 | error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", |
| 157 | __func__, vbasedev->name, region->nr, |
| 158 | addr, size); |
| 159 | return (uint64_t)-1; |
| 160 | } |
| 161 | switch (size) { |
| 162 | case 1: |
| 163 | data = buf.byte; |
| 164 | break; |
| 165 | case 2: |
| 166 | data = le16_to_cpu(buf.word); |
| 167 | break; |
| 168 | case 4: |
| 169 | data = le32_to_cpu(buf.dword); |
| 170 | break; |
| 171 | default: |
| 172 | hw_error("vfio: unsupported read size, %d bytes", size); |
| 173 | break; |
| 174 | } |
| 175 | |
| 176 | trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); |
| 177 | |
| 178 | /* Same as write above */ |
| 179 | vbasedev->ops->vfio_eoi(vbasedev); |
| 180 | |
| 181 | return data; |
| 182 | } |
| 183 | |
| 184 | const MemoryRegionOps vfio_region_ops = { |
| 185 | .read = vfio_region_read, |
| 186 | .write = vfio_region_write, |
| 187 | .endianness = DEVICE_LITTLE_ENDIAN, |
| 188 | }; |
| 189 | |
| 190 | /* |
| 191 | * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 |
| 192 | */ |
| 193 | static int vfio_dma_unmap(VFIOContainer *container, |
| 194 | hwaddr iova, ram_addr_t size) |
| 195 | { |
| 196 | struct vfio_iommu_type1_dma_unmap unmap = { |
| 197 | .argsz = sizeof(unmap), |
| 198 | .flags = 0, |
| 199 | .iova = iova, |
| 200 | .size = size, |
| 201 | }; |
| 202 | |
| 203 | if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { |
Gonglei | 78e5b17 | 2015-02-25 12:22:33 +0800 | [diff] [blame] | 204 | error_report("VFIO_UNMAP_DMA: %d", -errno); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 205 | return -errno; |
| 206 | } |
| 207 | |
| 208 | return 0; |
| 209 | } |
| 210 | |
| 211 | static int vfio_dma_map(VFIOContainer *container, hwaddr iova, |
| 212 | ram_addr_t size, void *vaddr, bool readonly) |
| 213 | { |
| 214 | struct vfio_iommu_type1_dma_map map = { |
| 215 | .argsz = sizeof(map), |
| 216 | .flags = VFIO_DMA_MAP_FLAG_READ, |
| 217 | .vaddr = (__u64)(uintptr_t)vaddr, |
| 218 | .iova = iova, |
| 219 | .size = size, |
| 220 | }; |
| 221 | |
| 222 | if (!readonly) { |
| 223 | map.flags |= VFIO_DMA_MAP_FLAG_WRITE; |
| 224 | } |
| 225 | |
| 226 | /* |
| 227 | * Try the mapping, if it fails with EBUSY, unmap the region and try |
| 228 | * again. This shouldn't be necessary, but we sometimes see it in |
Daniel P. Berrange | b6af097 | 2015-08-26 12:17:13 +0100 | [diff] [blame] | 229 | * the VGA ROM space. |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 230 | */ |
| 231 | if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || |
| 232 | (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && |
| 233 | ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { |
| 234 | return 0; |
| 235 | } |
| 236 | |
Gonglei | 78e5b17 | 2015-02-25 12:22:33 +0800 | [diff] [blame] | 237 | error_report("VFIO_MAP_DMA: %d", -errno); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 238 | return -errno; |
| 239 | } |
| 240 | |
| 241 | static bool vfio_listener_skipped_section(MemoryRegionSection *section) |
| 242 | { |
| 243 | return (!memory_region_is_ram(section->mr) && |
| 244 | !memory_region_is_iommu(section->mr)) || |
| 245 | /* |
| 246 | * Sizing an enabled 64-bit BAR can cause spurious mappings to |
| 247 | * addresses in the upper part of the 64-bit address space. These |
| 248 | * are never accessed by the CPU and beyond the address width of |
| 249 | * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. |
| 250 | */ |
| 251 | section->offset_within_address_space & (1ULL << 63); |
| 252 | } |
| 253 | |
| 254 | static void vfio_iommu_map_notify(Notifier *n, void *data) |
| 255 | { |
| 256 | VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); |
| 257 | VFIOContainer *container = giommu->container; |
| 258 | IOMMUTLBEntry *iotlb = data; |
| 259 | MemoryRegion *mr; |
| 260 | hwaddr xlat; |
| 261 | hwaddr len = iotlb->addr_mask + 1; |
| 262 | void *vaddr; |
| 263 | int ret; |
| 264 | |
| 265 | trace_vfio_iommu_map_notify(iotlb->iova, |
| 266 | iotlb->iova + iotlb->addr_mask); |
| 267 | |
| 268 | /* |
| 269 | * The IOMMU TLB entry we have just covers translation through |
| 270 | * this IOMMU to its immediate target. We need to translate |
| 271 | * it the rest of the way through to memory. |
| 272 | */ |
Paolo Bonzini | 41063e1 | 2015-03-18 14:21:43 +0100 | [diff] [blame] | 273 | rcu_read_lock(); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 274 | mr = address_space_translate(&address_space_memory, |
| 275 | iotlb->translated_addr, |
| 276 | &xlat, &len, iotlb->perm & IOMMU_WO); |
| 277 | if (!memory_region_is_ram(mr)) { |
Gonglei | 78e5b17 | 2015-02-25 12:22:33 +0800 | [diff] [blame] | 278 | error_report("iommu map to non memory area %"HWADDR_PRIx"", |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 279 | xlat); |
Paolo Bonzini | 41063e1 | 2015-03-18 14:21:43 +0100 | [diff] [blame] | 280 | goto out; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 281 | } |
| 282 | /* |
| 283 | * Translation truncates length to the IOMMU page size, |
| 284 | * check that it did not truncate too much. |
| 285 | */ |
| 286 | if (len & iotlb->addr_mask) { |
Gonglei | 78e5b17 | 2015-02-25 12:22:33 +0800 | [diff] [blame] | 287 | error_report("iommu has granularity incompatible with target AS"); |
Paolo Bonzini | 41063e1 | 2015-03-18 14:21:43 +0100 | [diff] [blame] | 288 | goto out; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 289 | } |
| 290 | |
| 291 | if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { |
| 292 | vaddr = memory_region_get_ram_ptr(mr) + xlat; |
| 293 | ret = vfio_dma_map(container, iotlb->iova, |
| 294 | iotlb->addr_mask + 1, vaddr, |
| 295 | !(iotlb->perm & IOMMU_WO) || mr->readonly); |
| 296 | if (ret) { |
| 297 | error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " |
| 298 | "0x%"HWADDR_PRIx", %p) = %d (%m)", |
| 299 | container, iotlb->iova, |
| 300 | iotlb->addr_mask + 1, vaddr, ret); |
| 301 | } |
| 302 | } else { |
| 303 | ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1); |
| 304 | if (ret) { |
| 305 | error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " |
| 306 | "0x%"HWADDR_PRIx") = %d (%m)", |
| 307 | container, iotlb->iova, |
| 308 | iotlb->addr_mask + 1, ret); |
| 309 | } |
| 310 | } |
Paolo Bonzini | 41063e1 | 2015-03-18 14:21:43 +0100 | [diff] [blame] | 311 | out: |
| 312 | rcu_read_unlock(); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 313 | } |
| 314 | |
David Gibson | 508ce5e | 2015-09-30 12:13:56 +1000 | [diff] [blame] | 315 | static hwaddr vfio_container_granularity(VFIOContainer *container) |
| 316 | { |
| 317 | return (hwaddr)1 << ctz64(container->iova_pgsizes); |
| 318 | } |
| 319 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 320 | static void vfio_listener_region_add(MemoryListener *listener, |
| 321 | MemoryRegionSection *section) |
| 322 | { |
David Gibson | ee0bf0e | 2015-09-30 12:13:51 +1000 | [diff] [blame] | 323 | VFIOContainer *container = container_of(listener, VFIOContainer, listener); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 324 | hwaddr iova, end; |
| 325 | Int128 llend; |
| 326 | void *vaddr; |
| 327 | int ret; |
| 328 | |
| 329 | if (vfio_listener_skipped_section(section)) { |
| 330 | trace_vfio_listener_region_add_skip( |
| 331 | section->offset_within_address_space, |
| 332 | section->offset_within_address_space + |
| 333 | int128_get64(int128_sub(section->size, int128_one()))); |
| 334 | return; |
| 335 | } |
| 336 | |
| 337 | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != |
| 338 | (section->offset_within_region & ~TARGET_PAGE_MASK))) { |
| 339 | error_report("%s received unaligned region", __func__); |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); |
| 344 | llend = int128_make64(section->offset_within_address_space); |
| 345 | llend = int128_add(llend, section->size); |
| 346 | llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); |
| 347 | |
| 348 | if (int128_ge(int128_make64(iova), llend)) { |
| 349 | return; |
| 350 | } |
David Gibson | 3898aad | 2015-09-30 12:13:53 +1000 | [diff] [blame] | 351 | end = int128_get64(llend); |
| 352 | |
| 353 | if ((iova < container->min_iova) || ((end - 1) > container->max_iova)) { |
| 354 | error_report("vfio: IOMMU container %p can't map guest IOVA region" |
| 355 | " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, |
| 356 | container, iova, end - 1); |
| 357 | ret = -EFAULT; |
| 358 | goto fail; |
| 359 | } |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 360 | |
| 361 | memory_region_ref(section->mr); |
| 362 | |
| 363 | if (memory_region_is_iommu(section->mr)) { |
| 364 | VFIOGuestIOMMU *giommu; |
| 365 | |
David Gibson | 3898aad | 2015-09-30 12:13:53 +1000 | [diff] [blame] | 366 | trace_vfio_listener_region_add_iommu(iova, end - 1); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 367 | /* |
| 368 | * FIXME: We should do some checking to see if the |
| 369 | * capabilities of the host VFIO IOMMU are adequate to model |
| 370 | * the guest IOMMU |
| 371 | * |
| 372 | * FIXME: For VFIO iommu types which have KVM acceleration to |
| 373 | * avoid bouncing all map/unmaps through qemu this way, this |
| 374 | * would be the right place to wire that up (tell the KVM |
| 375 | * device emulation the VFIO iommu handles to use). |
| 376 | */ |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 377 | giommu = g_malloc0(sizeof(*giommu)); |
| 378 | giommu->iommu = section->mr; |
| 379 | giommu->container = container; |
| 380 | giommu->n.notify = vfio_iommu_map_notify; |
| 381 | QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); |
David Gibson | 508ce5e | 2015-09-30 12:13:56 +1000 | [diff] [blame] | 382 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 383 | memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); |
David Gibson | 508ce5e | 2015-09-30 12:13:56 +1000 | [diff] [blame] | 384 | memory_region_iommu_replay(giommu->iommu, &giommu->n, |
| 385 | vfio_container_granularity(container), |
| 386 | false); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 387 | |
| 388 | return; |
| 389 | } |
| 390 | |
| 391 | /* Here we assume that memory_region_is_ram(section->mr)==true */ |
| 392 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 393 | vaddr = memory_region_get_ram_ptr(section->mr) + |
| 394 | section->offset_within_region + |
| 395 | (iova - section->offset_within_address_space); |
| 396 | |
| 397 | trace_vfio_listener_region_add_ram(iova, end - 1, vaddr); |
| 398 | |
| 399 | ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); |
| 400 | if (ret) { |
| 401 | error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " |
| 402 | "0x%"HWADDR_PRIx", %p) = %d (%m)", |
| 403 | container, iova, end - iova, vaddr, ret); |
David Gibson | ac6dc38 | 2015-09-30 12:13:52 +1000 | [diff] [blame] | 404 | goto fail; |
| 405 | } |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 406 | |
David Gibson | ac6dc38 | 2015-09-30 12:13:52 +1000 | [diff] [blame] | 407 | return; |
| 408 | |
| 409 | fail: |
| 410 | /* |
| 411 | * On the initfn path, store the first error in the container so we |
| 412 | * can gracefully fail. Runtime, there's not much we can do other |
| 413 | * than throw a hardware error. |
| 414 | */ |
| 415 | if (!container->initialized) { |
| 416 | if (!container->error) { |
| 417 | container->error = ret; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 418 | } |
David Gibson | ac6dc38 | 2015-09-30 12:13:52 +1000 | [diff] [blame] | 419 | } else { |
| 420 | hw_error("vfio: DMA mapping failed, unable to continue"); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 421 | } |
| 422 | } |
| 423 | |
| 424 | static void vfio_listener_region_del(MemoryListener *listener, |
| 425 | MemoryRegionSection *section) |
| 426 | { |
David Gibson | ee0bf0e | 2015-09-30 12:13:51 +1000 | [diff] [blame] | 427 | VFIOContainer *container = container_of(listener, VFIOContainer, listener); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 428 | hwaddr iova, end; |
| 429 | int ret; |
| 430 | |
| 431 | if (vfio_listener_skipped_section(section)) { |
| 432 | trace_vfio_listener_region_del_skip( |
| 433 | section->offset_within_address_space, |
| 434 | section->offset_within_address_space + |
| 435 | int128_get64(int128_sub(section->size, int128_one()))); |
| 436 | return; |
| 437 | } |
| 438 | |
| 439 | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != |
| 440 | (section->offset_within_region & ~TARGET_PAGE_MASK))) { |
| 441 | error_report("%s received unaligned region", __func__); |
| 442 | return; |
| 443 | } |
| 444 | |
| 445 | if (memory_region_is_iommu(section->mr)) { |
| 446 | VFIOGuestIOMMU *giommu; |
| 447 | |
| 448 | QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { |
| 449 | if (giommu->iommu == section->mr) { |
| 450 | memory_region_unregister_iommu_notifier(&giommu->n); |
| 451 | QLIST_REMOVE(giommu, giommu_next); |
| 452 | g_free(giommu); |
| 453 | break; |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | /* |
| 458 | * FIXME: We assume the one big unmap below is adequate to |
| 459 | * remove any individual page mappings in the IOMMU which |
| 460 | * might have been copied into VFIO. This works for a page table |
| 461 | * based IOMMU where a big unmap flattens a large range of IO-PTEs. |
| 462 | * That may not be true for all IOMMU types. |
| 463 | */ |
| 464 | } |
| 465 | |
| 466 | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); |
| 467 | end = (section->offset_within_address_space + int128_get64(section->size)) & |
| 468 | TARGET_PAGE_MASK; |
| 469 | |
| 470 | if (iova >= end) { |
| 471 | return; |
| 472 | } |
| 473 | |
| 474 | trace_vfio_listener_region_del(iova, end - 1); |
| 475 | |
| 476 | ret = vfio_dma_unmap(container, iova, end - iova); |
| 477 | memory_region_unref(section->mr); |
| 478 | if (ret) { |
| 479 | error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " |
| 480 | "0x%"HWADDR_PRIx") = %d (%m)", |
| 481 | container, iova, end - iova, ret); |
| 482 | } |
| 483 | } |
| 484 | |
Alexey Kardashevskiy | 51b833f | 2015-03-02 11:38:55 -0700 | [diff] [blame] | 485 | static const MemoryListener vfio_memory_listener = { |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 486 | .region_add = vfio_listener_region_add, |
| 487 | .region_del = vfio_listener_region_del, |
| 488 | }; |
| 489 | |
Alexey Kardashevskiy | 51b833f | 2015-03-02 11:38:55 -0700 | [diff] [blame] | 490 | static void vfio_listener_release(VFIOContainer *container) |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 491 | { |
David Gibson | ee0bf0e | 2015-09-30 12:13:51 +1000 | [diff] [blame] | 492 | memory_listener_unregister(&container->listener); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 493 | } |
| 494 | |
| 495 | int vfio_mmap_region(Object *obj, VFIORegion *region, |
| 496 | MemoryRegion *mem, MemoryRegion *submem, |
| 497 | void **map, size_t size, off_t offset, |
| 498 | const char *name) |
| 499 | { |
| 500 | int ret = 0; |
| 501 | VFIODevice *vbasedev = region->vbasedev; |
| 502 | |
Alex Williamson | 5e15d79 | 2015-09-23 13:04:44 -0600 | [diff] [blame] | 503 | if (!vbasedev->no_mmap && size && region->flags & |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 504 | VFIO_REGION_INFO_FLAG_MMAP) { |
| 505 | int prot = 0; |
| 506 | |
| 507 | if (region->flags & VFIO_REGION_INFO_FLAG_READ) { |
| 508 | prot |= PROT_READ; |
| 509 | } |
| 510 | |
| 511 | if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { |
| 512 | prot |= PROT_WRITE; |
| 513 | } |
| 514 | |
| 515 | *map = mmap(NULL, size, prot, MAP_SHARED, |
| 516 | vbasedev->fd, |
| 517 | region->fd_offset + offset); |
| 518 | if (*map == MAP_FAILED) { |
| 519 | *map = NULL; |
| 520 | ret = -errno; |
| 521 | goto empty_region; |
| 522 | } |
| 523 | |
| 524 | memory_region_init_ram_ptr(submem, obj, name, size, *map); |
| 525 | memory_region_set_skip_dump(submem); |
| 526 | } else { |
| 527 | empty_region: |
| 528 | /* Create a zero sized sub-region to make cleanup easy. */ |
| 529 | memory_region_init(submem, obj, name, 0); |
| 530 | } |
| 531 | |
| 532 | memory_region_add_subregion(mem, offset, submem); |
| 533 | |
| 534 | return ret; |
| 535 | } |
| 536 | |
| 537 | void vfio_reset_handler(void *opaque) |
| 538 | { |
| 539 | VFIOGroup *group; |
| 540 | VFIODevice *vbasedev; |
| 541 | |
| 542 | QLIST_FOREACH(group, &vfio_group_list, next) { |
| 543 | QLIST_FOREACH(vbasedev, &group->device_list, next) { |
| 544 | vbasedev->ops->vfio_compute_needs_reset(vbasedev); |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | QLIST_FOREACH(group, &vfio_group_list, next) { |
| 549 | QLIST_FOREACH(vbasedev, &group->device_list, next) { |
| 550 | if (vbasedev->needs_reset) { |
| 551 | vbasedev->ops->vfio_hot_reset_multi(vbasedev); |
| 552 | } |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | static void vfio_kvm_device_add_group(VFIOGroup *group) |
| 558 | { |
| 559 | #ifdef CONFIG_KVM |
| 560 | struct kvm_device_attr attr = { |
| 561 | .group = KVM_DEV_VFIO_GROUP, |
| 562 | .attr = KVM_DEV_VFIO_GROUP_ADD, |
| 563 | .addr = (uint64_t)(unsigned long)&group->fd, |
| 564 | }; |
| 565 | |
| 566 | if (!kvm_enabled()) { |
| 567 | return; |
| 568 | } |
| 569 | |
| 570 | if (vfio_kvm_device_fd < 0) { |
| 571 | struct kvm_create_device cd = { |
| 572 | .type = KVM_DEV_TYPE_VFIO, |
| 573 | }; |
| 574 | |
| 575 | if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { |
Gonglei | 78e5b17 | 2015-02-25 12:22:33 +0800 | [diff] [blame] | 576 | error_report("Failed to create KVM VFIO device: %m"); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 577 | return; |
| 578 | } |
| 579 | |
| 580 | vfio_kvm_device_fd = cd.fd; |
| 581 | } |
| 582 | |
| 583 | if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { |
| 584 | error_report("Failed to add group %d to KVM VFIO device: %m", |
| 585 | group->groupid); |
| 586 | } |
| 587 | #endif |
| 588 | } |
| 589 | |
| 590 | static void vfio_kvm_device_del_group(VFIOGroup *group) |
| 591 | { |
| 592 | #ifdef CONFIG_KVM |
| 593 | struct kvm_device_attr attr = { |
| 594 | .group = KVM_DEV_VFIO_GROUP, |
| 595 | .attr = KVM_DEV_VFIO_GROUP_DEL, |
| 596 | .addr = (uint64_t)(unsigned long)&group->fd, |
| 597 | }; |
| 598 | |
| 599 | if (vfio_kvm_device_fd < 0) { |
| 600 | return; |
| 601 | } |
| 602 | |
| 603 | if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { |
| 604 | error_report("Failed to remove group %d from KVM VFIO device: %m", |
| 605 | group->groupid); |
| 606 | } |
| 607 | #endif |
| 608 | } |
| 609 | |
| 610 | static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) |
| 611 | { |
| 612 | VFIOAddressSpace *space; |
| 613 | |
| 614 | QLIST_FOREACH(space, &vfio_address_spaces, list) { |
| 615 | if (space->as == as) { |
| 616 | return space; |
| 617 | } |
| 618 | } |
| 619 | |
| 620 | /* No suitable VFIOAddressSpace, create a new one */ |
| 621 | space = g_malloc0(sizeof(*space)); |
| 622 | space->as = as; |
| 623 | QLIST_INIT(&space->containers); |
| 624 | |
| 625 | QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); |
| 626 | |
| 627 | return space; |
| 628 | } |
| 629 | |
| 630 | static void vfio_put_address_space(VFIOAddressSpace *space) |
| 631 | { |
| 632 | if (QLIST_EMPTY(&space->containers)) { |
| 633 | QLIST_REMOVE(space, list); |
| 634 | g_free(space); |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) |
| 639 | { |
| 640 | VFIOContainer *container; |
| 641 | int ret, fd; |
| 642 | VFIOAddressSpace *space; |
| 643 | |
| 644 | space = vfio_get_address_space(as); |
| 645 | |
| 646 | QLIST_FOREACH(container, &space->containers, next) { |
| 647 | if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { |
| 648 | group->container = container; |
| 649 | QLIST_INSERT_HEAD(&container->group_list, group, container_next); |
| 650 | return 0; |
| 651 | } |
| 652 | } |
| 653 | |
| 654 | fd = qemu_open("/dev/vfio/vfio", O_RDWR); |
| 655 | if (fd < 0) { |
| 656 | error_report("vfio: failed to open /dev/vfio/vfio: %m"); |
| 657 | ret = -errno; |
| 658 | goto put_space_exit; |
| 659 | } |
| 660 | |
| 661 | ret = ioctl(fd, VFIO_GET_API_VERSION); |
| 662 | if (ret != VFIO_API_VERSION) { |
| 663 | error_report("vfio: supported vfio version: %d, " |
| 664 | "reported version: %d", VFIO_API_VERSION, ret); |
| 665 | ret = -EINVAL; |
| 666 | goto close_fd_exit; |
| 667 | } |
| 668 | |
| 669 | container = g_malloc0(sizeof(*container)); |
| 670 | container->space = space; |
| 671 | container->fd = fd; |
Alex Williamson | 2e6e697 | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 672 | if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) || |
| 673 | ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) { |
| 674 | bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU); |
David Gibson | 7a140a5 | 2015-09-30 12:13:54 +1000 | [diff] [blame] | 675 | struct vfio_iommu_type1_info info; |
Alex Williamson | 2e6e697 | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 676 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 677 | ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); |
| 678 | if (ret) { |
| 679 | error_report("vfio: failed to set group container: %m"); |
| 680 | ret = -errno; |
| 681 | goto free_container_exit; |
| 682 | } |
| 683 | |
Alex Williamson | 2e6e697 | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 684 | ret = ioctl(fd, VFIO_SET_IOMMU, |
| 685 | v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 686 | if (ret) { |
| 687 | error_report("vfio: failed to set iommu for container: %m"); |
| 688 | ret = -errno; |
| 689 | goto free_container_exit; |
| 690 | } |
David Gibson | 3898aad | 2015-09-30 12:13:53 +1000 | [diff] [blame] | 691 | |
| 692 | /* |
| 693 | * FIXME: This assumes that a Type1 IOMMU can map any 64-bit |
| 694 | * IOVA whatsoever. That's not actually true, but the current |
| 695 | * kernel interface doesn't tell us what it can map, and the |
| 696 | * existing Type1 IOMMUs generally support any IOVA we're |
| 697 | * going to actually try in practice. |
| 698 | */ |
| 699 | container->min_iova = 0; |
| 700 | container->max_iova = (hwaddr)-1; |
David Gibson | 7a140a5 | 2015-09-30 12:13:54 +1000 | [diff] [blame] | 701 | |
| 702 | /* Assume just 4K IOVA page size */ |
| 703 | container->iova_pgsizes = 0x1000; |
| 704 | info.argsz = sizeof(info); |
| 705 | ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); |
| 706 | /* Ignore errors */ |
| 707 | if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) { |
| 708 | container->iova_pgsizes = info.iova_pgsizes; |
| 709 | } |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 710 | } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) { |
David Gibson | 3898aad | 2015-09-30 12:13:53 +1000 | [diff] [blame] | 711 | struct vfio_iommu_spapr_tce_info info; |
| 712 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 713 | ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); |
| 714 | if (ret) { |
| 715 | error_report("vfio: failed to set group container: %m"); |
| 716 | ret = -errno; |
| 717 | goto free_container_exit; |
| 718 | } |
| 719 | ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU); |
| 720 | if (ret) { |
| 721 | error_report("vfio: failed to set iommu for container: %m"); |
| 722 | ret = -errno; |
| 723 | goto free_container_exit; |
| 724 | } |
| 725 | |
| 726 | /* |
| 727 | * The host kernel code implementing VFIO_IOMMU_DISABLE is called |
| 728 | * when container fd is closed so we do not call it explicitly |
| 729 | * in this file. |
| 730 | */ |
| 731 | ret = ioctl(fd, VFIO_IOMMU_ENABLE); |
| 732 | if (ret) { |
| 733 | error_report("vfio: failed to enable container: %m"); |
| 734 | ret = -errno; |
| 735 | goto free_container_exit; |
| 736 | } |
David Gibson | 3898aad | 2015-09-30 12:13:53 +1000 | [diff] [blame] | 737 | |
| 738 | /* |
| 739 | * This only considers the host IOMMU's 32-bit window. At |
| 740 | * some point we need to add support for the optional 64-bit |
| 741 | * window and dynamic windows |
| 742 | */ |
| 743 | info.argsz = sizeof(info); |
| 744 | ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); |
| 745 | if (ret) { |
| 746 | error_report("vfio: VFIO_IOMMU_SPAPR_TCE_GET_INFO failed: %m"); |
| 747 | ret = -errno; |
| 748 | goto free_container_exit; |
| 749 | } |
| 750 | container->min_iova = info.dma32_window_start; |
| 751 | container->max_iova = container->min_iova + info.dma32_window_size - 1; |
David Gibson | 7a140a5 | 2015-09-30 12:13:54 +1000 | [diff] [blame] | 752 | |
| 753 | /* Assume just 4K IOVA pages for now */ |
| 754 | container->iova_pgsizes = 0x1000; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 755 | } else { |
| 756 | error_report("vfio: No available IOMMU models"); |
| 757 | ret = -EINVAL; |
| 758 | goto free_container_exit; |
| 759 | } |
| 760 | |
David Gibson | ee0bf0e | 2015-09-30 12:13:51 +1000 | [diff] [blame] | 761 | container->listener = vfio_memory_listener; |
| 762 | |
| 763 | memory_listener_register(&container->listener, container->space->as); |
| 764 | |
| 765 | if (container->error) { |
| 766 | ret = container->error; |
| 767 | error_report("vfio: memory listener initialization failed for container"); |
| 768 | goto listener_release_exit; |
| 769 | } |
| 770 | |
| 771 | container->initialized = true; |
| 772 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 773 | QLIST_INIT(&container->group_list); |
| 774 | QLIST_INSERT_HEAD(&space->containers, container, next); |
| 775 | |
| 776 | group->container = container; |
| 777 | QLIST_INSERT_HEAD(&container->group_list, group, container_next); |
| 778 | |
| 779 | return 0; |
| 780 | listener_release_exit: |
| 781 | vfio_listener_release(container); |
| 782 | |
| 783 | free_container_exit: |
| 784 | g_free(container); |
| 785 | |
| 786 | close_fd_exit: |
| 787 | close(fd); |
| 788 | |
| 789 | put_space_exit: |
| 790 | vfio_put_address_space(space); |
| 791 | |
| 792 | return ret; |
| 793 | } |
| 794 | |
| 795 | static void vfio_disconnect_container(VFIOGroup *group) |
| 796 | { |
| 797 | VFIOContainer *container = group->container; |
| 798 | |
| 799 | if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { |
| 800 | error_report("vfio: error disconnecting group %d from container", |
| 801 | group->groupid); |
| 802 | } |
| 803 | |
| 804 | QLIST_REMOVE(group, container_next); |
| 805 | group->container = NULL; |
| 806 | |
| 807 | if (QLIST_EMPTY(&container->group_list)) { |
| 808 | VFIOAddressSpace *space = container->space; |
Alexey Kardashevskiy | f8d8a94 | 2015-07-06 12:15:15 -0600 | [diff] [blame] | 809 | VFIOGuestIOMMU *giommu, *tmp; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 810 | |
David Gibson | ee0bf0e | 2015-09-30 12:13:51 +1000 | [diff] [blame] | 811 | vfio_listener_release(container); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 812 | QLIST_REMOVE(container, next); |
Alexey Kardashevskiy | f8d8a94 | 2015-07-06 12:15:15 -0600 | [diff] [blame] | 813 | |
| 814 | QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { |
| 815 | memory_region_unregister_iommu_notifier(&giommu->n); |
| 816 | QLIST_REMOVE(giommu, giommu_next); |
| 817 | g_free(giommu); |
| 818 | } |
| 819 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 820 | trace_vfio_disconnect_container(container->fd); |
| 821 | close(container->fd); |
| 822 | g_free(container); |
| 823 | |
| 824 | vfio_put_address_space(space); |
| 825 | } |
| 826 | } |
| 827 | |
| 828 | VFIOGroup *vfio_get_group(int groupid, AddressSpace *as) |
| 829 | { |
| 830 | VFIOGroup *group; |
| 831 | char path[32]; |
| 832 | struct vfio_group_status status = { .argsz = sizeof(status) }; |
| 833 | |
| 834 | QLIST_FOREACH(group, &vfio_group_list, next) { |
| 835 | if (group->groupid == groupid) { |
| 836 | /* Found it. Now is it already in the right context? */ |
| 837 | if (group->container->space->as == as) { |
| 838 | return group; |
| 839 | } else { |
| 840 | error_report("vfio: group %d used in multiple address spaces", |
| 841 | group->groupid); |
| 842 | return NULL; |
| 843 | } |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | group = g_malloc0(sizeof(*group)); |
| 848 | |
| 849 | snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); |
| 850 | group->fd = qemu_open(path, O_RDWR); |
| 851 | if (group->fd < 0) { |
| 852 | error_report("vfio: error opening %s: %m", path); |
| 853 | goto free_group_exit; |
| 854 | } |
| 855 | |
| 856 | if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { |
| 857 | error_report("vfio: error getting group status: %m"); |
| 858 | goto close_fd_exit; |
| 859 | } |
| 860 | |
| 861 | if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { |
| 862 | error_report("vfio: error, group %d is not viable, please ensure " |
| 863 | "all devices within the iommu_group are bound to their " |
| 864 | "vfio bus driver.", groupid); |
| 865 | goto close_fd_exit; |
| 866 | } |
| 867 | |
| 868 | group->groupid = groupid; |
| 869 | QLIST_INIT(&group->device_list); |
| 870 | |
| 871 | if (vfio_connect_container(group, as)) { |
| 872 | error_report("vfio: failed to setup container for group %d", groupid); |
| 873 | goto close_fd_exit; |
| 874 | } |
| 875 | |
| 876 | if (QLIST_EMPTY(&vfio_group_list)) { |
| 877 | qemu_register_reset(vfio_reset_handler, NULL); |
| 878 | } |
| 879 | |
| 880 | QLIST_INSERT_HEAD(&vfio_group_list, group, next); |
| 881 | |
| 882 | vfio_kvm_device_add_group(group); |
| 883 | |
| 884 | return group; |
| 885 | |
| 886 | close_fd_exit: |
| 887 | close(group->fd); |
| 888 | |
| 889 | free_group_exit: |
| 890 | g_free(group); |
| 891 | |
| 892 | return NULL; |
| 893 | } |
| 894 | |
| 895 | void vfio_put_group(VFIOGroup *group) |
| 896 | { |
Paolo Bonzini | 77a10d0 | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 897 | if (!group || !QLIST_EMPTY(&group->device_list)) { |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 898 | return; |
| 899 | } |
| 900 | |
| 901 | vfio_kvm_device_del_group(group); |
| 902 | vfio_disconnect_container(group); |
| 903 | QLIST_REMOVE(group, next); |
| 904 | trace_vfio_put_group(group->fd); |
| 905 | close(group->fd); |
| 906 | g_free(group); |
| 907 | |
| 908 | if (QLIST_EMPTY(&vfio_group_list)) { |
| 909 | qemu_unregister_reset(vfio_reset_handler, NULL); |
| 910 | } |
| 911 | } |
| 912 | |
| 913 | int vfio_get_device(VFIOGroup *group, const char *name, |
| 914 | VFIODevice *vbasedev) |
| 915 | { |
| 916 | struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; |
Paolo Bonzini | 217e9fd | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 917 | int ret, fd; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 918 | |
Paolo Bonzini | 217e9fd | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 919 | fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); |
| 920 | if (fd < 0) { |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 921 | error_report("vfio: error getting device %s from group %d: %m", |
| 922 | name, group->groupid); |
| 923 | error_printf("Verify all devices in group %d are bound to vfio-<bus> " |
| 924 | "or pci-stub and not already in use\n", group->groupid); |
Paolo Bonzini | 217e9fd | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 925 | return fd; |
| 926 | } |
| 927 | |
| 928 | ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); |
| 929 | if (ret) { |
| 930 | error_report("vfio: error getting device info: %m"); |
| 931 | close(fd); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 932 | return ret; |
| 933 | } |
| 934 | |
Paolo Bonzini | 217e9fd | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 935 | vbasedev->fd = fd; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 936 | vbasedev->group = group; |
| 937 | QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); |
| 938 | |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 939 | vbasedev->num_irqs = dev_info.num_irqs; |
| 940 | vbasedev->num_regions = dev_info.num_regions; |
| 941 | vbasedev->flags = dev_info.flags; |
| 942 | |
| 943 | trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, |
| 944 | dev_info.num_irqs); |
| 945 | |
| 946 | vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); |
Paolo Bonzini | 217e9fd | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 947 | return 0; |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 948 | } |
| 949 | |
| 950 | void vfio_put_base_device(VFIODevice *vbasedev) |
| 951 | { |
Paolo Bonzini | 77a10d0 | 2015-02-10 10:25:44 -0700 | [diff] [blame] | 952 | if (!vbasedev->group) { |
| 953 | return; |
| 954 | } |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 955 | QLIST_REMOVE(vbasedev, next); |
| 956 | vbasedev->group = NULL; |
| 957 | trace_vfio_put_base_device(vbasedev->fd); |
| 958 | close(vbasedev->fd); |
| 959 | } |
| 960 | |
| 961 | static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, |
| 962 | int req, void *param) |
| 963 | { |
| 964 | VFIOGroup *group; |
| 965 | VFIOContainer *container; |
| 966 | int ret = -1; |
| 967 | |
| 968 | group = vfio_get_group(groupid, as); |
| 969 | if (!group) { |
| 970 | error_report("vfio: group %d not registered", groupid); |
| 971 | return ret; |
| 972 | } |
| 973 | |
| 974 | container = group->container; |
| 975 | if (group->container) { |
| 976 | ret = ioctl(container->fd, req, param); |
| 977 | if (ret < 0) { |
Alexey Kardashevskiy | 46f770d | 2015-03-02 11:38:54 -0700 | [diff] [blame] | 978 | error_report("vfio: failed to ioctl %d to container: ret=%d, %s", |
| 979 | _IOC_NR(req) - VFIO_BASE, ret, strerror(errno)); |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 980 | } |
| 981 | } |
| 982 | |
| 983 | vfio_put_group(group); |
| 984 | |
| 985 | return ret; |
| 986 | } |
| 987 | |
| 988 | int vfio_container_ioctl(AddressSpace *as, int32_t groupid, |
| 989 | int req, void *param) |
| 990 | { |
| 991 | /* We allow only certain ioctls to the container */ |
| 992 | switch (req) { |
| 993 | case VFIO_CHECK_EXTENSION: |
| 994 | case VFIO_IOMMU_SPAPR_TCE_GET_INFO: |
Gavin Shan | 2aad88f | 2015-02-20 15:58:53 +1100 | [diff] [blame] | 995 | case VFIO_EEH_PE_OP: |
Eric Auger | e2c7d02 | 2014-12-22 09:54:51 -0700 | [diff] [blame] | 996 | break; |
| 997 | default: |
| 998 | /* Return an error on unknown requests */ |
| 999 | error_report("vfio: unsupported ioctl %X", req); |
| 1000 | return -1; |
| 1001 | } |
| 1002 | |
| 1003 | return vfio_container_do_ioctl(as, groupid, req, param); |
| 1004 | } |