| /* |
| * vhost-user |
| * |
| * Copyright (c) 2013 Virtual Open Systems Sarl. |
| * |
| * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| * See the COPYING file in the top-level directory. |
| * |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "hw/virtio/vhost.h" |
| #include "hw/virtio/vhost-backend.h" |
| #include "hw/virtio/virtio-net.h" |
| #include "sysemu/char.h" |
| #include "sysemu/kvm.h" |
| #include "qemu/error-report.h" |
| #include "qemu/sockets.h" |
| #include "exec/ram_addr.h" |
| #include "migration/migration.h" |
| |
| #include <sys/ioctl.h> |
| #include <sys/socket.h> |
| #include <sys/un.h> |
| #include <linux/vhost.h> |
| |
| #define VHOST_MEMORY_MAX_NREGIONS 8 |
| #define VHOST_USER_F_PROTOCOL_FEATURES 30 |
| |
| enum VhostUserProtocolFeature { |
| VHOST_USER_PROTOCOL_F_MQ = 0, |
| VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, |
| VHOST_USER_PROTOCOL_F_RARP = 2, |
| |
| VHOST_USER_PROTOCOL_F_MAX |
| }; |
| |
| #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) |
| |
| typedef enum VhostUserRequest { |
| VHOST_USER_NONE = 0, |
| VHOST_USER_GET_FEATURES = 1, |
| VHOST_USER_SET_FEATURES = 2, |
| VHOST_USER_SET_OWNER = 3, |
| VHOST_USER_RESET_OWNER = 4, |
| VHOST_USER_SET_MEM_TABLE = 5, |
| VHOST_USER_SET_LOG_BASE = 6, |
| VHOST_USER_SET_LOG_FD = 7, |
| VHOST_USER_SET_VRING_NUM = 8, |
| VHOST_USER_SET_VRING_ADDR = 9, |
| VHOST_USER_SET_VRING_BASE = 10, |
| VHOST_USER_GET_VRING_BASE = 11, |
| VHOST_USER_SET_VRING_KICK = 12, |
| VHOST_USER_SET_VRING_CALL = 13, |
| VHOST_USER_SET_VRING_ERR = 14, |
| VHOST_USER_GET_PROTOCOL_FEATURES = 15, |
| VHOST_USER_SET_PROTOCOL_FEATURES = 16, |
| VHOST_USER_GET_QUEUE_NUM = 17, |
| VHOST_USER_SET_VRING_ENABLE = 18, |
| VHOST_USER_SEND_RARP = 19, |
| VHOST_USER_MAX |
| } VhostUserRequest; |
| |
| typedef struct VhostUserMemoryRegion { |
| uint64_t guest_phys_addr; |
| uint64_t memory_size; |
| uint64_t userspace_addr; |
| uint64_t mmap_offset; |
| } VhostUserMemoryRegion; |
| |
| typedef struct VhostUserMemory { |
| uint32_t nregions; |
| uint32_t padding; |
| VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; |
| } VhostUserMemory; |
| |
| typedef struct VhostUserLog { |
| uint64_t mmap_size; |
| uint64_t mmap_offset; |
| } VhostUserLog; |
| |
| typedef struct VhostUserMsg { |
| VhostUserRequest request; |
| |
| #define VHOST_USER_VERSION_MASK (0x3) |
| #define VHOST_USER_REPLY_MASK (0x1<<2) |
| uint32_t flags; |
| uint32_t size; /* the following payload size */ |
| union { |
| #define VHOST_USER_VRING_IDX_MASK (0xff) |
| #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) |
| uint64_t u64; |
| struct vhost_vring_state state; |
| struct vhost_vring_addr addr; |
| VhostUserMemory memory; |
| VhostUserLog log; |
| } payload; |
| } QEMU_PACKED VhostUserMsg; |
| |
| static VhostUserMsg m __attribute__ ((unused)); |
| #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ |
| + sizeof(m.flags) \ |
| + sizeof(m.size)) |
| |
| #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) |
| |
| /* The version of the protocol we support */ |
| #define VHOST_USER_VERSION (0x1) |
| |
| static bool ioeventfd_enabled(void) |
| { |
| return kvm_enabled() && kvm_eventfds_enabled(); |
| } |
| |
| static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) |
| { |
| CharDriverState *chr = dev->opaque; |
| uint8_t *p = (uint8_t *) msg; |
| int r, size = VHOST_USER_HDR_SIZE; |
| |
| r = qemu_chr_fe_read_all(chr, p, size); |
| if (r != size) { |
| error_report("Failed to read msg header. Read %d instead of %d." |
| " Original request %d.", r, size, msg->request); |
| goto fail; |
| } |
| |
| /* validate received flags */ |
| if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { |
| error_report("Failed to read msg header." |
| " Flags 0x%x instead of 0x%x.", msg->flags, |
| VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); |
| goto fail; |
| } |
| |
| /* validate message size is sane */ |
| if (msg->size > VHOST_USER_PAYLOAD_SIZE) { |
| error_report("Failed to read msg header." |
| " Size %d exceeds the maximum %zu.", msg->size, |
| VHOST_USER_PAYLOAD_SIZE); |
| goto fail; |
| } |
| |
| if (msg->size) { |
| p += VHOST_USER_HDR_SIZE; |
| size = msg->size; |
| r = qemu_chr_fe_read_all(chr, p, size); |
| if (r != size) { |
| error_report("Failed to read msg payload." |
| " Read %d instead of %d.", r, msg->size); |
| goto fail; |
| } |
| } |
| |
| return 0; |
| |
| fail: |
| return -1; |
| } |
| |
| static bool vhost_user_one_time_request(VhostUserRequest request) |
| { |
| switch (request) { |
| case VHOST_USER_SET_OWNER: |
| case VHOST_USER_RESET_OWNER: |
| case VHOST_USER_SET_MEM_TABLE: |
| case VHOST_USER_GET_QUEUE_NUM: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* most non-init callers ignore the error */ |
| static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, |
| int *fds, int fd_num) |
| { |
| CharDriverState *chr = dev->opaque; |
| int size = VHOST_USER_HDR_SIZE + msg->size; |
| |
| /* |
| * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, |
| * we just need send it once in the first time. For later such |
| * request, we just ignore it. |
| */ |
| if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { |
| return 0; |
| } |
| |
| if (fd_num) { |
| qemu_chr_fe_set_msgfds(chr, fds, fd_num); |
| } |
| |
| return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? |
| 0 : -1; |
| } |
| |
| static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, |
| struct vhost_log *log) |
| { |
| int fds[VHOST_MEMORY_MAX_NREGIONS]; |
| size_t fd_num = 0; |
| bool shmfd = virtio_has_feature(dev->protocol_features, |
| VHOST_USER_PROTOCOL_F_LOG_SHMFD); |
| VhostUserMsg msg = { |
| .request = VHOST_USER_SET_LOG_BASE, |
| .flags = VHOST_USER_VERSION, |
| .payload.log.mmap_size = log->size * sizeof(*(log->log)), |
| .payload.log.mmap_offset = 0, |
| .size = sizeof(msg.payload.log), |
| }; |
| |
| if (shmfd && log->fd != -1) { |
| fds[fd_num++] = log->fd; |
| } |
| |
| vhost_user_write(dev, &msg, fds, fd_num); |
| |
| if (shmfd) { |
| msg.size = 0; |
| if (vhost_user_read(dev, &msg) < 0) { |
| return 0; |
| } |
| |
| if (msg.request != VHOST_USER_SET_LOG_BASE) { |
| error_report("Received unexpected msg type. " |
| "Expected %d received %d", |
| VHOST_USER_SET_LOG_BASE, msg.request); |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_mem_table(struct vhost_dev *dev, |
| struct vhost_memory *mem) |
| { |
| int fds[VHOST_MEMORY_MAX_NREGIONS]; |
| int i, fd; |
| size_t fd_num = 0; |
| VhostUserMsg msg = { |
| .request = VHOST_USER_SET_MEM_TABLE, |
| .flags = VHOST_USER_VERSION, |
| }; |
| |
| for (i = 0; i < dev->mem->nregions; ++i) { |
| struct vhost_memory_region *reg = dev->mem->regions + i; |
| ram_addr_t ram_addr; |
| |
| assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); |
| qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, |
| &ram_addr); |
| fd = qemu_get_ram_fd(ram_addr); |
| if (fd > 0) { |
| msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; |
| msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; |
| msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; |
| msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - |
| (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); |
| assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); |
| fds[fd_num++] = fd; |
| } |
| } |
| |
| msg.payload.memory.nregions = fd_num; |
| |
| if (!fd_num) { |
| error_report("Failed initializing vhost-user memory map, " |
| "consider using -object memory-backend-file share=on"); |
| return -1; |
| } |
| |
| msg.size = sizeof(msg.payload.memory.nregions); |
| msg.size += sizeof(msg.payload.memory.padding); |
| msg.size += fd_num * sizeof(VhostUserMemoryRegion); |
| |
| vhost_user_write(dev, &msg, fds, fd_num); |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_vring_addr(struct vhost_dev *dev, |
| struct vhost_vring_addr *addr) |
| { |
| VhostUserMsg msg = { |
| .request = VHOST_USER_SET_VRING_ADDR, |
| .flags = VHOST_USER_VERSION, |
| .payload.addr = *addr, |
| .size = sizeof(msg.payload.addr), |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_vring_endian(struct vhost_dev *dev, |
| struct vhost_vring_state *ring) |
| { |
| error_report("vhost-user trying to send unhandled ioctl"); |
| return -1; |
| } |
| |
| static int vhost_set_vring(struct vhost_dev *dev, |
| unsigned long int request, |
| struct vhost_vring_state *ring) |
| { |
| VhostUserMsg msg = { |
| .request = request, |
| .flags = VHOST_USER_VERSION, |
| .payload.state = *ring, |
| .size = sizeof(msg.payload.state), |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_vring_num(struct vhost_dev *dev, |
| struct vhost_vring_state *ring) |
| { |
| return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); |
| } |
| |
| static int vhost_user_set_vring_base(struct vhost_dev *dev, |
| struct vhost_vring_state *ring) |
| { |
| return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); |
| } |
| |
| static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) |
| { |
| int i; |
| |
| if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { |
| return -1; |
| } |
| |
| for (i = 0; i < dev->nvqs; ++i) { |
| struct vhost_vring_state state = { |
| .index = dev->vq_index + i, |
| .num = enable, |
| }; |
| |
| vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); |
| } |
| |
| return 0; |
| } |
| |
| static int vhost_user_get_vring_base(struct vhost_dev *dev, |
| struct vhost_vring_state *ring) |
| { |
| VhostUserMsg msg = { |
| .request = VHOST_USER_GET_VRING_BASE, |
| .flags = VHOST_USER_VERSION, |
| .payload.state = *ring, |
| .size = sizeof(msg.payload.state), |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| if (vhost_user_read(dev, &msg) < 0) { |
| return 0; |
| } |
| |
| if (msg.request != VHOST_USER_GET_VRING_BASE) { |
| error_report("Received unexpected msg type. Expected %d received %d", |
| VHOST_USER_GET_VRING_BASE, msg.request); |
| return -1; |
| } |
| |
| if (msg.size != sizeof(msg.payload.state)) { |
| error_report("Received bad msg size."); |
| return -1; |
| } |
| |
| *ring = msg.payload.state; |
| |
| return 0; |
| } |
| |
| static int vhost_set_vring_file(struct vhost_dev *dev, |
| VhostUserRequest request, |
| struct vhost_vring_file *file) |
| { |
| int fds[VHOST_MEMORY_MAX_NREGIONS]; |
| size_t fd_num = 0; |
| VhostUserMsg msg = { |
| .request = request, |
| .flags = VHOST_USER_VERSION, |
| .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, |
| .size = sizeof(msg.payload.u64), |
| }; |
| |
| if (ioeventfd_enabled() && file->fd > 0) { |
| fds[fd_num++] = file->fd; |
| } else { |
| msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; |
| } |
| |
| vhost_user_write(dev, &msg, fds, fd_num); |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_vring_kick(struct vhost_dev *dev, |
| struct vhost_vring_file *file) |
| { |
| return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); |
| } |
| |
| static int vhost_user_set_vring_call(struct vhost_dev *dev, |
| struct vhost_vring_file *file) |
| { |
| return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); |
| } |
| |
| static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) |
| { |
| VhostUserMsg msg = { |
| .request = request, |
| .flags = VHOST_USER_VERSION, |
| .payload.u64 = u64, |
| .size = sizeof(msg.payload.u64), |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| return 0; |
| } |
| |
| static int vhost_user_set_features(struct vhost_dev *dev, |
| uint64_t features) |
| { |
| return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); |
| } |
| |
| static int vhost_user_set_protocol_features(struct vhost_dev *dev, |
| uint64_t features) |
| { |
| return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); |
| } |
| |
| static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) |
| { |
| VhostUserMsg msg = { |
| .request = request, |
| .flags = VHOST_USER_VERSION, |
| }; |
| |
| if (vhost_user_one_time_request(request) && dev->vq_index != 0) { |
| return 0; |
| } |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| if (vhost_user_read(dev, &msg) < 0) { |
| return 0; |
| } |
| |
| if (msg.request != request) { |
| error_report("Received unexpected msg type. Expected %d received %d", |
| request, msg.request); |
| return -1; |
| } |
| |
| if (msg.size != sizeof(msg.payload.u64)) { |
| error_report("Received bad msg size."); |
| return -1; |
| } |
| |
| *u64 = msg.payload.u64; |
| |
| return 0; |
| } |
| |
| static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) |
| { |
| return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); |
| } |
| |
| static int vhost_user_set_owner(struct vhost_dev *dev) |
| { |
| VhostUserMsg msg = { |
| .request = VHOST_USER_SET_OWNER, |
| .flags = VHOST_USER_VERSION, |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| return 0; |
| } |
| |
| static int vhost_user_reset_device(struct vhost_dev *dev) |
| { |
| VhostUserMsg msg = { |
| .request = VHOST_USER_RESET_OWNER, |
| .flags = VHOST_USER_VERSION, |
| }; |
| |
| vhost_user_write(dev, &msg, NULL, 0); |
| |
| return 0; |
| } |
| |
| static int vhost_user_init(struct vhost_dev *dev, void *opaque) |
| { |
| uint64_t features; |
| int err; |
| |
| assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); |
| |
| dev->opaque = opaque; |
| |
| err = vhost_user_get_features(dev, &features); |
| if (err < 0) { |
| return err; |
| } |
| |
| if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { |
| dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; |
| |
| err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, |
| &features); |
| if (err < 0) { |
| return err; |
| } |
| |
| dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; |
| err = vhost_user_set_protocol_features(dev, dev->protocol_features); |
| if (err < 0) { |
| return err; |
| } |
| |
| /* query the max queues we support if backend supports Multiple Queue */ |
| if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { |
| err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, |
| &dev->max_queues); |
| if (err < 0) { |
| return err; |
| } |
| } |
| } |
| |
| if (dev->migration_blocker == NULL && |
| !virtio_has_feature(dev->protocol_features, |
| VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { |
| error_setg(&dev->migration_blocker, |
| "Migration disabled: vhost-user backend lacks " |
| "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); |
| } |
| |
| return 0; |
| } |
| |
| static int vhost_user_cleanup(struct vhost_dev *dev) |
| { |
| assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); |
| |
| dev->opaque = 0; |
| |
| return 0; |
| } |
| |
| static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) |
| { |
| assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); |
| |
| return idx; |
| } |
| |
| static int vhost_user_memslots_limit(struct vhost_dev *dev) |
| { |
| return VHOST_MEMORY_MAX_NREGIONS; |
| } |
| |
| static bool vhost_user_requires_shm_log(struct vhost_dev *dev) |
| { |
| assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); |
| |
| return virtio_has_feature(dev->protocol_features, |
| VHOST_USER_PROTOCOL_F_LOG_SHMFD); |
| } |
| |
| static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) |
| { |
| VhostUserMsg msg = { 0 }; |
| int err; |
| |
| assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); |
| |
| /* If guest supports GUEST_ANNOUNCE do nothing */ |
| if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { |
| return 0; |
| } |
| |
| /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ |
| if (virtio_has_feature(dev->protocol_features, |
| VHOST_USER_PROTOCOL_F_RARP)) { |
| msg.request = VHOST_USER_SEND_RARP; |
| msg.flags = VHOST_USER_VERSION; |
| memcpy((char *)&msg.payload.u64, mac_addr, 6); |
| msg.size = sizeof(msg.payload.u64); |
| |
| err = vhost_user_write(dev, &msg, NULL, 0); |
| return err; |
| } |
| return -1; |
| } |
| |
| static bool vhost_user_can_merge(struct vhost_dev *dev, |
| uint64_t start1, uint64_t size1, |
| uint64_t start2, uint64_t size2) |
| { |
| ram_addr_t ram_addr; |
| int mfd, rfd; |
| MemoryRegion *mr; |
| |
| mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr); |
| assert(mr); |
| mfd = qemu_get_ram_fd(ram_addr); |
| |
| mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr); |
| assert(mr); |
| rfd = qemu_get_ram_fd(ram_addr); |
| |
| return mfd == rfd; |
| } |
| |
| const VhostOps user_ops = { |
| .backend_type = VHOST_BACKEND_TYPE_USER, |
| .vhost_backend_init = vhost_user_init, |
| .vhost_backend_cleanup = vhost_user_cleanup, |
| .vhost_backend_memslots_limit = vhost_user_memslots_limit, |
| .vhost_set_log_base = vhost_user_set_log_base, |
| .vhost_set_mem_table = vhost_user_set_mem_table, |
| .vhost_set_vring_addr = vhost_user_set_vring_addr, |
| .vhost_set_vring_endian = vhost_user_set_vring_endian, |
| .vhost_set_vring_num = vhost_user_set_vring_num, |
| .vhost_set_vring_base = vhost_user_set_vring_base, |
| .vhost_get_vring_base = vhost_user_get_vring_base, |
| .vhost_set_vring_kick = vhost_user_set_vring_kick, |
| .vhost_set_vring_call = vhost_user_set_vring_call, |
| .vhost_set_features = vhost_user_set_features, |
| .vhost_get_features = vhost_user_get_features, |
| .vhost_set_owner = vhost_user_set_owner, |
| .vhost_reset_device = vhost_user_reset_device, |
| .vhost_get_vq_index = vhost_user_get_vq_index, |
| .vhost_set_vring_enable = vhost_user_set_vring_enable, |
| .vhost_requires_shm_log = vhost_user_requires_shm_log, |
| .vhost_migration_done = vhost_user_migration_done, |
| .vhost_backend_can_merge = vhost_user_can_merge, |
| }; |