hw/virtio/vhost-user.c - qemu - Git at Google

 /*
  * vhost-user
  *
  * Copyright (c) 2013 Virtual Open Systems Sarl.
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  *
  */

 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-backend.h"
 #include "hw/virtio/virtio-net.h"
 #include "sysemu/char.h"
 #include "sysemu/kvm.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "exec/ram_addr.h"
 #include "migration/migration.h"

 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <linux/vhost.h>

 #define VHOST_MEMORY_MAX_NREGIONS    8
 #define VHOST_USER_F_PROTOCOL_FEATURES 30

 enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_MQ = 0,
     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
     VHOST_USER_PROTOCOL_F_RARP = 2,

     VHOST_USER_PROTOCOL_F_MAX
 };

 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)

 typedef enum VhostUserRequest {
     VHOST_USER_NONE = 0,
     VHOST_USER_GET_FEATURES = 1,
     VHOST_USER_SET_FEATURES = 2,
     VHOST_USER_SET_OWNER = 3,
     VHOST_USER_RESET_OWNER = 4,
     VHOST_USER_SET_MEM_TABLE = 5,
     VHOST_USER_SET_LOG_BASE = 6,
     VHOST_USER_SET_LOG_FD = 7,
     VHOST_USER_SET_VRING_NUM = 8,
     VHOST_USER_SET_VRING_ADDR = 9,
     VHOST_USER_SET_VRING_BASE = 10,
     VHOST_USER_GET_VRING_BASE = 11,
     VHOST_USER_SET_VRING_KICK = 12,
     VHOST_USER_SET_VRING_CALL = 13,
     VHOST_USER_SET_VRING_ERR = 14,
     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
     VHOST_USER_GET_QUEUE_NUM = 17,
     VHOST_USER_SET_VRING_ENABLE = 18,
     VHOST_USER_SEND_RARP = 19,
     VHOST_USER_MAX
 } VhostUserRequest;

 typedef struct VhostUserMemoryRegion {
     uint64_t guest_phys_addr;
     uint64_t memory_size;
     uint64_t userspace_addr;
     uint64_t mmap_offset;
 } VhostUserMemoryRegion;

 typedef struct VhostUserMemory {
     uint32_t nregions;
     uint32_t padding;
     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 } VhostUserMemory;

 typedef struct VhostUserLog {
     uint64_t mmap_size;
     uint64_t mmap_offset;
 } VhostUserLog;

 typedef struct VhostUserMsg {
     VhostUserRequest request;

 #define VHOST_USER_VERSION_MASK     (0x3)
 #define VHOST_USER_REPLY_MASK       (0x1<<2)
     uint32_t flags;
     uint32_t size; /* the following payload size */
     union {
 #define VHOST_USER_VRING_IDX_MASK   (0xff)
 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
         uint64_t u64;
         struct vhost_vring_state state;
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
         VhostUserLog log;
     } payload;
 } QEMU_PACKED VhostUserMsg;

 static VhostUserMsg m __attribute__ ((unused));
 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
                             + sizeof(m.flags) \
                             + sizeof(m.size))

 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

 /* The version of the protocol we support */
 #define VHOST_USER_VERSION    (0x1)

 static bool ioeventfd_enabled(void)
 {
     return kvm_enabled() && kvm_eventfds_enabled();
 }

 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 {
     CharDriverState *chr = dev->opaque;
     uint8_t *p = (uint8_t *) msg;
     int r, size = VHOST_USER_HDR_SIZE;

     r = qemu_chr_fe_read_all(chr, p, size);
     if (r != size) {
         error_report("Failed to read msg header. Read %d instead of %d."
                      " Original request %d.", r, size, msg->request);
         goto fail;
     }

     /* validate received flags */
     if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
         error_report("Failed to read msg header."
                 " Flags 0x%x instead of 0x%x.", msg->flags,
                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
         goto fail;
     }

     /* validate message size is sane */
     if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
         error_report("Failed to read msg header."
                 " Size %d exceeds the maximum %zu.", msg->size,
                 VHOST_USER_PAYLOAD_SIZE);
         goto fail;
     }

     if (msg->size) {
         p += VHOST_USER_HDR_SIZE;
         size = msg->size;
         r = qemu_chr_fe_read_all(chr, p, size);
         if (r != size) {
             error_report("Failed to read msg payload."
                          " Read %d instead of %d.", r, msg->size);
             goto fail;
         }
     }

     return 0;

 fail:
     return -1;
 }

 static bool vhost_user_one_time_request(VhostUserRequest request)
 {
     switch (request) {
     case VHOST_USER_SET_OWNER:
     case VHOST_USER_RESET_OWNER:
     case VHOST_USER_SET_MEM_TABLE:
     case VHOST_USER_GET_QUEUE_NUM:
         return true;
     default:
         return false;
     }
 }

 /* most non-init callers ignore the error */
 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                             int *fds, int fd_num)
 {
     CharDriverState *chr = dev->opaque;
     int size = VHOST_USER_HDR_SIZE + msg->size;

     /*
      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
      * we just need send it once in the first time. For later such
      * request, we just ignore it.
      */
     if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
         return 0;
     }

     if (fd_num) {
         qemu_chr_fe_set_msgfds(chr, fds, fd_num);
     }

     return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
             0 : -1;
 }

 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                    struct vhost_log *log)
 {
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     size_t fd_num = 0;
     bool shmfd = virtio_has_feature(dev->protocol_features,
                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_LOG_BASE,
         .flags = VHOST_USER_VERSION,
         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
         .payload.log.mmap_offset = 0,
         .size = sizeof(msg.payload.log),
     };

     if (shmfd && log->fd != -1) {
         fds[fd_num++] = log->fd;
     }

     vhost_user_write(dev, &msg, fds, fd_num);

     if (shmfd) {
         msg.size = 0;
         if (vhost_user_read(dev, &msg) < 0) {
             return 0;
         }

         if (msg.request != VHOST_USER_SET_LOG_BASE) {
             error_report("Received unexpected msg type. "
                          "Expected %d received %d",
                          VHOST_USER_SET_LOG_BASE, msg.request);
             return -1;
         }
     }

     return 0;
 }

 static int vhost_user_set_mem_table(struct vhost_dev *dev,
                                     struct vhost_memory *mem)
 {
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int i, fd;
     size_t fd_num = 0;
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_MEM_TABLE,
         .flags = VHOST_USER_VERSION,
     };

     for (i = 0; i < dev->mem->nregions; ++i) {
         struct vhost_memory_region *reg = dev->mem->regions + i;
         ram_addr_t ram_addr;

         assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
         qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
                                 &ram_addr);
         fd = qemu_get_ram_fd(ram_addr);
         if (fd > 0) {
             msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
             msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
             msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
             msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
                 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
             assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
             fds[fd_num++] = fd;
         }
     }

     msg.payload.memory.nregions = fd_num;

     if (!fd_num) {
         error_report("Failed initializing vhost-user memory map, "
                      "consider using -object memory-backend-file share=on");
         return -1;
     }

     msg.size = sizeof(msg.payload.memory.nregions);
     msg.size += sizeof(msg.payload.memory.padding);
     msg.size += fd_num * sizeof(VhostUserMemoryRegion);

     vhost_user_write(dev, &msg, fds, fd_num);

     return 0;
 }

 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                      struct vhost_vring_addr *addr)
 {
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_VRING_ADDR,
         .flags = VHOST_USER_VERSION,
         .payload.addr = *addr,
         .size = sizeof(msg.payload.addr),
     };

     vhost_user_write(dev, &msg, NULL, 0);

     return 0;
 }

 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
                                        struct vhost_vring_state *ring)
 {
     error_report("vhost-user trying to send unhandled ioctl");
     return -1;
 }

 static int vhost_set_vring(struct vhost_dev *dev,
                            unsigned long int request,
                            struct vhost_vring_state *ring)
 {
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
         .payload.state = *ring,
         .size = sizeof(msg.payload.state),
     };

     vhost_user_write(dev, &msg, NULL, 0);

     return 0;
 }

 static int vhost_user_set_vring_num(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
 {
     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
 }

 static int vhost_user_set_vring_base(struct vhost_dev *dev,
                                      struct vhost_vring_state *ring)
 {
     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
 }

 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
 {
     int i;

     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
         return -1;
     }

     for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_vring_state state = {
             .index = dev->vq_index + i,
             .num   = enable,
         };

         vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
     }

     return 0;
 }

 static int vhost_user_get_vring_base(struct vhost_dev *dev,
                                      struct vhost_vring_state *ring)
 {
     VhostUserMsg msg = {
         .request = VHOST_USER_GET_VRING_BASE,
         .flags = VHOST_USER_VERSION,
         .payload.state = *ring,
         .size = sizeof(msg.payload.state),
     };

     vhost_user_write(dev, &msg, NULL, 0);

     if (vhost_user_read(dev, &msg) < 0) {
         return 0;
     }

     if (msg.request != VHOST_USER_GET_VRING_BASE) {
         error_report("Received unexpected msg type. Expected %d received %d",
                      VHOST_USER_GET_VRING_BASE, msg.request);
         return -1;
     }

     if (msg.size != sizeof(msg.payload.state)) {
         error_report("Received bad msg size.");
         return -1;
     }

     *ring = msg.payload.state;

     return 0;
 }

 static int vhost_set_vring_file(struct vhost_dev *dev,
                                 VhostUserRequest request,
                                 struct vhost_vring_file *file)
 {
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     size_t fd_num = 0;
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
         .size = sizeof(msg.payload.u64),
     };

     if (ioeventfd_enabled() && file->fd > 0) {
         fds[fd_num++] = file->fd;
     } else {
         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
     }

     vhost_user_write(dev, &msg, fds, fd_num);

     return 0;
 }

 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
                                      struct vhost_vring_file *file)
 {
     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
 }

 static int vhost_user_set_vring_call(struct vhost_dev *dev,
                                      struct vhost_vring_file *file)
 {
     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
 }

 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
 {
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
         .payload.u64 = u64,
         .size = sizeof(msg.payload.u64),
     };

     vhost_user_write(dev, &msg, NULL, 0);

     return 0;
 }

 static int vhost_user_set_features(struct vhost_dev *dev,
                                    uint64_t features)
 {
     return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
 }

 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
                                             uint64_t features)
 {
     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
 }

 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 {
     VhostUserMsg msg = {
         .request = request,
         .flags = VHOST_USER_VERSION,
     };

     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
         return 0;
     }

     vhost_user_write(dev, &msg, NULL, 0);

     if (vhost_user_read(dev, &msg) < 0) {
         return 0;
     }

     if (msg.request != request) {
         error_report("Received unexpected msg type. Expected %d received %d",
                      request, msg.request);
         return -1;
     }

     if (msg.size != sizeof(msg.payload.u64)) {
         error_report("Received bad msg size.");
         return -1;
     }

     *u64 = msg.payload.u64;

     return 0;
 }

 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
 {
     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 }

 static int vhost_user_set_owner(struct vhost_dev *dev)
 {
     VhostUserMsg msg = {
         .request = VHOST_USER_SET_OWNER,
         .flags = VHOST_USER_VERSION,
     };

     vhost_user_write(dev, &msg, NULL, 0);

     return 0;
 }

 static int vhost_user_reset_device(struct vhost_dev *dev)
 {
     VhostUserMsg msg = {
         .request = VHOST_USER_RESET_OWNER,
         .flags = VHOST_USER_VERSION,
     };

     vhost_user_write(dev, &msg, NULL, 0);

     return 0;
 }

 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
 {
     uint64_t features;
     int err;

     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

     dev->opaque = opaque;

     err = vhost_user_get_features(dev, &features);
     if (err < 0) {
         return err;
     }

     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
                                  &features);
         if (err < 0) {
             return err;
         }

         dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
         if (err < 0) {
             return err;
         }

         /* query the max queues we support if backend supports Multiple Queue */
         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
                                      &dev->max_queues);
             if (err < 0) {
                 return err;
             }
         }
     }

     if (dev->migration_blocker == NULL &&
         !virtio_has_feature(dev->protocol_features,
                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
         error_setg(&dev->migration_blocker,
                    "Migration disabled: vhost-user backend lacks "
                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
     }

     return 0;
 }

 static int vhost_user_cleanup(struct vhost_dev *dev)
 {
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

     dev->opaque = 0;

     return 0;
 }

 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
 {
     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);

     return idx;
 }

 static int vhost_user_memslots_limit(struct vhost_dev *dev)
 {
     return VHOST_MEMORY_MAX_NREGIONS;
 }

 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
 {
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

     return virtio_has_feature(dev->protocol_features,
                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 }

 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
 {
     VhostUserMsg msg = { 0 };
     int err;

     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

     /* If guest supports GUEST_ANNOUNCE do nothing */
     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
         return 0;
     }

     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
     if (virtio_has_feature(dev->protocol_features,
                            VHOST_USER_PROTOCOL_F_RARP)) {
         msg.request = VHOST_USER_SEND_RARP;
         msg.flags = VHOST_USER_VERSION;
         memcpy((char *)&msg.payload.u64, mac_addr, 6);
         msg.size = sizeof(msg.payload.u64);

         err = vhost_user_write(dev, &msg, NULL, 0);
         return err;
     }
     return -1;
 }

 static bool vhost_user_can_merge(struct vhost_dev *dev,
                                  uint64_t start1, uint64_t size1,
                                  uint64_t start2, uint64_t size2)
 {
     ram_addr_t ram_addr;
     int mfd, rfd;
     MemoryRegion *mr;

     mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr);
     assert(mr);
     mfd = qemu_get_ram_fd(ram_addr);

     mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr);
     assert(mr);
     rfd = qemu_get_ram_fd(ram_addr);

     return mfd == rfd;
 }

 const VhostOps user_ops = {
         .backend_type = VHOST_BACKEND_TYPE_USER,
         .vhost_backend_init = vhost_user_init,
         .vhost_backend_cleanup = vhost_user_cleanup,
         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
         .vhost_set_log_base = vhost_user_set_log_base,
         .vhost_set_mem_table = vhost_user_set_mem_table,
         .vhost_set_vring_addr = vhost_user_set_vring_addr,
         .vhost_set_vring_endian = vhost_user_set_vring_endian,
         .vhost_set_vring_num = vhost_user_set_vring_num,
         .vhost_set_vring_base = vhost_user_set_vring_base,
         .vhost_get_vring_base = vhost_user_get_vring_base,
         .vhost_set_vring_kick = vhost_user_set_vring_kick,
         .vhost_set_vring_call = vhost_user_set_vring_call,
         .vhost_set_features = vhost_user_set_features,
         .vhost_get_features = vhost_user_get_features,
         .vhost_set_owner = vhost_user_set_owner,
         .vhost_reset_device = vhost_user_reset_device,
         .vhost_get_vq_index = vhost_user_get_vq_index,
         .vhost_set_vring_enable = vhost_user_set_vring_enable,
         .vhost_requires_shm_log = vhost_user_requires_shm_log,
         .vhost_migration_done = vhost_user_migration_done,
         .vhost_backend_can_merge = vhost_user_can_merge,
 };
	/*
	* vhost-user
	*
	* Copyright (c) 2013 Virtual Open Systems Sarl.
	*
	* This work is licensed under the terms of the GNU GPL, version 2 or later.
	* See the COPYING file in the top-level directory.
	*
	*/

	#include "qemu/osdep.h"
	#include "qapi/error.h"
	#include "hw/virtio/vhost.h"
	#include "hw/virtio/vhost-backend.h"
	#include "hw/virtio/virtio-net.h"
	#include "sysemu/char.h"
	#include "sysemu/kvm.h"
	#include "qemu/error-report.h"
	#include "qemu/sockets.h"
	#include "exec/ram_addr.h"
	#include "migration/migration.h"

	#include <sys/ioctl.h>
	#include <sys/socket.h>
	#include <sys/un.h>
	#include <linux/vhost.h>

	#define VHOST_MEMORY_MAX_NREGIONS 8
	#define VHOST_USER_F_PROTOCOL_FEATURES 30

	enum VhostUserProtocolFeature {
	VHOST_USER_PROTOCOL_F_MQ = 0,
	VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
	VHOST_USER_PROTOCOL_F_RARP = 2,

	VHOST_USER_PROTOCOL_F_MAX
	};

	#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)

	typedef enum VhostUserRequest {
	VHOST_USER_NONE = 0,
	VHOST_USER_GET_FEATURES = 1,
	VHOST_USER_SET_FEATURES = 2,
	VHOST_USER_SET_OWNER = 3,
	VHOST_USER_RESET_OWNER = 4,
	VHOST_USER_SET_MEM_TABLE = 5,
	VHOST_USER_SET_LOG_BASE = 6,
	VHOST_USER_SET_LOG_FD = 7,
	VHOST_USER_SET_VRING_NUM = 8,
	VHOST_USER_SET_VRING_ADDR = 9,
	VHOST_USER_SET_VRING_BASE = 10,
	VHOST_USER_GET_VRING_BASE = 11,
	VHOST_USER_SET_VRING_KICK = 12,
	VHOST_USER_SET_VRING_CALL = 13,
	VHOST_USER_SET_VRING_ERR = 14,
	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
	VHOST_USER_GET_QUEUE_NUM = 17,
	VHOST_USER_SET_VRING_ENABLE = 18,
	VHOST_USER_SEND_RARP = 19,
	VHOST_USER_MAX
	} VhostUserRequest;

	typedef struct VhostUserMemoryRegion {
	uint64_t guest_phys_addr;
	uint64_t memory_size;
	uint64_t userspace_addr;
	uint64_t mmap_offset;
	} VhostUserMemoryRegion;

	typedef struct VhostUserMemory {
	uint32_t nregions;
	uint32_t padding;
	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
	} VhostUserMemory;

	typedef struct VhostUserLog {
	uint64_t mmap_size;
	uint64_t mmap_offset;
	} VhostUserLog;

	typedef struct VhostUserMsg {
	VhostUserRequest request;

	#define VHOST_USER_VERSION_MASK (0x3)
	#define VHOST_USER_REPLY_MASK (0x1<<2)
	uint32_t flags;
	uint32_t size; /* the following payload size */
	union {
	#define VHOST_USER_VRING_IDX_MASK (0xff)
	#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
	uint64_t u64;
	struct vhost_vring_state state;
	struct vhost_vring_addr addr;
	VhostUserMemory memory;
	VhostUserLog log;
	} payload;
	} QEMU_PACKED VhostUserMsg;

	static VhostUserMsg m __attribute__ ((unused));
	#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
	+ sizeof(m.flags) \
	+ sizeof(m.size))

	#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

	/* The version of the protocol we support */
	#define VHOST_USER_VERSION (0x1)

	static bool ioeventfd_enabled(void)
	{
	return kvm_enabled() && kvm_eventfds_enabled();
	}

	static int vhost_user_read(struct vhost_dev dev, VhostUserMsg msg)
	{
	CharDriverState *chr = dev->opaque;
	uint8_t p = (uint8_t ) msg;
	int r, size = VHOST_USER_HDR_SIZE;

	r = qemu_chr_fe_read_all(chr, p, size);
	if (r != size) {
	error_report("Failed to read msg header. Read %d instead of %d."
	" Original request %d.", r, size, msg->request);
	goto fail;
	}

	/* validate received flags */
	if (msg->flags != (VHOST_USER_REPLY_MASK \| VHOST_USER_VERSION)) {
	error_report("Failed to read msg header."
	" Flags 0x%x instead of 0x%x.", msg->flags,
	VHOST_USER_REPLY_MASK \| VHOST_USER_VERSION);
	goto fail;
	}

	/* validate message size is sane */
	if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
	error_report("Failed to read msg header."
	" Size %d exceeds the maximum %zu.", msg->size,
	VHOST_USER_PAYLOAD_SIZE);
	goto fail;
	}

	if (msg->size) {
	p += VHOST_USER_HDR_SIZE;
	size = msg->size;
	r = qemu_chr_fe_read_all(chr, p, size);
	if (r != size) {
	error_report("Failed to read msg payload."
	" Read %d instead of %d.", r, msg->size);
	goto fail;
	}
	}

	return 0;

	fail:
	return -1;
	}

	static bool vhost_user_one_time_request(VhostUserRequest request)
	{
	switch (request) {
	case VHOST_USER_SET_OWNER:
	case VHOST_USER_RESET_OWNER:
	case VHOST_USER_SET_MEM_TABLE:
	case VHOST_USER_GET_QUEUE_NUM:
	return true;
	default:
	return false;
	}
	}

	/* most non-init callers ignore the error */
	static int vhost_user_write(struct vhost_dev dev, VhostUserMsg msg,
	int *fds, int fd_num)
	{
	CharDriverState *chr = dev->opaque;
	int size = VHOST_USER_HDR_SIZE + msg->size;

	/*
	* For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
	* we just need send it once in the first time. For later such
	* request, we just ignore it.
	*/
	if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
	return 0;
	}

	if (fd_num) {
	qemu_chr_fe_set_msgfds(chr, fds, fd_num);
	}

	return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
	0 : -1;
	}

	static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
	struct vhost_log *log)
	{
	int fds[VHOST_MEMORY_MAX_NREGIONS];
	size_t fd_num = 0;
	bool shmfd = virtio_has_feature(dev->protocol_features,
	VHOST_USER_PROTOCOL_F_LOG_SHMFD);
	VhostUserMsg msg = {
	.request = VHOST_USER_SET_LOG_BASE,
	.flags = VHOST_USER_VERSION,
	.payload.log.mmap_size = log->size * sizeof(*(log->log)),
	.payload.log.mmap_offset = 0,
	.size = sizeof(msg.payload.log),
	};

	if (shmfd && log->fd != -1) {
	fds[fd_num++] = log->fd;
	}

	vhost_user_write(dev, &msg, fds, fd_num);

	if (shmfd) {
	msg.size = 0;
	if (vhost_user_read(dev, &msg) < 0) {
	return 0;
	}

	if (msg.request != VHOST_USER_SET_LOG_BASE) {
	error_report("Received unexpected msg type. "
	"Expected %d received %d",
	VHOST_USER_SET_LOG_BASE, msg.request);
	return -1;
	}
	}

	return 0;
	}

	static int vhost_user_set_mem_table(struct vhost_dev *dev,
	struct vhost_memory *mem)
	{
	int fds[VHOST_MEMORY_MAX_NREGIONS];
	int i, fd;
	size_t fd_num = 0;
	VhostUserMsg msg = {
	.request = VHOST_USER_SET_MEM_TABLE,
	.flags = VHOST_USER_VERSION,
	};

	for (i = 0; i < dev->mem->nregions; ++i) {
	struct vhost_memory_region *reg = dev->mem->regions + i;
	ram_addr_t ram_addr;

	assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
	qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
	&ram_addr);
	fd = qemu_get_ram_fd(ram_addr);
	if (fd > 0) {
	msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
	msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
	msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
	msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
	(uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
	assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
	fds[fd_num++] = fd;
	}
	}

	msg.payload.memory.nregions = fd_num;

	if (!fd_num) {
	error_report("Failed initializing vhost-user memory map, "
	"consider using -object memory-backend-file share=on");
	return -1;
	}

	msg.size = sizeof(msg.payload.memory.nregions);
	msg.size += sizeof(msg.payload.memory.padding);
	msg.size += fd_num * sizeof(VhostUserMemoryRegion);

	vhost_user_write(dev, &msg, fds, fd_num);

	return 0;
	}

	static int vhost_user_set_vring_addr(struct vhost_dev *dev,
	struct vhost_vring_addr *addr)
	{
	VhostUserMsg msg = {
	.request = VHOST_USER_SET_VRING_ADDR,
	.flags = VHOST_USER_VERSION,
	.payload.addr = *addr,
	.size = sizeof(msg.payload.addr),
	};

	vhost_user_write(dev, &msg, NULL, 0);

	return 0;
	}

	static int vhost_user_set_vring_endian(struct vhost_dev *dev,
	struct vhost_vring_state *ring)
	{
	error_report("vhost-user trying to send unhandled ioctl");
	return -1;
	}

	static int vhost_set_vring(struct vhost_dev *dev,
	unsigned long int request,
	struct vhost_vring_state *ring)
	{
	VhostUserMsg msg = {
	.request = request,
	.flags = VHOST_USER_VERSION,
	.payload.state = *ring,
	.size = sizeof(msg.payload.state),
	};

	vhost_user_write(dev, &msg, NULL, 0);

	return 0;
	}

	static int vhost_user_set_vring_num(struct vhost_dev *dev,
	struct vhost_vring_state *ring)
	{
	return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
	}

	static int vhost_user_set_vring_base(struct vhost_dev *dev,
	struct vhost_vring_state *ring)
	{
	return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
	}

	static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
	{
	int i;

	if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
	return -1;
	}

	for (i = 0; i < dev->nvqs; ++i) {
	struct vhost_vring_state state = {
	.index = dev->vq_index + i,
	.num = enable,
	};

	vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
	}

	return 0;
	}

	static int vhost_user_get_vring_base(struct vhost_dev *dev,
	struct vhost_vring_state *ring)
	{
	VhostUserMsg msg = {
	.request = VHOST_USER_GET_VRING_BASE,
	.flags = VHOST_USER_VERSION,
	.payload.state = *ring,
	.size = sizeof(msg.payload.state),
	};

	vhost_user_write(dev, &msg, NULL, 0);

	if (vhost_user_read(dev, &msg) < 0) {
	return 0;
	}

	if (msg.request != VHOST_USER_GET_VRING_BASE) {
	error_report("Received unexpected msg type. Expected %d received %d",
	VHOST_USER_GET_VRING_BASE, msg.request);
	return -1;
	}

	if (msg.size != sizeof(msg.payload.state)) {
	error_report("Received bad msg size.");
	return -1;
	}

	*ring = msg.payload.state;

	return 0;
	}

	static int vhost_set_vring_file(struct vhost_dev *dev,
	VhostUserRequest request,
	struct vhost_vring_file *file)
	{
	int fds[VHOST_MEMORY_MAX_NREGIONS];
	size_t fd_num = 0;
	VhostUserMsg msg = {
	.request = request,
	.flags = VHOST_USER_VERSION,
	.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
	.size = sizeof(msg.payload.u64),
	};

	if (ioeventfd_enabled() && file->fd > 0) {
	fds[fd_num++] = file->fd;
	} else {
	msg.payload.u64 \|= VHOST_USER_VRING_NOFD_MASK;
	}

	vhost_user_write(dev, &msg, fds, fd_num);

	return 0;
	}

	static int vhost_user_set_vring_kick(struct vhost_dev *dev,
	struct vhost_vring_file *file)
	{
	return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
	}

	static int vhost_user_set_vring_call(struct vhost_dev *dev,
	struct vhost_vring_file *file)
	{
	return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
	}

	static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
	{
	VhostUserMsg msg = {
	.request = request,
	.flags = VHOST_USER_VERSION,
	.payload.u64 = u64,
	.size = sizeof(msg.payload.u64),
	};

	vhost_user_write(dev, &msg, NULL, 0);

	return 0;
	}

	static int vhost_user_set_features(struct vhost_dev *dev,
	uint64_t features)
	{
	return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
	}

	static int vhost_user_set_protocol_features(struct vhost_dev *dev,
	uint64_t features)
	{
	return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
	}

	static int vhost_user_get_u64(struct vhost_dev dev, int request, uint64_t u64)
	{
	VhostUserMsg msg = {
	.request = request,
	.flags = VHOST_USER_VERSION,
	};

	if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
	return 0;
	}

	vhost_user_write(dev, &msg, NULL, 0);

	if (vhost_user_read(dev, &msg) < 0) {
	return 0;
	}

	if (msg.request != request) {
	error_report("Received unexpected msg type. Expected %d received %d",
	request, msg.request);
	return -1;
	}

	if (msg.size != sizeof(msg.payload.u64)) {
	error_report("Received bad msg size.");
	return -1;
	}

	*u64 = msg.payload.u64;

	return 0;
	}

	static int vhost_user_get_features(struct vhost_dev dev, uint64_t features)
	{
	return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
	}

	static int vhost_user_set_owner(struct vhost_dev *dev)
	{
	VhostUserMsg msg = {
	.request = VHOST_USER_SET_OWNER,
	.flags = VHOST_USER_VERSION,
	};

	vhost_user_write(dev, &msg, NULL, 0);

	return 0;
	}

	static int vhost_user_reset_device(struct vhost_dev *dev)
	{
	VhostUserMsg msg = {
	.request = VHOST_USER_RESET_OWNER,
	.flags = VHOST_USER_VERSION,
	};

	vhost_user_write(dev, &msg, NULL, 0);

	return 0;
	}

	static int vhost_user_init(struct vhost_dev dev, void opaque)
	{
	uint64_t features;
	int err;

	assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

	dev->opaque = opaque;

	err = vhost_user_get_features(dev, &features);
	if (err < 0) {
	return err;
	}

	if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
	dev->backend_features \|= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

	err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
	&features);
	if (err < 0) {
	return err;
	}

	dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
	err = vhost_user_set_protocol_features(dev, dev->protocol_features);
	if (err < 0) {
	return err;
	}

	/* query the max queues we support if backend supports Multiple Queue */
	if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
	err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
	&dev->max_queues);
	if (err < 0) {
	return err;
	}
	}
	}

	if (dev->migration_blocker == NULL &&
	!virtio_has_feature(dev->protocol_features,
	VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
	error_setg(&dev->migration_blocker,
	"Migration disabled: vhost-user backend lacks "
	"VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
	}

	return 0;
	}

	static int vhost_user_cleanup(struct vhost_dev *dev)
	{
	assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

	dev->opaque = 0;

	return 0;
	}

	static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
	{
	assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);

	return idx;
	}

	static int vhost_user_memslots_limit(struct vhost_dev *dev)
	{
	return VHOST_MEMORY_MAX_NREGIONS;
	}

	static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
	{
	assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

	return virtio_has_feature(dev->protocol_features,
	VHOST_USER_PROTOCOL_F_LOG_SHMFD);
	}

	static int vhost_user_migration_done(struct vhost_dev dev, char mac_addr)
	{
	VhostUserMsg msg = { 0 };
	int err;

	assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

	/* If guest supports GUEST_ANNOUNCE do nothing */
	if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
	return 0;
	}

	/* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
	if (virtio_has_feature(dev->protocol_features,
	VHOST_USER_PROTOCOL_F_RARP)) {
	msg.request = VHOST_USER_SEND_RARP;
	msg.flags = VHOST_USER_VERSION;
	memcpy((char *)&msg.payload.u64, mac_addr, 6);
	msg.size = sizeof(msg.payload.u64);

	err = vhost_user_write(dev, &msg, NULL, 0);
	return err;
	}
	return -1;
	}

	static bool vhost_user_can_merge(struct vhost_dev *dev,
	uint64_t start1, uint64_t size1,
	uint64_t start2, uint64_t size2)
	{
	ram_addr_t ram_addr;
	int mfd, rfd;
	MemoryRegion *mr;

	mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr);
	assert(mr);
	mfd = qemu_get_ram_fd(ram_addr);

	mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr);
	assert(mr);
	rfd = qemu_get_ram_fd(ram_addr);

	return mfd == rfd;
	}

	const VhostOps user_ops = {
	.backend_type = VHOST_BACKEND_TYPE_USER,
	.vhost_backend_init = vhost_user_init,
	.vhost_backend_cleanup = vhost_user_cleanup,
	.vhost_backend_memslots_limit = vhost_user_memslots_limit,
	.vhost_set_log_base = vhost_user_set_log_base,
	.vhost_set_mem_table = vhost_user_set_mem_table,
	.vhost_set_vring_addr = vhost_user_set_vring_addr,
	.vhost_set_vring_endian = vhost_user_set_vring_endian,
	.vhost_set_vring_num = vhost_user_set_vring_num,
	.vhost_set_vring_base = vhost_user_set_vring_base,
	.vhost_get_vring_base = vhost_user_get_vring_base,
	.vhost_set_vring_kick = vhost_user_set_vring_kick,
	.vhost_set_vring_call = vhost_user_set_vring_call,
	.vhost_set_features = vhost_user_set_features,
	.vhost_get_features = vhost_user_get_features,
	.vhost_set_owner = vhost_user_set_owner,
	.vhost_reset_device = vhost_user_reset_device,
	.vhost_get_vq_index = vhost_user_get_vq_index,
	.vhost_set_vring_enable = vhost_user_set_vring_enable,
	.vhost_requires_shm_log = vhost_user_requires_shm_log,
	.vhost_migration_done = vhost_user_migration_done,
	.vhost_backend_can_merge = vhost_user_can_merge,
	};