| .. |
| Copyright (c) 2022, Linaro Limited |
| Written by Alex Bennée |
| |
| Writing VirtIO backends for QEMU |
| ================================ |
| |
| This document attempts to outline the information a developer needs to |
| know to write device emulations in QEMU. It is specifically focused on |
| implementing VirtIO devices. For VirtIO the frontend is the driver |
| running on the guest. The backend is the everything that QEMU needs to |
| do to handle the emulation of the VirtIO device. This can be done |
| entirely in QEMU, divided between QEMU and the kernel (vhost) or |
| handled by a separate process which is configured by QEMU |
| (vhost-user). |
| |
| VirtIO Transports |
| ----------------- |
| |
| VirtIO supports a number of different transports. While the details of |
| the configuration and operation of the device will generally be the |
| same QEMU represents them as different devices depending on the |
| transport they use. For example -device virtio-foo represents the foo |
| device using mmio and -device virtio-foo-pci is the same class of |
| device using the PCI transport. |
| |
| Using the QEMU Object Model (QOM) |
| --------------------------------- |
| |
| Generally all devices in QEMU are super classes of ``TYPE_DEVICE`` |
| however VirtIO devices should be based on ``TYPE_VIRTIO_DEVICE`` which |
| itself is derived from the base class. For example: |
| |
| .. code:: c |
| |
| static const TypeInfo virtio_blk_info = { |
| .name = TYPE_VIRTIO_BLK, |
| .parent = TYPE_VIRTIO_DEVICE, |
| .instance_size = sizeof(VirtIOBlock), |
| .instance_init = virtio_blk_instance_init, |
| .class_init = virtio_blk_class_init, |
| }; |
| |
| The author may decide to have a more expansive class hierarchy to |
| support multiple device types. For example the Virtio GPU device: |
| |
| .. code:: c |
| |
| static const TypeInfo virtio_gpu_base_info = { |
| .name = TYPE_VIRTIO_GPU_BASE, |
| .parent = TYPE_VIRTIO_DEVICE, |
| .instance_size = sizeof(VirtIOGPUBase), |
| .class_size = sizeof(VirtIOGPUBaseClass), |
| .class_init = virtio_gpu_base_class_init, |
| .abstract = true |
| }; |
| |
| static const TypeInfo vhost_user_gpu_info = { |
| .name = TYPE_VHOST_USER_GPU, |
| .parent = TYPE_VIRTIO_GPU_BASE, |
| .instance_size = sizeof(VhostUserGPU), |
| .instance_init = vhost_user_gpu_instance_init, |
| .instance_finalize = vhost_user_gpu_instance_finalize, |
| .class_init = vhost_user_gpu_class_init, |
| }; |
| |
| static const TypeInfo virtio_gpu_info = { |
| .name = TYPE_VIRTIO_GPU, |
| .parent = TYPE_VIRTIO_GPU_BASE, |
| .instance_size = sizeof(VirtIOGPU), |
| .class_size = sizeof(VirtIOGPUClass), |
| .class_init = virtio_gpu_class_init, |
| }; |
| |
| defines a base class for the VirtIO GPU and then specialises two |
| versions, one for the internal implementation and the other for the |
| vhost-user version. |
| |
| VirtIOPCIProxy |
| ^^^^^^^^^^^^^^ |
| |
| [AJB: the following is supposition and welcomes more informed |
| opinions] |
| |
| Probably due to legacy from the pre-QOM days PCI VirtIO devices don't |
| follow the normal hierarchy. Instead the a standalone object is based |
| on the VirtIOPCIProxy class and the specific VirtIO instance is |
| manually instantiated: |
| |
| .. code:: c |
| |
| /* |
| * virtio-blk-pci: This extends VirtioPCIProxy. |
| */ |
| #define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" |
| DECLARE_INSTANCE_CHECKER(VirtIOBlkPCI, VIRTIO_BLK_PCI, |
| TYPE_VIRTIO_BLK_PCI) |
| |
| struct VirtIOBlkPCI { |
| VirtIOPCIProxy parent_obj; |
| VirtIOBlock vdev; |
| }; |
| |
| static Property virtio_blk_pci_properties[] = { |
| DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), |
| DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, |
| VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), |
| DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, |
| DEV_NVECTORS_UNSPECIFIED), |
| DEFINE_PROP_END_OF_LIST(), |
| }; |
| |
| static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) |
| { |
| VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); |
| DeviceState *vdev = DEVICE(&dev->vdev); |
| |
| ... |
| |
| qdev_realize(vdev, BUS(&vpci_dev->bus), errp); |
| } |
| |
| static void virtio_blk_pci_class_init(ObjectClass *klass, void *data) |
| { |
| DeviceClass *dc = DEVICE_CLASS(klass); |
| VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); |
| PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); |
| |
| set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); |
| device_class_set_props(dc, virtio_blk_pci_properties); |
| k->realize = virtio_blk_pci_realize; |
| pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; |
| pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK; |
| pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; |
| pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; |
| } |
| |
| static void virtio_blk_pci_instance_init(Object *obj) |
| { |
| VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(obj); |
| |
| virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), |
| TYPE_VIRTIO_BLK); |
| object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), |
| "bootindex"); |
| } |
| |
| static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { |
| .base_name = TYPE_VIRTIO_BLK_PCI, |
| .generic_name = "virtio-blk-pci", |
| .transitional_name = "virtio-blk-pci-transitional", |
| .non_transitional_name = "virtio-blk-pci-non-transitional", |
| .instance_size = sizeof(VirtIOBlkPCI), |
| .instance_init = virtio_blk_pci_instance_init, |
| .class_init = virtio_blk_pci_class_init, |
| }; |
| |
| Here you can see the instance_init has to manually instantiate the |
| underlying ``TYPE_VIRTIO_BLOCK`` object and link an alias for one of |
| it's properties to the PCI device. |
| |
| |
| Back End Implementations |
| ------------------------ |
| |
| There are a number of places where the implementation of the backend |
| can be done: |
| |
| * in QEMU itself |
| * in the host kernel (a.k.a vhost) |
| * in a separate process (a.k.a. vhost-user) |
| |
| vhost_ops vs TYPE_VHOST_USER_BACKEND |
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| |
| There are two choices to how to implement vhost code. Most of the code |
| which has to work with either vhost or vhost-user uses |
| ``vhost_dev_init()`` to instantiate the appropriate backend. This |
| means including a ``struct vhost_dev`` in the main object structure. |
| |
| For vhost-user devices you also need to add code to track the |
| initialisation of the ``chardev`` device used for the control socket |
| between QEMU and the external vhost-user process. |
| |
| If you only need to implement a vhost-user backed the other option is |
| a use a QOM-ified version of vhost-user. |
| |
| .. code:: c |
| |
| static void |
| vhost_user_gpu_instance_init(Object *obj) |
| { |
| VhostUserGPU *g = VHOST_USER_GPU(obj); |
| |
| g->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND)); |
| object_property_add_alias(obj, "chardev", |
| OBJECT(g->vhost), "chardev"); |
| } |
| |
| static const TypeInfo vhost_user_gpu_info = { |
| .name = TYPE_VHOST_USER_GPU, |
| .parent = TYPE_VIRTIO_GPU_BASE, |
| .instance_size = sizeof(VhostUserGPU), |
| .instance_init = vhost_user_gpu_instance_init, |
| .instance_finalize = vhost_user_gpu_instance_finalize, |
| .class_init = vhost_user_gpu_class_init, |
| }; |
| |
| Using it this way entails adding a ``struct VhostUserBackend`` to your |
| core object structure and manually instantiating the backend. This |
| sub-structure tracks both the ``vhost_dev`` and ``CharDev`` types |
| needed for the connection. Instead of calling ``vhost_dev_init`` you |
| would call ``vhost_user_backend_dev_init`` which does what is needed |
| on your behalf. |