[virtio] Replace the virtio core and network device driver

The existing virtio network driver has been somewhat hacked together
over the past two decades by multiple contributors, and includes a
substantial amount of logic that is almost but not quite duplicated
between the "legacy" and "modern" code paths.

Rip out the existing driver and replace with a completely new driver
written based on the Virtual I/O Device specification document, not
derived from the Linux kernel driver.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
diff --git a/src/drivers/bus/virtio-pci.c b/src/drivers/bus/virtio-pci.c
deleted file mode 100644
index 3fc93a9..0000000
--- a/src/drivers/bus/virtio-pci.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/* virtio-pci.c - pci interface for virtio interface
- *
- * (c) Copyright 2008 Bull S.A.S.
- *
- *  Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * some parts from Linux Virtio PCI driver
- *
- *  Copyright IBM Corp. 2007
- *  Authors: Anthony Liguori  <aliguori@us.ibm.com>
- *
- */
-
-#include "errno.h"
-#include "byteswap.h"
-#include "etherboot.h"
-#include "ipxe/io.h"
-#include "ipxe/iomap.h"
-#include "ipxe/pci.h"
-#include "ipxe/dma.h"
-#include "ipxe/reboot.h"
-#include "ipxe/virtio-pci.h"
-#include "ipxe/virtio-ring.h"
-
-static int vp_alloc_vq(struct vring_virtqueue *vq, u16 num, size_t header_size)
-{
-    size_t ring_size = PAGE_MASK + vring_size(num);
-    size_t vdata_size = num * sizeof(void *);
-    size_t queue_size = ring_size + vdata_size + header_size;
-
-    vq->queue = dma_alloc(vq->dma, &vq->map, queue_size, queue_size);
-    if (!vq->queue) {
-        return -ENOMEM;
-    }
-
-    memset ( vq->queue, 0, queue_size );
-    vq->queue_size = queue_size;
-
-    /* vdata immediately follows the ring */
-    vq->vdata = (void **)(vq->queue + ring_size);
-
-    /* empty header immediately follows vdata */
-    vq->empty_header = (struct virtio_net_hdr_modern *)(vq->queue + ring_size + vdata_size);
-
-    return 0;
-}
-
-void vp_free_vq(struct vring_virtqueue *vq)
-{
-    if (vq->queue && vq->queue_size) {
-        dma_free(&vq->map, vq->queue, vq->queue_size);
-        vq->queue = NULL;
-        vq->vdata = NULL;
-        vq->queue_size = 0;
-    }
-}
-
-int vp_find_vq(unsigned int ioaddr, int queue_index,
-               struct vring_virtqueue *vq, struct dma_device *dma_dev,
-               size_t header_size)
-{
-   struct vring * vr = &vq->vring;
-   u16 num;
-   int rc;
-
-   /* select the queue */
-
-   outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
-   /* check if the queue is available */
-
-   num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM);
-   if (!num) {
-           DBG("VIRTIO-PCI ERROR: queue size is 0\n");
-           return -1;
-   }
-
-   /* check if the queue is already active */
-
-   if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) {
-           DBG("VIRTIO-PCI ERROR: queue already active\n");
-           return -1;
-   }
-
-   vq->queue_index = queue_index;
-   vq->dma = dma_dev;
-
-   /* initialize the queue */
-   rc = vp_alloc_vq(vq, num, header_size);
-   if (rc) {
-           DBG("VIRTIO-PCI ERROR: failed to allocate queue memory\n");
-           return rc;
-   }
-   vring_init(vr, num, vq->queue);
-
-   /* activate the queue
-    *
-    * NOTE: vr->desc is initialized by vring_init()
-    */
-
-   outl(dma(&vq->map, vr->desc) >> PAGE_SHIFT, ioaddr + VIRTIO_PCI_QUEUE_PFN);
-
-   return num;
-}
-
-#define CFG_POS(vdev, field) \
-    (vdev->cfg_cap_pos + offsetof(struct virtio_pci_cfg_cap, field))
-
-static void prep_pci_cfg_cap(struct virtio_pci_modern_device *vdev,
-                             struct virtio_pci_region *region,
-                             size_t offset, u32 length)
-{
-    pci_write_config_byte(vdev->pci, CFG_POS(vdev, cap.bar), region->bar);
-    pci_write_config_dword(vdev->pci, CFG_POS(vdev, cap.length), length);
-    pci_write_config_dword(vdev->pci, CFG_POS(vdev, cap.offset),
-        (intptr_t)(region->base + offset));
-}
-
-void vpm_iowrite8(struct virtio_pci_modern_device *vdev,
-                  struct virtio_pci_region *region, u8 data, size_t offset)
-{
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        writeb(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        outb(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 1);
-        pci_write_config_byte(vdev->pci, CFG_POS(vdev, pci_cfg_data), data);
-        break;
-    default:
-        assert(0);
-        break;
-    }
-}
-
-void vpm_iowrite16(struct virtio_pci_modern_device *vdev,
-                   struct virtio_pci_region *region, u16 data, size_t offset)
-{
-    data = cpu_to_le16(data);
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        writew(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        outw(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 2);
-        pci_write_config_word(vdev->pci, CFG_POS(vdev, pci_cfg_data), data);
-        break;
-    default:
-        assert(0);
-        break;
-    }
-}
-
-void vpm_iowrite32(struct virtio_pci_modern_device *vdev,
-                   struct virtio_pci_region *region, u32 data, size_t offset)
-{
-    data = cpu_to_le32(data);
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        writel(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        outl(data, region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 4);
-        pci_write_config_dword(vdev->pci, CFG_POS(vdev, pci_cfg_data), data);
-        break;
-    default:
-        assert(0);
-        break;
-    }
-}
-
-u8 vpm_ioread8(struct virtio_pci_modern_device *vdev,
-               struct virtio_pci_region *region, size_t offset)
-{
-    uint8_t data;
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        data = readb(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        data = inb(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 1);
-        pci_read_config_byte(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data);
-        break;
-    default:
-        assert(0);
-        data = 0;
-        break;
-    }
-    return data;
-}
-
-u16 vpm_ioread16(struct virtio_pci_modern_device *vdev,
-                 struct virtio_pci_region *region, size_t offset)
-{
-    uint16_t data;
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        data = readw(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        data = inw(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 2);
-        pci_read_config_word(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data);
-        break;
-    default:
-        assert(0);
-        data = 0;
-        break;
-    }
-    return le16_to_cpu(data);
-}
-
-u32 vpm_ioread32(struct virtio_pci_modern_device *vdev,
-                 struct virtio_pci_region *region, size_t offset)
-{
-    uint32_t data;
-    switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) {
-    case VIRTIO_PCI_REGION_MEMORY:
-        data = readl(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PORT:
-        data = inl(region->base + offset);
-        break;
-    case VIRTIO_PCI_REGION_PCI_CONFIG:
-        prep_pci_cfg_cap(vdev, region, offset, 4);
-        pci_read_config_dword(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data);
-        break;
-    default:
-        assert(0);
-        data = 0;
-        break;
-    }
-    return le32_to_cpu(data);
-}
-
-int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type)
-{
-    int pos;
-    uint8_t type, bar;
-
-    for (pos = pci_find_capability(pci, PCI_CAP_ID_VNDR);
-         pos > 0;
-         pos = pci_find_next_capability(pci, pos, PCI_CAP_ID_VNDR)) {
-
-        pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
-            cfg_type), &type);
-        pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap,
-            bar), &bar);
-
-        /* Ignore structures with reserved BAR values */
-        if (bar > 0x5) {
-            continue;
-        }
-
-        if (type == cfg_type) {
-            return pos;
-        }
-    }
-    return 0;
-}
-
-int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
-                              u32 align, u32 start, u32 size,
-                              struct virtio_pci_region *region)
-{
-    u8 bar;
-    u32 offset, length, base_raw;
-    unsigned long base;
-
-    pci_read_config_byte(pci, cap + offsetof(struct virtio_pci_cap, bar), &bar);
-    pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, offset),
-                          &offset);
-    pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, length),
-                          &length);
-
-    if (length <= start) {
-        DBG("VIRTIO-PCI bad capability len %d (>%d expected)\n", length, start);
-        return -EINVAL;
-    }
-    if (length - start < minlen) {
-        DBG("VIRTIO-PCI bad capability len %d (>=%zd expected)\n", length, minlen);
-        return -EINVAL;
-    }
-    length -= start;
-    if (start + offset < offset) {
-        DBG("VIRTIO-PCI map wrap-around %d+%d\n", start, offset);
-        return -EINVAL;
-    }
-    offset += start;
-    if (offset & (align - 1)) {
-        DBG("VIRTIO-PCI offset %d not aligned to %d\n", offset, align);
-        return -EINVAL;
-    }
-    if (length > size) {
-        length = size;
-    }
-
-    if (minlen + offset < minlen ||
-        minlen + offset > pci_bar_size(pci, PCI_BASE_ADDRESS(bar))) {
-        DBG("VIRTIO-PCI map virtio %zd@%d out of range on bar %i length %ld\n",
-            minlen, offset,
-            bar, pci_bar_size(pci, PCI_BASE_ADDRESS(bar)));
-        return -EINVAL;
-    }
-
-    region->base = NULL;
-    region->length = length;
-    region->bar = bar;
-
-    base = pci_bar_start(pci, PCI_BASE_ADDRESS(bar));
-    if (base) {
-        pci_read_config_dword(pci, PCI_BASE_ADDRESS(bar), &base_raw);
-
-        if (base_raw & PCI_BASE_ADDRESS_SPACE_IO) {
-            /* Region accessed using port I/O */
-            region->base = (void *)(base + offset);
-            region->flags = VIRTIO_PCI_REGION_PORT;
-        } else {
-            /* Region mapped into memory space */
-            region->base = pci_ioremap(pci, base + offset, length);
-            region->flags = VIRTIO_PCI_REGION_MEMORY;
-        }
-    }
-    if (!region->base) {
-        /* Region accessed via PCI config space window */
-	    region->base = (void *)(intptr_t)offset;
-        region->flags = VIRTIO_PCI_REGION_PCI_CONFIG;
-    }
-    return 0;
-}
-
-void virtio_pci_unmap_capability(struct virtio_pci_region *region)
-{
-    unsigned region_type = region->flags & VIRTIO_PCI_REGION_TYPE_MASK;
-    if (region_type == VIRTIO_PCI_REGION_MEMORY) {
-        iounmap(region->base);
-    }
-}
-
-void vpm_notify(struct virtio_pci_modern_device *vdev,
-                struct vring_virtqueue *vq)
-{
-    vpm_iowrite16(vdev, &vq->notification, (u16)vq->queue_index, 0);
-}
-
-int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
-                 unsigned nvqs, struct vring_virtqueue *vqs,
-                 struct dma_device *dma_dev, size_t header_size)
-{
-    unsigned i;
-    struct vring_virtqueue *vq;
-    u16 size, off;
-    u32 notify_offset_multiplier;
-    int err;
-
-    if (nvqs > vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(num_queues))) {
-        return -ENOENT;
-    }
-
-    /* Read notify_off_multiplier from config space. */
-    pci_read_config_dword(vdev->pci,
-        vdev->notify_cap_pos + offsetof(struct virtio_pci_notify_cap,
-        notify_off_multiplier),
-        &notify_offset_multiplier);
-
-    for (i = 0; i < nvqs; i++) {
-        /* Select the queue we're interested in */
-        vpm_iowrite16(vdev, &vdev->common, (u16)i, COMMON_OFFSET(queue_select));
-
-        /* Check if queue is either not available or already active. */
-        size = vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(queue_size));
-        /* QEMU has a bug where queues don't revert to inactive on device
-         * reset. Skip checking the queue_enable field until it is fixed.
-         */
-        if (!size /*|| vpm_ioread16(vdev, &vdev->common.queue_enable)*/)
-            return -ENOENT;
-
-        if (size & (size - 1)) {
-            DBG("VIRTIO-PCI %p: bad queue size %d\n", vdev, size);
-            return -EINVAL;
-        }
-
-        if (size > MAX_QUEUE_NUM) {
-            /* iPXE networking tends to be not perf critical so there's no
-             * need to accept large queue sizes.
-             */
-            size = MAX_QUEUE_NUM;
-        }
-
-        vq = &vqs[i];
-        vq->queue_index = i;
-        vq->dma = dma_dev;
-
-        /* get offset of notification word for this vq */
-        off = vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(queue_notify_off));
-
-        err = vp_alloc_vq(vq, size, header_size);
-        if (err) {
-            DBG("VIRTIO-PCI %p: failed to allocate queue memory\n", vdev);
-            return err;
-        }
-        vring_init(&vq->vring, size, vq->queue);
-
-        /* activate the queue */
-        vpm_iowrite16(vdev, &vdev->common, size, COMMON_OFFSET(queue_size));
-
-        vpm_iowrite64(vdev, &vdev->common,
-                      dma(&vq->map, vq->vring.desc),
-                      COMMON_OFFSET(queue_desc_lo),
-                      COMMON_OFFSET(queue_desc_hi));
-        vpm_iowrite64(vdev, &vdev->common,
-                      dma(&vq->map, vq->vring.avail),
-                      COMMON_OFFSET(queue_avail_lo),
-                      COMMON_OFFSET(queue_avail_hi));
-        vpm_iowrite64(vdev, &vdev->common,
-                      dma(&vq->map, vq->vring.used),
-                      COMMON_OFFSET(queue_used_lo),
-                      COMMON_OFFSET(queue_used_hi));
-
-        err = virtio_pci_map_capability(vdev->pci,
-            vdev->notify_cap_pos, 2, 2,
-            off * notify_offset_multiplier, 2,
-            &vq->notification);
-        if (err) {
-            return err;
-        }
-    }
-
-    /* Select and activate all queues. Has to be done last: once we do
-     * this, there's no way to go back except reset.
-     */
-    for (i = 0; i < nvqs; i++) {
-        vq = &vqs[i];
-        vpm_iowrite16(vdev, &vdev->common, (u16)vq->queue_index,
-                      COMMON_OFFSET(queue_select));
-        vpm_iowrite16(vdev, &vdev->common, 1, COMMON_OFFSET(queue_enable));
-    }
-    return 0;
-}
diff --git a/src/drivers/bus/virtio-ring.c b/src/drivers/bus/virtio-ring.c
deleted file mode 100644
index e448c34..0000000
--- a/src/drivers/bus/virtio-ring.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/* virtio-pci.c - virtio ring management
- *
- * (c) Copyright 2008 Bull S.A.S.
- *
- *  Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- *  some parts from Linux Virtio Ring
- *
- *  Copyright Rusty Russell IBM Corporation 2007
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- *
- */
-
-FILE_LICENCE ( GPL2_OR_LATER );
-
-#include "etherboot.h"
-#include "ipxe/io.h"
-#include "ipxe/virtio-pci.h"
-#include "ipxe/virtio-ring.h"
-
-#define BUG() do { \
-   printf("BUG: failure at %s:%d/%s()!\n", \
-          __FILE__, __LINE__, __FUNCTION__); \
-   while(1); \
-} while (0)
-#define BUG_ON(condition) do { if (condition) BUG(); } while (0)
-
-/*
- * vring_free
- *
- * put at the begin of the free list the current desc[head]
- */
-
-void vring_detach(struct vring_virtqueue *vq, unsigned int head)
-{
-   struct vring *vr = &vq->vring;
-   unsigned int i;
-
-   /* find end of given descriptor */
-
-   i = head;
-   while (vr->desc[i].flags & VRING_DESC_F_NEXT)
-           i = vr->desc[i].next;
-
-   /* link it with free list and point to it */
-
-   vr->desc[i].next = vq->free_head;
-   wmb();
-   vq->free_head = head;
-}
-
-/*
- * vring_get_buf
- *
- * get a buffer from the used list
- *
- */
-
-void *vring_get_buf(struct vring_virtqueue *vq, unsigned int *len)
-{
-   struct vring *vr = &vq->vring;
-   struct vring_used_elem *elem;
-   u32 id;
-   void *opaque;
-
-   BUG_ON(!vring_more_used(vq));
-
-   elem = &vr->used->ring[vq->last_used_idx % vr->num];
-   wmb();
-   id = elem->id;
-   if (len != NULL)
-           *len = elem->len;
-
-   opaque = vq->vdata[id];
-
-   vring_detach(vq, id);
-
-   vq->last_used_idx++;
-
-   return opaque;
-}
-
-void vring_add_buf(struct vring_virtqueue *vq,
-		   struct vring_list list[],
-		   unsigned int out, unsigned int in,
-		   void *opaque, int num_added)
-{
-   struct vring *vr = &vq->vring;
-   int i, avail, head, prev;
-
-   BUG_ON(out + in == 0);
-
-   prev = 0;
-   head = vq->free_head;
-   for (i = head; out; i = vr->desc[i].next, out--) {
-
-           vr->desc[i].flags = VRING_DESC_F_NEXT;
-           vr->desc[i].addr = list->addr;
-           vr->desc[i].len = list->length;
-           prev = i;
-           list++;
-   }
-   for ( ; in; i = vr->desc[i].next, in--) {
-
-           vr->desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-           vr->desc[i].addr = list->addr;
-           vr->desc[i].len = list->length;
-           prev = i;
-           list++;
-   }
-   vr->desc[prev].flags &= ~VRING_DESC_F_NEXT;
-
-   vq->free_head = i;
-
-   vq->vdata[head] = opaque;
-
-   avail = (vr->avail->idx + num_added) % vr->num;
-   vr->avail->ring[avail] = head;
-   wmb();
-}
-
-void vring_kick(struct virtio_pci_modern_device *vdev, unsigned int ioaddr,
-                struct vring_virtqueue *vq, int num_added)
-{
-   struct vring *vr = &vq->vring;
-
-   wmb();
-   vr->avail->idx += num_added;
-
-   mb();
-   if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY)) {
-           if (vdev) {
-                   /* virtio 1.0 */
-                   vpm_notify(vdev, vq);
-           } else {
-                   /* legacy virtio */
-                   vp_notify(ioaddr, vq->queue_index);
-           }
-   }
-}
diff --git a/src/drivers/bus/virtio.c b/src/drivers/bus/virtio.c
new file mode 100644
index 0000000..b7d7570
--- /dev/null
+++ b/src/drivers/bus/virtio.c
@@ -0,0 +1,783 @@
+/*
+ * Copyright (C) 2026 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+FILE_SECBOOT ( PERMITTED );
+
+/** @file
+ *
+ * Virtual I/O device
+ *
+ */
+
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <ipxe/pci.h>
+#include <ipxe/virtio.h>
+
+/******************************************************************************
+ *
+ * Original ("legacy") device operations
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Reset device
+ *
+ * @v virtio		Virtio device
+ * @ret rc		Return status code
+ */
+static int virtio_legacy_reset ( struct virtio_device *virtio ) {
+	uint8_t stat;
+	unsigned int i;
+
+	/* Reset device */
+	iowrite8 ( 0, virtio->common + VIRTIO_LEG_STAT );
+
+	/* Wait for reset to complete */
+	for ( i = 0 ; i < VIRTIO_RESET_MAX_WAIT_MS ; i++ ) {
+		stat = ioread8 ( virtio->common + VIRTIO_LEG_STAT );
+		if ( ! stat )
+			return 0;
+		mdelay ( 1 );
+	}
+
+	DBGC ( virtio, "VIRTIO %s could not reset device\n", virtio->name );
+	return -ETIMEDOUT;
+}
+
+/**
+ * Report driver status
+ *
+ * @v virtio		Virtio device
+ * @ret stat		Actual device status
+ */
+static unsigned int virtio_legacy_status ( struct virtio_device *virtio ) {
+
+	/* Report device status */
+	iowrite8 ( virtio->stat, virtio->common + VIRTIO_LEG_STAT );
+
+	/* Read back device status */
+	return ioread8 ( virtio->common + VIRTIO_LEG_STAT );
+}
+
+/**
+ * Get supported features
+ *
+ * @v virtio		Virtio device
+ */
+static void virtio_legacy_supported ( struct virtio_device *virtio ) {
+	struct virtio_features *supported = &virtio->supported;
+	unsigned int i;
+
+	/* Get device supported features */
+	supported->word[0] = ioread32 ( virtio->common + VIRTIO_LEG_FEAT );
+
+	/* Legacy devices have only a single 32-bit feature register */
+	for ( i = 1 ; i < VIRTIO_FEATURE_WORDS ; i++ )
+		supported->word[i] = 0;
+}
+
+/**
+ * Negotiate device features
+ *
+ * @v virtio		Virtio device
+ */
+static void virtio_legacy_negotiate ( struct virtio_device *virtio ) {
+	struct virtio_features *features = &virtio->features;
+	unsigned int i;
+
+	/* Set in-use features */
+	iowrite32 ( features->word[0], virtio->common + VIRTIO_LEG_USED );
+
+	/* Legacy devices have only a single 32-bit feature register */
+	for ( i = 1 ; i < VIRTIO_FEATURE_WORDS ; i++ )
+		assert ( features->word[i] == 0 );
+}
+
+/**
+ * Set queue size
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ * @v count		Requested size
+ */
+static void virtio_legacy_size ( struct virtio_device *virtio,
+				 struct virtio_queue *queue,
+				 unsigned int count ) {
+	size_t len;
+
+	/* Select queue */
+	iowrite16 ( queue->index, virtio->common + VIRTIO_LEG_SEL );
+
+	/* Get (fixed) queue size */
+	count = ioread16 ( virtio->common + VIRTIO_LEG_SIZE );
+
+	/* Calculate queue length */
+	len = virtio_desc_size ( count );
+	len = virtio_align ( len + virtio_sq_size ( count ) );
+	len = virtio_align ( len + virtio_cq_size ( count ) );
+
+	/* Record queue size */
+	queue->count = count;
+	queue->len = len;
+}
+
+/**
+ * Enable queue
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ */
+static void virtio_legacy_enable ( struct virtio_device *virtio,
+				   struct virtio_queue *queue ) {
+	unsigned int count = queue->count;
+	void *base = queue->desc;
+	size_t len;
+
+	/* Select queue */
+	iowrite16 ( queue->index, virtio->common + VIRTIO_LEG_SEL );
+
+	/* Lay out queue regions */
+	len = virtio_desc_size ( count );
+	queue->sq = ( base + len );
+	len = virtio_align ( len + virtio_sq_size ( count ) );
+	queue->cq = ( base + len );
+	len = virtio_align ( len + virtio_cq_size ( count ) );
+	assert ( len == queue->len );
+
+	/* Program queue base page address */
+	iowrite32 ( ( dma ( &queue->map, queue->desc ) / VIRTIO_PAGE ),
+		    virtio->common + VIRTIO_LEG_BASE );
+}
+
+/** Original ("legacy") device operations */
+static struct virtio_operations virtio_legacy_operations = {
+	.reset = virtio_legacy_reset,
+	.status = virtio_legacy_status,
+	.supported = virtio_legacy_supported,
+	.negotiate = virtio_legacy_negotiate,
+	.size = virtio_legacy_size,
+	.enable = virtio_legacy_enable,
+};
+
+/******************************************************************************
+ *
+ * PCI ("modern") device operations
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Reset device
+ *
+ * @v virtio		Virtio device
+ * @ret rc		Return status code
+ */
+static int virtio_pci_reset ( struct virtio_device *virtio ) {
+	uint8_t stat;
+	unsigned int i;
+
+	/* Reset device */
+	iowrite8 ( 0, virtio->common + VIRTIO_PCI_STAT );
+
+	/* Wait for reset to complete */
+	for ( i = 0 ; i < VIRTIO_RESET_MAX_WAIT_MS ; i++ ) {
+		stat = ioread8 ( virtio->common + VIRTIO_PCI_STAT );
+		if ( ! stat )
+			return 0;
+		mdelay ( 1 );
+	}
+
+	DBGC ( virtio, "VIRTIO %s could not reset device\n", virtio->name );
+	return -ETIMEDOUT;
+}
+
+/**
+ * Report driver status
+ *
+ * @v virtio		Virtio device
+ * @ret stat		Actual device status
+ */
+static unsigned int virtio_pci_status ( struct virtio_device *virtio ) {
+
+	/* Report device status */
+	iowrite8 ( virtio->stat, virtio->common + VIRTIO_PCI_STAT );
+
+	/* Read back device status */
+	return ioread8 ( virtio->common + VIRTIO_PCI_STAT );
+}
+
+/**
+ * Get supported features
+ *
+ * @v virtio		Virtio device
+ */
+static void virtio_pci_supported ( struct virtio_device *virtio ) {
+	struct virtio_features *supported = &virtio->supported;
+	unsigned int i;
+
+	/* Get device supported features */
+	for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) {
+		iowrite32 ( i, virtio->common + VIRTIO_PCI_FEAT_SEL );
+		supported->word[i] =
+			ioread32 ( virtio->common + VIRTIO_PCI_FEAT );
+	}
+}
+
+/**
+ * Negotiate device features
+ *
+ * @v virtio		Virtio device
+ */
+static void virtio_pci_negotiate ( struct virtio_device *virtio ) {
+	struct virtio_features *features = &virtio->features;
+	unsigned int i;
+
+	/* Set in-use features */
+	for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) {
+		iowrite32 ( i, virtio->common + VIRTIO_PCI_USED_SEL );
+		iowrite32 ( features->word[i],
+			    virtio->common + VIRTIO_PCI_USED );
+	}
+}
+
+/**
+ * Set queue size
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ * @v count		Requested size
+ */
+static void virtio_pci_size ( struct virtio_device *virtio,
+			      struct virtio_queue *queue,
+			      unsigned int count ) {
+	unsigned int max;
+	size_t len;
+
+	/* Select queue */
+	iowrite16 ( queue->index, virtio->common + VIRTIO_PCI_SEL );
+
+	/* Set queue size */
+	max = ioread16 ( virtio->common + VIRTIO_PCI_SIZE );
+	if ( count > max )
+		count = max;
+	iowrite16 ( count, virtio->common + VIRTIO_PCI_SIZE );
+
+	/* Calculate queue length */
+	len = virtio_align ( virtio_desc_size ( count ) );
+	len = virtio_align ( len + virtio_sq_size ( count ) );
+	len = virtio_align ( len + virtio_cq_size ( count ) );
+
+	/* Record queue size */
+	queue->count = count;
+	queue->len = len;
+}
+
+/**
+ * Program queue address
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ * @v addr		Address
+ * @v offset		Register offset
+ */
+static void virtio_pci_address ( struct virtio_device *virtio,
+				 struct virtio_queue *queue,
+				 void *addr, unsigned int offset ) {
+	physaddr_t phys;
+
+	/* Program address */
+	phys = dma ( &queue->map, addr );
+	iowrite32 ( ( phys & 0xffffffffUL ), ( virtio->common + offset + 0 ) );
+	if ( sizeof ( physaddr_t ) > sizeof ( uint32_t ) ) {
+		iowrite32 ( ( ( ( uint64_t ) phys ) >> 32 ),
+			    ( virtio->common + offset + 4 ) );
+	} else {
+		iowrite32 ( 0, ( virtio->common + offset + 4 ) );
+	}
+}
+
+/**
+ * Enable queue
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ */
+static void virtio_pci_enable ( struct virtio_device *virtio,
+				struct virtio_queue *queue ) {
+	unsigned int count = queue->count;
+	void *base = queue->desc;
+	size_t len;
+
+	/* Select queue */
+	iowrite16 ( queue->index, virtio->common + VIRTIO_PCI_SEL );
+
+	/* Lay out queue regions */
+	len = virtio_align ( virtio_desc_size ( count ) );
+	queue->sq = ( base + len );
+	len = virtio_align ( len + virtio_sq_size ( count ) );
+	queue->cq = ( base + len );
+	len = virtio_align ( len + virtio_cq_size ( count ) );
+	assert ( len == queue->len );
+
+	/* Program queue addresses */
+	virtio_pci_address ( virtio, queue, queue->desc, VIRTIO_PCI_DESC );
+	virtio_pci_address ( virtio, queue, queue->sq, VIRTIO_PCI_SQ );
+	virtio_pci_address ( virtio, queue, queue->cq, VIRTIO_PCI_CQ );
+
+	/* Enable queue */
+	iowrite16 ( 1, virtio->common + VIRTIO_PCI_ENABLE );
+}
+
+/** PCI ("modern") device operations */
+static struct virtio_operations virtio_pci_operations = {
+	.reset = virtio_pci_reset,
+	.status = virtio_pci_status,
+	.supported = virtio_pci_supported,
+	.negotiate = virtio_pci_negotiate,
+	.size = virtio_pci_size,
+	.enable = virtio_pci_enable,
+};
+
+/**
+ * Find PCI capability
+ *
+ * @v virtio		Virtio device
+ * @v pci		PCI device
+ * @v type		Capability type
+ * @v cap		Virtio PCI capability to fill in
+ * @ret rc		Return status code
+ */
+static int virtio_pci_cap ( struct virtio_device *virtio,
+			    struct pci_device *pci, unsigned int type,
+			    struct virtio_pci_capability *cap ) {
+	unsigned int reg;
+	int pos;
+
+	/* Scan through vendor capabilities */
+	for ( pos = pci_find_capability ( pci, PCI_CAP_ID_VNDR ) ; pos > 0 ;
+	      pos = pci_find_next_capability ( pci, pos, PCI_CAP_ID_VNDR ) ) {
+
+		/* Check length */
+		pci_read_config_byte ( pci, ( pos + PCI_CAP_LEN ), &cap->len );
+		if ( cap->len < VIRTIO_PCI_CAP_END ) {
+			DBGC ( virtio, "VIRTIO %s capability +%#02x too short "
+			       "(%d bytes)\n", virtio->name, pos, cap->len );
+			continue;
+		}
+
+		/* Read values */
+		pci_read_config_byte ( pci, ( pos + VIRTIO_PCI_CAP_TYPE ),
+				       &cap->type );
+		pci_read_config_byte ( pci, ( pos + VIRTIO_PCI_CAP_BAR ),
+				       &cap->bar );
+		pci_read_config_dword ( pci, ( pos + VIRTIO_PCI_CAP_OFFSET ),
+					&cap->offset );
+
+		/* Check type */
+		if ( cap->type != type )
+			continue;
+		DBGC2 ( virtio, "VIRTIO %s capability type %d BAR%d+%#04x\n",
+			virtio->name, type, cap->bar, cap->offset );
+
+		/* Check BAR */
+		reg = PCI_BASE_ADDRESS ( cap->bar );
+		if ( reg > PCI_BASE_ADDRESS_5 )
+			continue;
+
+		/* Success */
+		cap->pos = pos;
+		return 0;
+	}
+
+	DBGC ( virtio, "VIRTIO %s has no usable capability type %d\n",
+	       virtio->name, type );
+	cap->pos = 0;
+	return -ENOENT;
+}
+
+/**
+ * Map PCI capability
+ *
+ * @v virtio		Virtio device
+ * @v pci		PCI device
+ * @v cap		Virtio PCI capability
+ * @ret io_addr		I/O address, or NULL on error
+ */
+static void * virtio_pci_map_cap ( struct virtio_device *virtio,
+				   struct pci_device *pci,
+				   struct virtio_pci_capability *cap ) {
+	unsigned long addr;
+	unsigned int reg;
+	int is_io_bar;
+	void *io_addr;
+
+	/* Get BAR start address and type */
+	reg = PCI_BASE_ADDRESS ( cap->bar );
+	addr = pci_bar_start ( pci, reg );
+	if ( ! addr ) {
+		DBGC ( virtio, "VIRTIO %s BAR%d is not usable\n",
+		       virtio->name, cap->bar );
+		return NULL;
+	}
+
+	/* Map memory or I/O BAR */
+	addr += cap->offset;
+	is_io_bar = pci_bar_is_io ( pci, reg );
+	io_addr = ( is_io_bar ? ( ( void * ) addr ) :
+		    pci_ioremap ( pci, addr, VIRTIO_PAGE ) );
+	if ( ! io_addr ) {
+		DBGC ( virtio, "VIRTIO %s could not map BAR%d+%#04x\n",
+		       virtio->name, cap->bar, cap->offset );
+		return NULL;
+	}
+
+	DBGC2 ( virtio, "VIRTIO %s mapped BAR%d+%#04x (%s %#08lx)\n",
+		virtio->name, cap->bar, cap->offset,
+		( is_io_bar ? "IO" : "MEM" ), addr );
+	return io_addr;
+}
+
+/**
+ * Map PCI device
+ *
+ * @v virtio		Virtio device
+ * @v pci		PCI device
+ * @ret rc		Return status code
+ */
+int virtio_pci_map ( struct virtio_device *virtio, struct pci_device *pci ) {
+	struct virtio_pci_capability common;
+	struct virtio_pci_capability notify;
+	struct virtio_pci_capability device;
+	unsigned int msix;
+	uint32_t mult;
+	uint16_t ctrl;
+	int rc;
+
+	/* Initialise device */
+	virtio->name = pci->dev.name;
+	virtio->dma = &pci->dma;
+
+	/* Fix up PCI device */
+	adjust_pci_device ( pci );
+
+	/* Check if MSI-X is enabled */
+	msix = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
+	if ( msix ) {
+		pci_read_config_word ( pci, msix, &ctrl );
+		if ( ! ( ctrl & PCI_MSIX_CTRL_ENABLE ) )
+			msix = 0;
+	}
+
+	/* Locate virtio capabilities */
+	virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_COMMON, &common );
+	virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_NOTIFY, &notify );
+	virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_DEVICE, &device );
+
+	/* Use modern interface if available */
+	if ( common.pos && notify.pos && device.pos &&
+	     ( notify.len >= VIRTIO_PCI_CAP_NOTIFY_END ) ) {
+
+		/* Use modern interface */
+		virtio->op = &virtio_pci_operations;
+		dma_set_mask_64bit ( virtio->dma );
+
+		/* Read notification doorbell multiplier */
+		pci_read_config_dword ( pci, ( notify.pos +
+					       VIRTIO_PCI_CAP_NOTIFY_MULT ),
+					&mult );
+		virtio->multiplier = mult;
+		DBGC ( virtio, "VIRTIO %s using modern interface (mult x%d)\n",
+		       virtio->name, virtio->multiplier );
+
+	} else {
+
+		/* Use legacy interface */
+		virtio->op = &virtio_legacy_operations;
+		common.bar = 0;
+		common.offset = 0;
+		notify.bar = 0;
+		notify.offset = VIRTIO_LEG_DB;
+		device.bar = 0;
+		device.offset = ( msix ? VIRTIO_LEG_DEV_MSIX :
+				  VIRTIO_LEG_DEV );
+		DBGC ( virtio, "VIRTIO %s using legacy interface (MSI-X "
+		       "%sabled)\n", virtio->name, ( msix ? "en" : "dis" ) );
+	}
+
+	/* Map registers */
+	virtio->common = virtio_pci_map_cap ( virtio, pci, &common );
+	if ( ! virtio->common ) {
+		rc = -ENODEV;
+		goto err_common;
+	}
+	virtio->notify = virtio_pci_map_cap ( virtio, pci, &notify );
+	if ( ! virtio->notify ) {
+		rc = -ENODEV;
+		goto err_notify;
+	}
+	virtio->device = virtio_pci_map_cap ( virtio, pci, &device );
+	if ( ! virtio->device ) {
+		rc = -ENODEV;
+		goto err_device;
+	}
+
+	return 0;
+
+	iounmap ( virtio->device );
+ err_device:
+	iounmap ( virtio->notify );
+ err_notify:
+	iounmap ( virtio->common );
+ err_common:
+	return rc;
+}
+
+/******************************************************************************
+ *
+ * Transport-independent operations
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Reset device
+ *
+ * @v virtio		Virtio device
+ * @ret rc		Return status code
+ */
+int virtio_reset ( struct virtio_device *virtio ) {
+	int rc;
+
+	/* Clear driver status */
+	virtio->stat = 0;
+
+	/* Reset device */
+	if ( ( rc = virtio->op->reset ( virtio ) ) != 0 ) {
+		DBGC ( virtio, "VIRTIO %s could not reset: %s\n",
+		       virtio->name, strerror ( rc ) );
+		return rc;
+	}
+
+	return 0;
+}
+
+/**
+ * Report driver status
+ *
+ * @v virtio		Virtio device
+ * @v stat		Additional driver status bits
+ * @ret stat		Actual device status
+ */
+unsigned int virtio_status ( struct virtio_device *virtio,
+			     unsigned int stat ) {
+
+	/* Set new driver status bits */
+	virtio->stat |= stat;
+
+	/* Report driver status */
+	return virtio->op->status ( virtio );
+}
+
+/**
+ * Negotiate features
+ *
+ * @v virtio		Virtio device
+ * @v driver		Driver supported features
+ */
+static void virtio_negotiate ( struct virtio_device *virtio,
+			       const struct virtio_features *driver ) {
+	struct virtio_features *device = &virtio->supported;
+	struct virtio_features *features = &virtio->features;
+	unsigned int i;
+
+	/* Get device supported features */
+	virtio->op->supported ( virtio );
+
+	/* Negotiate mutually supported features */
+	for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ )
+		features->word[i] = ( device->word[i] & driver->word[i] );
+	virtio->op->negotiate ( virtio );
+
+	/* Show features */
+	DBGC ( virtio, "VIRTIO %s features", virtio->name );
+	for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ )
+		DBGC ( virtio, "%s%08x", ( i ? ":" : " " ), device->word[i] );
+	DBGC ( virtio, " /" );
+	for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ )
+		DBGC ( virtio, "%s%08x", ( i ? ":" : " " ), features->word[i] );
+	DBGC ( virtio, "\n" );
+}
+
+/**
+ * Initialise device
+ *
+ * @v virtio		Virtio device
+ * @v driver		Driver supported features
+ * @ret rc		Return status code
+ */
+int virtio_init ( struct virtio_device *virtio,
+		  const struct virtio_features *driver ) {
+	unsigned int stat;
+	int rc;
+
+	/* Reset device */
+	if ( ( rc = virtio_reset ( virtio ) ) != 0 )
+		goto err_reset;
+
+	/* Acknowledge device existence */
+	virtio_status ( virtio, VIRTIO_STAT_ACKNOWLEDGE );
+
+	/* Report driver existence */
+	virtio_status ( virtio, VIRTIO_STAT_DRIVER );
+
+	/* Negotiate features */
+	virtio_negotiate ( virtio, driver );
+
+	/* Report feature negotiation completion, if applicable */
+	if ( virtio->features.word[1] & VIRTIO_FEAT1_MODERN ) {
+		stat = virtio_status ( virtio, VIRTIO_STAT_FEATURES_OK );
+		if ( ! ( stat & VIRTIO_STAT_FEATURES_OK ) ) {
+			DBGC ( virtio, "VIRTIO %s did not accept features\n",
+			       virtio->name );
+			rc = -ENOTSUP;
+			goto err_features;
+		}
+	}
+
+	return 0;
+
+ err_features:
+	virtio_reset ( virtio );
+ err_reset:
+	virtio_status ( virtio, VIRTIO_STAT_FAIL );
+	return rc;
+}
+
+/**
+ * Enable queue
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ * @v count		Requested queue size
+ * @ret rc		Return status code
+ */
+int virtio_enable ( struct virtio_device *virtio, struct virtio_queue *queue,
+		    unsigned int count ) {
+	unsigned int offset;
+	int rc;
+
+	/* Reset counters */
+	queue->prod = 0;
+	queue->cons = 0;
+
+	/* Determine queue size */
+	virtio->op->size ( virtio, queue, count );
+	if ( ( queue->count == 0 ) ||
+	     ( queue->count & ( queue->count - 1 ) ) ) {
+		DBGC ( virtio, "VIRTIO %s Q%d invalid size %d\n",
+		       virtio->name, queue->index, queue->count );
+		rc = -ENODEV;
+		goto err_count;
+	}
+	queue->mask = ( queue->count - 1 );
+
+	/* Allocate and initialise queue */
+	queue->desc = dma_alloc ( virtio->dma, &queue->map, queue->len,
+				  VIRTIO_PAGE );
+	if ( ! queue->desc ) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+	memset ( queue->desc, 0, queue->len );
+
+	/* Enable queue */
+	virtio->op->enable ( virtio, queue );
+	DBGC ( virtio, "VIRTIO %s Q%d %dx descriptors at [%#08lx,%#08lx)\n",
+	       virtio->name, queue->index, queue->count,
+	       virt_to_phys ( queue->desc ),
+	       ( virt_to_phys ( queue->desc ) +
+		 virtio_desc_size ( queue->count ) ) );
+	DBGC ( virtio, "VIRTIO %s Q%d %dx submissions at [%#08lx,%#08lx)\n",
+	       virtio->name, queue->index, queue->count,
+	       virt_to_phys ( queue->sq ),
+	       ( virt_to_phys ( queue->sq ) +
+		 virtio_sq_size ( queue->count ) ) );
+	DBGC ( virtio, "VIRTIO %s Q%d %dx completions at [%#08lx,%#08lx)\n",
+	       virtio->name, queue->index, queue->count,
+	       virt_to_phys ( queue->cq ),
+	       ( virt_to_phys ( queue->cq ) +
+		 virtio_cq_size ( queue->count ) ) );
+
+	/* Calculate doorbell register address */
+	offset = ( queue->index * virtio->multiplier );
+	queue->db = ( virtio->notify + offset );
+	DBGC ( virtio, "VIRTIO %s Q%d doorbell at +%#04x\n",
+	       virtio->name, queue->index, offset );
+
+	return 0;
+
+	dma_free ( &queue->map, queue->desc, queue->len );
+	queue->desc = NULL;
+ err_alloc:
+ err_count:
+	return rc;
+}
+
+/**
+ * Free queue
+ *
+ * @v virtio		Virtio device
+ * @v queue		Virtio queue
+ */
+void virtio_free ( struct virtio_device *virtio, struct virtio_queue *queue ) {
+
+	/* Free queue */
+	if ( queue->desc ) {
+		dma_free ( &queue->map, queue->desc, queue->len );
+		queue->desc = NULL;
+		DBGC ( virtio, "VIRTIO %s Q%d freed\n",
+		       virtio->name, queue->index );
+	}
+}
+
+/**
+ * Unmap device
+ *
+ * @v virtio		Virtio device
+ */
+void virtio_unmap ( struct virtio_device *virtio ) {
+
+	/* Unmap device-specific registers */
+	iounmap ( virtio->device );
+
+	/* Unmap notification doorbells */
+	iounmap ( virtio->notify );
+
+	/* Unmap common registers */
+	iounmap ( virtio->common );
+}
diff --git a/src/drivers/net/virtio-net.c b/src/drivers/net/virtio-net.c
index 32dad9a..bbaeb9c 100644
--- a/src/drivers/net/virtio-net.c
+++ b/src/drivers/net/virtio-net.c
@@ -1,707 +1,597 @@
 /*
- * (c) Copyright 2010 Stefan Hajnoczi <stefanha@gmail.com>
+ * Copyright (C) 2026 Michael Brown <mbrown@fensystems.co.uk>.
  *
- * based on the Etherboot virtio-net driver
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
  *
- *  (c) Copyright 2008 Bull S.A.S.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
  *
- *  Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
  *
- * some parts from Linux Virtio PCI driver
- *
- *  Copyright IBM Corp. 2007
- *  Authors: Anthony Liguori  <aliguori@us.ibm.com>
- *
- *  some parts from Linux Virtio Ring
- *
- *  Copyright Rusty Russell IBM Corporation 2007
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
  */
 
 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+FILE_SECBOOT ( PERMITTED );
 
-#include <errno.h>
-#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
 #include <unistd.h>
-#include <ipxe/list.h>
-#include <ipxe/iobuf.h>
+#include <errno.h>
+#include <byteswap.h>
 #include <ipxe/netdevice.h>
-#include <ipxe/pci.h>
-#include <ipxe/dma.h>
-#include <ipxe/if_ether.h>
 #include <ipxe/ethernet.h>
-#include <ipxe/virtio-pci.h>
-#include <ipxe/virtio-ring.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/malloc.h>
+#include <ipxe/pci.h>
 #include "virtio-net.h"
 
-/*
- * Virtio network device driver
+/** @file
  *
- * Specification:
- * http://ozlabs.org/~rusty/virtio-spec/
+ * Virtual I/O network device
  *
- * The virtio network device is supported by Linux virtualization software
- * including QEMU/KVM and lguest.  This driver supports the virtio over PCI
- * transport; virtual machines have one virtio-net PCI adapter per NIC.
- *
- * Virtio-net is different from hardware NICs because virtio devices
- * communicate with the hypervisor via virtqueues, not traditional descriptor
- * rings.  Virtqueues are unordered queues, they support add_buf() and
- * get_buf() operations.  To transmit a packet, the driver has to add the
- * packet buffer onto the virtqueue.  To receive a packet, the driver must
- * first add an empty buffer to the virtqueue and then get the filled packet
- * buffer on completion.
- *
- * Virtqueues are an abstraction that is commonly implemented using the vring
- * descriptor ring layout.  The vring is the actual shared memory structure
- * that allows the virtual machine to communicate buffers with the hypervisor.
- * Because the vring layout is optimized for flexibility and performance rather
- * than space, it is heavy-weight and allocated like traditional descriptor
- * rings in the open() function of the driver and not in probe().
- *
- * There is no true interrupt enable/disable.  Virtqueues have callback
- * enable/disable flags but these are only hints.  The hypervisor may still
- * raise an interrupt.  Nevertheless, this driver disables callbacks in the
- * hopes of avoiding interrupts.
  */
 
-/* Driver types are declared here so virtio-net.h can be easily synced with its
- * Linux source.
+/** Supported features */
+const struct virtio_features virtio_net_features = {
+	.word = {
+		( VIRTIO_FEAT0_ANY_LAYOUT |
+		  VIRTIO_FEAT0_NET_MTU |
+		  VIRTIO_FEAT0_NET_MAC ),
+		( VIRTIO_FEAT1_MODERN ),
+	},
+};
+
+/******************************************************************************
+ *
+ * Device-specific registers
+ *
+ ******************************************************************************
  */
 
-/* Virtqueue indices */
-enum {
-	RX_INDEX = 0,
-	TX_INDEX,
-	QUEUE_NB
-};
-
-/** Max number of pending rx packets */
-#define NUM_RX_BUF 8
-
-struct virtnet_nic {
-	/** Base pio register address */
-	unsigned long ioaddr;
-
-	/** 0 for legacy, 1 for virtio 1.0 */
-	int virtio_version;
-
-	/** Virtio 1.0 device data */
-	struct virtio_pci_modern_device vdev;
-
-	/** RX/TX virtqueues */
-	struct vring_virtqueue *virtqueue;
-
-	/** RX packets handed to the NIC waiting to be filled in */
-	struct list_head rx_iobufs;
-
-	/** Pending rx packet count */
-	unsigned int rx_num_iobufs;
-
-	/** DMA device */
-	struct dma_device *dma;
-
-};
-
-/** Add an iobuf to a virtqueue
+/**
+ * Get MAC address
  *
  * @v netdev		Network device
- * @v vq_idx		Virtqueue index (RX_INDEX or TX_INDEX)
+ */
+static void virtio_net_mac ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
+	uint32_t has_mac;
+	unsigned int i;
+
+	/* Read MAC address from device registers */
+	for ( i = 0 ; i < ETH_ALEN ; i++ ) {
+		netdev->hw_addr[i] = ioread8 ( virtio->device +
+					       VIRTIO_NET_MAC + i );
+	}
+
+	/* Use random MAC address if undefined or invalid */
+	has_mac = ( virtio->features.word[0] & VIRTIO_FEAT0_NET_MAC );
+	if ( ! ( has_mac && is_valid_ether_addr ( netdev->hw_addr ) ) ) {
+		DBGC ( vnet, "VNET %s has %s MAC address\n",
+		       virtio->name, ( has_mac ? "invalid" : "no" ) );
+		eth_random_addr ( netdev->hw_addr );
+	}
+}
+
+/**
+ * Get MTU
+ *
+ * @v netdev		Network device
+ */
+static void virtio_net_mtu ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
+	uint32_t has_mtu;
+
+	/* Read MTU from device registers, if available */
+	has_mtu = ( virtio->features.word[0] & VIRTIO_FEAT0_NET_MTU );
+	if ( has_mtu ) {
+		netdev->mtu = ioread16 ( virtio->device + VIRTIO_NET_MTU );
+		netdev->max_pkt_len = ( netdev->mtu + ETH_HLEN );
+		DBGC ( vnet, "VNET %s has MTU %zd\n",
+		       virtio->name, netdev->mtu );
+	}
+}
+
+/******************************************************************************
+ *
+ * Queue management
+ *
+ ******************************************************************************
+ */
+
+/**
+ * Enable queue
+ *
+ * @v vnet		Virtio network device
+ * @v queue		Virtio network queue
+ * @ret rc		Return status code
+ */
+static int virtio_net_enable ( struct virtio_net *vnet,
+			       struct virtio_net_queue *queue ) {
+	struct virtio_device *virtio = &vnet->virtio;
+	struct virtio_desc *desc;
+	unsigned int fill;
+	unsigned int slot;
+	unsigned int index;
+	unsigned int write;
+	int rc;
+
+	/* Map packet header */
+	if ( ( rc = dma_map ( virtio->dma, &queue->map, &queue->hdr,
+			      sizeof ( queue->hdr ), queue->dma ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s Q%d could not map header: %s\n",
+		       virtio->name, queue->queue.index, strerror ( rc ) );
+		goto err_map;
+	}
+
+	/* Enable queue */
+	if ( ( rc = virtio_enable ( virtio, &queue->queue,
+				    queue->count ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s Q%d could not initialise: %s\n",
+		       virtio->name, queue->queue.index, strerror ( rc ) );
+		goto err_enable;
+	}
+
+	/* Calculate mask */
+	fill = queue->queue.count;
+	if ( fill > queue->max )
+		fill = queue->max;
+	queue->fill = fill;
+	queue->mask = ( fill - 1 );
+
+	/* Initialise descriptors and slot ring */
+	write = queue->write;
+	for ( slot = 0 ; slot < fill ; slot++ ) {
+		queue->slots[slot] = slot;
+		queue->iobufs[slot] = NULL;
+		index = ( slot * VIRTIO_NET_DESCS );
+		desc = &queue->queue.desc[index];
+		desc[0].addr = cpu_to_le64 ( dma ( &queue->map, &queue->hdr ));
+		desc[0].len = cpu_to_le32 ( vnet->hlen );
+		desc[0].flags = cpu_to_le16 ( VIRTIO_DESC_FL_NEXT | write );
+		desc[0].next = cpu_to_le16 ( index + 1 );
+		desc[1].flags = cpu_to_le16 ( write );
+	}
+
+	DBGC ( vnet, "VNET %s Q%d using %d/%d descriptors\n", virtio->name,
+	       queue->queue.index, queue->fill, queue->queue.count );
+	return 0;
+
+	/* There may be no way to disable individual queues: the
+	 * caller must reset the whole device to recover from a
+	 * failure.
+	 */
+ err_enable:
+	dma_unmap ( &queue->map, sizeof ( queue->hdr ) );
+ err_map:
+	return rc;
+}
+
+/**
+ * Submit I/O buffer to queue
+ *
+ * @v vnet		Virtio network device
+ * @v queue		Virtio network queue
  * @v iobuf		I/O buffer
- *
- * The virtqueue is kicked after the iobuf has been added.
+ * @v len		Submitted length
  */
-static void virtnet_enqueue_iob ( struct net_device *netdev,
-				  int vq_idx, struct io_buffer *iobuf ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	struct vring_virtqueue *vq = &virtnet->virtqueue[vq_idx];
-	struct virtio_net_hdr_modern *header = vq->empty_header;
-	unsigned int out = ( vq_idx == TX_INDEX ) ? 2 : 0;
-	unsigned int in = ( vq_idx == TX_INDEX ) ? 0 : 2;
-	size_t header_len = ( virtnet->virtio_version ?
-			      sizeof ( *header ) : sizeof ( header->legacy ) );
-	struct vring_list list[] = {
-		{
-			/* Share a single zeroed virtio net header between all
-			 * packets in a ring.  This works because this driver
-			 * does not use any advanced features so none of the
-			 * header fields get used.
-			 *
-			 * Some host implementations (notably Google Compute
-			 * Platform) are known to unconditionally write back
-			 * to header->flags for received packets.  Work around
-			 * this by using separate RX and TX headers.
-			 */
-			.addr = dma ( &vq->map, header ),
-			.length = header_len,
-		},
-		{
-			.addr = iob_dma ( iobuf ),
-			.length = iob_len ( iobuf ),
-		},
-	};
+static void virtio_net_submit ( struct virtio_net *vnet,
+				struct virtio_net_queue *queue,
+				struct io_buffer *iobuf, size_t len ) {
+	struct virtio_device *virtio = &vnet->virtio;
+	struct virtio_desc *desc;
+	unsigned int prod;
+	unsigned int slot;
+	unsigned int index;
 
-	DBGC2 ( virtnet, "VIRTIO-NET %p enqueuing iobuf %p on vq %d\n",
-		virtnet, iobuf, vq_idx );
+	/* Get next descriptor pair and consume slot */
+	prod = queue->queue.prod;
+	slot = queue->slots[ prod & queue->mask ];
+	index = ( slot * VIRTIO_NET_DESCS );
+	desc = &queue->queue.desc[index];
 
-	vring_add_buf ( vq, list, out, in, iobuf, 0 );
-	vring_kick ( virtnet->virtio_version ? &virtnet->vdev : NULL,
-		     virtnet->ioaddr, vq, 1 );
+	/* Populate descriptors */
+	desc[1].addr = cpu_to_le64 ( iob_dma ( iobuf ) );
+	desc[1].len = cpu_to_le32 ( len );
+	DBGC2 ( vnet, "VNET %s Q%d [%02x-%02x] is [%lx,%lx)\n",
+		virtio->name, queue->queue.index, index, ( index + 1 ),
+		virt_to_phys ( iobuf->data ),
+		( virt_to_phys ( iobuf->data ) + len ) );
+
+	/* Record I/O buffer */
+	assert ( queue->iobufs[slot] == NULL );
+	queue->iobufs[slot] = iobuf;
+
+	/* Submit descriptors */
+	virtio_submit ( &queue->queue, index );
 }
 
-/** Try to keep rx virtqueue filled with iobufs
+/**
+ * Complete I/O buffer
  *
- * @v netdev		Network device
+ * @v vnet		Virtio network device
+ * @v queue		Virtio network queue
+ * @v len		Length to fill in (or NULL to ignore)
+ * @ret iobuf		I/O buffer
  */
-static void virtnet_refill_rx_virtqueue ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	size_t len = ( netdev->max_pkt_len + 4 /* VLAN */ );
-
-	while ( virtnet->rx_num_iobufs < NUM_RX_BUF ) {
-		struct io_buffer *iobuf;
-
-		/* Try to allocate a buffer, stop for now if out of memory */
-		iobuf = alloc_rx_iob ( len, virtnet->dma );
-		if ( ! iobuf )
-			break;
-
-		/* Keep track of iobuf so close() can free it */
-		list_add ( &iobuf->list, &virtnet->rx_iobufs );
-
-		/* Mark packet length until we know the actual size */
-		iob_put ( iobuf, len );
-
-		virtnet_enqueue_iob ( netdev, RX_INDEX, iobuf );
-		virtnet->rx_num_iobufs++;
-	}
-}
-
-/** Helper to free all virtqueue memory
- *
- * @v netdev		Network device
- */
-static void virtnet_free_virtqueues ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	int i;
-
-	for ( i = 0; i < QUEUE_NB; i++ ) {
-		virtio_pci_unmap_capability ( &virtnet->virtqueue[i].notification );
-		vp_free_vq ( &virtnet->virtqueue[i] );
-	}
-
-	free ( virtnet->virtqueue );
-	virtnet->virtqueue = NULL;
-}
-
-/** Open network device, legacy virtio 0.9.5
- *
- * @v netdev	Network device
- * @ret rc	Return status code
- */
-static int virtnet_open_legacy ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	unsigned long ioaddr = virtnet->ioaddr;
-	u32 features;
-	int i;
-
-	/* Reset for sanity */
-	vp_reset ( ioaddr );
-
-	/* Allocate virtqueues */
-	virtnet->virtqueue = zalloc ( QUEUE_NB *
-				      sizeof ( *virtnet->virtqueue ) );
-	if ( ! virtnet->virtqueue )
-		return -ENOMEM;
-
-	/* Initialize rx/tx virtqueues */
-	for ( i = 0; i < QUEUE_NB; i++ ) {
-		if ( vp_find_vq ( ioaddr, i, &virtnet->virtqueue[i], virtnet->dma,
-                                  sizeof ( struct virtio_net_hdr_modern ) ) == -1 ) {
-			DBGC ( virtnet, "VIRTIO-NET %p cannot register queue %d\n",
-			       virtnet, i );
-			virtnet_free_virtqueues ( netdev );
-			return -ENOENT;
-		}
-	}
-
-	/* Initialize rx packets */
-	INIT_LIST_HEAD ( &virtnet->rx_iobufs );
-	virtnet->rx_num_iobufs = 0;
-	virtnet_refill_rx_virtqueue ( netdev );
-
-	/* Disable interrupts before starting */
-	netdev_irq ( netdev, 0 );
-
-	/* Driver is ready */
-	features = vp_get_features ( ioaddr );
-	vp_set_features ( ioaddr, features & ( ( 1 << VIRTIO_NET_F_MAC ) |
-					       ( 1 << VIRTIO_NET_F_MTU ) ) );
-	vp_set_status ( ioaddr, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK );
-	return 0;
-}
-
-/** Open network device, modern virtio 1.0
- *
- * @v netdev	Network device
- * @ret rc	Return status code
- */
-static int virtnet_open_modern ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	u64 features;
-	u8 status;
-
-	/* Negotiate features */
-	features = vpm_get_features ( &virtnet->vdev );
-	if ( ! ( features & VIRTIO_F_VERSION_1 ) ) {
-		vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
-		return -EINVAL;
-	}
-	vpm_set_features ( &virtnet->vdev, features & (
-		( 1ULL << VIRTIO_NET_F_MAC ) |
-		( 1ULL << VIRTIO_NET_F_MTU ) |
-		( 1ULL << VIRTIO_F_VERSION_1 ) |
-		( 1ULL << VIRTIO_F_ANY_LAYOUT ) |
-		( 1ULL << VIRTIO_F_IOMMU_PLATFORM ) ) );
-	vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FEATURES_OK );
-
-	status = vpm_get_status ( &virtnet->vdev );
-	if ( ! ( status & VIRTIO_CONFIG_S_FEATURES_OK ) ) {
-		DBGC ( virtnet, "VIRTIO-NET %p device didn't accept features\n",
-		       virtnet );
-		vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
-		return -EINVAL;
-	}
-
-	/* Allocate virtqueues */
-	virtnet->virtqueue = zalloc ( QUEUE_NB *
-				      sizeof ( *virtnet->virtqueue ) );
-	if ( ! virtnet->virtqueue ) {
-		vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
-		return -ENOMEM;
-	}
-
-	/* Initialize rx/tx virtqueues */
-	if ( vpm_find_vqs ( &virtnet->vdev, QUEUE_NB, virtnet->virtqueue,
-                            virtnet->dma, sizeof ( struct virtio_net_hdr_modern ) ) ) {
-		DBGC ( virtnet, "VIRTIO-NET %p cannot register queues\n",
-		       virtnet );
-		virtnet_free_virtqueues ( netdev );
-		vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
-		return -ENOENT;
-	}
-
-	/* Disable interrupts before starting */
-	netdev_irq ( netdev, 0 );
-
-	vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER_OK );
-
-	/* Initialize rx packets */
-	INIT_LIST_HEAD ( &virtnet->rx_iobufs );
-	virtnet->rx_num_iobufs = 0;
-	virtnet_refill_rx_virtqueue ( netdev );
-	return 0;
-}
-
-/** Open network device
- *
- * @v netdev	Network device
- * @ret rc	Return status code
- */
-static int virtnet_open ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-
-	if ( virtnet->virtio_version ) {
-		return virtnet_open_modern ( netdev );
-	} else {
-		return virtnet_open_legacy ( netdev );
-	}
-}
-
-/** Close network device
- *
- * @v netdev	Network device
- */
-static void virtnet_close ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
+static struct io_buffer * virtio_net_complete ( struct virtio_net *vnet,
+						struct virtio_net_queue *queue,
+						size_t *len ) {
+	struct virtio_device *virtio = &vnet->virtio;
 	struct io_buffer *iobuf;
-	struct io_buffer *next_iobuf;
+	unsigned int cons;
+	unsigned int slot;
+	unsigned int index;
 
-	if ( virtnet->virtio_version ) {
-		vpm_reset ( &virtnet->vdev );
-	} else {
-		vp_reset ( virtnet->ioaddr );
-	}
+	/* Complete descriptor pair and recycle slot */
+	cons = queue->queue.cons;
+	index = virtio_complete ( &queue->queue, len );
+	slot = ( index / VIRTIO_NET_DESCS );
+	queue->slots[ cons & queue->mask ] = slot;
 
-	/* Virtqueues can be freed now that NIC is reset */
-	virtnet_free_virtqueues ( netdev );
+	/* Complete I/O buffer */
+	iobuf = queue->iobufs[slot];
+	assert ( iobuf != NULL );
+	queue->iobufs[slot] = NULL;
+	DBGC2 ( vnet, "VNET %s Q%d [%02x-%02x] complete",
+		virtio->name, queue->queue.index, index, ( index + 1 ) );
+	if ( len )
+		DBGC2 ( vnet, " len %#zx\n", *len );
+	DBGC2 ( vnet, "\n" );
 
-	/* Free rx iobufs */
-	list_for_each_entry_safe ( iobuf, next_iobuf, &virtnet->rx_iobufs,
-				   list ) {
-		list_del ( &iobuf->list );
-		free_rx_iob ( iobuf );
-	}
-	virtnet->rx_num_iobufs = 0;
+	return iobuf;
 }
 
-/** Transmit packet
+/******************************************************************************
  *
- * @v netdev	Network device
- * @v iobuf	I/O buffer
- * @ret rc	Return status code
+ * Network device interface
+ *
+ ******************************************************************************
  */
-static int virtnet_transmit ( struct net_device *netdev,
-			      struct io_buffer *iobuf ) {
-	virtnet_enqueue_iob ( netdev, TX_INDEX, iobuf );
+
+/**
+ * Refill receive queue
+ *
+ * @v vnet		Virtio network device
+ */
+static void virtio_net_refill_rx ( struct virtio_net *vnet ) {
+	struct virtio_device *virtio = &vnet->virtio;
+	struct virtio_net_queue *queue = &vnet->rx;
+	struct io_buffer *iobuf;
+	size_t len = vnet->mfs;
+	unsigned int refilled = 0;
+
+	/* Refill queue */
+	while ( ( queue->queue.prod - queue->queue.cons ) < queue->fill ) {
+
+		/* Allocate I/O buffer */
+		iobuf = alloc_rx_iob ( len, virtio->dma );
+		if ( ! iobuf ) {
+			/* Wait for next refill */
+			break;
+		}
+
+		/* Submit I/O buffer */
+		virtio_net_submit ( vnet, queue, iobuf, len );
+		refilled++;
+	}
+
+	/* Notify queue, if applicable */
+	if ( refilled )
+		virtio_notify ( &queue->queue );
+}
+
+/**
+ * Open network device
+ *
+ * @v netdev		Network device
+ * @ret rc		Return status code
+ */
+static int virtio_net_open ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
+	union virtio_net_header hdr;
+	int rc;
+
+	/* (Re)initialise device */
+	if ( ( rc = virtio_init ( virtio, &virtio_net_features ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s could not initialise: %s\n",
+		       virtio->name, strerror ( rc ) );
+		goto err_init;
+	}
+
+	/* Calculate header length */
+	vnet->hlen = ( virtio_is_legacy ( virtio ) ?
+		       sizeof ( hdr.legacy ) : sizeof ( hdr.modern ) );
+
+	/* Calculate maximum frame size */
+	vnet->mfs = ( ETH_HLEN + 4 /* possible VLAN */ + netdev->mtu );
+
+	/* Enable receive queue */
+	if ( ( rc = virtio_net_enable ( vnet, &vnet->rx ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s could not enable RX: %s\n",
+		       virtio->name, strerror ( rc ) );
+		goto err_rx;
+	}
+
+	/* Enable transmit queue */
+	if ( ( rc = virtio_net_enable ( vnet, &vnet->tx ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s could not enable TX: %s\n",
+		       virtio->name, strerror ( rc ) );
+		goto err_tx;
+	}
+
+	/* Report driver readiness */
+	virtio_status ( virtio, VIRTIO_STAT_DRIVER_OK );
+
+	/* Refill receive queue */
+	virtio_net_refill_rx ( vnet );
+
+	return 0;
+
+	dma_unmap ( &vnet->tx.map, sizeof ( vnet->tx.hdr ) );
+ err_tx:
+	dma_unmap ( &vnet->rx.map, sizeof ( vnet->rx.hdr ) );
+ err_rx:
+	/* There may be no way to disable individual queues: we must
+	 * reset the whole device instead and then free the queues.
+	 */
+	virtio_reset ( virtio );
+	virtio_free ( virtio, &vnet->rx.queue );
+	virtio_free ( virtio, &vnet->tx.queue );
+ err_init:
+	return rc;
+}
+
+/**
+ * Close network device
+ *
+ * @v netdev		Network device
+ */
+static void virtio_net_close ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
+	unsigned int i;
+
+	/* Reset device */
+	virtio_reset ( virtio );
+
+	/* Unmap headers (now that device is guaranteed idle) */
+	dma_unmap ( &vnet->rx.map, sizeof ( vnet->rx.hdr ) );
+	dma_unmap ( &vnet->tx.map, sizeof ( vnet->tx.hdr ) );
+
+	/* Free queues */
+	virtio_free ( virtio, &vnet->rx.queue );
+	virtio_free ( virtio, &vnet->tx.queue );
+
+	/* Discard any incomplete RX buffers */
+	for ( i = 0 ; i < VIRTIO_NET_RX_MAX ; i++ )
+		free_rx_iob ( vnet->rx_iobufs[i] );
+}
+
+/**
+ * Transmit packet
+ *
+ * @v netdev		Network device
+ * @v iobuf		I/O buffer
+ * @ret rc		Return status code
+ */
+static int virtio_net_transmit ( struct net_device *netdev,
+				 struct io_buffer *iobuf ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
+	struct virtio_net_queue *queue = &vnet->tx;
+
+	/* Check for an available transmit descriptor */
+	if ( ( queue->queue.prod - queue->queue.cons ) >= queue->fill ) {
+		DBGC ( vnet, "VNET %s out of transmit descriptors\n",
+		       virtio->name );
+		return -ENOBUFS;
+	}
+
+	/* Submit I/O buffer */
+	virtio_net_submit ( vnet, queue, iobuf, iob_len ( iobuf ) );
+
+	/* Notify queue */
+	virtio_notify ( &queue->queue );
+
 	return 0;
 }
 
-/** Complete packet transmission
+/**
+ * Poll for completed packets
  *
- * @v netdev	Network device
+ * @v netdev		Network device
  */
-static void virtnet_process_tx_packets ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	struct vring_virtqueue *tx_vq = &virtnet->virtqueue[TX_INDEX];
+static void virtio_net_poll_tx ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_net_queue *queue = &vnet->tx;
+	struct io_buffer *iobuf;
 
-	while ( vring_more_used ( tx_vq ) ) {
-		struct io_buffer *iobuf = vring_get_buf ( tx_vq, NULL );
+	/* Poll for completed descriptors */
+	while ( virtio_completions ( &queue->queue ) ) {
 
-		DBGC2 ( virtnet, "VIRTIO-NET %p tx complete iobuf %p\n",
-			virtnet, iobuf );
-
+		/* Complete I/O buffer */
+		iobuf = virtio_net_complete ( vnet, queue, NULL );
 		netdev_tx_complete ( netdev, iobuf );
 	}
 }
 
-/** Complete packet reception
+/**
+ * Poll for received packets
  *
- * @v netdev	Network device
+ * @v netdev		Network device
  */
-static void virtnet_process_rx_packets ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	struct vring_virtqueue *rx_vq = &virtnet->virtqueue[RX_INDEX];
+static void virtio_net_poll_rx ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_net_queue *queue = &vnet->rx;
+	struct io_buffer *iobuf;
+	size_t len;
 
-	while ( vring_more_used ( rx_vq ) ) {
-		unsigned int len;
-		struct io_buffer *iobuf = vring_get_buf ( rx_vq, &len );
+	/* Poll for completed descriptors */
+	while ( virtio_completions ( &queue->queue ) > 0 ) {
 
-		/* Release ownership of iobuf */
-		list_del ( &iobuf->list );
-		virtnet->rx_num_iobufs--;
-
-		/* Update iobuf length */
-		iob_unput ( iobuf, iob_len ( iobuf ) );
-		iob_put ( iobuf, len - sizeof ( struct virtio_net_hdr ) );
-
-		DBGC2 ( virtnet, "VIRTIO-NET %p rx complete iobuf %p len %zd\n",
-			virtnet, iobuf, iob_len ( iobuf ) );
-
-		/* Pass completed packet to the network stack */
+		/* Complete I/O buffer */
+		iobuf = virtio_net_complete ( vnet, queue, &len );
+		iob_put ( iobuf, ( len - vnet->hlen ) );
 		netdev_rx ( netdev, iobuf );
 	}
-
-	virtnet_refill_rx_virtqueue ( netdev );
 }
 
-/** Poll for completed and received packets
+/**
+ * Poll for completed and received packets
  *
- * @v netdev	Network device
+ * @v netdev		Network device
  */
-static void virtnet_poll ( struct net_device *netdev ) {
-	struct virtnet_nic *virtnet = netdev->priv;
+static void virtio_net_poll ( struct net_device *netdev ) {
+	struct virtio_net *vnet = netdev->priv;
 
-	/* Acknowledge interrupt.  This is necessary for UNDI operation and
-	 * interrupts that are raised despite VRING_AVAIL_F_NO_INTERRUPT being
-	 * set (that flag is just a hint and the hypervisor does not have to
-	 * honor it).
-	 */
-	if ( virtnet->virtio_version ) {
-		vpm_get_isr ( &virtnet->vdev );
-	} else {
-		vp_get_isr ( virtnet->ioaddr );
-	}
+	/* Poll for completed packets */
+	virtio_net_poll_tx ( netdev );
 
-	virtnet_process_tx_packets ( netdev );
-	virtnet_process_rx_packets ( netdev );
+	/* Poll for received packets */
+	virtio_net_poll_rx ( netdev );
+
+	/* Refill receive queue */
+	virtio_net_refill_rx ( vnet );
 }
 
-/** Enable or disable interrupts
- *
- * @v netdev	Network device
- * @v enable	Interrupts should be enabled
- */
-static void virtnet_irq ( struct net_device *netdev, int enable ) {
-	struct virtnet_nic *virtnet = netdev->priv;
-	int i;
-
-	for ( i = 0; i < QUEUE_NB; i++ ) {
-		if ( enable )
-			vring_enable_cb ( &virtnet->virtqueue[i] );
-		else
-			vring_disable_cb ( &virtnet->virtqueue[i] );
-	}
-}
-
-/** virtio-net device operations */
-static struct net_device_operations virtnet_operations = {
-	.open = virtnet_open,
-	.close = virtnet_close,
-	.transmit = virtnet_transmit,
-	.poll = virtnet_poll,
-	.irq = virtnet_irq,
+/** Virtio network device operations */
+static struct net_device_operations virtio_net_operations = {
+	.open		= virtio_net_open,
+	.close		= virtio_net_close,
+	.transmit	= virtio_net_transmit,
+	.poll		= virtio_net_poll,
 };
 
-/**
- * Probe PCI device, legacy virtio 0.9.5
+/******************************************************************************
  *
- * @v pci	PCI device
- * @ret rc	Return status code
- */
-static int virtnet_probe_legacy ( struct pci_device *pci ) {
-	unsigned long ioaddr = pci->ioaddr;
-	struct net_device *netdev;
-	struct virtnet_nic *virtnet;
-	u32 features;
-	u16 mtu;
-	int rc;
-
-	/* Allocate and hook up net device */
-	netdev = alloc_etherdev ( sizeof ( *virtnet ) );
-	if ( ! netdev )
-		return -ENOMEM;
-	netdev_init ( netdev, &virtnet_operations );
-	virtnet = netdev->priv;
-	virtnet->ioaddr = ioaddr;
-	pci_set_drvdata ( pci, netdev );
-	netdev->dev = &pci->dev;
-
-	DBGC ( virtnet, "VIRTIO-NET %p busaddr=%s ioaddr=%#lx irq=%d\n",
-	       virtnet, pci->dev.name, ioaddr, pci->irq );
-
-	/* Enable PCI bus master and reset NIC */
-	adjust_pci_device ( pci );
-
-	/* Configure DMA */
-	virtnet->dma =  &pci->dma;
-	dma_set_mask_64bit ( virtnet->dma );
-	netdev->dma = virtnet->dma;
-
-	vp_reset ( ioaddr );
-
-	/* Load MAC address and MTU */
-	features = vp_get_features ( ioaddr );
-	if ( features & ( 1 << VIRTIO_NET_F_MAC ) ) {
-		vp_get ( ioaddr, offsetof ( struct virtio_net_config, mac ),
-			 netdev->hw_addr, ETH_ALEN );
-		DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
-		       eth_ntoa ( netdev->hw_addr ) );
-	}
-	if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
-		vp_get ( ioaddr, offsetof ( struct virtio_net_config, mtu ),
-			 &mtu, sizeof ( mtu ) );
-		DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu );
-		netdev->max_pkt_len = ( mtu + ETH_HLEN );
-		netdev->mtu = mtu;
-	}
-
-	/* Register network device */
-	if ( ( rc = register_netdev ( netdev ) ) != 0 )
-		goto err_register_netdev;
-
-	/* Mark link as up, control virtqueue is not used */
-	netdev_link_up ( netdev );
-
-	return 0;
-
-	unregister_netdev ( netdev );
-err_register_netdev:
-	vp_reset ( ioaddr );
-	netdev_nullify ( netdev );
-	netdev_put ( netdev );
-	return rc;
-}
-
-/**
- * Probe PCI device, modern virtio 1.0
+ * PCI interface
  *
- * @v pci	PCI device
- * @v found_dev	Set to non-zero if modern device was found (probe may still fail)
- * @ret rc	Return status code
+ ******************************************************************************
  */
-static int virtnet_probe_modern ( struct pci_device *pci, int *found_dev ) {
-	struct net_device *netdev;
-	struct virtnet_nic *virtnet;
-	u64 features;
-	u16 mtu;
-	int rc, common, isr, notify, config, device;
-
-	common = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_COMMON_CFG );
-	if ( ! common ) {
-		DBG ( "Common virtio capability not found!\n" );
-		return -ENODEV;
-	}
-	*found_dev = 1;
-
-	isr = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_ISR_CFG );
-	notify = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_NOTIFY_CFG );
-	config = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_PCI_CFG );
-	if ( ! isr || ! notify || ! config ) {
-		DBG ( "Missing virtio capabilities %i/%i/%i/%i\n",
-		      common, isr, notify, config );
-		return -EINVAL;
-	}
-	device = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_DEVICE_CFG );
-
-	/* Allocate and hook up net device */
-	netdev = alloc_etherdev ( sizeof ( *virtnet ) );
-	if ( ! netdev )
-		return -ENOMEM;
-	netdev_init ( netdev, &virtnet_operations );
-	virtnet = netdev->priv;
-
-	pci_set_drvdata ( pci, netdev );
-	netdev->dev = &pci->dev;
-
-	DBGC ( virtnet, "VIRTIO-NET modern %p busaddr=%s irq=%d\n",
-	       virtnet, pci->dev.name, pci->irq );
-
-	virtnet->vdev.pci = pci;
-	rc = virtio_pci_map_capability ( pci, common,
-		sizeof ( struct virtio_pci_common_cfg ), 4,
-		0, sizeof ( struct virtio_pci_common_cfg ),
-		&virtnet->vdev.common );
-	if ( rc )
-		goto err_map_common;
-
-	rc = virtio_pci_map_capability ( pci, isr, sizeof ( u8 ), 1,
-		0, 1,
-		&virtnet->vdev.isr );
-	if ( rc )
-		goto err_map_isr;
-
-	virtnet->vdev.notify_cap_pos = notify;
-	virtnet->vdev.cfg_cap_pos = config;
-
-	/* Map the device capability */
-	if ( device ) {
-		rc = virtio_pci_map_capability ( pci, device,
-			0, 4, 0, sizeof ( struct virtio_net_config ),
-			&virtnet->vdev.device );
-		if ( rc )
-			goto err_map_device;
-	}
-
-	/* Enable the PCI device */
-	adjust_pci_device ( pci );
-
-	/* Configure DMA */
-	virtnet->dma =  &pci->dma;
-	dma_set_mask_64bit ( virtnet->dma );
-	netdev->dma = virtnet->dma;
-
-	/* Reset the device and set initial status bits */
-	vpm_reset ( &virtnet->vdev );
-	vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE );
-	vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER );
-
-	/* Load MAC address and MTU */
-	if ( device ) {
-		features = vpm_get_features ( &virtnet->vdev );
-		if ( features & ( 1ULL << VIRTIO_NET_F_MAC ) ) {
-			vpm_get ( &virtnet->vdev,
-				  offsetof ( struct virtio_net_config, mac ),
-				  netdev->hw_addr, ETH_ALEN );
-			DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
-			       eth_ntoa ( netdev->hw_addr ) );
-		}
-		if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
-			vpm_get ( &virtnet->vdev,
-				  offsetof ( struct virtio_net_config, mtu ),
-				  &mtu, sizeof ( mtu ) );
-			DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet,
-			       mtu );
-			netdev->max_pkt_len = ( mtu + ETH_HLEN );
-			netdev->mtu = mtu;
-		}
-	}
-
-	/* We need a valid MAC address */
-	if ( ! is_valid_ether_addr ( netdev->hw_addr ) ) {
-		rc = -EADDRNOTAVAIL;
-		goto err_mac_address;
-	}
-
-	/* Register network device */
-	if ( ( rc = register_netdev ( netdev ) ) != 0 )
-		goto err_register_netdev;
-
-	/* Mark link as up, control virtqueue is not used */
-	netdev_link_up ( netdev );
-
-	virtnet->virtio_version = 1;
-	return 0;
-
-	unregister_netdev ( netdev );
-err_register_netdev:
-err_mac_address:
-	vpm_reset ( &virtnet->vdev );
-	netdev_nullify ( netdev );
-	netdev_put ( netdev );
-	virtio_pci_unmap_capability ( &virtnet->vdev.device );
-err_map_device:
-	virtio_pci_unmap_capability ( &virtnet->vdev.isr );
-err_map_isr:
-	virtio_pci_unmap_capability ( &virtnet->vdev.common );
-err_map_common:
-	return rc;
-}
 
 /**
  * Probe PCI device
  *
- * @v pci	PCI device
- * @ret rc	Return status code
+ * @v pci		PCI device
+ * @ret rc		Return status code
  */
-static int virtnet_probe ( struct pci_device *pci ) {
-	int found_modern = 0;
-	int rc = virtnet_probe_modern ( pci, &found_modern );
-	if ( ! found_modern && pci->device < 0x1040 ) {
-		/* fall back to the legacy probe */
-		rc = virtnet_probe_legacy ( pci );
+static int virtio_net_probe ( struct pci_device *pci ) {
+	struct net_device *netdev;
+	struct virtio_net *vnet;
+	struct virtio_device *virtio;
+	int rc;
+
+	/* Allocate and initialise net device */
+	netdev = alloc_etherdev ( sizeof ( *vnet ) );
+	if ( ! netdev ) {
+		rc = -ENOMEM;
+		goto err_alloc;
 	}
+	netdev_init ( netdev, &virtio_net_operations );
+	vnet = netdev->priv;
+	pci_set_drvdata ( pci, netdev );
+	netdev->dev = &pci->dev;
+	netdev->dma = &pci->dma;
+	memset ( vnet, 0, sizeof ( *vnet ) );
+	virtio = &vnet->virtio;
+	virtio_net_queue_init ( &vnet->rx, vnet->rx_iobufs, vnet->rx_slots,
+				VIRTIO_NET_RX_INDEX, VIRTIO_NET_RX_COUNT,
+				VIRTIO_NET_RX_MAX, DMA_RX,
+				VIRTIO_DESC_FL_WRITE );
+	virtio_net_queue_init ( &vnet->tx, vnet->tx_iobufs, vnet->tx_slots,
+				VIRTIO_NET_TX_INDEX, VIRTIO_NET_TX_COUNT,
+				VIRTIO_NET_TX_MAX, DMA_TX, 0 );
+
+	/* Map PCI device */
+	if ( ( rc = virtio_pci_map ( virtio, pci ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s could not map: %s\n",
+		       virtio->name, strerror ( rc ) );
+		goto err_pci_map;
+	}
+
+	/* Initialise device */
+	if ( ( rc = virtio_init ( virtio, &virtio_net_features ) ) != 0 ) {
+		DBGC ( vnet, "VNET %s could not initialise: %s\n",
+		       virtio->name, strerror ( rc ) );
+		goto err_init;
+	}
+
+	/* Get MAC address */
+	virtio_net_mac ( netdev );
+
+	/* Set MTU */
+	virtio_net_mtu ( netdev );
+
+	/* Register network device */
+	if ( ( rc = register_netdev ( netdev ) ) != 0 )
+		goto err_register;
+
+	/* Mark as link up, since we have no way to test link state changes */
+	netdev_link_up ( netdev );
+
+	return 0;
+
+	unregister_netdev ( netdev );
+ err_register:
+	virtio_reset ( virtio );
+ err_init:
+	virtio_unmap ( virtio );
+ err_pci_map:
+	netdev_nullify ( netdev );
+	netdev_put ( netdev );
+ err_alloc:
 	return rc;
 }
 
 /**
- * Remove device
+ * Remove PCI device
  *
- * @v pci	PCI device
+ * @v pci		PCI device
  */
-static void virtnet_remove ( struct pci_device *pci ) {
+static void virtio_net_remove ( struct pci_device *pci ) {
 	struct net_device *netdev = pci_get_drvdata ( pci );
-	struct virtnet_nic *virtnet = netdev->priv;
+	struct virtio_net *vnet = netdev->priv;
+	struct virtio_device *virtio = &vnet->virtio;
 
+	/* Unregister network device */
 	unregister_netdev ( netdev );
 
-	virtio_pci_unmap_capability ( &virtnet->vdev.device );
-	virtio_pci_unmap_capability ( &virtnet->vdev.isr );
-	virtio_pci_unmap_capability ( &virtnet->vdev.common );
+	/* Reset device */
+	virtio_reset ( virtio );
 
+	/* Free network device */
+	virtio_unmap ( virtio );
 	netdev_nullify ( netdev );
 	netdev_put ( netdev );
 }
 
-static struct pci_device_id virtnet_nics[] = {
-PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface", 0),
-PCI_ROM(0x1af4, 0x1041, "virtio-net", "Virtio Network Interface 1.0", 0),
+/** Virtio network PCI device IDs */
+static struct pci_device_id virtio_net_ids[] = {
+	PCI_ROM ( 0x1af4, 0x1000, "virtio-net", "Virtio (legacy)", 0 ),
+	PCI_ROM ( 0x1af4, 0x1041, "virtio-net", "Virtio (modern)", 0 ),
 };
 
-struct pci_driver virtnet_driver __pci_driver = {
-	.ids = virtnet_nics,
-	.id_count = ( sizeof ( virtnet_nics ) / sizeof ( virtnet_nics[0] ) ),
-	.probe = virtnet_probe,
-	.remove = virtnet_remove,
+/** Virtio network PCI driver */
+struct pci_driver virtio_net_driver __pci_driver = {
+	.ids = virtio_net_ids,
+	.id_count = ( sizeof ( virtio_net_ids ) /
+		      sizeof ( virtio_net_ids[0] ) ),
+	.probe = virtio_net_probe,
+	.remove = virtio_net_remove,
 };
diff --git a/src/drivers/net/virtio-net.h b/src/drivers/net/virtio-net.h
index ff58d3e..2a3423f 100644
--- a/src/drivers/net/virtio-net.h
+++ b/src/drivers/net/virtio-net.h
@@ -1,70 +1,137 @@
-#ifndef _VIRTIO_NET_H_
-# define _VIRTIO_NET_H_
+#ifndef _VIRTIO_NET_H
+#define _VIRTIO_NET_H
 
-/* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_CSUM       0       /* Host handles pkts w/ partial csum */
-#define VIRTIO_NET_F_GUEST_CSUM 1       /* Guest handles pkts w/ partial csum */
-#define VIRTIO_NET_F_MTU        3       /* Initial MTU advice */
-#define VIRTIO_NET_F_MAC        5       /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO        6       /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_GUEST_TSO4 7       /* Guest can handle TSOv4 in. */
-#define VIRTIO_NET_F_GUEST_TSO6 8       /* Guest can handle TSOv6 in. */
-#define VIRTIO_NET_F_GUEST_ECN  9       /* Guest can handle TSO[6] w/ ECN in. */
-#define VIRTIO_NET_F_GUEST_UFO  10      /* Guest can handle UFO in. */
-#define VIRTIO_NET_F_HOST_TSO4  11      /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_TSO6  12      /* Host can handle TSOv6 in. */
-#define VIRTIO_NET_F_HOST_ECN   13      /* Host can handle TSO[6] w/ ECN in. */
-#define VIRTIO_NET_F_HOST_UFO   14      /* Host can handle UFO in. */
-#define VIRTIO_NET_F_MRG_RXBUF  15      /* Driver can merge receive buffers. */
-#define VIRTIO_NET_F_STATUS     16      /* Configuration status field is available. */
-#define VIRTIO_NET_F_CTRL_VQ    17      /* Control channel is available. */
-#define VIRTIO_NET_F_CTRL_RX    18      /* Control channel RX mode support. */
-#define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering. */
-#define VIRTIO_NET_F_GUEST_ANNOUNCE 21  /* Driver can send gratuitous packets. */
+/** @file
+ *
+ * Virtual I/O network device
+ *
+ */
 
-struct virtio_net_config
-{
-   /* The config defining mac address (if VIRTIO_NET_F_MAC) */
-   u8 mac[6];
-   /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
-   u16 status;
-   /* Maximum number of each of transmit and receive queues;
-    * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
-    * Legal values are between 1 and 0x8000
-    */
-   u16 max_virtqueue_pairs;
-   /* Default maximum transmit unit advice */
-   u16 mtu;
-} __attribute__((packed));
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+FILE_SECBOOT ( PERMITTED );
 
-/* This is the first element of the scatter-gather list.  If you don't
- * specify GSO or CSUM features, you can simply ignore the header. */
+#include <ipxe/virtio.h>
 
-struct virtio_net_hdr
-{
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1       // Use csum_start, csum_offset
-   uint8_t flags;
-#define VIRTIO_NET_HDR_GSO_NONE         0       // Not a GSO frame
-#define VIRTIO_NET_HDR_GSO_TCPV4        1       // GSO frame, IPv4 TCP (TSO)
-/* FIXME: Do we need this?  If they said they can handle ECN, do they care? */
-#define VIRTIO_NET_HDR_GSO_TCPV4_ECN    2       // GSO frame, IPv4 TCP w/ ECN
-#define VIRTIO_NET_HDR_GSO_UDP          3       // GSO frame, IPv4 UDP (UFO)
-#define VIRTIO_NET_HDR_GSO_TCPV6        4       // GSO frame, IPv6 TCP
-#define VIRTIO_NET_HDR_GSO_ECN          0x80    // TCP has ECN set
-   uint8_t gso_type;
-   uint16_t hdr_len;
-   uint16_t gso_size;
-   uint16_t csum_start;
-   uint16_t csum_offset;
+/** Device has a reported MTU */
+#define VIRTIO_FEAT0_NET_MTU 0x00000008
+
+/** Device has a MAC address */
+#define VIRTIO_FEAT0_NET_MAC 0x00000020
+
+/** MAC address register offset */
+#define VIRTIO_NET_MAC 0x00
+
+/** MTU register offset */
+#define VIRTIO_NET_MTU 0x0a
+
+/** A virtio network packet header */
+union virtio_net_header {
+	/** Legacy interface */
+	uint8_t legacy[10];
+	/** Modern (version 1.0) interface */
+	uint8_t modern[12];
+} __attribute__ (( packed ));
+
+/** Receive queue index */
+#define VIRTIO_NET_RX_INDEX 0
+
+/** Receive queue requested queue size */
+#define VIRTIO_NET_RX_COUNT 128
+
+/** Receive queue maximum fill level */
+#define VIRTIO_NET_RX_MAX 16
+
+/** Transmit queue index */
+#define VIRTIO_NET_TX_INDEX 1
+
+/** Transmit queue requested queue size */
+#define VIRTIO_NET_TX_COUNT 128
+
+/** Transmit queue maximum fill level */
+#define VIRTIO_NET_TX_MAX 32
+
+/** Number of descriptors per packet */
+#define VIRTIO_NET_DESCS 2
+
+/** A virtio network queue */
+struct virtio_net_queue {
+	/** Underlying virtio queue */
+	struct virtio_queue queue;
+	/** I/O buffer list */
+	struct io_buffer **iobufs;
+	/** Descriptor slot ring */
+	uint8_t *slots;
+	/** Effective fill level */
+	unsigned int fill;
+	/** Descriptor index ring mask */
+	unsigned int mask;
+
+	/** Shared packet header */
+	union virtio_net_header hdr;
+	/** DMA mapping for packet header */
+	struct dma_mapping map;
+
+	/** DMA direction for packet header */
+	uint8_t dma;
+	/** Buffer writability flag for packet header */
+	uint8_t write;
+	/** Requested queue size */
+	uint8_t count;
+	/** Maximum fill level */
+	uint8_t max;
 };
 
-/* Virtio 1.0 version of the first element of the scatter-gather list. */
-struct virtio_net_hdr_modern
-{
-   struct virtio_net_hdr legacy;
+/**
+ * Initialise virtio network queue
+ *
+ * @v queue		Virtio network queue
+ * @v index		Queue index
+ * @v iobufs		I/O buffer list
+ * @v slots		Descriptor slot ring
+ * @v dma		DMA direction for packet header
+ * @v write		Writability flag for packet header
+ * @v count		Requested queue size
+ * @v max		Maximum fill level
+ */
+static inline __attribute__ (( always_inline )) void
+virtio_net_queue_init ( struct virtio_net_queue *queue,
+			struct io_buffer **iobufs, uint8_t *slots,
+			unsigned int index, unsigned int count,
+			unsigned int max, unsigned int dma,
+			unsigned int write ) {
 
-   /* Used only if VIRTIO_NET_F_MRG_RXBUF: */
-   uint16_t num_buffers;
+	queue->queue.index = index;
+	queue->iobufs = iobufs;
+	queue->slots = slots;
+	queue->dma = dma;
+	queue->write = write;
+	queue->count = count;
+	queue->max = max;
+}
+
+/** A virtio network device */
+struct virtio_net {
+	/** Underlying virtio device */
+	struct virtio_device virtio;
+	/** Receive queue */
+	struct virtio_net_queue rx;
+	/** Transmit queue */
+	struct virtio_net_queue tx;
+
+	/** Virtio network header length */
+	size_t hlen;
+	/** Maximum frame size */
+	size_t mfs;
+
+	/** Receive descriptor slot ring */
+	uint8_t rx_slots[VIRTIO_NET_RX_MAX];
+	/** Receive I/O buffers */
+	struct io_buffer *rx_iobufs[VIRTIO_NET_RX_MAX];
+
+	/** Transmit descriptor slot ring */
+	uint8_t tx_slots[VIRTIO_NET_TX_MAX];
+	/** Transmit I/O buffers */
+	struct io_buffer *tx_iobufs[VIRTIO_NET_TX_MAX];
 };
 
-#endif /* _VIRTIO_NET_H_ */
+#endif /* _VIRTIO_NET_H */
diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h
index a2e3ff8..048cbe6 100644
--- a/src/include/ipxe/errfile.h
+++ b/src/include/ipxe/errfile.h
@@ -212,7 +212,7 @@
 #define ERRFILE_eoib		     ( ERRFILE_DRIVER | 0x007c0000 )
 #define ERRFILE_golan		     ( ERRFILE_DRIVER | 0x007d0000 )
 #define ERRFILE_flexboot_nodnic	     ( ERRFILE_DRIVER | 0x007e0000 )
-#define ERRFILE_virtio_pci	     ( ERRFILE_DRIVER | 0x007f0000 )
+#define ERRFILE_virtio		     ( ERRFILE_DRIVER | 0x007f0000 )
 #define ERRFILE_pciea		     ( ERRFILE_DRIVER | 0x00c00000 )
 #define ERRFILE_axge		     ( ERRFILE_DRIVER | 0x00c10000 )
 #define ERRFILE_thunderx	     ( ERRFILE_DRIVER | 0x00c20000 )
diff --git a/src/include/ipxe/pci.h b/src/include/ipxe/pci.h
index f65c8c2..b16c769 100644
--- a/src/include/ipxe/pci.h
+++ b/src/include/ipxe/pci.h
@@ -102,6 +102,9 @@
 /** Next capability */
 #define PCI_CAP_NEXT		0x01
 
+/** Capability length */
+#define PCI_CAP_LEN		0x02
+
 /** Power management control and status */
 #define PCI_PM_CTRL		0x04
 #define PCI_PM_CTRL_STATE_MASK		0x0003	/**< Current power state */
diff --git a/src/include/ipxe/virtio-pci.h b/src/include/ipxe/virtio-pci.h
deleted file mode 100644
index 7abae26..0000000
--- a/src/include/ipxe/virtio-pci.h
+++ /dev/null
@@ -1,314 +0,0 @@
-#ifndef _VIRTIO_PCI_H_
-# define _VIRTIO_PCI_H_
-
-#include <ipxe/dma.h>
-
-/* A 32-bit r/o bitmask of the features supported by the host */
-#define VIRTIO_PCI_HOST_FEATURES        0
-
-/* A 32-bit r/w bitmask of features activated by the guest */
-#define VIRTIO_PCI_GUEST_FEATURES       4
-
-/* A 32-bit r/w PFN for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_PFN            8
-
-/* A 16-bit r/o queue size for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_NUM            12
-
-/* A 16-bit r/w queue selector */
-#define VIRTIO_PCI_QUEUE_SEL            14
-
-/* A 16-bit r/w queue notifier */
-#define VIRTIO_PCI_QUEUE_NOTIFY         16
-
-/* An 8-bit device status register.  */
-#define VIRTIO_PCI_STATUS               18
-
-/* An 8-bit r/o interrupt status register.  Reading the value will return the
- * current contents of the ISR and will also clear it.  This is effectively
- * a read-and-acknowledge. */
-#define VIRTIO_PCI_ISR                  19
-
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG           0x2
-
-/* The remaining space is defined by each driver as the per-driver
- * configuration space */
-#define VIRTIO_PCI_CONFIG               20
-
-/* Virtio ABI version, this must match exactly */
-#define VIRTIO_PCI_ABI_VERSION          0
-
-/* PCI capability types: */
-#define VIRTIO_PCI_CAP_COMMON_CFG       1  /* Common configuration */
-#define VIRTIO_PCI_CAP_NOTIFY_CFG       2  /* Notifications */
-#define VIRTIO_PCI_CAP_ISR_CFG          3  /* ISR access */
-#define VIRTIO_PCI_CAP_DEVICE_CFG       4  /* Device specific configuration */
-#define VIRTIO_PCI_CAP_PCI_CFG          5  /* PCI configuration access */
-
-#define __u8       uint8_t
-#define __le16     uint16_t
-#define __le32     uint32_t
-#define __le64     uint64_t
-
-/* This is the PCI capability header: */
-struct virtio_pci_cap {
-    __u8 cap_vndr;    /* Generic PCI field: PCI_CAP_ID_VNDR */
-    __u8 cap_next;    /* Generic PCI field: next ptr. */
-    __u8 cap_len;     /* Generic PCI field: capability length */
-    __u8 cfg_type;    /* Identifies the structure. */
-    __u8 bar;         /* Where to find it. */
-    __u8 padding[3];  /* Pad to full dword. */
-    __le32 offset;    /* Offset within bar. */
-    __le32 length;    /* Length of the structure, in bytes. */
-};
-
-struct virtio_pci_notify_cap {
-    struct virtio_pci_cap cap;
-    __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */
-};
-
-struct virtio_pci_cfg_cap {
-    struct virtio_pci_cap cap;
-    __u8 pci_cfg_data[4]; /* Data for BAR access. */
-};
-
-/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
-struct virtio_pci_common_cfg {
-    /* About the whole device. */
-    __le32 device_feature_select; /* read-write */
-    __le32 device_feature;        /* read-only */
-    __le32 guest_feature_select;  /* read-write */
-    __le32 guest_feature;         /* read-write */
-    __le16 msix_config;           /* read-write */
-    __le16 num_queues;            /* read-only */
-    __u8 device_status;           /* read-write */
-    __u8 config_generation;       /* read-only */
-
-    /* About a specific virtqueue. */
-    __le16 queue_select;          /* read-write */
-    __le16 queue_size;            /* read-write, power of 2. */
-    __le16 queue_msix_vector;     /* read-write */
-    __le16 queue_enable;          /* read-write */
-    __le16 queue_notify_off;      /* read-only */
-    __le32 queue_desc_lo;         /* read-write */
-    __le32 queue_desc_hi;         /* read-write */
-    __le32 queue_avail_lo;        /* read-write */
-    __le32 queue_avail_hi;        /* read-write */
-    __le32 queue_used_lo;         /* read-write */
-    __le32 queue_used_hi;         /* read-write */
-};
-
-/* Virtio 1.0 PCI region descriptor. We support memory mapped I/O, port I/O,
- * and PCI config space access via the cfg PCI capability as a fallback. */
-struct virtio_pci_region {
-    void *base;
-    size_t length;
-    u8 bar;
-
-/* How to interpret the base field */
-#define VIRTIO_PCI_REGION_TYPE_MASK  0x00000003
-/* The base field is a memory address */
-#define VIRTIO_PCI_REGION_MEMORY     0x00000001
-/* The base field is a port address */
-#define VIRTIO_PCI_REGION_PORT       0x00000002
-/* The base field is an offset within the PCI bar */
-#define VIRTIO_PCI_REGION_PCI_CONFIG 0x00000003
-    unsigned flags;
-};
-
-/* Virtio 1.0 device state */
-struct virtio_pci_modern_device {
-    struct pci_device *pci;
-
-    /* VIRTIO_PCI_CAP_PCI_CFG position */
-    int cfg_cap_pos;
-
-    /* VIRTIO_PCI_CAP_COMMON_CFG data */
-    struct virtio_pci_region common;
-
-    /* VIRTIO_PCI_CAP_DEVICE_CFG data */
-    struct virtio_pci_region device;
-
-    /* VIRTIO_PCI_CAP_ISR_CFG data */
-    struct virtio_pci_region isr;
-
-    /* VIRTIO_PCI_CAP_NOTIFY_CFG data */
-    int notify_cap_pos;
-};
-
-static inline u32 vp_get_features(unsigned int ioaddr)
-{
-   return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES);
-}
-
-static inline void vp_set_features(unsigned int ioaddr, u32 features)
-{
-        outl(features, ioaddr + VIRTIO_PCI_GUEST_FEATURES);
-}
-
-static inline void vp_get(unsigned int ioaddr, unsigned offset,
-                     void *buf, unsigned len)
-{
-   u8 *ptr = buf;
-   unsigned i;
-
-   for (i = 0; i < len; i++)
-           ptr[i] = inb(ioaddr + VIRTIO_PCI_CONFIG + offset + i);
-}
-
-static inline u8 vp_get_status(unsigned int ioaddr)
-{
-   return inb(ioaddr + VIRTIO_PCI_STATUS);
-}
-
-static inline void vp_set_status(unsigned int ioaddr, u8 status)
-{
-   if (status == 0)        /* reset */
-           return;
-   outb(status, ioaddr + VIRTIO_PCI_STATUS);
-}
-
-static inline u8 vp_get_isr(unsigned int ioaddr)
-{
-   return inb(ioaddr + VIRTIO_PCI_ISR);
-}
-
-static inline void vp_reset(unsigned int ioaddr)
-{
-   outb(0, ioaddr + VIRTIO_PCI_STATUS);
-   (void)inb(ioaddr + VIRTIO_PCI_ISR);
-}
-
-static inline void vp_notify(unsigned int ioaddr, int queue_index)
-{
-   outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
-}
-
-static inline void vp_del_vq(unsigned int ioaddr, int queue_index)
-{
-   /* select the queue */
-
-   outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
-   /* deactivate the queue */
-
-   outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN);
-}
-
-struct vring_virtqueue;
-
-void vp_free_vq(struct vring_virtqueue *vq);
-int vp_find_vq(unsigned int ioaddr, int queue_index,
-               struct vring_virtqueue *vq, struct dma_device *dma_dev,
-               size_t header_size);
-
-
-/* Virtio 1.0 I/O routines abstract away the three possible HW access
- * mechanisms - memory, port I/O, and PCI cfg space access. Also built-in
- * are endianness conversions - to LE on write and from LE on read. */
-
-void vpm_iowrite8(struct virtio_pci_modern_device *vdev,
-                  struct virtio_pci_region *region, u8 data, size_t offset);
-
-void vpm_iowrite16(struct virtio_pci_modern_device *vdev,
-                   struct virtio_pci_region *region, u16 data, size_t offset);
-
-void vpm_iowrite32(struct virtio_pci_modern_device *vdev,
-                   struct virtio_pci_region *region, u32 data, size_t offset);
-
-static inline void vpm_iowrite64(struct virtio_pci_modern_device *vdev,
-                                 struct virtio_pci_region *region,
-                                 u64 data, size_t offset_lo, size_t offset_hi)
-{
-    vpm_iowrite32(vdev, region, (u32)data, offset_lo);
-    vpm_iowrite32(vdev, region, data >> 32, offset_hi);
-}
-
-u8 vpm_ioread8(struct virtio_pci_modern_device *vdev,
-               struct virtio_pci_region *region, size_t offset);
-
-u16 vpm_ioread16(struct virtio_pci_modern_device *vdev,
-                 struct virtio_pci_region *region, size_t offset);
-
-u32 vpm_ioread32(struct virtio_pci_modern_device *vdev,
-                 struct virtio_pci_region *region, size_t offset);
-
-/* Virtio 1.0 device manipulation routines */
-
-#define COMMON_OFFSET(field) offsetof(struct virtio_pci_common_cfg, field)
-
-static inline void vpm_reset(struct virtio_pci_modern_device *vdev)
-{
-    vpm_iowrite8(vdev, &vdev->common, 0, COMMON_OFFSET(device_status));
-    while (vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status)))
-        mdelay(1);
-}
-
-static inline u8 vpm_get_status(struct virtio_pci_modern_device *vdev)
-{
-    return vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status));
-}
-
-static inline void vpm_add_status(struct virtio_pci_modern_device *vdev,
-                                  u8 status)
-{
-    u8 curr_status = vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status));
-    vpm_iowrite8(vdev, &vdev->common,
-                 curr_status | status, COMMON_OFFSET(device_status));
-}
-
-static inline u64 vpm_get_features(struct virtio_pci_modern_device *vdev)
-{
-    u32 features_lo, features_hi;
-
-    vpm_iowrite32(vdev, &vdev->common, 0, COMMON_OFFSET(device_feature_select));
-    features_lo = vpm_ioread32(vdev, &vdev->common, COMMON_OFFSET(device_feature));
-    vpm_iowrite32(vdev, &vdev->common, 1, COMMON_OFFSET(device_feature_select));
-    features_hi = vpm_ioread32(vdev, &vdev->common, COMMON_OFFSET(device_feature));
-
-    return ((u64)features_hi << 32) | features_lo;
-}
-
-static inline void vpm_set_features(struct virtio_pci_modern_device *vdev,
-                                    u64 features)
-{
-    u32 features_lo = (u32)features;
-    u32 features_hi = features >> 32;
-
-    vpm_iowrite32(vdev, &vdev->common, 0, COMMON_OFFSET(guest_feature_select));
-    vpm_iowrite32(vdev, &vdev->common, features_lo, COMMON_OFFSET(guest_feature));
-    vpm_iowrite32(vdev, &vdev->common, 1, COMMON_OFFSET(guest_feature_select));
-    vpm_iowrite32(vdev, &vdev->common, features_hi, COMMON_OFFSET(guest_feature));
-}
-
-static inline void vpm_get(struct virtio_pci_modern_device *vdev,
-                           unsigned offset, void *buf, unsigned len)
-{
-    u8 *ptr = buf;
-    unsigned i;
-
-    for (i = 0; i < len; i++)
-        ptr[i] = vpm_ioread8(vdev, &vdev->device, offset + i);
-}
-
-static inline u8 vpm_get_isr(struct virtio_pci_modern_device *vdev)
-{
-    return vpm_ioread8(vdev, &vdev->isr, 0);
-}
-
-void vpm_notify(struct virtio_pci_modern_device *vdev,
-                struct vring_virtqueue *vq);
-
-int vpm_find_vqs(struct virtio_pci_modern_device *vdev,
-                 unsigned nvqs, struct vring_virtqueue *vqs,
-                 struct dma_device *dma_dev, size_t header_size);
-
-int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type);
-
-int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen,
-                              u32 align, u32 start, u32 size,
-                              struct virtio_pci_region *region);
-
-void virtio_pci_unmap_capability(struct virtio_pci_region *region);
-#endif /* _VIRTIO_PCI_H_ */
diff --git a/src/include/ipxe/virtio-ring.h b/src/include/ipxe/virtio-ring.h
deleted file mode 100644
index d082139..0000000
--- a/src/include/ipxe/virtio-ring.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef _VIRTIO_RING_H_
-# define _VIRTIO_RING_H_
-
-#include <ipxe/virtio-pci.h>
-#include <ipxe/dma.h>
-
-/* Status byte for guest to report progress, and synchronize features. */
-/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
-#define VIRTIO_CONFIG_S_ACKNOWLEDGE     1
-/* We have found a driver for the device. */
-#define VIRTIO_CONFIG_S_DRIVER          2
-/* Driver has used its parts of the config, and is happy */
-#define VIRTIO_CONFIG_S_DRIVER_OK       4
-/* Driver has finished configuring features */
-#define VIRTIO_CONFIG_S_FEATURES_OK     8
-/* We've given up on this device. */
-#define VIRTIO_CONFIG_S_FAILED          0x80
-
-/* Virtio feature flags used to negotiate device and driver features. */
-/* Can the device handle any descriptor layout? */
-#define VIRTIO_F_ANY_LAYOUT             27
-/* v1.0 compliant. */
-#define VIRTIO_F_VERSION_1              32
-#define VIRTIO_F_IOMMU_PLATFORM         33
-
-#define MAX_QUEUE_NUM      (256)
-
-#define VRING_DESC_F_NEXT  1
-#define VRING_DESC_F_WRITE 2
-
-#define VRING_AVAIL_F_NO_INTERRUPT 1
-
-#define VRING_USED_F_NO_NOTIFY     1
-
-struct vring_desc
-{
-   u64 addr;
-   u32 len;
-   u16 flags;
-   u16 next;
-};
-
-struct vring_avail
-{
-   u16 flags;
-   u16 idx;
-   u16 ring[0];
-};
-
-struct vring_used_elem
-{
-   u32 id;
-   u32 len;
-};
-
-struct vring_used
-{
-   u16 flags;
-   u16 idx;
-   struct vring_used_elem ring[];
-};
-
-struct vring {
-   unsigned int num;
-   struct vring_desc *desc;
-   struct vring_avail *avail;
-   struct vring_used *used;
-};
-
-#define vring_size(num) \
-   (((((sizeof(struct vring_desc) * num) + \
-      (sizeof(struct vring_avail) + sizeof(u16) * num)) \
-         + PAGE_MASK) & ~PAGE_MASK) + \
-         (sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num))
-
-struct vring_virtqueue {
-   unsigned char *queue;
-   size_t queue_size;
-   struct dma_mapping map;
-   struct dma_device *dma;
-   struct vring vring;
-   u16 free_head;
-   u16 last_used_idx;
-   void **vdata;
-   struct virtio_net_hdr_modern *empty_header;
-   /* PCI */
-   int queue_index;
-   struct virtio_pci_region notification;
-};
-
-struct vring_list {
-  physaddr_t addr;
-  unsigned int length;
-};
-
-static inline void vring_init(struct vring *vr,
-                         unsigned int num, unsigned char *queue)
-{
-   unsigned int i;
-   unsigned long pa;
-
-   vr->num = num;
-
-   /* physical address of desc must be page aligned */
-
-   pa = virt_to_phys(queue);
-   pa = (pa + PAGE_MASK) & ~PAGE_MASK;
-   vr->desc = phys_to_virt(pa);
-
-   vr->avail = (struct vring_avail *)&vr->desc[num];
-
-   /* physical address of used must be page aligned */
-
-   pa = virt_to_phys(&vr->avail->ring[num]);
-   pa = (pa + PAGE_MASK) & ~PAGE_MASK;
-   vr->used = phys_to_virt(pa);
-
-   for (i = 0; i < num - 1; i++)
-           vr->desc[i].next = i + 1;
-   vr->desc[i].next = 0;
-}
-
-static inline void vring_enable_cb(struct vring_virtqueue *vq)
-{
-   vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-static inline void vring_disable_cb(struct vring_virtqueue *vq)
-{
-   vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-
-/*
- * vring_more_used
- *
- * is there some used buffers ?
- *
- */
-
-static inline int vring_more_used(struct vring_virtqueue *vq)
-{
-   wmb();
-   return vq->last_used_idx != vq->vring.used->idx;
-}
-
-void vring_detach(struct vring_virtqueue *vq, unsigned int head);
-void *vring_get_buf(struct vring_virtqueue *vq, unsigned int *len);
-void vring_add_buf(struct vring_virtqueue *vq, struct vring_list list[],
-                   unsigned int out, unsigned int in,
-                   void *index, int num_added);
-void vring_kick(struct virtio_pci_modern_device *vdev, unsigned int ioaddr,
-                struct vring_virtqueue *vq, int num_added);
-
-#endif /* _VIRTIO_RING_H_ */
diff --git a/src/include/ipxe/virtio.h b/src/include/ipxe/virtio.h
new file mode 100644
index 0000000..81a2028
--- /dev/null
+++ b/src/include/ipxe/virtio.h
@@ -0,0 +1,476 @@
+#ifndef _IPXE_VIRTIO_H
+#define _IPXE_VIRTIO_H
+
+/** @file
+ *
+ * Virtual I/O device
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+FILE_SECBOOT ( PERMITTED );
+
+#include <stdint.h>
+#include <byteswap.h>
+#include <ipxe/dma.h>
+#include <ipxe/pci.h>
+
+/** Virtio page alignment */
+#define VIRTIO_PAGE 4096
+
+/** Maximum time to wait for reset (in ms) */
+#define VIRTIO_RESET_MAX_WAIT_MS 100
+
+/**
+ * @defgroup virtio_legacy Original ("legacy") common device registers
+ * @{
+ */
+
+/** Legacy device supported features register */
+#define VIRTIO_LEG_FEAT 0x00
+
+/** Legacy negotiated in-use features register */
+#define VIRTIO_LEG_USED 0x04
+
+/** Legacy queue base address register */
+#define VIRTIO_LEG_BASE 0x08
+
+/** Legacy queue size register */
+#define VIRTIO_LEG_SIZE 0x0c
+
+/** Legacy queue select register */
+#define VIRTIO_LEG_SEL 0x0e
+
+/** Legacy queue doorbell notification register */
+#define VIRTIO_LEG_DB 0x10
+
+/** Legacy driver status register */
+#define VIRTIO_LEG_STAT 0x12
+#define VIRTIO_STAT_ACKNOWLEDGE	0x0001	/**< Guest has found device */
+#define VIRTIO_STAT_DRIVER	0x0002	/**< Guest driver exists */
+#define VIRTIO_STAT_DRIVER_OK	0x0004	/**< Guest driver is ready */
+#define VIRTIO_STAT_FEATURES_OK	0x0008	/**< Guest driver has set features */
+#define VIRTIO_STAT_FAIL	0x0080	/**< Guest driver has failed */
+
+/** Legacy device-specific registers */
+#define VIRTIO_LEG_DEV 0x14
+
+/** Legacy device-specific register (if MSI-X is enabled) */
+#define VIRTIO_LEG_DEV_MSIX 0x18
+
+/** @} */
+
+/**
+ * @defgroup virtio_pci_cap PCI capability registers
+ * @{
+ */
+
+/** Capability type */
+#define VIRTIO_PCI_CAP_TYPE 0x03
+#define VIRTIO_PCI_CAP_TYPE_COMMON 0x01	/**< Common registers */
+#define VIRTIO_PCI_CAP_TYPE_NOTIFY 0x02	/**< Notification doorbells */
+#define VIRTIO_PCI_CAP_TYPE_DEVICE 0x04	/**< Device-specific registers */
+
+/** Capability BAR index */
+#define VIRTIO_PCI_CAP_BAR 0x04
+
+/** Capability BAR offset */
+#define VIRTIO_PCI_CAP_OFFSET 0x08
+
+/** Capability minimum length */
+#define VIRTIO_PCI_CAP_END 0x10
+
+/** Notification doorbell capability multiplier offset */
+#define VIRTIO_PCI_CAP_NOTIFY_MULT 0x10
+
+/** Notification doorbell capability minimum length */
+#define VIRTIO_PCI_CAP_NOTIFY_END 0x14
+
+/** @} */
+
+/** A virtio PCI capability */
+struct virtio_pci_capability {
+	/** Capability type */
+	uint8_t type;
+	/** Capability offset */
+	uint8_t pos;
+	/** Capability length */
+	uint8_t len;
+	/** BAR number */
+	uint8_t bar;
+	/** Offset within BAR */
+	uint32_t offset;
+};
+
+/**
+ * @defgroup virtio_pci_common PCI common device registers
+ * @{
+ */
+
+/** PCI device supported features select register */
+#define VIRTIO_PCI_FEAT_SEL 0x00
+
+/** PCI device supported features register */
+#define VIRTIO_PCI_FEAT 0x04
+
+/** PCI negotiated in-use features select register */
+#define VIRTIO_PCI_USED_SEL 0x08
+
+/** PCI negotiated in-use features register */
+#define VIRTIO_PCI_USED 0x0c
+
+/** PCI device status register */
+#define VIRTIO_PCI_STAT 0x14
+
+/** PCI configuration generation register */
+#define VIRTIO_PCI_GEN 0x15
+
+/** PCI queue select register */
+#define VIRTIO_PCI_SEL 0x16
+
+/** PCI queue size register */
+#define VIRTIO_PCI_SIZE 0x18
+
+/** PCI queue enable register */
+#define VIRTIO_PCI_ENABLE 0x1c
+
+/** PCI queue doorbell notification offset register */
+#define VIRTIO_PCI_DBOFF 0x1e
+
+/** PCI queue descriptor array base address register */
+#define VIRTIO_PCI_DESC 0x20
+
+/** PCI queue submission queue base address register */
+#define VIRTIO_PCI_SQ 0x28
+
+/** PCI queue completion queue base address register */
+#define VIRTIO_PCI_CQ 0x30
+
+/** @} */
+
+/** A virtio buffer descriptor */
+struct virtio_desc {
+	/** Buffer address */
+	uint64_t addr;
+	/** Buffer length */
+	uint32_t len;
+	/** Flags */
+	uint16_t flags;
+	/** Next descriptor index */
+	uint16_t next;
+} __attribute__ (( packed ));
+
+/** Next descriptor index is valid */
+#define VIRTIO_DESC_FL_NEXT 0x0001
+
+/** Buffer is write-only */
+#define VIRTIO_DESC_FL_WRITE 0x0002
+
+/** A virtio submission queue entry */
+struct virtio_sqe {
+	/** Starting descriptor index */
+	uint16_t index;
+} __attribute__ (( packed ));
+
+/** A virtio submission ("available") queue */
+struct virtio_sq {
+	/** Flags */
+	uint16_t flags;
+	/** Producer index */
+	uint16_t prod;
+	/** Queue entries */
+	struct virtio_sqe sqe[];
+} __attribute__ (( packed ));
+
+/** Do not generate interrupt */
+#define VIRTIO_SQ_FL_NO_INTERRUPT 0x0001
+
+/** A virtio completion queue entry */
+struct virtio_cqe {
+	/** Starting descriptor index */
+	uint32_t index;
+	/** Length written */
+	uint32_t len;
+} __attribute__ (( packed ));
+
+/** A virtio completion ("used") queue */
+struct virtio_cq {
+	/** Flags */
+	uint16_t flags;
+	/** Producer index */
+	uint16_t prod;
+	/** Queue entries */
+	struct virtio_cqe cqe[];
+} __attribute__ (( packed ));
+
+/** A virtio queue */
+struct virtio_queue {
+	/** Queue index */
+	unsigned int index;
+	/** Queue size (must be a power of two) */
+	unsigned int count;
+	/** Queue mask */
+	unsigned int mask;
+	/** Submission queue producer index */
+	unsigned int prod;
+	/** Completion queue consumer index */
+	unsigned int cons;
+	/** Total length of queue */
+	size_t len;
+	/** DMA mapping */
+	struct dma_mapping map;
+	/** Descriptor array (and start of DMA allocation) */
+	struct virtio_desc *desc;
+	/** Submission queue */
+	struct virtio_sq *sq;
+	/** Completion queue */
+	struct virtio_cq *cq;
+	/** Notification doorbell */
+	void *db;
+};
+
+/**
+ * Initialise virtio queue
+ *
+ * @v queue		Virtio queue
+ * @v index		Queue index
+ */
+static inline __attribute__ (( always_inline )) void
+virtio_queue_init ( struct virtio_queue *queue, unsigned int index ) {
+
+	queue->index = index;
+}
+
+/**
+ * Calculate aligned size
+ *
+ * @v size		Unaligned size
+ * @ret size		Aligned size
+ */
+static inline __attribute__ (( always_inline )) size_t
+virtio_align ( size_t size ) {
+
+	return ( ( size + VIRTIO_PAGE - 1 ) & ~( VIRTIO_PAGE - 1 ) );
+}
+
+/**
+ * Calculate (unaligned) descriptor array size
+ *
+ * @v queue		Virtio queue
+ * @v count		Queue size
+ */
+static inline __attribute__ (( always_inline )) size_t
+virtio_desc_size ( unsigned int count ) {
+	struct virtio_desc *desc;
+
+	return ( count * sizeof ( desc[0] ) );
+}
+
+/**
+ * Calculate (unaligned) submission queue size
+ *
+ * @v queue		Virtio queue
+ * @v count		Queue size
+ */
+static inline __attribute__ (( always_inline )) size_t
+virtio_sq_size ( unsigned int count ) {
+	struct virtio_sq *sq;
+
+	return ( sizeof ( *sq ) + ( count * sizeof ( sq->sqe[0] ) ) );
+}
+
+/**
+ * Calculate (unaligned) completion queue size
+ *
+ * @v queue		Virtio queue
+ * @v count		Queue size
+ */
+static inline __attribute__ (( always_inline )) size_t
+virtio_cq_size ( unsigned int count ) {
+	struct virtio_cq *cq;
+
+	return ( sizeof ( *cq ) + ( count * sizeof ( cq->cqe[0] ) ) );
+}
+
+/** Number of 32-bit feature words */
+#define VIRTIO_FEATURE_WORDS 2
+
+/** A virtio feature set */
+struct virtio_features {
+	/** Feature words */
+	uint32_t word[VIRTIO_FEATURE_WORDS];
+};
+
+/** Arbitrary descriptor layouts may be used */
+#define VIRTIO_FEAT0_ANY_LAYOUT 0x08000000
+
+/** Virtio version 1.0 or above */
+#define VIRTIO_FEAT1_MODERN 0x00000001
+
+/** A virtio device */
+struct virtio_device {
+	/** Device name */
+	const char *name;
+	/** Device operations */
+	struct virtio_operations *op;
+	/** DMA device */
+	struct dma_device *dma;
+	/** Common registers */
+	void *common;
+	/** Doorbell notification registers */
+	void *notify;
+	/** Device-specific registers */
+	void *device;
+	/** Driver status */
+	unsigned int stat;
+	/** Device supported features */
+	struct virtio_features supported;
+	/** Negotiated features */
+	struct virtio_features features;
+	/** Notification doorbell multiplier */
+	unsigned int multiplier;
+};
+
+/** Virtio device operations */
+struct virtio_operations {
+	/**
+	 * Reset device
+	 *
+	 * @v virtio		Virtio device
+	 * @ret rc		Return status code
+	 */
+	int ( * reset ) ( struct virtio_device *virtio );
+	/**
+	 * Report driver status
+	 *
+	 * @v virtio		Virtio device
+	 * @ret stat		Actual device status
+	 */
+	unsigned int ( * status ) ( struct virtio_device *virtio );
+	/**
+	 * Get supported features
+	 *
+	 * @v virtio		Virtio device
+	 */
+	void ( * supported ) ( struct virtio_device *virtio );
+	/**
+	 * Set negotiated features
+	 *
+	 * @v virtio		Virtio device
+	 */
+	void ( * negotiate ) ( struct virtio_device *virtio );
+	/**
+	 * Set queue size
+	 *
+	 * @v virtio		Virtio device
+	 * @v queue		Virtio queue
+	 * @v count		Requested size
+	 */
+	void ( * size ) ( struct virtio_device *virtio,
+			  struct virtio_queue *queue, unsigned int count );
+	/**
+	 * Enable queue
+	 *
+	 * @v virtio		Virtio device
+	 * @v queue		Virtio queue
+	 */
+	void ( * enable ) ( struct virtio_device *virtio,
+			    struct virtio_queue *queue );
+};
+
+/**
+ * Submit descriptor(s) to queue
+ *
+ * @v queue		Virtio queue
+ * @v index		Starting descriptor index
+ */
+static inline __attribute__ (( always_inline )) void
+virtio_submit ( struct virtio_queue *queue, unsigned int index ) {
+	struct virtio_sqe *sqe;
+
+	/* Get next submission queue entry */
+	sqe = &queue->sq->sqe[ queue->prod++ & queue->mask ];
+
+	/* Populate submission queue entry */
+	sqe->index = cpu_to_le16 ( index );
+}
+
+/**
+ * Notify queue
+ *
+ * @v queue		Virtio queue
+ */
+static inline __attribute__ (( always_inline )) void
+virtio_notify ( struct virtio_queue *queue ) {
+
+	/* Write producer index */
+	wmb();
+	queue->sq->prod = cpu_to_le16 ( queue->prod );
+	wmb();
+
+	/* Ring doorbell */
+	iowrite16 ( queue->index, queue->db );
+}
+
+/**
+ * Check for completed descriptors
+ *
+ * @v queue		Virtio queue
+ * @v completions	Number of pending completions
+ */
+static inline __attribute__ (( always_inline )) unsigned int
+virtio_completions ( struct virtio_queue *queue ) {
+	uint16_t completions;
+
+	/* Get completion count */
+	completions = ( le16_to_cpu ( queue->cq->prod ) - queue->cons );
+	return completions;
+}
+
+/**
+ * Complete descriptor(s)
+ *
+ * @v queue		Virtio queue
+ * @v len		Length to fill in, or NULL
+ * @ret index		Starting descriptor index
+ */
+static inline __attribute__ (( always_inline )) unsigned int
+virtio_complete ( struct virtio_queue *queue, size_t *len ) {
+	struct virtio_cqe *cqe;
+
+	/* Get next completion queue entry */
+	cqe = &queue->cq->cqe[ queue->cons++ & queue->mask ];
+
+	/* Parse completion queue entry */
+	if ( len )
+		*len = le32_to_cpu ( cqe->len );
+	return le32_to_cpu ( cqe->index );
+}
+
+/**
+ * Check if device is using the legacy interface
+ *
+ * @v virtio		Virtio device
+ * @ret is_legacy	Device is using the legacy interface
+ */
+static inline __attribute__ (( always_inline )) int
+virtio_is_legacy ( struct virtio_device *virtio ) {
+
+	/* Check negotiation of version 1.0 or above */
+	return ( ! ( virtio->features.word[1] & VIRTIO_FEAT1_MODERN ) );
+}
+
+extern int virtio_pci_map ( struct virtio_device *virtio,
+			    struct pci_device *pci );
+extern int virtio_reset ( struct virtio_device *virtio );
+extern unsigned int virtio_status ( struct virtio_device *virtio,
+				    unsigned int stat );
+extern int virtio_init ( struct virtio_device *virtio,
+			 const struct virtio_features *driver );
+extern int virtio_enable ( struct virtio_device *virtio,
+			   struct virtio_queue *queue, unsigned int count );
+extern void virtio_free ( struct virtio_device *virtio,
+			  struct virtio_queue *queue );
+extern void virtio_unmap ( struct virtio_device *virtio );
+
+#endif /* _IPXE_VIRTIO_H */