[virtio] Replace the virtio core and network device driver The existing virtio network driver has been somewhat hacked together over the past two decades by multiple contributors, and includes a substantial amount of logic that is almost but not quite duplicated between the "legacy" and "modern" code paths. Rip out the existing driver and replace with a completely new driver written based on the Virtual I/O Device specification document, not derived from the Linux kernel driver. Signed-off-by: Michael Brown <mcb30@ipxe.org>
diff --git a/src/drivers/bus/virtio-pci.c b/src/drivers/bus/virtio-pci.c deleted file mode 100644 index 3fc93a9..0000000 --- a/src/drivers/bus/virtio-pci.c +++ /dev/null
@@ -1,453 +0,0 @@ -/* virtio-pci.c - pci interface for virtio interface - * - * (c) Copyright 2008 Bull S.A.S. - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - * - * some parts from Linux Virtio PCI driver - * - * Copyright IBM Corp. 2007 - * Authors: Anthony Liguori <aliguori@us.ibm.com> - * - */ - -#include "errno.h" -#include "byteswap.h" -#include "etherboot.h" -#include "ipxe/io.h" -#include "ipxe/iomap.h" -#include "ipxe/pci.h" -#include "ipxe/dma.h" -#include "ipxe/reboot.h" -#include "ipxe/virtio-pci.h" -#include "ipxe/virtio-ring.h" - -static int vp_alloc_vq(struct vring_virtqueue *vq, u16 num, size_t header_size) -{ - size_t ring_size = PAGE_MASK + vring_size(num); - size_t vdata_size = num * sizeof(void *); - size_t queue_size = ring_size + vdata_size + header_size; - - vq->queue = dma_alloc(vq->dma, &vq->map, queue_size, queue_size); - if (!vq->queue) { - return -ENOMEM; - } - - memset ( vq->queue, 0, queue_size ); - vq->queue_size = queue_size; - - /* vdata immediately follows the ring */ - vq->vdata = (void **)(vq->queue + ring_size); - - /* empty header immediately follows vdata */ - vq->empty_header = (struct virtio_net_hdr_modern *)(vq->queue + ring_size + vdata_size); - - return 0; -} - -void vp_free_vq(struct vring_virtqueue *vq) -{ - if (vq->queue && vq->queue_size) { - dma_free(&vq->map, vq->queue, vq->queue_size); - vq->queue = NULL; - vq->vdata = NULL; - vq->queue_size = 0; - } -} - -int vp_find_vq(unsigned int ioaddr, int queue_index, - struct vring_virtqueue *vq, struct dma_device *dma_dev, - size_t header_size) -{ - struct vring * vr = &vq->vring; - u16 num; - int rc; - - /* select the queue */ - - outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL); - - /* check if the queue is available */ - - num = inw(ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num) { - DBG("VIRTIO-PCI ERROR: queue size is 0\n"); - return -1; - } - - /* check if the queue is already active */ - - if (inl(ioaddr + VIRTIO_PCI_QUEUE_PFN)) { - DBG("VIRTIO-PCI ERROR: queue already active\n"); - return -1; - } - - vq->queue_index = queue_index; - vq->dma = dma_dev; - - /* initialize the queue */ - rc = vp_alloc_vq(vq, num, header_size); - if (rc) { - DBG("VIRTIO-PCI ERROR: failed to allocate queue memory\n"); - return rc; - } - vring_init(vr, num, vq->queue); - - /* activate the queue - * - * NOTE: vr->desc is initialized by vring_init() - */ - - outl(dma(&vq->map, vr->desc) >> PAGE_SHIFT, ioaddr + VIRTIO_PCI_QUEUE_PFN); - - return num; -} - -#define CFG_POS(vdev, field) \ - (vdev->cfg_cap_pos + offsetof(struct virtio_pci_cfg_cap, field)) - -static void prep_pci_cfg_cap(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, - size_t offset, u32 length) -{ - pci_write_config_byte(vdev->pci, CFG_POS(vdev, cap.bar), region->bar); - pci_write_config_dword(vdev->pci, CFG_POS(vdev, cap.length), length); - pci_write_config_dword(vdev->pci, CFG_POS(vdev, cap.offset), - (intptr_t)(region->base + offset)); -} - -void vpm_iowrite8(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u8 data, size_t offset) -{ - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - writeb(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - outb(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 1); - pci_write_config_byte(vdev->pci, CFG_POS(vdev, pci_cfg_data), data); - break; - default: - assert(0); - break; - } -} - -void vpm_iowrite16(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u16 data, size_t offset) -{ - data = cpu_to_le16(data); - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - writew(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - outw(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 2); - pci_write_config_word(vdev->pci, CFG_POS(vdev, pci_cfg_data), data); - break; - default: - assert(0); - break; - } -} - -void vpm_iowrite32(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u32 data, size_t offset) -{ - data = cpu_to_le32(data); - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - writel(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - outl(data, region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 4); - pci_write_config_dword(vdev->pci, CFG_POS(vdev, pci_cfg_data), data); - break; - default: - assert(0); - break; - } -} - -u8 vpm_ioread8(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset) -{ - uint8_t data; - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - data = readb(region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - data = inb(region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 1); - pci_read_config_byte(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data); - break; - default: - assert(0); - data = 0; - break; - } - return data; -} - -u16 vpm_ioread16(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset) -{ - uint16_t data; - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - data = readw(region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - data = inw(region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 2); - pci_read_config_word(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data); - break; - default: - assert(0); - data = 0; - break; - } - return le16_to_cpu(data); -} - -u32 vpm_ioread32(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset) -{ - uint32_t data; - switch (region->flags & VIRTIO_PCI_REGION_TYPE_MASK) { - case VIRTIO_PCI_REGION_MEMORY: - data = readl(region->base + offset); - break; - case VIRTIO_PCI_REGION_PORT: - data = inl(region->base + offset); - break; - case VIRTIO_PCI_REGION_PCI_CONFIG: - prep_pci_cfg_cap(vdev, region, offset, 4); - pci_read_config_dword(vdev->pci, CFG_POS(vdev, pci_cfg_data), &data); - break; - default: - assert(0); - data = 0; - break; - } - return le32_to_cpu(data); -} - -int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type) -{ - int pos; - uint8_t type, bar; - - for (pos = pci_find_capability(pci, PCI_CAP_ID_VNDR); - pos > 0; - pos = pci_find_next_capability(pci, pos, PCI_CAP_ID_VNDR)) { - - pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap, - cfg_type), &type); - pci_read_config_byte(pci, pos + offsetof(struct virtio_pci_cap, - bar), &bar); - - /* Ignore structures with reserved BAR values */ - if (bar > 0x5) { - continue; - } - - if (type == cfg_type) { - return pos; - } - } - return 0; -} - -int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen, - u32 align, u32 start, u32 size, - struct virtio_pci_region *region) -{ - u8 bar; - u32 offset, length, base_raw; - unsigned long base; - - pci_read_config_byte(pci, cap + offsetof(struct virtio_pci_cap, bar), &bar); - pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, offset), - &offset); - pci_read_config_dword(pci, cap + offsetof(struct virtio_pci_cap, length), - &length); - - if (length <= start) { - DBG("VIRTIO-PCI bad capability len %d (>%d expected)\n", length, start); - return -EINVAL; - } - if (length - start < minlen) { - DBG("VIRTIO-PCI bad capability len %d (>=%zd expected)\n", length, minlen); - return -EINVAL; - } - length -= start; - if (start + offset < offset) { - DBG("VIRTIO-PCI map wrap-around %d+%d\n", start, offset); - return -EINVAL; - } - offset += start; - if (offset & (align - 1)) { - DBG("VIRTIO-PCI offset %d not aligned to %d\n", offset, align); - return -EINVAL; - } - if (length > size) { - length = size; - } - - if (minlen + offset < minlen || - minlen + offset > pci_bar_size(pci, PCI_BASE_ADDRESS(bar))) { - DBG("VIRTIO-PCI map virtio %zd@%d out of range on bar %i length %ld\n", - minlen, offset, - bar, pci_bar_size(pci, PCI_BASE_ADDRESS(bar))); - return -EINVAL; - } - - region->base = NULL; - region->length = length; - region->bar = bar; - - base = pci_bar_start(pci, PCI_BASE_ADDRESS(bar)); - if (base) { - pci_read_config_dword(pci, PCI_BASE_ADDRESS(bar), &base_raw); - - if (base_raw & PCI_BASE_ADDRESS_SPACE_IO) { - /* Region accessed using port I/O */ - region->base = (void *)(base + offset); - region->flags = VIRTIO_PCI_REGION_PORT; - } else { - /* Region mapped into memory space */ - region->base = pci_ioremap(pci, base + offset, length); - region->flags = VIRTIO_PCI_REGION_MEMORY; - } - } - if (!region->base) { - /* Region accessed via PCI config space window */ - region->base = (void *)(intptr_t)offset; - region->flags = VIRTIO_PCI_REGION_PCI_CONFIG; - } - return 0; -} - -void virtio_pci_unmap_capability(struct virtio_pci_region *region) -{ - unsigned region_type = region->flags & VIRTIO_PCI_REGION_TYPE_MASK; - if (region_type == VIRTIO_PCI_REGION_MEMORY) { - iounmap(region->base); - } -} - -void vpm_notify(struct virtio_pci_modern_device *vdev, - struct vring_virtqueue *vq) -{ - vpm_iowrite16(vdev, &vq->notification, (u16)vq->queue_index, 0); -} - -int vpm_find_vqs(struct virtio_pci_modern_device *vdev, - unsigned nvqs, struct vring_virtqueue *vqs, - struct dma_device *dma_dev, size_t header_size) -{ - unsigned i; - struct vring_virtqueue *vq; - u16 size, off; - u32 notify_offset_multiplier; - int err; - - if (nvqs > vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(num_queues))) { - return -ENOENT; - } - - /* Read notify_off_multiplier from config space. */ - pci_read_config_dword(vdev->pci, - vdev->notify_cap_pos + offsetof(struct virtio_pci_notify_cap, - notify_off_multiplier), - ¬ify_offset_multiplier); - - for (i = 0; i < nvqs; i++) { - /* Select the queue we're interested in */ - vpm_iowrite16(vdev, &vdev->common, (u16)i, COMMON_OFFSET(queue_select)); - - /* Check if queue is either not available or already active. */ - size = vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(queue_size)); - /* QEMU has a bug where queues don't revert to inactive on device - * reset. Skip checking the queue_enable field until it is fixed. - */ - if (!size /*|| vpm_ioread16(vdev, &vdev->common.queue_enable)*/) - return -ENOENT; - - if (size & (size - 1)) { - DBG("VIRTIO-PCI %p: bad queue size %d\n", vdev, size); - return -EINVAL; - } - - if (size > MAX_QUEUE_NUM) { - /* iPXE networking tends to be not perf critical so there's no - * need to accept large queue sizes. - */ - size = MAX_QUEUE_NUM; - } - - vq = &vqs[i]; - vq->queue_index = i; - vq->dma = dma_dev; - - /* get offset of notification word for this vq */ - off = vpm_ioread16(vdev, &vdev->common, COMMON_OFFSET(queue_notify_off)); - - err = vp_alloc_vq(vq, size, header_size); - if (err) { - DBG("VIRTIO-PCI %p: failed to allocate queue memory\n", vdev); - return err; - } - vring_init(&vq->vring, size, vq->queue); - - /* activate the queue */ - vpm_iowrite16(vdev, &vdev->common, size, COMMON_OFFSET(queue_size)); - - vpm_iowrite64(vdev, &vdev->common, - dma(&vq->map, vq->vring.desc), - COMMON_OFFSET(queue_desc_lo), - COMMON_OFFSET(queue_desc_hi)); - vpm_iowrite64(vdev, &vdev->common, - dma(&vq->map, vq->vring.avail), - COMMON_OFFSET(queue_avail_lo), - COMMON_OFFSET(queue_avail_hi)); - vpm_iowrite64(vdev, &vdev->common, - dma(&vq->map, vq->vring.used), - COMMON_OFFSET(queue_used_lo), - COMMON_OFFSET(queue_used_hi)); - - err = virtio_pci_map_capability(vdev->pci, - vdev->notify_cap_pos, 2, 2, - off * notify_offset_multiplier, 2, - &vq->notification); - if (err) { - return err; - } - } - - /* Select and activate all queues. Has to be done last: once we do - * this, there's no way to go back except reset. - */ - for (i = 0; i < nvqs; i++) { - vq = &vqs[i]; - vpm_iowrite16(vdev, &vdev->common, (u16)vq->queue_index, - COMMON_OFFSET(queue_select)); - vpm_iowrite16(vdev, &vdev->common, 1, COMMON_OFFSET(queue_enable)); - } - return 0; -}
diff --git a/src/drivers/bus/virtio-ring.c b/src/drivers/bus/virtio-ring.c deleted file mode 100644 index e448c34..0000000 --- a/src/drivers/bus/virtio-ring.c +++ /dev/null
@@ -1,143 +0,0 @@ -/* virtio-pci.c - virtio ring management - * - * (c) Copyright 2008 Bull S.A.S. - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - * - * some parts from Linux Virtio Ring - * - * Copyright Rusty Russell IBM Corporation 2007 - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - * - */ - -FILE_LICENCE ( GPL2_OR_LATER ); - -#include "etherboot.h" -#include "ipxe/io.h" -#include "ipxe/virtio-pci.h" -#include "ipxe/virtio-ring.h" - -#define BUG() do { \ - printf("BUG: failure at %s:%d/%s()!\n", \ - __FILE__, __LINE__, __FUNCTION__); \ - while(1); \ -} while (0) -#define BUG_ON(condition) do { if (condition) BUG(); } while (0) - -/* - * vring_free - * - * put at the begin of the free list the current desc[head] - */ - -void vring_detach(struct vring_virtqueue *vq, unsigned int head) -{ - struct vring *vr = &vq->vring; - unsigned int i; - - /* find end of given descriptor */ - - i = head; - while (vr->desc[i].flags & VRING_DESC_F_NEXT) - i = vr->desc[i].next; - - /* link it with free list and point to it */ - - vr->desc[i].next = vq->free_head; - wmb(); - vq->free_head = head; -} - -/* - * vring_get_buf - * - * get a buffer from the used list - * - */ - -void *vring_get_buf(struct vring_virtqueue *vq, unsigned int *len) -{ - struct vring *vr = &vq->vring; - struct vring_used_elem *elem; - u32 id; - void *opaque; - - BUG_ON(!vring_more_used(vq)); - - elem = &vr->used->ring[vq->last_used_idx % vr->num]; - wmb(); - id = elem->id; - if (len != NULL) - *len = elem->len; - - opaque = vq->vdata[id]; - - vring_detach(vq, id); - - vq->last_used_idx++; - - return opaque; -} - -void vring_add_buf(struct vring_virtqueue *vq, - struct vring_list list[], - unsigned int out, unsigned int in, - void *opaque, int num_added) -{ - struct vring *vr = &vq->vring; - int i, avail, head, prev; - - BUG_ON(out + in == 0); - - prev = 0; - head = vq->free_head; - for (i = head; out; i = vr->desc[i].next, out--) { - - vr->desc[i].flags = VRING_DESC_F_NEXT; - vr->desc[i].addr = list->addr; - vr->desc[i].len = list->length; - prev = i; - list++; - } - for ( ; in; i = vr->desc[i].next, in--) { - - vr->desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; - vr->desc[i].addr = list->addr; - vr->desc[i].len = list->length; - prev = i; - list++; - } - vr->desc[prev].flags &= ~VRING_DESC_F_NEXT; - - vq->free_head = i; - - vq->vdata[head] = opaque; - - avail = (vr->avail->idx + num_added) % vr->num; - vr->avail->ring[avail] = head; - wmb(); -} - -void vring_kick(struct virtio_pci_modern_device *vdev, unsigned int ioaddr, - struct vring_virtqueue *vq, int num_added) -{ - struct vring *vr = &vq->vring; - - wmb(); - vr->avail->idx += num_added; - - mb(); - if (!(vr->used->flags & VRING_USED_F_NO_NOTIFY)) { - if (vdev) { - /* virtio 1.0 */ - vpm_notify(vdev, vq); - } else { - /* legacy virtio */ - vp_notify(ioaddr, vq->queue_index); - } - } -}
diff --git a/src/drivers/bus/virtio.c b/src/drivers/bus/virtio.c new file mode 100644 index 0000000..b7d7570 --- /dev/null +++ b/src/drivers/bus/virtio.c
@@ -0,0 +1,783 @@ +/* + * Copyright (C) 2026 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); +FILE_SECBOOT ( PERMITTED ); + +/** @file + * + * Virtual I/O device + * + */ + +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <unistd.h> +#include <ipxe/pci.h> +#include <ipxe/virtio.h> + +/****************************************************************************** + * + * Original ("legacy") device operations + * + ****************************************************************************** + */ + +/** + * Reset device + * + * @v virtio Virtio device + * @ret rc Return status code + */ +static int virtio_legacy_reset ( struct virtio_device *virtio ) { + uint8_t stat; + unsigned int i; + + /* Reset device */ + iowrite8 ( 0, virtio->common + VIRTIO_LEG_STAT ); + + /* Wait for reset to complete */ + for ( i = 0 ; i < VIRTIO_RESET_MAX_WAIT_MS ; i++ ) { + stat = ioread8 ( virtio->common + VIRTIO_LEG_STAT ); + if ( ! stat ) + return 0; + mdelay ( 1 ); + } + + DBGC ( virtio, "VIRTIO %s could not reset device\n", virtio->name ); + return -ETIMEDOUT; +} + +/** + * Report driver status + * + * @v virtio Virtio device + * @ret stat Actual device status + */ +static unsigned int virtio_legacy_status ( struct virtio_device *virtio ) { + + /* Report device status */ + iowrite8 ( virtio->stat, virtio->common + VIRTIO_LEG_STAT ); + + /* Read back device status */ + return ioread8 ( virtio->common + VIRTIO_LEG_STAT ); +} + +/** + * Get supported features + * + * @v virtio Virtio device + */ +static void virtio_legacy_supported ( struct virtio_device *virtio ) { + struct virtio_features *supported = &virtio->supported; + unsigned int i; + + /* Get device supported features */ + supported->word[0] = ioread32 ( virtio->common + VIRTIO_LEG_FEAT ); + + /* Legacy devices have only a single 32-bit feature register */ + for ( i = 1 ; i < VIRTIO_FEATURE_WORDS ; i++ ) + supported->word[i] = 0; +} + +/** + * Negotiate device features + * + * @v virtio Virtio device + */ +static void virtio_legacy_negotiate ( struct virtio_device *virtio ) { + struct virtio_features *features = &virtio->features; + unsigned int i; + + /* Set in-use features */ + iowrite32 ( features->word[0], virtio->common + VIRTIO_LEG_USED ); + + /* Legacy devices have only a single 32-bit feature register */ + for ( i = 1 ; i < VIRTIO_FEATURE_WORDS ; i++ ) + assert ( features->word[i] == 0 ); +} + +/** + * Set queue size + * + * @v virtio Virtio device + * @v queue Virtio queue + * @v count Requested size + */ +static void virtio_legacy_size ( struct virtio_device *virtio, + struct virtio_queue *queue, + unsigned int count ) { + size_t len; + + /* Select queue */ + iowrite16 ( queue->index, virtio->common + VIRTIO_LEG_SEL ); + + /* Get (fixed) queue size */ + count = ioread16 ( virtio->common + VIRTIO_LEG_SIZE ); + + /* Calculate queue length */ + len = virtio_desc_size ( count ); + len = virtio_align ( len + virtio_sq_size ( count ) ); + len = virtio_align ( len + virtio_cq_size ( count ) ); + + /* Record queue size */ + queue->count = count; + queue->len = len; +} + +/** + * Enable queue + * + * @v virtio Virtio device + * @v queue Virtio queue + */ +static void virtio_legacy_enable ( struct virtio_device *virtio, + struct virtio_queue *queue ) { + unsigned int count = queue->count; + void *base = queue->desc; + size_t len; + + /* Select queue */ + iowrite16 ( queue->index, virtio->common + VIRTIO_LEG_SEL ); + + /* Lay out queue regions */ + len = virtio_desc_size ( count ); + queue->sq = ( base + len ); + len = virtio_align ( len + virtio_sq_size ( count ) ); + queue->cq = ( base + len ); + len = virtio_align ( len + virtio_cq_size ( count ) ); + assert ( len == queue->len ); + + /* Program queue base page address */ + iowrite32 ( ( dma ( &queue->map, queue->desc ) / VIRTIO_PAGE ), + virtio->common + VIRTIO_LEG_BASE ); +} + +/** Original ("legacy") device operations */ +static struct virtio_operations virtio_legacy_operations = { + .reset = virtio_legacy_reset, + .status = virtio_legacy_status, + .supported = virtio_legacy_supported, + .negotiate = virtio_legacy_negotiate, + .size = virtio_legacy_size, + .enable = virtio_legacy_enable, +}; + +/****************************************************************************** + * + * PCI ("modern") device operations + * + ****************************************************************************** + */ + +/** + * Reset device + * + * @v virtio Virtio device + * @ret rc Return status code + */ +static int virtio_pci_reset ( struct virtio_device *virtio ) { + uint8_t stat; + unsigned int i; + + /* Reset device */ + iowrite8 ( 0, virtio->common + VIRTIO_PCI_STAT ); + + /* Wait for reset to complete */ + for ( i = 0 ; i < VIRTIO_RESET_MAX_WAIT_MS ; i++ ) { + stat = ioread8 ( virtio->common + VIRTIO_PCI_STAT ); + if ( ! stat ) + return 0; + mdelay ( 1 ); + } + + DBGC ( virtio, "VIRTIO %s could not reset device\n", virtio->name ); + return -ETIMEDOUT; +} + +/** + * Report driver status + * + * @v virtio Virtio device + * @ret stat Actual device status + */ +static unsigned int virtio_pci_status ( struct virtio_device *virtio ) { + + /* Report device status */ + iowrite8 ( virtio->stat, virtio->common + VIRTIO_PCI_STAT ); + + /* Read back device status */ + return ioread8 ( virtio->common + VIRTIO_PCI_STAT ); +} + +/** + * Get supported features + * + * @v virtio Virtio device + */ +static void virtio_pci_supported ( struct virtio_device *virtio ) { + struct virtio_features *supported = &virtio->supported; + unsigned int i; + + /* Get device supported features */ + for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) { + iowrite32 ( i, virtio->common + VIRTIO_PCI_FEAT_SEL ); + supported->word[i] = + ioread32 ( virtio->common + VIRTIO_PCI_FEAT ); + } +} + +/** + * Negotiate device features + * + * @v virtio Virtio device + */ +static void virtio_pci_negotiate ( struct virtio_device *virtio ) { + struct virtio_features *features = &virtio->features; + unsigned int i; + + /* Set in-use features */ + for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) { + iowrite32 ( i, virtio->common + VIRTIO_PCI_USED_SEL ); + iowrite32 ( features->word[i], + virtio->common + VIRTIO_PCI_USED ); + } +} + +/** + * Set queue size + * + * @v virtio Virtio device + * @v queue Virtio queue + * @v count Requested size + */ +static void virtio_pci_size ( struct virtio_device *virtio, + struct virtio_queue *queue, + unsigned int count ) { + unsigned int max; + size_t len; + + /* Select queue */ + iowrite16 ( queue->index, virtio->common + VIRTIO_PCI_SEL ); + + /* Set queue size */ + max = ioread16 ( virtio->common + VIRTIO_PCI_SIZE ); + if ( count > max ) + count = max; + iowrite16 ( count, virtio->common + VIRTIO_PCI_SIZE ); + + /* Calculate queue length */ + len = virtio_align ( virtio_desc_size ( count ) ); + len = virtio_align ( len + virtio_sq_size ( count ) ); + len = virtio_align ( len + virtio_cq_size ( count ) ); + + /* Record queue size */ + queue->count = count; + queue->len = len; +} + +/** + * Program queue address + * + * @v virtio Virtio device + * @v queue Virtio queue + * @v addr Address + * @v offset Register offset + */ +static void virtio_pci_address ( struct virtio_device *virtio, + struct virtio_queue *queue, + void *addr, unsigned int offset ) { + physaddr_t phys; + + /* Program address */ + phys = dma ( &queue->map, addr ); + iowrite32 ( ( phys & 0xffffffffUL ), ( virtio->common + offset + 0 ) ); + if ( sizeof ( physaddr_t ) > sizeof ( uint32_t ) ) { + iowrite32 ( ( ( ( uint64_t ) phys ) >> 32 ), + ( virtio->common + offset + 4 ) ); + } else { + iowrite32 ( 0, ( virtio->common + offset + 4 ) ); + } +} + +/** + * Enable queue + * + * @v virtio Virtio device + * @v queue Virtio queue + */ +static void virtio_pci_enable ( struct virtio_device *virtio, + struct virtio_queue *queue ) { + unsigned int count = queue->count; + void *base = queue->desc; + size_t len; + + /* Select queue */ + iowrite16 ( queue->index, virtio->common + VIRTIO_PCI_SEL ); + + /* Lay out queue regions */ + len = virtio_align ( virtio_desc_size ( count ) ); + queue->sq = ( base + len ); + len = virtio_align ( len + virtio_sq_size ( count ) ); + queue->cq = ( base + len ); + len = virtio_align ( len + virtio_cq_size ( count ) ); + assert ( len == queue->len ); + + /* Program queue addresses */ + virtio_pci_address ( virtio, queue, queue->desc, VIRTIO_PCI_DESC ); + virtio_pci_address ( virtio, queue, queue->sq, VIRTIO_PCI_SQ ); + virtio_pci_address ( virtio, queue, queue->cq, VIRTIO_PCI_CQ ); + + /* Enable queue */ + iowrite16 ( 1, virtio->common + VIRTIO_PCI_ENABLE ); +} + +/** PCI ("modern") device operations */ +static struct virtio_operations virtio_pci_operations = { + .reset = virtio_pci_reset, + .status = virtio_pci_status, + .supported = virtio_pci_supported, + .negotiate = virtio_pci_negotiate, + .size = virtio_pci_size, + .enable = virtio_pci_enable, +}; + +/** + * Find PCI capability + * + * @v virtio Virtio device + * @v pci PCI device + * @v type Capability type + * @v cap Virtio PCI capability to fill in + * @ret rc Return status code + */ +static int virtio_pci_cap ( struct virtio_device *virtio, + struct pci_device *pci, unsigned int type, + struct virtio_pci_capability *cap ) { + unsigned int reg; + int pos; + + /* Scan through vendor capabilities */ + for ( pos = pci_find_capability ( pci, PCI_CAP_ID_VNDR ) ; pos > 0 ; + pos = pci_find_next_capability ( pci, pos, PCI_CAP_ID_VNDR ) ) { + + /* Check length */ + pci_read_config_byte ( pci, ( pos + PCI_CAP_LEN ), &cap->len ); + if ( cap->len < VIRTIO_PCI_CAP_END ) { + DBGC ( virtio, "VIRTIO %s capability +%#02x too short " + "(%d bytes)\n", virtio->name, pos, cap->len ); + continue; + } + + /* Read values */ + pci_read_config_byte ( pci, ( pos + VIRTIO_PCI_CAP_TYPE ), + &cap->type ); + pci_read_config_byte ( pci, ( pos + VIRTIO_PCI_CAP_BAR ), + &cap->bar ); + pci_read_config_dword ( pci, ( pos + VIRTIO_PCI_CAP_OFFSET ), + &cap->offset ); + + /* Check type */ + if ( cap->type != type ) + continue; + DBGC2 ( virtio, "VIRTIO %s capability type %d BAR%d+%#04x\n", + virtio->name, type, cap->bar, cap->offset ); + + /* Check BAR */ + reg = PCI_BASE_ADDRESS ( cap->bar ); + if ( reg > PCI_BASE_ADDRESS_5 ) + continue; + + /* Success */ + cap->pos = pos; + return 0; + } + + DBGC ( virtio, "VIRTIO %s has no usable capability type %d\n", + virtio->name, type ); + cap->pos = 0; + return -ENOENT; +} + +/** + * Map PCI capability + * + * @v virtio Virtio device + * @v pci PCI device + * @v cap Virtio PCI capability + * @ret io_addr I/O address, or NULL on error + */ +static void * virtio_pci_map_cap ( struct virtio_device *virtio, + struct pci_device *pci, + struct virtio_pci_capability *cap ) { + unsigned long addr; + unsigned int reg; + int is_io_bar; + void *io_addr; + + /* Get BAR start address and type */ + reg = PCI_BASE_ADDRESS ( cap->bar ); + addr = pci_bar_start ( pci, reg ); + if ( ! addr ) { + DBGC ( virtio, "VIRTIO %s BAR%d is not usable\n", + virtio->name, cap->bar ); + return NULL; + } + + /* Map memory or I/O BAR */ + addr += cap->offset; + is_io_bar = pci_bar_is_io ( pci, reg ); + io_addr = ( is_io_bar ? ( ( void * ) addr ) : + pci_ioremap ( pci, addr, VIRTIO_PAGE ) ); + if ( ! io_addr ) { + DBGC ( virtio, "VIRTIO %s could not map BAR%d+%#04x\n", + virtio->name, cap->bar, cap->offset ); + return NULL; + } + + DBGC2 ( virtio, "VIRTIO %s mapped BAR%d+%#04x (%s %#08lx)\n", + virtio->name, cap->bar, cap->offset, + ( is_io_bar ? "IO" : "MEM" ), addr ); + return io_addr; +} + +/** + * Map PCI device + * + * @v virtio Virtio device + * @v pci PCI device + * @ret rc Return status code + */ +int virtio_pci_map ( struct virtio_device *virtio, struct pci_device *pci ) { + struct virtio_pci_capability common; + struct virtio_pci_capability notify; + struct virtio_pci_capability device; + unsigned int msix; + uint32_t mult; + uint16_t ctrl; + int rc; + + /* Initialise device */ + virtio->name = pci->dev.name; + virtio->dma = &pci->dma; + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Check if MSI-X is enabled */ + msix = pci_find_capability ( pci, PCI_CAP_ID_MSIX ); + if ( msix ) { + pci_read_config_word ( pci, msix, &ctrl ); + if ( ! ( ctrl & PCI_MSIX_CTRL_ENABLE ) ) + msix = 0; + } + + /* Locate virtio capabilities */ + virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_COMMON, &common ); + virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_NOTIFY, ¬ify ); + virtio_pci_cap ( virtio, pci, VIRTIO_PCI_CAP_TYPE_DEVICE, &device ); + + /* Use modern interface if available */ + if ( common.pos && notify.pos && device.pos && + ( notify.len >= VIRTIO_PCI_CAP_NOTIFY_END ) ) { + + /* Use modern interface */ + virtio->op = &virtio_pci_operations; + dma_set_mask_64bit ( virtio->dma ); + + /* Read notification doorbell multiplier */ + pci_read_config_dword ( pci, ( notify.pos + + VIRTIO_PCI_CAP_NOTIFY_MULT ), + &mult ); + virtio->multiplier = mult; + DBGC ( virtio, "VIRTIO %s using modern interface (mult x%d)\n", + virtio->name, virtio->multiplier ); + + } else { + + /* Use legacy interface */ + virtio->op = &virtio_legacy_operations; + common.bar = 0; + common.offset = 0; + notify.bar = 0; + notify.offset = VIRTIO_LEG_DB; + device.bar = 0; + device.offset = ( msix ? VIRTIO_LEG_DEV_MSIX : + VIRTIO_LEG_DEV ); + DBGC ( virtio, "VIRTIO %s using legacy interface (MSI-X " + "%sabled)\n", virtio->name, ( msix ? "en" : "dis" ) ); + } + + /* Map registers */ + virtio->common = virtio_pci_map_cap ( virtio, pci, &common ); + if ( ! virtio->common ) { + rc = -ENODEV; + goto err_common; + } + virtio->notify = virtio_pci_map_cap ( virtio, pci, ¬ify ); + if ( ! virtio->notify ) { + rc = -ENODEV; + goto err_notify; + } + virtio->device = virtio_pci_map_cap ( virtio, pci, &device ); + if ( ! virtio->device ) { + rc = -ENODEV; + goto err_device; + } + + return 0; + + iounmap ( virtio->device ); + err_device: + iounmap ( virtio->notify ); + err_notify: + iounmap ( virtio->common ); + err_common: + return rc; +} + +/****************************************************************************** + * + * Transport-independent operations + * + ****************************************************************************** + */ + +/** + * Reset device + * + * @v virtio Virtio device + * @ret rc Return status code + */ +int virtio_reset ( struct virtio_device *virtio ) { + int rc; + + /* Clear driver status */ + virtio->stat = 0; + + /* Reset device */ + if ( ( rc = virtio->op->reset ( virtio ) ) != 0 ) { + DBGC ( virtio, "VIRTIO %s could not reset: %s\n", + virtio->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Report driver status + * + * @v virtio Virtio device + * @v stat Additional driver status bits + * @ret stat Actual device status + */ +unsigned int virtio_status ( struct virtio_device *virtio, + unsigned int stat ) { + + /* Set new driver status bits */ + virtio->stat |= stat; + + /* Report driver status */ + return virtio->op->status ( virtio ); +} + +/** + * Negotiate features + * + * @v virtio Virtio device + * @v driver Driver supported features + */ +static void virtio_negotiate ( struct virtio_device *virtio, + const struct virtio_features *driver ) { + struct virtio_features *device = &virtio->supported; + struct virtio_features *features = &virtio->features; + unsigned int i; + + /* Get device supported features */ + virtio->op->supported ( virtio ); + + /* Negotiate mutually supported features */ + for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) + features->word[i] = ( device->word[i] & driver->word[i] ); + virtio->op->negotiate ( virtio ); + + /* Show features */ + DBGC ( virtio, "VIRTIO %s features", virtio->name ); + for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) + DBGC ( virtio, "%s%08x", ( i ? ":" : " " ), device->word[i] ); + DBGC ( virtio, " /" ); + for ( i = 0 ; i < VIRTIO_FEATURE_WORDS ; i++ ) + DBGC ( virtio, "%s%08x", ( i ? ":" : " " ), features->word[i] ); + DBGC ( virtio, "\n" ); +} + +/** + * Initialise device + * + * @v virtio Virtio device + * @v driver Driver supported features + * @ret rc Return status code + */ +int virtio_init ( struct virtio_device *virtio, + const struct virtio_features *driver ) { + unsigned int stat; + int rc; + + /* Reset device */ + if ( ( rc = virtio_reset ( virtio ) ) != 0 ) + goto err_reset; + + /* Acknowledge device existence */ + virtio_status ( virtio, VIRTIO_STAT_ACKNOWLEDGE ); + + /* Report driver existence */ + virtio_status ( virtio, VIRTIO_STAT_DRIVER ); + + /* Negotiate features */ + virtio_negotiate ( virtio, driver ); + + /* Report feature negotiation completion, if applicable */ + if ( virtio->features.word[1] & VIRTIO_FEAT1_MODERN ) { + stat = virtio_status ( virtio, VIRTIO_STAT_FEATURES_OK ); + if ( ! ( stat & VIRTIO_STAT_FEATURES_OK ) ) { + DBGC ( virtio, "VIRTIO %s did not accept features\n", + virtio->name ); + rc = -ENOTSUP; + goto err_features; + } + } + + return 0; + + err_features: + virtio_reset ( virtio ); + err_reset: + virtio_status ( virtio, VIRTIO_STAT_FAIL ); + return rc; +} + +/** + * Enable queue + * + * @v virtio Virtio device + * @v queue Virtio queue + * @v count Requested queue size + * @ret rc Return status code + */ +int virtio_enable ( struct virtio_device *virtio, struct virtio_queue *queue, + unsigned int count ) { + unsigned int offset; + int rc; + + /* Reset counters */ + queue->prod = 0; + queue->cons = 0; + + /* Determine queue size */ + virtio->op->size ( virtio, queue, count ); + if ( ( queue->count == 0 ) || + ( queue->count & ( queue->count - 1 ) ) ) { + DBGC ( virtio, "VIRTIO %s Q%d invalid size %d\n", + virtio->name, queue->index, queue->count ); + rc = -ENODEV; + goto err_count; + } + queue->mask = ( queue->count - 1 ); + + /* Allocate and initialise queue */ + queue->desc = dma_alloc ( virtio->dma, &queue->map, queue->len, + VIRTIO_PAGE ); + if ( ! queue->desc ) { + rc = -ENOMEM; + goto err_alloc; + } + memset ( queue->desc, 0, queue->len ); + + /* Enable queue */ + virtio->op->enable ( virtio, queue ); + DBGC ( virtio, "VIRTIO %s Q%d %dx descriptors at [%#08lx,%#08lx)\n", + virtio->name, queue->index, queue->count, + virt_to_phys ( queue->desc ), + ( virt_to_phys ( queue->desc ) + + virtio_desc_size ( queue->count ) ) ); + DBGC ( virtio, "VIRTIO %s Q%d %dx submissions at [%#08lx,%#08lx)\n", + virtio->name, queue->index, queue->count, + virt_to_phys ( queue->sq ), + ( virt_to_phys ( queue->sq ) + + virtio_sq_size ( queue->count ) ) ); + DBGC ( virtio, "VIRTIO %s Q%d %dx completions at [%#08lx,%#08lx)\n", + virtio->name, queue->index, queue->count, + virt_to_phys ( queue->cq ), + ( virt_to_phys ( queue->cq ) + + virtio_cq_size ( queue->count ) ) ); + + /* Calculate doorbell register address */ + offset = ( queue->index * virtio->multiplier ); + queue->db = ( virtio->notify + offset ); + DBGC ( virtio, "VIRTIO %s Q%d doorbell at +%#04x\n", + virtio->name, queue->index, offset ); + + return 0; + + dma_free ( &queue->map, queue->desc, queue->len ); + queue->desc = NULL; + err_alloc: + err_count: + return rc; +} + +/** + * Free queue + * + * @v virtio Virtio device + * @v queue Virtio queue + */ +void virtio_free ( struct virtio_device *virtio, struct virtio_queue *queue ) { + + /* Free queue */ + if ( queue->desc ) { + dma_free ( &queue->map, queue->desc, queue->len ); + queue->desc = NULL; + DBGC ( virtio, "VIRTIO %s Q%d freed\n", + virtio->name, queue->index ); + } +} + +/** + * Unmap device + * + * @v virtio Virtio device + */ +void virtio_unmap ( struct virtio_device *virtio ) { + + /* Unmap device-specific registers */ + iounmap ( virtio->device ); + + /* Unmap notification doorbells */ + iounmap ( virtio->notify ); + + /* Unmap common registers */ + iounmap ( virtio->common ); +}
diff --git a/src/drivers/net/virtio-net.c b/src/drivers/net/virtio-net.c index 32dad9a..bbaeb9c 100644 --- a/src/drivers/net/virtio-net.c +++ b/src/drivers/net/virtio-net.c
@@ -1,707 +1,597 @@ /* - * (c) Copyright 2010 Stefan Hajnoczi <stefanha@gmail.com> + * Copyright (C) 2026 Michael Brown <mbrown@fensystems.co.uk>. * - * based on the Etherboot virtio-net driver + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. * - * (c) Copyright 2008 Bull S.A.S. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. * - * some parts from Linux Virtio PCI driver - * - * Copyright IBM Corp. 2007 - * Authors: Anthony Liguori <aliguori@us.ibm.com> - * - * some parts from Linux Virtio Ring - * - * Copyright Rusty Russell IBM Corporation 2007 - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. */ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); +FILE_SECBOOT ( PERMITTED ); -#include <errno.h> -#include <stdlib.h> +#include <stdint.h> +#include <string.h> #include <unistd.h> -#include <ipxe/list.h> -#include <ipxe/iobuf.h> +#include <errno.h> +#include <byteswap.h> #include <ipxe/netdevice.h> -#include <ipxe/pci.h> -#include <ipxe/dma.h> -#include <ipxe/if_ether.h> #include <ipxe/ethernet.h> -#include <ipxe/virtio-pci.h> -#include <ipxe/virtio-ring.h> +#include <ipxe/if_ether.h> +#include <ipxe/iobuf.h> +#include <ipxe/malloc.h> +#include <ipxe/pci.h> #include "virtio-net.h" -/* - * Virtio network device driver +/** @file * - * Specification: - * http://ozlabs.org/~rusty/virtio-spec/ + * Virtual I/O network device * - * The virtio network device is supported by Linux virtualization software - * including QEMU/KVM and lguest. This driver supports the virtio over PCI - * transport; virtual machines have one virtio-net PCI adapter per NIC. - * - * Virtio-net is different from hardware NICs because virtio devices - * communicate with the hypervisor via virtqueues, not traditional descriptor - * rings. Virtqueues are unordered queues, they support add_buf() and - * get_buf() operations. To transmit a packet, the driver has to add the - * packet buffer onto the virtqueue. To receive a packet, the driver must - * first add an empty buffer to the virtqueue and then get the filled packet - * buffer on completion. - * - * Virtqueues are an abstraction that is commonly implemented using the vring - * descriptor ring layout. The vring is the actual shared memory structure - * that allows the virtual machine to communicate buffers with the hypervisor. - * Because the vring layout is optimized for flexibility and performance rather - * than space, it is heavy-weight and allocated like traditional descriptor - * rings in the open() function of the driver and not in probe(). - * - * There is no true interrupt enable/disable. Virtqueues have callback - * enable/disable flags but these are only hints. The hypervisor may still - * raise an interrupt. Nevertheless, this driver disables callbacks in the - * hopes of avoiding interrupts. */ -/* Driver types are declared here so virtio-net.h can be easily synced with its - * Linux source. +/** Supported features */ +const struct virtio_features virtio_net_features = { + .word = { + ( VIRTIO_FEAT0_ANY_LAYOUT | + VIRTIO_FEAT0_NET_MTU | + VIRTIO_FEAT0_NET_MAC ), + ( VIRTIO_FEAT1_MODERN ), + }, +}; + +/****************************************************************************** + * + * Device-specific registers + * + ****************************************************************************** */ -/* Virtqueue indices */ -enum { - RX_INDEX = 0, - TX_INDEX, - QUEUE_NB -}; - -/** Max number of pending rx packets */ -#define NUM_RX_BUF 8 - -struct virtnet_nic { - /** Base pio register address */ - unsigned long ioaddr; - - /** 0 for legacy, 1 for virtio 1.0 */ - int virtio_version; - - /** Virtio 1.0 device data */ - struct virtio_pci_modern_device vdev; - - /** RX/TX virtqueues */ - struct vring_virtqueue *virtqueue; - - /** RX packets handed to the NIC waiting to be filled in */ - struct list_head rx_iobufs; - - /** Pending rx packet count */ - unsigned int rx_num_iobufs; - - /** DMA device */ - struct dma_device *dma; - -}; - -/** Add an iobuf to a virtqueue +/** + * Get MAC address * * @v netdev Network device - * @v vq_idx Virtqueue index (RX_INDEX or TX_INDEX) + */ +static void virtio_net_mac ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + uint32_t has_mac; + unsigned int i; + + /* Read MAC address from device registers */ + for ( i = 0 ; i < ETH_ALEN ; i++ ) { + netdev->hw_addr[i] = ioread8 ( virtio->device + + VIRTIO_NET_MAC + i ); + } + + /* Use random MAC address if undefined or invalid */ + has_mac = ( virtio->features.word[0] & VIRTIO_FEAT0_NET_MAC ); + if ( ! ( has_mac && is_valid_ether_addr ( netdev->hw_addr ) ) ) { + DBGC ( vnet, "VNET %s has %s MAC address\n", + virtio->name, ( has_mac ? "invalid" : "no" ) ); + eth_random_addr ( netdev->hw_addr ); + } +} + +/** + * Get MTU + * + * @v netdev Network device + */ +static void virtio_net_mtu ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + uint32_t has_mtu; + + /* Read MTU from device registers, if available */ + has_mtu = ( virtio->features.word[0] & VIRTIO_FEAT0_NET_MTU ); + if ( has_mtu ) { + netdev->mtu = ioread16 ( virtio->device + VIRTIO_NET_MTU ); + netdev->max_pkt_len = ( netdev->mtu + ETH_HLEN ); + DBGC ( vnet, "VNET %s has MTU %zd\n", + virtio->name, netdev->mtu ); + } +} + +/****************************************************************************** + * + * Queue management + * + ****************************************************************************** + */ + +/** + * Enable queue + * + * @v vnet Virtio network device + * @v queue Virtio network queue + * @ret rc Return status code + */ +static int virtio_net_enable ( struct virtio_net *vnet, + struct virtio_net_queue *queue ) { + struct virtio_device *virtio = &vnet->virtio; + struct virtio_desc *desc; + unsigned int fill; + unsigned int slot; + unsigned int index; + unsigned int write; + int rc; + + /* Map packet header */ + if ( ( rc = dma_map ( virtio->dma, &queue->map, &queue->hdr, + sizeof ( queue->hdr ), queue->dma ) ) != 0 ) { + DBGC ( vnet, "VNET %s Q%d could not map header: %s\n", + virtio->name, queue->queue.index, strerror ( rc ) ); + goto err_map; + } + + /* Enable queue */ + if ( ( rc = virtio_enable ( virtio, &queue->queue, + queue->count ) ) != 0 ) { + DBGC ( vnet, "VNET %s Q%d could not initialise: %s\n", + virtio->name, queue->queue.index, strerror ( rc ) ); + goto err_enable; + } + + /* Calculate mask */ + fill = queue->queue.count; + if ( fill > queue->max ) + fill = queue->max; + queue->fill = fill; + queue->mask = ( fill - 1 ); + + /* Initialise descriptors and slot ring */ + write = queue->write; + for ( slot = 0 ; slot < fill ; slot++ ) { + queue->slots[slot] = slot; + queue->iobufs[slot] = NULL; + index = ( slot * VIRTIO_NET_DESCS ); + desc = &queue->queue.desc[index]; + desc[0].addr = cpu_to_le64 ( dma ( &queue->map, &queue->hdr )); + desc[0].len = cpu_to_le32 ( vnet->hlen ); + desc[0].flags = cpu_to_le16 ( VIRTIO_DESC_FL_NEXT | write ); + desc[0].next = cpu_to_le16 ( index + 1 ); + desc[1].flags = cpu_to_le16 ( write ); + } + + DBGC ( vnet, "VNET %s Q%d using %d/%d descriptors\n", virtio->name, + queue->queue.index, queue->fill, queue->queue.count ); + return 0; + + /* There may be no way to disable individual queues: the + * caller must reset the whole device to recover from a + * failure. + */ + err_enable: + dma_unmap ( &queue->map, sizeof ( queue->hdr ) ); + err_map: + return rc; +} + +/** + * Submit I/O buffer to queue + * + * @v vnet Virtio network device + * @v queue Virtio network queue * @v iobuf I/O buffer - * - * The virtqueue is kicked after the iobuf has been added. + * @v len Submitted length */ -static void virtnet_enqueue_iob ( struct net_device *netdev, - int vq_idx, struct io_buffer *iobuf ) { - struct virtnet_nic *virtnet = netdev->priv; - struct vring_virtqueue *vq = &virtnet->virtqueue[vq_idx]; - struct virtio_net_hdr_modern *header = vq->empty_header; - unsigned int out = ( vq_idx == TX_INDEX ) ? 2 : 0; - unsigned int in = ( vq_idx == TX_INDEX ) ? 0 : 2; - size_t header_len = ( virtnet->virtio_version ? - sizeof ( *header ) : sizeof ( header->legacy ) ); - struct vring_list list[] = { - { - /* Share a single zeroed virtio net header between all - * packets in a ring. This works because this driver - * does not use any advanced features so none of the - * header fields get used. - * - * Some host implementations (notably Google Compute - * Platform) are known to unconditionally write back - * to header->flags for received packets. Work around - * this by using separate RX and TX headers. - */ - .addr = dma ( &vq->map, header ), - .length = header_len, - }, - { - .addr = iob_dma ( iobuf ), - .length = iob_len ( iobuf ), - }, - }; +static void virtio_net_submit ( struct virtio_net *vnet, + struct virtio_net_queue *queue, + struct io_buffer *iobuf, size_t len ) { + struct virtio_device *virtio = &vnet->virtio; + struct virtio_desc *desc; + unsigned int prod; + unsigned int slot; + unsigned int index; - DBGC2 ( virtnet, "VIRTIO-NET %p enqueuing iobuf %p on vq %d\n", - virtnet, iobuf, vq_idx ); + /* Get next descriptor pair and consume slot */ + prod = queue->queue.prod; + slot = queue->slots[ prod & queue->mask ]; + index = ( slot * VIRTIO_NET_DESCS ); + desc = &queue->queue.desc[index]; - vring_add_buf ( vq, list, out, in, iobuf, 0 ); - vring_kick ( virtnet->virtio_version ? &virtnet->vdev : NULL, - virtnet->ioaddr, vq, 1 ); + /* Populate descriptors */ + desc[1].addr = cpu_to_le64 ( iob_dma ( iobuf ) ); + desc[1].len = cpu_to_le32 ( len ); + DBGC2 ( vnet, "VNET %s Q%d [%02x-%02x] is [%lx,%lx)\n", + virtio->name, queue->queue.index, index, ( index + 1 ), + virt_to_phys ( iobuf->data ), + ( virt_to_phys ( iobuf->data ) + len ) ); + + /* Record I/O buffer */ + assert ( queue->iobufs[slot] == NULL ); + queue->iobufs[slot] = iobuf; + + /* Submit descriptors */ + virtio_submit ( &queue->queue, index ); } -/** Try to keep rx virtqueue filled with iobufs +/** + * Complete I/O buffer * - * @v netdev Network device + * @v vnet Virtio network device + * @v queue Virtio network queue + * @v len Length to fill in (or NULL to ignore) + * @ret iobuf I/O buffer */ -static void virtnet_refill_rx_virtqueue ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - size_t len = ( netdev->max_pkt_len + 4 /* VLAN */ ); - - while ( virtnet->rx_num_iobufs < NUM_RX_BUF ) { - struct io_buffer *iobuf; - - /* Try to allocate a buffer, stop for now if out of memory */ - iobuf = alloc_rx_iob ( len, virtnet->dma ); - if ( ! iobuf ) - break; - - /* Keep track of iobuf so close() can free it */ - list_add ( &iobuf->list, &virtnet->rx_iobufs ); - - /* Mark packet length until we know the actual size */ - iob_put ( iobuf, len ); - - virtnet_enqueue_iob ( netdev, RX_INDEX, iobuf ); - virtnet->rx_num_iobufs++; - } -} - -/** Helper to free all virtqueue memory - * - * @v netdev Network device - */ -static void virtnet_free_virtqueues ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - int i; - - for ( i = 0; i < QUEUE_NB; i++ ) { - virtio_pci_unmap_capability ( &virtnet->virtqueue[i].notification ); - vp_free_vq ( &virtnet->virtqueue[i] ); - } - - free ( virtnet->virtqueue ); - virtnet->virtqueue = NULL; -} - -/** Open network device, legacy virtio 0.9.5 - * - * @v netdev Network device - * @ret rc Return status code - */ -static int virtnet_open_legacy ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - unsigned long ioaddr = virtnet->ioaddr; - u32 features; - int i; - - /* Reset for sanity */ - vp_reset ( ioaddr ); - - /* Allocate virtqueues */ - virtnet->virtqueue = zalloc ( QUEUE_NB * - sizeof ( *virtnet->virtqueue ) ); - if ( ! virtnet->virtqueue ) - return -ENOMEM; - - /* Initialize rx/tx virtqueues */ - for ( i = 0; i < QUEUE_NB; i++ ) { - if ( vp_find_vq ( ioaddr, i, &virtnet->virtqueue[i], virtnet->dma, - sizeof ( struct virtio_net_hdr_modern ) ) == -1 ) { - DBGC ( virtnet, "VIRTIO-NET %p cannot register queue %d\n", - virtnet, i ); - virtnet_free_virtqueues ( netdev ); - return -ENOENT; - } - } - - /* Initialize rx packets */ - INIT_LIST_HEAD ( &virtnet->rx_iobufs ); - virtnet->rx_num_iobufs = 0; - virtnet_refill_rx_virtqueue ( netdev ); - - /* Disable interrupts before starting */ - netdev_irq ( netdev, 0 ); - - /* Driver is ready */ - features = vp_get_features ( ioaddr ); - vp_set_features ( ioaddr, features & ( ( 1 << VIRTIO_NET_F_MAC ) | - ( 1 << VIRTIO_NET_F_MTU ) ) ); - vp_set_status ( ioaddr, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK ); - return 0; -} - -/** Open network device, modern virtio 1.0 - * - * @v netdev Network device - * @ret rc Return status code - */ -static int virtnet_open_modern ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - u64 features; - u8 status; - - /* Negotiate features */ - features = vpm_get_features ( &virtnet->vdev ); - if ( ! ( features & VIRTIO_F_VERSION_1 ) ) { - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); - return -EINVAL; - } - vpm_set_features ( &virtnet->vdev, features & ( - ( 1ULL << VIRTIO_NET_F_MAC ) | - ( 1ULL << VIRTIO_NET_F_MTU ) | - ( 1ULL << VIRTIO_F_VERSION_1 ) | - ( 1ULL << VIRTIO_F_ANY_LAYOUT ) | - ( 1ULL << VIRTIO_F_IOMMU_PLATFORM ) ) ); - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FEATURES_OK ); - - status = vpm_get_status ( &virtnet->vdev ); - if ( ! ( status & VIRTIO_CONFIG_S_FEATURES_OK ) ) { - DBGC ( virtnet, "VIRTIO-NET %p device didn't accept features\n", - virtnet ); - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); - return -EINVAL; - } - - /* Allocate virtqueues */ - virtnet->virtqueue = zalloc ( QUEUE_NB * - sizeof ( *virtnet->virtqueue ) ); - if ( ! virtnet->virtqueue ) { - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); - return -ENOMEM; - } - - /* Initialize rx/tx virtqueues */ - if ( vpm_find_vqs ( &virtnet->vdev, QUEUE_NB, virtnet->virtqueue, - virtnet->dma, sizeof ( struct virtio_net_hdr_modern ) ) ) { - DBGC ( virtnet, "VIRTIO-NET %p cannot register queues\n", - virtnet ); - virtnet_free_virtqueues ( netdev ); - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED ); - return -ENOENT; - } - - /* Disable interrupts before starting */ - netdev_irq ( netdev, 0 ); - - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER_OK ); - - /* Initialize rx packets */ - INIT_LIST_HEAD ( &virtnet->rx_iobufs ); - virtnet->rx_num_iobufs = 0; - virtnet_refill_rx_virtqueue ( netdev ); - return 0; -} - -/** Open network device - * - * @v netdev Network device - * @ret rc Return status code - */ -static int virtnet_open ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - - if ( virtnet->virtio_version ) { - return virtnet_open_modern ( netdev ); - } else { - return virtnet_open_legacy ( netdev ); - } -} - -/** Close network device - * - * @v netdev Network device - */ -static void virtnet_close ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; +static struct io_buffer * virtio_net_complete ( struct virtio_net *vnet, + struct virtio_net_queue *queue, + size_t *len ) { + struct virtio_device *virtio = &vnet->virtio; struct io_buffer *iobuf; - struct io_buffer *next_iobuf; + unsigned int cons; + unsigned int slot; + unsigned int index; - if ( virtnet->virtio_version ) { - vpm_reset ( &virtnet->vdev ); - } else { - vp_reset ( virtnet->ioaddr ); - } + /* Complete descriptor pair and recycle slot */ + cons = queue->queue.cons; + index = virtio_complete ( &queue->queue, len ); + slot = ( index / VIRTIO_NET_DESCS ); + queue->slots[ cons & queue->mask ] = slot; - /* Virtqueues can be freed now that NIC is reset */ - virtnet_free_virtqueues ( netdev ); + /* Complete I/O buffer */ + iobuf = queue->iobufs[slot]; + assert ( iobuf != NULL ); + queue->iobufs[slot] = NULL; + DBGC2 ( vnet, "VNET %s Q%d [%02x-%02x] complete", + virtio->name, queue->queue.index, index, ( index + 1 ) ); + if ( len ) + DBGC2 ( vnet, " len %#zx\n", *len ); + DBGC2 ( vnet, "\n" ); - /* Free rx iobufs */ - list_for_each_entry_safe ( iobuf, next_iobuf, &virtnet->rx_iobufs, - list ) { - list_del ( &iobuf->list ); - free_rx_iob ( iobuf ); - } - virtnet->rx_num_iobufs = 0; + return iobuf; } -/** Transmit packet +/****************************************************************************** * - * @v netdev Network device - * @v iobuf I/O buffer - * @ret rc Return status code + * Network device interface + * + ****************************************************************************** */ -static int virtnet_transmit ( struct net_device *netdev, - struct io_buffer *iobuf ) { - virtnet_enqueue_iob ( netdev, TX_INDEX, iobuf ); + +/** + * Refill receive queue + * + * @v vnet Virtio network device + */ +static void virtio_net_refill_rx ( struct virtio_net *vnet ) { + struct virtio_device *virtio = &vnet->virtio; + struct virtio_net_queue *queue = &vnet->rx; + struct io_buffer *iobuf; + size_t len = vnet->mfs; + unsigned int refilled = 0; + + /* Refill queue */ + while ( ( queue->queue.prod - queue->queue.cons ) < queue->fill ) { + + /* Allocate I/O buffer */ + iobuf = alloc_rx_iob ( len, virtio->dma ); + if ( ! iobuf ) { + /* Wait for next refill */ + break; + } + + /* Submit I/O buffer */ + virtio_net_submit ( vnet, queue, iobuf, len ); + refilled++; + } + + /* Notify queue, if applicable */ + if ( refilled ) + virtio_notify ( &queue->queue ); +} + +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int virtio_net_open ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + union virtio_net_header hdr; + int rc; + + /* (Re)initialise device */ + if ( ( rc = virtio_init ( virtio, &virtio_net_features ) ) != 0 ) { + DBGC ( vnet, "VNET %s could not initialise: %s\n", + virtio->name, strerror ( rc ) ); + goto err_init; + } + + /* Calculate header length */ + vnet->hlen = ( virtio_is_legacy ( virtio ) ? + sizeof ( hdr.legacy ) : sizeof ( hdr.modern ) ); + + /* Calculate maximum frame size */ + vnet->mfs = ( ETH_HLEN + 4 /* possible VLAN */ + netdev->mtu ); + + /* Enable receive queue */ + if ( ( rc = virtio_net_enable ( vnet, &vnet->rx ) ) != 0 ) { + DBGC ( vnet, "VNET %s could not enable RX: %s\n", + virtio->name, strerror ( rc ) ); + goto err_rx; + } + + /* Enable transmit queue */ + if ( ( rc = virtio_net_enable ( vnet, &vnet->tx ) ) != 0 ) { + DBGC ( vnet, "VNET %s could not enable TX: %s\n", + virtio->name, strerror ( rc ) ); + goto err_tx; + } + + /* Report driver readiness */ + virtio_status ( virtio, VIRTIO_STAT_DRIVER_OK ); + + /* Refill receive queue */ + virtio_net_refill_rx ( vnet ); + + return 0; + + dma_unmap ( &vnet->tx.map, sizeof ( vnet->tx.hdr ) ); + err_tx: + dma_unmap ( &vnet->rx.map, sizeof ( vnet->rx.hdr ) ); + err_rx: + /* There may be no way to disable individual queues: we must + * reset the whole device instead and then free the queues. + */ + virtio_reset ( virtio ); + virtio_free ( virtio, &vnet->rx.queue ); + virtio_free ( virtio, &vnet->tx.queue ); + err_init: + return rc; +} + +/** + * Close network device + * + * @v netdev Network device + */ +static void virtio_net_close ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + unsigned int i; + + /* Reset device */ + virtio_reset ( virtio ); + + /* Unmap headers (now that device is guaranteed idle) */ + dma_unmap ( &vnet->rx.map, sizeof ( vnet->rx.hdr ) ); + dma_unmap ( &vnet->tx.map, sizeof ( vnet->tx.hdr ) ); + + /* Free queues */ + virtio_free ( virtio, &vnet->rx.queue ); + virtio_free ( virtio, &vnet->tx.queue ); + + /* Discard any incomplete RX buffers */ + for ( i = 0 ; i < VIRTIO_NET_RX_MAX ; i++ ) + free_rx_iob ( vnet->rx_iobufs[i] ); +} + +/** + * Transmit packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int virtio_net_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + struct virtio_net_queue *queue = &vnet->tx; + + /* Check for an available transmit descriptor */ + if ( ( queue->queue.prod - queue->queue.cons ) >= queue->fill ) { + DBGC ( vnet, "VNET %s out of transmit descriptors\n", + virtio->name ); + return -ENOBUFS; + } + + /* Submit I/O buffer */ + virtio_net_submit ( vnet, queue, iobuf, iob_len ( iobuf ) ); + + /* Notify queue */ + virtio_notify ( &queue->queue ); + return 0; } -/** Complete packet transmission +/** + * Poll for completed packets * - * @v netdev Network device + * @v netdev Network device */ -static void virtnet_process_tx_packets ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - struct vring_virtqueue *tx_vq = &virtnet->virtqueue[TX_INDEX]; +static void virtio_net_poll_tx ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_net_queue *queue = &vnet->tx; + struct io_buffer *iobuf; - while ( vring_more_used ( tx_vq ) ) { - struct io_buffer *iobuf = vring_get_buf ( tx_vq, NULL ); + /* Poll for completed descriptors */ + while ( virtio_completions ( &queue->queue ) ) { - DBGC2 ( virtnet, "VIRTIO-NET %p tx complete iobuf %p\n", - virtnet, iobuf ); - + /* Complete I/O buffer */ + iobuf = virtio_net_complete ( vnet, queue, NULL ); netdev_tx_complete ( netdev, iobuf ); } } -/** Complete packet reception +/** + * Poll for received packets * - * @v netdev Network device + * @v netdev Network device */ -static void virtnet_process_rx_packets ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; - struct vring_virtqueue *rx_vq = &virtnet->virtqueue[RX_INDEX]; +static void virtio_net_poll_rx ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; + struct virtio_net_queue *queue = &vnet->rx; + struct io_buffer *iobuf; + size_t len; - while ( vring_more_used ( rx_vq ) ) { - unsigned int len; - struct io_buffer *iobuf = vring_get_buf ( rx_vq, &len ); + /* Poll for completed descriptors */ + while ( virtio_completions ( &queue->queue ) > 0 ) { - /* Release ownership of iobuf */ - list_del ( &iobuf->list ); - virtnet->rx_num_iobufs--; - - /* Update iobuf length */ - iob_unput ( iobuf, iob_len ( iobuf ) ); - iob_put ( iobuf, len - sizeof ( struct virtio_net_hdr ) ); - - DBGC2 ( virtnet, "VIRTIO-NET %p rx complete iobuf %p len %zd\n", - virtnet, iobuf, iob_len ( iobuf ) ); - - /* Pass completed packet to the network stack */ + /* Complete I/O buffer */ + iobuf = virtio_net_complete ( vnet, queue, &len ); + iob_put ( iobuf, ( len - vnet->hlen ) ); netdev_rx ( netdev, iobuf ); } - - virtnet_refill_rx_virtqueue ( netdev ); } -/** Poll for completed and received packets +/** + * Poll for completed and received packets * - * @v netdev Network device + * @v netdev Network device */ -static void virtnet_poll ( struct net_device *netdev ) { - struct virtnet_nic *virtnet = netdev->priv; +static void virtio_net_poll ( struct net_device *netdev ) { + struct virtio_net *vnet = netdev->priv; - /* Acknowledge interrupt. This is necessary for UNDI operation and - * interrupts that are raised despite VRING_AVAIL_F_NO_INTERRUPT being - * set (that flag is just a hint and the hypervisor does not have to - * honor it). - */ - if ( virtnet->virtio_version ) { - vpm_get_isr ( &virtnet->vdev ); - } else { - vp_get_isr ( virtnet->ioaddr ); - } + /* Poll for completed packets */ + virtio_net_poll_tx ( netdev ); - virtnet_process_tx_packets ( netdev ); - virtnet_process_rx_packets ( netdev ); + /* Poll for received packets */ + virtio_net_poll_rx ( netdev ); + + /* Refill receive queue */ + virtio_net_refill_rx ( vnet ); } -/** Enable or disable interrupts - * - * @v netdev Network device - * @v enable Interrupts should be enabled - */ -static void virtnet_irq ( struct net_device *netdev, int enable ) { - struct virtnet_nic *virtnet = netdev->priv; - int i; - - for ( i = 0; i < QUEUE_NB; i++ ) { - if ( enable ) - vring_enable_cb ( &virtnet->virtqueue[i] ); - else - vring_disable_cb ( &virtnet->virtqueue[i] ); - } -} - -/** virtio-net device operations */ -static struct net_device_operations virtnet_operations = { - .open = virtnet_open, - .close = virtnet_close, - .transmit = virtnet_transmit, - .poll = virtnet_poll, - .irq = virtnet_irq, +/** Virtio network device operations */ +static struct net_device_operations virtio_net_operations = { + .open = virtio_net_open, + .close = virtio_net_close, + .transmit = virtio_net_transmit, + .poll = virtio_net_poll, }; -/** - * Probe PCI device, legacy virtio 0.9.5 +/****************************************************************************** * - * @v pci PCI device - * @ret rc Return status code - */ -static int virtnet_probe_legacy ( struct pci_device *pci ) { - unsigned long ioaddr = pci->ioaddr; - struct net_device *netdev; - struct virtnet_nic *virtnet; - u32 features; - u16 mtu; - int rc; - - /* Allocate and hook up net device */ - netdev = alloc_etherdev ( sizeof ( *virtnet ) ); - if ( ! netdev ) - return -ENOMEM; - netdev_init ( netdev, &virtnet_operations ); - virtnet = netdev->priv; - virtnet->ioaddr = ioaddr; - pci_set_drvdata ( pci, netdev ); - netdev->dev = &pci->dev; - - DBGC ( virtnet, "VIRTIO-NET %p busaddr=%s ioaddr=%#lx irq=%d\n", - virtnet, pci->dev.name, ioaddr, pci->irq ); - - /* Enable PCI bus master and reset NIC */ - adjust_pci_device ( pci ); - - /* Configure DMA */ - virtnet->dma = &pci->dma; - dma_set_mask_64bit ( virtnet->dma ); - netdev->dma = virtnet->dma; - - vp_reset ( ioaddr ); - - /* Load MAC address and MTU */ - features = vp_get_features ( ioaddr ); - if ( features & ( 1 << VIRTIO_NET_F_MAC ) ) { - vp_get ( ioaddr, offsetof ( struct virtio_net_config, mac ), - netdev->hw_addr, ETH_ALEN ); - DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet, - eth_ntoa ( netdev->hw_addr ) ); - } - if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) { - vp_get ( ioaddr, offsetof ( struct virtio_net_config, mtu ), - &mtu, sizeof ( mtu ) ); - DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu ); - netdev->max_pkt_len = ( mtu + ETH_HLEN ); - netdev->mtu = mtu; - } - - /* Register network device */ - if ( ( rc = register_netdev ( netdev ) ) != 0 ) - goto err_register_netdev; - - /* Mark link as up, control virtqueue is not used */ - netdev_link_up ( netdev ); - - return 0; - - unregister_netdev ( netdev ); -err_register_netdev: - vp_reset ( ioaddr ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); - return rc; -} - -/** - * Probe PCI device, modern virtio 1.0 + * PCI interface * - * @v pci PCI device - * @v found_dev Set to non-zero if modern device was found (probe may still fail) - * @ret rc Return status code + ****************************************************************************** */ -static int virtnet_probe_modern ( struct pci_device *pci, int *found_dev ) { - struct net_device *netdev; - struct virtnet_nic *virtnet; - u64 features; - u16 mtu; - int rc, common, isr, notify, config, device; - - common = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_COMMON_CFG ); - if ( ! common ) { - DBG ( "Common virtio capability not found!\n" ); - return -ENODEV; - } - *found_dev = 1; - - isr = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_ISR_CFG ); - notify = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_NOTIFY_CFG ); - config = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_PCI_CFG ); - if ( ! isr || ! notify || ! config ) { - DBG ( "Missing virtio capabilities %i/%i/%i/%i\n", - common, isr, notify, config ); - return -EINVAL; - } - device = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_DEVICE_CFG ); - - /* Allocate and hook up net device */ - netdev = alloc_etherdev ( sizeof ( *virtnet ) ); - if ( ! netdev ) - return -ENOMEM; - netdev_init ( netdev, &virtnet_operations ); - virtnet = netdev->priv; - - pci_set_drvdata ( pci, netdev ); - netdev->dev = &pci->dev; - - DBGC ( virtnet, "VIRTIO-NET modern %p busaddr=%s irq=%d\n", - virtnet, pci->dev.name, pci->irq ); - - virtnet->vdev.pci = pci; - rc = virtio_pci_map_capability ( pci, common, - sizeof ( struct virtio_pci_common_cfg ), 4, - 0, sizeof ( struct virtio_pci_common_cfg ), - &virtnet->vdev.common ); - if ( rc ) - goto err_map_common; - - rc = virtio_pci_map_capability ( pci, isr, sizeof ( u8 ), 1, - 0, 1, - &virtnet->vdev.isr ); - if ( rc ) - goto err_map_isr; - - virtnet->vdev.notify_cap_pos = notify; - virtnet->vdev.cfg_cap_pos = config; - - /* Map the device capability */ - if ( device ) { - rc = virtio_pci_map_capability ( pci, device, - 0, 4, 0, sizeof ( struct virtio_net_config ), - &virtnet->vdev.device ); - if ( rc ) - goto err_map_device; - } - - /* Enable the PCI device */ - adjust_pci_device ( pci ); - - /* Configure DMA */ - virtnet->dma = &pci->dma; - dma_set_mask_64bit ( virtnet->dma ); - netdev->dma = virtnet->dma; - - /* Reset the device and set initial status bits */ - vpm_reset ( &virtnet->vdev ); - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE ); - vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER ); - - /* Load MAC address and MTU */ - if ( device ) { - features = vpm_get_features ( &virtnet->vdev ); - if ( features & ( 1ULL << VIRTIO_NET_F_MAC ) ) { - vpm_get ( &virtnet->vdev, - offsetof ( struct virtio_net_config, mac ), - netdev->hw_addr, ETH_ALEN ); - DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet, - eth_ntoa ( netdev->hw_addr ) ); - } - if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) { - vpm_get ( &virtnet->vdev, - offsetof ( struct virtio_net_config, mtu ), - &mtu, sizeof ( mtu ) ); - DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, - mtu ); - netdev->max_pkt_len = ( mtu + ETH_HLEN ); - netdev->mtu = mtu; - } - } - - /* We need a valid MAC address */ - if ( ! is_valid_ether_addr ( netdev->hw_addr ) ) { - rc = -EADDRNOTAVAIL; - goto err_mac_address; - } - - /* Register network device */ - if ( ( rc = register_netdev ( netdev ) ) != 0 ) - goto err_register_netdev; - - /* Mark link as up, control virtqueue is not used */ - netdev_link_up ( netdev ); - - virtnet->virtio_version = 1; - return 0; - - unregister_netdev ( netdev ); -err_register_netdev: -err_mac_address: - vpm_reset ( &virtnet->vdev ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); - virtio_pci_unmap_capability ( &virtnet->vdev.device ); -err_map_device: - virtio_pci_unmap_capability ( &virtnet->vdev.isr ); -err_map_isr: - virtio_pci_unmap_capability ( &virtnet->vdev.common ); -err_map_common: - return rc; -} /** * Probe PCI device * - * @v pci PCI device - * @ret rc Return status code + * @v pci PCI device + * @ret rc Return status code */ -static int virtnet_probe ( struct pci_device *pci ) { - int found_modern = 0; - int rc = virtnet_probe_modern ( pci, &found_modern ); - if ( ! found_modern && pci->device < 0x1040 ) { - /* fall back to the legacy probe */ - rc = virtnet_probe_legacy ( pci ); +static int virtio_net_probe ( struct pci_device *pci ) { + struct net_device *netdev; + struct virtio_net *vnet; + struct virtio_device *virtio; + int rc; + + /* Allocate and initialise net device */ + netdev = alloc_etherdev ( sizeof ( *vnet ) ); + if ( ! netdev ) { + rc = -ENOMEM; + goto err_alloc; } + netdev_init ( netdev, &virtio_net_operations ); + vnet = netdev->priv; + pci_set_drvdata ( pci, netdev ); + netdev->dev = &pci->dev; + netdev->dma = &pci->dma; + memset ( vnet, 0, sizeof ( *vnet ) ); + virtio = &vnet->virtio; + virtio_net_queue_init ( &vnet->rx, vnet->rx_iobufs, vnet->rx_slots, + VIRTIO_NET_RX_INDEX, VIRTIO_NET_RX_COUNT, + VIRTIO_NET_RX_MAX, DMA_RX, + VIRTIO_DESC_FL_WRITE ); + virtio_net_queue_init ( &vnet->tx, vnet->tx_iobufs, vnet->tx_slots, + VIRTIO_NET_TX_INDEX, VIRTIO_NET_TX_COUNT, + VIRTIO_NET_TX_MAX, DMA_TX, 0 ); + + /* Map PCI device */ + if ( ( rc = virtio_pci_map ( virtio, pci ) ) != 0 ) { + DBGC ( vnet, "VNET %s could not map: %s\n", + virtio->name, strerror ( rc ) ); + goto err_pci_map; + } + + /* Initialise device */ + if ( ( rc = virtio_init ( virtio, &virtio_net_features ) ) != 0 ) { + DBGC ( vnet, "VNET %s could not initialise: %s\n", + virtio->name, strerror ( rc ) ); + goto err_init; + } + + /* Get MAC address */ + virtio_net_mac ( netdev ); + + /* Set MTU */ + virtio_net_mtu ( netdev ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register; + + /* Mark as link up, since we have no way to test link state changes */ + netdev_link_up ( netdev ); + + return 0; + + unregister_netdev ( netdev ); + err_register: + virtio_reset ( virtio ); + err_init: + virtio_unmap ( virtio ); + err_pci_map: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + err_alloc: return rc; } /** - * Remove device + * Remove PCI device * - * @v pci PCI device + * @v pci PCI device */ -static void virtnet_remove ( struct pci_device *pci ) { +static void virtio_net_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); - struct virtnet_nic *virtnet = netdev->priv; + struct virtio_net *vnet = netdev->priv; + struct virtio_device *virtio = &vnet->virtio; + /* Unregister network device */ unregister_netdev ( netdev ); - virtio_pci_unmap_capability ( &virtnet->vdev.device ); - virtio_pci_unmap_capability ( &virtnet->vdev.isr ); - virtio_pci_unmap_capability ( &virtnet->vdev.common ); + /* Reset device */ + virtio_reset ( virtio ); + /* Free network device */ + virtio_unmap ( virtio ); netdev_nullify ( netdev ); netdev_put ( netdev ); } -static struct pci_device_id virtnet_nics[] = { -PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface", 0), -PCI_ROM(0x1af4, 0x1041, "virtio-net", "Virtio Network Interface 1.0", 0), +/** Virtio network PCI device IDs */ +static struct pci_device_id virtio_net_ids[] = { + PCI_ROM ( 0x1af4, 0x1000, "virtio-net", "Virtio (legacy)", 0 ), + PCI_ROM ( 0x1af4, 0x1041, "virtio-net", "Virtio (modern)", 0 ), }; -struct pci_driver virtnet_driver __pci_driver = { - .ids = virtnet_nics, - .id_count = ( sizeof ( virtnet_nics ) / sizeof ( virtnet_nics[0] ) ), - .probe = virtnet_probe, - .remove = virtnet_remove, +/** Virtio network PCI driver */ +struct pci_driver virtio_net_driver __pci_driver = { + .ids = virtio_net_ids, + .id_count = ( sizeof ( virtio_net_ids ) / + sizeof ( virtio_net_ids[0] ) ), + .probe = virtio_net_probe, + .remove = virtio_net_remove, };
diff --git a/src/drivers/net/virtio-net.h b/src/drivers/net/virtio-net.h index ff58d3e..2a3423f 100644 --- a/src/drivers/net/virtio-net.h +++ b/src/drivers/net/virtio-net.h
@@ -1,70 +1,137 @@ -#ifndef _VIRTIO_NET_H_ -# define _VIRTIO_NET_H_ +#ifndef _VIRTIO_NET_H +#define _VIRTIO_NET_H -/* The feature bitmap for virtio net */ -#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ -#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ -#define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */ -#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ -#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ -#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ -#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ -#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ -#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ -#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ -#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ -#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ -#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ -#define VIRTIO_NET_F_MRG_RXBUF 15 /* Driver can merge receive buffers. */ -#define VIRTIO_NET_F_STATUS 16 /* Configuration status field is available. */ -#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel is available. */ -#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support. */ -#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering. */ -#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Driver can send gratuitous packets. */ +/** @file + * + * Virtual I/O network device + * + */ -struct virtio_net_config -{ - /* The config defining mac address (if VIRTIO_NET_F_MAC) */ - u8 mac[6]; - /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ - u16 status; - /* Maximum number of each of transmit and receive queues; - * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. - * Legal values are between 1 and 0x8000 - */ - u16 max_virtqueue_pairs; - /* Default maximum transmit unit advice */ - u16 mtu; -} __attribute__((packed)); +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); +FILE_SECBOOT ( PERMITTED ); -/* This is the first element of the scatter-gather list. If you don't - * specify GSO or CSUM features, you can simply ignore the header. */ +#include <ipxe/virtio.h> -struct virtio_net_hdr -{ -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset - uint8_t flags; -#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame -#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) -/* FIXME: Do we need this? If they said they can handle ECN, do they care? */ -#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN -#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) -#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP -#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set - uint8_t gso_type; - uint16_t hdr_len; - uint16_t gso_size; - uint16_t csum_start; - uint16_t csum_offset; +/** Device has a reported MTU */ +#define VIRTIO_FEAT0_NET_MTU 0x00000008 + +/** Device has a MAC address */ +#define VIRTIO_FEAT0_NET_MAC 0x00000020 + +/** MAC address register offset */ +#define VIRTIO_NET_MAC 0x00 + +/** MTU register offset */ +#define VIRTIO_NET_MTU 0x0a + +/** A virtio network packet header */ +union virtio_net_header { + /** Legacy interface */ + uint8_t legacy[10]; + /** Modern (version 1.0) interface */ + uint8_t modern[12]; +} __attribute__ (( packed )); + +/** Receive queue index */ +#define VIRTIO_NET_RX_INDEX 0 + +/** Receive queue requested queue size */ +#define VIRTIO_NET_RX_COUNT 128 + +/** Receive queue maximum fill level */ +#define VIRTIO_NET_RX_MAX 16 + +/** Transmit queue index */ +#define VIRTIO_NET_TX_INDEX 1 + +/** Transmit queue requested queue size */ +#define VIRTIO_NET_TX_COUNT 128 + +/** Transmit queue maximum fill level */ +#define VIRTIO_NET_TX_MAX 32 + +/** Number of descriptors per packet */ +#define VIRTIO_NET_DESCS 2 + +/** A virtio network queue */ +struct virtio_net_queue { + /** Underlying virtio queue */ + struct virtio_queue queue; + /** I/O buffer list */ + struct io_buffer **iobufs; + /** Descriptor slot ring */ + uint8_t *slots; + /** Effective fill level */ + unsigned int fill; + /** Descriptor index ring mask */ + unsigned int mask; + + /** Shared packet header */ + union virtio_net_header hdr; + /** DMA mapping for packet header */ + struct dma_mapping map; + + /** DMA direction for packet header */ + uint8_t dma; + /** Buffer writability flag for packet header */ + uint8_t write; + /** Requested queue size */ + uint8_t count; + /** Maximum fill level */ + uint8_t max; }; -/* Virtio 1.0 version of the first element of the scatter-gather list. */ -struct virtio_net_hdr_modern -{ - struct virtio_net_hdr legacy; +/** + * Initialise virtio network queue + * + * @v queue Virtio network queue + * @v index Queue index + * @v iobufs I/O buffer list + * @v slots Descriptor slot ring + * @v dma DMA direction for packet header + * @v write Writability flag for packet header + * @v count Requested queue size + * @v max Maximum fill level + */ +static inline __attribute__ (( always_inline )) void +virtio_net_queue_init ( struct virtio_net_queue *queue, + struct io_buffer **iobufs, uint8_t *slots, + unsigned int index, unsigned int count, + unsigned int max, unsigned int dma, + unsigned int write ) { - /* Used only if VIRTIO_NET_F_MRG_RXBUF: */ - uint16_t num_buffers; + queue->queue.index = index; + queue->iobufs = iobufs; + queue->slots = slots; + queue->dma = dma; + queue->write = write; + queue->count = count; + queue->max = max; +} + +/** A virtio network device */ +struct virtio_net { + /** Underlying virtio device */ + struct virtio_device virtio; + /** Receive queue */ + struct virtio_net_queue rx; + /** Transmit queue */ + struct virtio_net_queue tx; + + /** Virtio network header length */ + size_t hlen; + /** Maximum frame size */ + size_t mfs; + + /** Receive descriptor slot ring */ + uint8_t rx_slots[VIRTIO_NET_RX_MAX]; + /** Receive I/O buffers */ + struct io_buffer *rx_iobufs[VIRTIO_NET_RX_MAX]; + + /** Transmit descriptor slot ring */ + uint8_t tx_slots[VIRTIO_NET_TX_MAX]; + /** Transmit I/O buffers */ + struct io_buffer *tx_iobufs[VIRTIO_NET_TX_MAX]; }; -#endif /* _VIRTIO_NET_H_ */ +#endif /* _VIRTIO_NET_H */
diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h index a2e3ff8..048cbe6 100644 --- a/src/include/ipxe/errfile.h +++ b/src/include/ipxe/errfile.h
@@ -212,7 +212,7 @@ #define ERRFILE_eoib ( ERRFILE_DRIVER | 0x007c0000 ) #define ERRFILE_golan ( ERRFILE_DRIVER | 0x007d0000 ) #define ERRFILE_flexboot_nodnic ( ERRFILE_DRIVER | 0x007e0000 ) -#define ERRFILE_virtio_pci ( ERRFILE_DRIVER | 0x007f0000 ) +#define ERRFILE_virtio ( ERRFILE_DRIVER | 0x007f0000 ) #define ERRFILE_pciea ( ERRFILE_DRIVER | 0x00c00000 ) #define ERRFILE_axge ( ERRFILE_DRIVER | 0x00c10000 ) #define ERRFILE_thunderx ( ERRFILE_DRIVER | 0x00c20000 )
diff --git a/src/include/ipxe/pci.h b/src/include/ipxe/pci.h index f65c8c2..b16c769 100644 --- a/src/include/ipxe/pci.h +++ b/src/include/ipxe/pci.h
@@ -102,6 +102,9 @@ /** Next capability */ #define PCI_CAP_NEXT 0x01 +/** Capability length */ +#define PCI_CAP_LEN 0x02 + /** Power management control and status */ #define PCI_PM_CTRL 0x04 #define PCI_PM_CTRL_STATE_MASK 0x0003 /**< Current power state */
diff --git a/src/include/ipxe/virtio-pci.h b/src/include/ipxe/virtio-pci.h deleted file mode 100644 index 7abae26..0000000 --- a/src/include/ipxe/virtio-pci.h +++ /dev/null
@@ -1,314 +0,0 @@ -#ifndef _VIRTIO_PCI_H_ -# define _VIRTIO_PCI_H_ - -#include <ipxe/dma.h> - -/* A 32-bit r/o bitmask of the features supported by the host */ -#define VIRTIO_PCI_HOST_FEATURES 0 - -/* A 32-bit r/w bitmask of features activated by the guest */ -#define VIRTIO_PCI_GUEST_FEATURES 4 - -/* A 32-bit r/w PFN for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_PFN 8 - -/* A 16-bit r/o queue size for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_NUM 12 - -/* A 16-bit r/w queue selector */ -#define VIRTIO_PCI_QUEUE_SEL 14 - -/* A 16-bit r/w queue notifier */ -#define VIRTIO_PCI_QUEUE_NOTIFY 16 - -/* An 8-bit device status register. */ -#define VIRTIO_PCI_STATUS 18 - -/* An 8-bit r/o interrupt status register. Reading the value will return the - * current contents of the ISR and will also clear it. This is effectively - * a read-and-acknowledge. */ -#define VIRTIO_PCI_ISR 19 - -/* The bit of the ISR which indicates a device configuration change. */ -#define VIRTIO_PCI_ISR_CONFIG 0x2 - -/* The remaining space is defined by each driver as the per-driver - * configuration space */ -#define VIRTIO_PCI_CONFIG 20 - -/* Virtio ABI version, this must match exactly */ -#define VIRTIO_PCI_ABI_VERSION 0 - -/* PCI capability types: */ -#define VIRTIO_PCI_CAP_COMMON_CFG 1 /* Common configuration */ -#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 /* Notifications */ -#define VIRTIO_PCI_CAP_ISR_CFG 3 /* ISR access */ -#define VIRTIO_PCI_CAP_DEVICE_CFG 4 /* Device specific configuration */ -#define VIRTIO_PCI_CAP_PCI_CFG 5 /* PCI configuration access */ - -#define __u8 uint8_t -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - -/* This is the PCI capability header: */ -struct virtio_pci_cap { - __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ - __u8 cap_next; /* Generic PCI field: next ptr. */ - __u8 cap_len; /* Generic PCI field: capability length */ - __u8 cfg_type; /* Identifies the structure. */ - __u8 bar; /* Where to find it. */ - __u8 padding[3]; /* Pad to full dword. */ - __le32 offset; /* Offset within bar. */ - __le32 length; /* Length of the structure, in bytes. */ -}; - -struct virtio_pci_notify_cap { - struct virtio_pci_cap cap; - __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ -}; - -struct virtio_pci_cfg_cap { - struct virtio_pci_cap cap; - __u8 pci_cfg_data[4]; /* Data for BAR access. */ -}; - -/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ -struct virtio_pci_common_cfg { - /* About the whole device. */ - __le32 device_feature_select; /* read-write */ - __le32 device_feature; /* read-only */ - __le32 guest_feature_select; /* read-write */ - __le32 guest_feature; /* read-write */ - __le16 msix_config; /* read-write */ - __le16 num_queues; /* read-only */ - __u8 device_status; /* read-write */ - __u8 config_generation; /* read-only */ - - /* About a specific virtqueue. */ - __le16 queue_select; /* read-write */ - __le16 queue_size; /* read-write, power of 2. */ - __le16 queue_msix_vector; /* read-write */ - __le16 queue_enable; /* read-write */ - __le16 queue_notify_off; /* read-only */ - __le32 queue_desc_lo; /* read-write */ - __le32 queue_desc_hi; /* read-write */ - __le32 queue_avail_lo; /* read-write */ - __le32 queue_avail_hi; /* read-write */ - __le32 queue_used_lo; /* read-write */ - __le32 queue_used_hi; /* read-write */ -}; - -/* Virtio 1.0 PCI region descriptor. We support memory mapped I/O, port I/O, - * and PCI config space access via the cfg PCI capability as a fallback. */ -struct virtio_pci_region { - void *base; - size_t length; - u8 bar; - -/* How to interpret the base field */ -#define VIRTIO_PCI_REGION_TYPE_MASK 0x00000003 -/* The base field is a memory address */ -#define VIRTIO_PCI_REGION_MEMORY 0x00000001 -/* The base field is a port address */ -#define VIRTIO_PCI_REGION_PORT 0x00000002 -/* The base field is an offset within the PCI bar */ -#define VIRTIO_PCI_REGION_PCI_CONFIG 0x00000003 - unsigned flags; -}; - -/* Virtio 1.0 device state */ -struct virtio_pci_modern_device { - struct pci_device *pci; - - /* VIRTIO_PCI_CAP_PCI_CFG position */ - int cfg_cap_pos; - - /* VIRTIO_PCI_CAP_COMMON_CFG data */ - struct virtio_pci_region common; - - /* VIRTIO_PCI_CAP_DEVICE_CFG data */ - struct virtio_pci_region device; - - /* VIRTIO_PCI_CAP_ISR_CFG data */ - struct virtio_pci_region isr; - - /* VIRTIO_PCI_CAP_NOTIFY_CFG data */ - int notify_cap_pos; -}; - -static inline u32 vp_get_features(unsigned int ioaddr) -{ - return inl(ioaddr + VIRTIO_PCI_HOST_FEATURES); -} - -static inline void vp_set_features(unsigned int ioaddr, u32 features) -{ - outl(features, ioaddr + VIRTIO_PCI_GUEST_FEATURES); -} - -static inline void vp_get(unsigned int ioaddr, unsigned offset, - void *buf, unsigned len) -{ - u8 *ptr = buf; - unsigned i; - - for (i = 0; i < len; i++) - ptr[i] = inb(ioaddr + VIRTIO_PCI_CONFIG + offset + i); -} - -static inline u8 vp_get_status(unsigned int ioaddr) -{ - return inb(ioaddr + VIRTIO_PCI_STATUS); -} - -static inline void vp_set_status(unsigned int ioaddr, u8 status) -{ - if (status == 0) /* reset */ - return; - outb(status, ioaddr + VIRTIO_PCI_STATUS); -} - -static inline u8 vp_get_isr(unsigned int ioaddr) -{ - return inb(ioaddr + VIRTIO_PCI_ISR); -} - -static inline void vp_reset(unsigned int ioaddr) -{ - outb(0, ioaddr + VIRTIO_PCI_STATUS); - (void)inb(ioaddr + VIRTIO_PCI_ISR); -} - -static inline void vp_notify(unsigned int ioaddr, int queue_index) -{ - outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); -} - -static inline void vp_del_vq(unsigned int ioaddr, int queue_index) -{ - /* select the queue */ - - outw(queue_index, ioaddr + VIRTIO_PCI_QUEUE_SEL); - - /* deactivate the queue */ - - outl(0, ioaddr + VIRTIO_PCI_QUEUE_PFN); -} - -struct vring_virtqueue; - -void vp_free_vq(struct vring_virtqueue *vq); -int vp_find_vq(unsigned int ioaddr, int queue_index, - struct vring_virtqueue *vq, struct dma_device *dma_dev, - size_t header_size); - - -/* Virtio 1.0 I/O routines abstract away the three possible HW access - * mechanisms - memory, port I/O, and PCI cfg space access. Also built-in - * are endianness conversions - to LE on write and from LE on read. */ - -void vpm_iowrite8(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u8 data, size_t offset); - -void vpm_iowrite16(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u16 data, size_t offset); - -void vpm_iowrite32(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, u32 data, size_t offset); - -static inline void vpm_iowrite64(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, - u64 data, size_t offset_lo, size_t offset_hi) -{ - vpm_iowrite32(vdev, region, (u32)data, offset_lo); - vpm_iowrite32(vdev, region, data >> 32, offset_hi); -} - -u8 vpm_ioread8(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset); - -u16 vpm_ioread16(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset); - -u32 vpm_ioread32(struct virtio_pci_modern_device *vdev, - struct virtio_pci_region *region, size_t offset); - -/* Virtio 1.0 device manipulation routines */ - -#define COMMON_OFFSET(field) offsetof(struct virtio_pci_common_cfg, field) - -static inline void vpm_reset(struct virtio_pci_modern_device *vdev) -{ - vpm_iowrite8(vdev, &vdev->common, 0, COMMON_OFFSET(device_status)); - while (vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status))) - mdelay(1); -} - -static inline u8 vpm_get_status(struct virtio_pci_modern_device *vdev) -{ - return vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status)); -} - -static inline void vpm_add_status(struct virtio_pci_modern_device *vdev, - u8 status) -{ - u8 curr_status = vpm_ioread8(vdev, &vdev->common, COMMON_OFFSET(device_status)); - vpm_iowrite8(vdev, &vdev->common, - curr_status | status, COMMON_OFFSET(device_status)); -} - -static inline u64 vpm_get_features(struct virtio_pci_modern_device *vdev) -{ - u32 features_lo, features_hi; - - vpm_iowrite32(vdev, &vdev->common, 0, COMMON_OFFSET(device_feature_select)); - features_lo = vpm_ioread32(vdev, &vdev->common, COMMON_OFFSET(device_feature)); - vpm_iowrite32(vdev, &vdev->common, 1, COMMON_OFFSET(device_feature_select)); - features_hi = vpm_ioread32(vdev, &vdev->common, COMMON_OFFSET(device_feature)); - - return ((u64)features_hi << 32) | features_lo; -} - -static inline void vpm_set_features(struct virtio_pci_modern_device *vdev, - u64 features) -{ - u32 features_lo = (u32)features; - u32 features_hi = features >> 32; - - vpm_iowrite32(vdev, &vdev->common, 0, COMMON_OFFSET(guest_feature_select)); - vpm_iowrite32(vdev, &vdev->common, features_lo, COMMON_OFFSET(guest_feature)); - vpm_iowrite32(vdev, &vdev->common, 1, COMMON_OFFSET(guest_feature_select)); - vpm_iowrite32(vdev, &vdev->common, features_hi, COMMON_OFFSET(guest_feature)); -} - -static inline void vpm_get(struct virtio_pci_modern_device *vdev, - unsigned offset, void *buf, unsigned len) -{ - u8 *ptr = buf; - unsigned i; - - for (i = 0; i < len; i++) - ptr[i] = vpm_ioread8(vdev, &vdev->device, offset + i); -} - -static inline u8 vpm_get_isr(struct virtio_pci_modern_device *vdev) -{ - return vpm_ioread8(vdev, &vdev->isr, 0); -} - -void vpm_notify(struct virtio_pci_modern_device *vdev, - struct vring_virtqueue *vq); - -int vpm_find_vqs(struct virtio_pci_modern_device *vdev, - unsigned nvqs, struct vring_virtqueue *vqs, - struct dma_device *dma_dev, size_t header_size); - -int virtio_pci_find_capability(struct pci_device *pci, uint8_t cfg_type); - -int virtio_pci_map_capability(struct pci_device *pci, int cap, size_t minlen, - u32 align, u32 start, u32 size, - struct virtio_pci_region *region); - -void virtio_pci_unmap_capability(struct virtio_pci_region *region); -#endif /* _VIRTIO_PCI_H_ */
diff --git a/src/include/ipxe/virtio-ring.h b/src/include/ipxe/virtio-ring.h deleted file mode 100644 index d082139..0000000 --- a/src/include/ipxe/virtio-ring.h +++ /dev/null
@@ -1,155 +0,0 @@ -#ifndef _VIRTIO_RING_H_ -# define _VIRTIO_RING_H_ - -#include <ipxe/virtio-pci.h> -#include <ipxe/dma.h> - -/* Status byte for guest to report progress, and synchronize features. */ -/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ -#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 -/* We have found a driver for the device. */ -#define VIRTIO_CONFIG_S_DRIVER 2 -/* Driver has used its parts of the config, and is happy */ -#define VIRTIO_CONFIG_S_DRIVER_OK 4 -/* Driver has finished configuring features */ -#define VIRTIO_CONFIG_S_FEATURES_OK 8 -/* We've given up on this device. */ -#define VIRTIO_CONFIG_S_FAILED 0x80 - -/* Virtio feature flags used to negotiate device and driver features. */ -/* Can the device handle any descriptor layout? */ -#define VIRTIO_F_ANY_LAYOUT 27 -/* v1.0 compliant. */ -#define VIRTIO_F_VERSION_1 32 -#define VIRTIO_F_IOMMU_PLATFORM 33 - -#define MAX_QUEUE_NUM (256) - -#define VRING_DESC_F_NEXT 1 -#define VRING_DESC_F_WRITE 2 - -#define VRING_AVAIL_F_NO_INTERRUPT 1 - -#define VRING_USED_F_NO_NOTIFY 1 - -struct vring_desc -{ - u64 addr; - u32 len; - u16 flags; - u16 next; -}; - -struct vring_avail -{ - u16 flags; - u16 idx; - u16 ring[0]; -}; - -struct vring_used_elem -{ - u32 id; - u32 len; -}; - -struct vring_used -{ - u16 flags; - u16 idx; - struct vring_used_elem ring[]; -}; - -struct vring { - unsigned int num; - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; -}; - -#define vring_size(num) \ - (((((sizeof(struct vring_desc) * num) + \ - (sizeof(struct vring_avail) + sizeof(u16) * num)) \ - + PAGE_MASK) & ~PAGE_MASK) + \ - (sizeof(struct vring_used) + sizeof(struct vring_used_elem) * num)) - -struct vring_virtqueue { - unsigned char *queue; - size_t queue_size; - struct dma_mapping map; - struct dma_device *dma; - struct vring vring; - u16 free_head; - u16 last_used_idx; - void **vdata; - struct virtio_net_hdr_modern *empty_header; - /* PCI */ - int queue_index; - struct virtio_pci_region notification; -}; - -struct vring_list { - physaddr_t addr; - unsigned int length; -}; - -static inline void vring_init(struct vring *vr, - unsigned int num, unsigned char *queue) -{ - unsigned int i; - unsigned long pa; - - vr->num = num; - - /* physical address of desc must be page aligned */ - - pa = virt_to_phys(queue); - pa = (pa + PAGE_MASK) & ~PAGE_MASK; - vr->desc = phys_to_virt(pa); - - vr->avail = (struct vring_avail *)&vr->desc[num]; - - /* physical address of used must be page aligned */ - - pa = virt_to_phys(&vr->avail->ring[num]); - pa = (pa + PAGE_MASK) & ~PAGE_MASK; - vr->used = phys_to_virt(pa); - - for (i = 0; i < num - 1; i++) - vr->desc[i].next = i + 1; - vr->desc[i].next = 0; -} - -static inline void vring_enable_cb(struct vring_virtqueue *vq) -{ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; -} - -static inline void vring_disable_cb(struct vring_virtqueue *vq) -{ - vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; -} - - -/* - * vring_more_used - * - * is there some used buffers ? - * - */ - -static inline int vring_more_used(struct vring_virtqueue *vq) -{ - wmb(); - return vq->last_used_idx != vq->vring.used->idx; -} - -void vring_detach(struct vring_virtqueue *vq, unsigned int head); -void *vring_get_buf(struct vring_virtqueue *vq, unsigned int *len); -void vring_add_buf(struct vring_virtqueue *vq, struct vring_list list[], - unsigned int out, unsigned int in, - void *index, int num_added); -void vring_kick(struct virtio_pci_modern_device *vdev, unsigned int ioaddr, - struct vring_virtqueue *vq, int num_added); - -#endif /* _VIRTIO_RING_H_ */
diff --git a/src/include/ipxe/virtio.h b/src/include/ipxe/virtio.h new file mode 100644 index 0000000..81a2028 --- /dev/null +++ b/src/include/ipxe/virtio.h
@@ -0,0 +1,476 @@ +#ifndef _IPXE_VIRTIO_H +#define _IPXE_VIRTIO_H + +/** @file + * + * Virtual I/O device + * + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); +FILE_SECBOOT ( PERMITTED ); + +#include <stdint.h> +#include <byteswap.h> +#include <ipxe/dma.h> +#include <ipxe/pci.h> + +/** Virtio page alignment */ +#define VIRTIO_PAGE 4096 + +/** Maximum time to wait for reset (in ms) */ +#define VIRTIO_RESET_MAX_WAIT_MS 100 + +/** + * @defgroup virtio_legacy Original ("legacy") common device registers + * @{ + */ + +/** Legacy device supported features register */ +#define VIRTIO_LEG_FEAT 0x00 + +/** Legacy negotiated in-use features register */ +#define VIRTIO_LEG_USED 0x04 + +/** Legacy queue base address register */ +#define VIRTIO_LEG_BASE 0x08 + +/** Legacy queue size register */ +#define VIRTIO_LEG_SIZE 0x0c + +/** Legacy queue select register */ +#define VIRTIO_LEG_SEL 0x0e + +/** Legacy queue doorbell notification register */ +#define VIRTIO_LEG_DB 0x10 + +/** Legacy driver status register */ +#define VIRTIO_LEG_STAT 0x12 +#define VIRTIO_STAT_ACKNOWLEDGE 0x0001 /**< Guest has found device */ +#define VIRTIO_STAT_DRIVER 0x0002 /**< Guest driver exists */ +#define VIRTIO_STAT_DRIVER_OK 0x0004 /**< Guest driver is ready */ +#define VIRTIO_STAT_FEATURES_OK 0x0008 /**< Guest driver has set features */ +#define VIRTIO_STAT_FAIL 0x0080 /**< Guest driver has failed */ + +/** Legacy device-specific registers */ +#define VIRTIO_LEG_DEV 0x14 + +/** Legacy device-specific register (if MSI-X is enabled) */ +#define VIRTIO_LEG_DEV_MSIX 0x18 + +/** @} */ + +/** + * @defgroup virtio_pci_cap PCI capability registers + * @{ + */ + +/** Capability type */ +#define VIRTIO_PCI_CAP_TYPE 0x03 +#define VIRTIO_PCI_CAP_TYPE_COMMON 0x01 /**< Common registers */ +#define VIRTIO_PCI_CAP_TYPE_NOTIFY 0x02 /**< Notification doorbells */ +#define VIRTIO_PCI_CAP_TYPE_DEVICE 0x04 /**< Device-specific registers */ + +/** Capability BAR index */ +#define VIRTIO_PCI_CAP_BAR 0x04 + +/** Capability BAR offset */ +#define VIRTIO_PCI_CAP_OFFSET 0x08 + +/** Capability minimum length */ +#define VIRTIO_PCI_CAP_END 0x10 + +/** Notification doorbell capability multiplier offset */ +#define VIRTIO_PCI_CAP_NOTIFY_MULT 0x10 + +/** Notification doorbell capability minimum length */ +#define VIRTIO_PCI_CAP_NOTIFY_END 0x14 + +/** @} */ + +/** A virtio PCI capability */ +struct virtio_pci_capability { + /** Capability type */ + uint8_t type; + /** Capability offset */ + uint8_t pos; + /** Capability length */ + uint8_t len; + /** BAR number */ + uint8_t bar; + /** Offset within BAR */ + uint32_t offset; +}; + +/** + * @defgroup virtio_pci_common PCI common device registers + * @{ + */ + +/** PCI device supported features select register */ +#define VIRTIO_PCI_FEAT_SEL 0x00 + +/** PCI device supported features register */ +#define VIRTIO_PCI_FEAT 0x04 + +/** PCI negotiated in-use features select register */ +#define VIRTIO_PCI_USED_SEL 0x08 + +/** PCI negotiated in-use features register */ +#define VIRTIO_PCI_USED 0x0c + +/** PCI device status register */ +#define VIRTIO_PCI_STAT 0x14 + +/** PCI configuration generation register */ +#define VIRTIO_PCI_GEN 0x15 + +/** PCI queue select register */ +#define VIRTIO_PCI_SEL 0x16 + +/** PCI queue size register */ +#define VIRTIO_PCI_SIZE 0x18 + +/** PCI queue enable register */ +#define VIRTIO_PCI_ENABLE 0x1c + +/** PCI queue doorbell notification offset register */ +#define VIRTIO_PCI_DBOFF 0x1e + +/** PCI queue descriptor array base address register */ +#define VIRTIO_PCI_DESC 0x20 + +/** PCI queue submission queue base address register */ +#define VIRTIO_PCI_SQ 0x28 + +/** PCI queue completion queue base address register */ +#define VIRTIO_PCI_CQ 0x30 + +/** @} */ + +/** A virtio buffer descriptor */ +struct virtio_desc { + /** Buffer address */ + uint64_t addr; + /** Buffer length */ + uint32_t len; + /** Flags */ + uint16_t flags; + /** Next descriptor index */ + uint16_t next; +} __attribute__ (( packed )); + +/** Next descriptor index is valid */ +#define VIRTIO_DESC_FL_NEXT 0x0001 + +/** Buffer is write-only */ +#define VIRTIO_DESC_FL_WRITE 0x0002 + +/** A virtio submission queue entry */ +struct virtio_sqe { + /** Starting descriptor index */ + uint16_t index; +} __attribute__ (( packed )); + +/** A virtio submission ("available") queue */ +struct virtio_sq { + /** Flags */ + uint16_t flags; + /** Producer index */ + uint16_t prod; + /** Queue entries */ + struct virtio_sqe sqe[]; +} __attribute__ (( packed )); + +/** Do not generate interrupt */ +#define VIRTIO_SQ_FL_NO_INTERRUPT 0x0001 + +/** A virtio completion queue entry */ +struct virtio_cqe { + /** Starting descriptor index */ + uint32_t index; + /** Length written */ + uint32_t len; +} __attribute__ (( packed )); + +/** A virtio completion ("used") queue */ +struct virtio_cq { + /** Flags */ + uint16_t flags; + /** Producer index */ + uint16_t prod; + /** Queue entries */ + struct virtio_cqe cqe[]; +} __attribute__ (( packed )); + +/** A virtio queue */ +struct virtio_queue { + /** Queue index */ + unsigned int index; + /** Queue size (must be a power of two) */ + unsigned int count; + /** Queue mask */ + unsigned int mask; + /** Submission queue producer index */ + unsigned int prod; + /** Completion queue consumer index */ + unsigned int cons; + /** Total length of queue */ + size_t len; + /** DMA mapping */ + struct dma_mapping map; + /** Descriptor array (and start of DMA allocation) */ + struct virtio_desc *desc; + /** Submission queue */ + struct virtio_sq *sq; + /** Completion queue */ + struct virtio_cq *cq; + /** Notification doorbell */ + void *db; +}; + +/** + * Initialise virtio queue + * + * @v queue Virtio queue + * @v index Queue index + */ +static inline __attribute__ (( always_inline )) void +virtio_queue_init ( struct virtio_queue *queue, unsigned int index ) { + + queue->index = index; +} + +/** + * Calculate aligned size + * + * @v size Unaligned size + * @ret size Aligned size + */ +static inline __attribute__ (( always_inline )) size_t +virtio_align ( size_t size ) { + + return ( ( size + VIRTIO_PAGE - 1 ) & ~( VIRTIO_PAGE - 1 ) ); +} + +/** + * Calculate (unaligned) descriptor array size + * + * @v queue Virtio queue + * @v count Queue size + */ +static inline __attribute__ (( always_inline )) size_t +virtio_desc_size ( unsigned int count ) { + struct virtio_desc *desc; + + return ( count * sizeof ( desc[0] ) ); +} + +/** + * Calculate (unaligned) submission queue size + * + * @v queue Virtio queue + * @v count Queue size + */ +static inline __attribute__ (( always_inline )) size_t +virtio_sq_size ( unsigned int count ) { + struct virtio_sq *sq; + + return ( sizeof ( *sq ) + ( count * sizeof ( sq->sqe[0] ) ) ); +} + +/** + * Calculate (unaligned) completion queue size + * + * @v queue Virtio queue + * @v count Queue size + */ +static inline __attribute__ (( always_inline )) size_t +virtio_cq_size ( unsigned int count ) { + struct virtio_cq *cq; + + return ( sizeof ( *cq ) + ( count * sizeof ( cq->cqe[0] ) ) ); +} + +/** Number of 32-bit feature words */ +#define VIRTIO_FEATURE_WORDS 2 + +/** A virtio feature set */ +struct virtio_features { + /** Feature words */ + uint32_t word[VIRTIO_FEATURE_WORDS]; +}; + +/** Arbitrary descriptor layouts may be used */ +#define VIRTIO_FEAT0_ANY_LAYOUT 0x08000000 + +/** Virtio version 1.0 or above */ +#define VIRTIO_FEAT1_MODERN 0x00000001 + +/** A virtio device */ +struct virtio_device { + /** Device name */ + const char *name; + /** Device operations */ + struct virtio_operations *op; + /** DMA device */ + struct dma_device *dma; + /** Common registers */ + void *common; + /** Doorbell notification registers */ + void *notify; + /** Device-specific registers */ + void *device; + /** Driver status */ + unsigned int stat; + /** Device supported features */ + struct virtio_features supported; + /** Negotiated features */ + struct virtio_features features; + /** Notification doorbell multiplier */ + unsigned int multiplier; +}; + +/** Virtio device operations */ +struct virtio_operations { + /** + * Reset device + * + * @v virtio Virtio device + * @ret rc Return status code + */ + int ( * reset ) ( struct virtio_device *virtio ); + /** + * Report driver status + * + * @v virtio Virtio device + * @ret stat Actual device status + */ + unsigned int ( * status ) ( struct virtio_device *virtio ); + /** + * Get supported features + * + * @v virtio Virtio device + */ + void ( * supported ) ( struct virtio_device *virtio ); + /** + * Set negotiated features + * + * @v virtio Virtio device + */ + void ( * negotiate ) ( struct virtio_device *virtio ); + /** + * Set queue size + * + * @v virtio Virtio device + * @v queue Virtio queue + * @v count Requested size + */ + void ( * size ) ( struct virtio_device *virtio, + struct virtio_queue *queue, unsigned int count ); + /** + * Enable queue + * + * @v virtio Virtio device + * @v queue Virtio queue + */ + void ( * enable ) ( struct virtio_device *virtio, + struct virtio_queue *queue ); +}; + +/** + * Submit descriptor(s) to queue + * + * @v queue Virtio queue + * @v index Starting descriptor index + */ +static inline __attribute__ (( always_inline )) void +virtio_submit ( struct virtio_queue *queue, unsigned int index ) { + struct virtio_sqe *sqe; + + /* Get next submission queue entry */ + sqe = &queue->sq->sqe[ queue->prod++ & queue->mask ]; + + /* Populate submission queue entry */ + sqe->index = cpu_to_le16 ( index ); +} + +/** + * Notify queue + * + * @v queue Virtio queue + */ +static inline __attribute__ (( always_inline )) void +virtio_notify ( struct virtio_queue *queue ) { + + /* Write producer index */ + wmb(); + queue->sq->prod = cpu_to_le16 ( queue->prod ); + wmb(); + + /* Ring doorbell */ + iowrite16 ( queue->index, queue->db ); +} + +/** + * Check for completed descriptors + * + * @v queue Virtio queue + * @v completions Number of pending completions + */ +static inline __attribute__ (( always_inline )) unsigned int +virtio_completions ( struct virtio_queue *queue ) { + uint16_t completions; + + /* Get completion count */ + completions = ( le16_to_cpu ( queue->cq->prod ) - queue->cons ); + return completions; +} + +/** + * Complete descriptor(s) + * + * @v queue Virtio queue + * @v len Length to fill in, or NULL + * @ret index Starting descriptor index + */ +static inline __attribute__ (( always_inline )) unsigned int +virtio_complete ( struct virtio_queue *queue, size_t *len ) { + struct virtio_cqe *cqe; + + /* Get next completion queue entry */ + cqe = &queue->cq->cqe[ queue->cons++ & queue->mask ]; + + /* Parse completion queue entry */ + if ( len ) + *len = le32_to_cpu ( cqe->len ); + return le32_to_cpu ( cqe->index ); +} + +/** + * Check if device is using the legacy interface + * + * @v virtio Virtio device + * @ret is_legacy Device is using the legacy interface + */ +static inline __attribute__ (( always_inline )) int +virtio_is_legacy ( struct virtio_device *virtio ) { + + /* Check negotiation of version 1.0 or above */ + return ( ! ( virtio->features.word[1] & VIRTIO_FEAT1_MODERN ) ); +} + +extern int virtio_pci_map ( struct virtio_device *virtio, + struct pci_device *pci ); +extern int virtio_reset ( struct virtio_device *virtio ); +extern unsigned int virtio_status ( struct virtio_device *virtio, + unsigned int stat ); +extern int virtio_init ( struct virtio_device *virtio, + const struct virtio_features *driver ); +extern int virtio_enable ( struct virtio_device *virtio, + struct virtio_queue *queue, unsigned int count ); +extern void virtio_free ( struct virtio_device *virtio, + struct virtio_queue *queue ); +extern void virtio_unmap ( struct virtio_device *virtio ); + +#endif /* _IPXE_VIRTIO_H */