blob: 78ec9ac4e0be0431addcfe63ebbf1d8609a07b5d [file] [log] [blame]
/*
* (c) Copyright 2010 Stefan Hajnoczi <stefanha@gmail.com>
*
* based on the Etherboot virtio-net driver
*
* (c) Copyright 2008 Bull S.A.S.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
* some parts from Linux Virtio PCI driver
*
* Copyright IBM Corp. 2007
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*
* some parts from Linux Virtio Ring
*
* Copyright Rusty Russell IBM Corporation 2007
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <ipxe/list.h>
#include <ipxe/iobuf.h>
#include <ipxe/netdevice.h>
#include <ipxe/pci.h>
#include <ipxe/if_ether.h>
#include <ipxe/ethernet.h>
#include <ipxe/virtio-pci.h>
#include <ipxe/virtio-ring.h>
#include "virtio-net.h"
/*
* Virtio network device driver
*
* Specification:
* http://ozlabs.org/~rusty/virtio-spec/
*
* The virtio network device is supported by Linux virtualization software
* including QEMU/KVM and lguest. This driver supports the virtio over PCI
* transport; virtual machines have one virtio-net PCI adapter per NIC.
*
* Virtio-net is different from hardware NICs because virtio devices
* communicate with the hypervisor via virtqueues, not traditional descriptor
* rings. Virtqueues are unordered queues, they support add_buf() and
* get_buf() operations. To transmit a packet, the driver has to add the
* packet buffer onto the virtqueue. To receive a packet, the driver must
* first add an empty buffer to the virtqueue and then get the filled packet
* buffer on completion.
*
* Virtqueues are an abstraction that is commonly implemented using the vring
* descriptor ring layout. The vring is the actual shared memory structure
* that allows the virtual machine to communicate buffers with the hypervisor.
* Because the vring layout is optimized for flexibility and performance rather
* than space, it is heavy-weight and allocated like traditional descriptor
* rings in the open() function of the driver and not in probe().
*
* There is no true interrupt enable/disable. Virtqueues have callback
* enable/disable flags but these are only hints. The hypervisor may still
* raise an interrupt. Nevertheless, this driver disables callbacks in the
* hopes of avoiding interrupts.
*/
/* Driver types are declared here so virtio-net.h can be easily synced with its
* Linux source.
*/
/* Virtqueue indices */
enum {
RX_INDEX = 0,
TX_INDEX,
QUEUE_NB
};
/** Max number of pending rx packets */
#define NUM_RX_BUF 8
struct virtnet_nic {
/** Base pio register address */
unsigned long ioaddr;
/** 0 for legacy, 1 for virtio 1.0 */
int virtio_version;
/** Virtio 1.0 device data */
struct virtio_pci_modern_device vdev;
/** RX/TX virtqueues */
struct vring_virtqueue *virtqueue;
/** RX packets handed to the NIC waiting to be filled in */
struct list_head rx_iobufs;
/** Pending rx packet count */
unsigned int rx_num_iobufs;
/** Virtio net dummy packet headers */
struct virtio_net_hdr_modern empty_header[QUEUE_NB];
};
/** Add an iobuf to a virtqueue
*
* @v netdev Network device
* @v vq_idx Virtqueue index (RX_INDEX or TX_INDEX)
* @v iobuf I/O buffer
*
* The virtqueue is kicked after the iobuf has been added.
*/
static void virtnet_enqueue_iob ( struct net_device *netdev,
int vq_idx, struct io_buffer *iobuf ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *vq = &virtnet->virtqueue[vq_idx];
struct virtio_net_hdr_modern *header = &virtnet->empty_header[vq_idx];
unsigned int out = ( vq_idx == TX_INDEX ) ? 2 : 0;
unsigned int in = ( vq_idx == TX_INDEX ) ? 0 : 2;
size_t header_len = ( virtnet->virtio_version ?
sizeof ( *header ) : sizeof ( header->legacy ) );
struct vring_list list[] = {
{
/* Share a single zeroed virtio net header between all
* packets in a ring. This works because this driver
* does not use any advanced features so none of the
* header fields get used.
*
* Some host implementations (notably Google Compute
* Platform) are known to unconditionally write back
* to header->flags for received packets. Work around
* this by using separate RX and TX headers.
*/
.addr = ( char* ) header,
.length = header_len,
},
{
.addr = ( char* ) iobuf->data,
.length = iob_len ( iobuf ),
},
};
DBGC2 ( virtnet, "VIRTIO-NET %p enqueuing iobuf %p on vq %d\n",
virtnet, iobuf, vq_idx );
vring_add_buf ( vq, list, out, in, iobuf, 0 );
vring_kick ( virtnet->virtio_version ? &virtnet->vdev : NULL,
virtnet->ioaddr, vq, 1 );
}
/** Try to keep rx virtqueue filled with iobufs
*
* @v netdev Network device
*/
static void virtnet_refill_rx_virtqueue ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
size_t len = ( netdev->max_pkt_len + 4 /* VLAN */ );
while ( virtnet->rx_num_iobufs < NUM_RX_BUF ) {
struct io_buffer *iobuf;
/* Try to allocate a buffer, stop for now if out of memory */
iobuf = alloc_iob ( len );
if ( ! iobuf )
break;
/* Keep track of iobuf so close() can free it */
list_add ( &iobuf->list, &virtnet->rx_iobufs );
/* Mark packet length until we know the actual size */
iob_put ( iobuf, len );
virtnet_enqueue_iob ( netdev, RX_INDEX, iobuf );
virtnet->rx_num_iobufs++;
}
}
/** Helper to free all virtqueue memory
*
* @v netdev Network device
*/
static void virtnet_free_virtqueues ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
int i;
for ( i = 0; i < QUEUE_NB; i++ ) {
virtio_pci_unmap_capability ( &virtnet->virtqueue[i].notification );
vp_free_vq ( &virtnet->virtqueue[i] );
}
free ( virtnet->virtqueue );
virtnet->virtqueue = NULL;
}
/** Open network device, legacy virtio 0.9.5
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open_legacy ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
unsigned long ioaddr = virtnet->ioaddr;
u32 features;
int i;
/* Reset for sanity */
vp_reset ( ioaddr );
/* Allocate virtqueues */
virtnet->virtqueue = zalloc ( QUEUE_NB *
sizeof ( *virtnet->virtqueue ) );
if ( ! virtnet->virtqueue )
return -ENOMEM;
/* Initialize rx/tx virtqueues */
for ( i = 0; i < QUEUE_NB; i++ ) {
if ( vp_find_vq ( ioaddr, i, &virtnet->virtqueue[i] ) == -1 ) {
DBGC ( virtnet, "VIRTIO-NET %p cannot register queue %d\n",
virtnet, i );
virtnet_free_virtqueues ( netdev );
return -ENOENT;
}
}
/* Initialize rx packets */
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
virtnet_refill_rx_virtqueue ( netdev );
/* Disable interrupts before starting */
netdev_irq ( netdev, 0 );
/* Driver is ready */
features = vp_get_features ( ioaddr );
vp_set_features ( ioaddr, features & ( ( 1 << VIRTIO_NET_F_MAC ) |
( 1 << VIRTIO_NET_F_MTU ) ) );
vp_set_status ( ioaddr, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK );
return 0;
}
/** Open network device, modern virtio 1.0
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open_modern ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
u64 features;
u8 status;
/* Negotiate features */
features = vpm_get_features ( &virtnet->vdev );
if ( ! ( features & VIRTIO_F_VERSION_1 ) ) {
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -EINVAL;
}
vpm_set_features ( &virtnet->vdev, features & (
( 1ULL << VIRTIO_NET_F_MAC ) |
( 1ULL << VIRTIO_NET_F_MTU ) |
( 1ULL << VIRTIO_F_VERSION_1 ) |
( 1ULL << VIRTIO_F_ANY_LAYOUT ) |
( 1ULL << VIRTIO_F_IOMMU_PLATFORM ) ) );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FEATURES_OK );
status = vpm_get_status ( &virtnet->vdev );
if ( ! ( status & VIRTIO_CONFIG_S_FEATURES_OK ) ) {
DBGC ( virtnet, "VIRTIO-NET %p device didn't accept features\n",
virtnet );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -EINVAL;
}
/* Allocate virtqueues */
virtnet->virtqueue = zalloc ( QUEUE_NB *
sizeof ( *virtnet->virtqueue ) );
if ( ! virtnet->virtqueue ) {
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -ENOMEM;
}
/* Initialize rx/tx virtqueues */
if ( vpm_find_vqs ( &virtnet->vdev, QUEUE_NB, virtnet->virtqueue ) ) {
DBGC ( virtnet, "VIRTIO-NET %p cannot register queues\n",
virtnet );
virtnet_free_virtqueues ( netdev );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -ENOENT;
}
/* Disable interrupts before starting */
netdev_irq ( netdev, 0 );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER_OK );
/* Initialize rx packets */
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
virtnet_refill_rx_virtqueue ( netdev );
return 0;
}
/** Open network device
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
if ( virtnet->virtio_version ) {
return virtnet_open_modern ( netdev );
} else {
return virtnet_open_legacy ( netdev );
}
}
/** Close network device
*
* @v netdev Network device
*/
static void virtnet_close ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct io_buffer *iobuf;
struct io_buffer *next_iobuf;
if ( virtnet->virtio_version ) {
vpm_reset ( &virtnet->vdev );
} else {
vp_reset ( virtnet->ioaddr );
}
/* Virtqueues can be freed now that NIC is reset */
virtnet_free_virtqueues ( netdev );
/* Free rx iobufs */
list_for_each_entry_safe ( iobuf, next_iobuf, &virtnet->rx_iobufs, list ) {
free_iob ( iobuf );
}
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
}
/** Transmit packet
*
* @v netdev Network device
* @v iobuf I/O buffer
* @ret rc Return status code
*/
static int virtnet_transmit ( struct net_device *netdev,
struct io_buffer *iobuf ) {
virtnet_enqueue_iob ( netdev, TX_INDEX, iobuf );
return 0;
}
/** Complete packet transmission
*
* @v netdev Network device
*/
static void virtnet_process_tx_packets ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *tx_vq = &virtnet->virtqueue[TX_INDEX];
while ( vring_more_used ( tx_vq ) ) {
struct io_buffer *iobuf = vring_get_buf ( tx_vq, NULL );
DBGC2 ( virtnet, "VIRTIO-NET %p tx complete iobuf %p\n",
virtnet, iobuf );
netdev_tx_complete ( netdev, iobuf );
}
}
/** Complete packet reception
*
* @v netdev Network device
*/
static void virtnet_process_rx_packets ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *rx_vq = &virtnet->virtqueue[RX_INDEX];
while ( vring_more_used ( rx_vq ) ) {
unsigned int len;
struct io_buffer *iobuf = vring_get_buf ( rx_vq, &len );
/* Release ownership of iobuf */
list_del ( &iobuf->list );
virtnet->rx_num_iobufs--;
/* Update iobuf length */
iob_unput ( iobuf, iob_len ( iobuf ) );
iob_put ( iobuf, len - sizeof ( struct virtio_net_hdr ) );
DBGC2 ( virtnet, "VIRTIO-NET %p rx complete iobuf %p len %zd\n",
virtnet, iobuf, iob_len ( iobuf ) );
/* Pass completed packet to the network stack */
netdev_rx ( netdev, iobuf );
}
virtnet_refill_rx_virtqueue ( netdev );
}
/** Poll for completed and received packets
*
* @v netdev Network device
*/
static void virtnet_poll ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
/* Acknowledge interrupt. This is necessary for UNDI operation and
* interrupts that are raised despite VRING_AVAIL_F_NO_INTERRUPT being
* set (that flag is just a hint and the hypervisor does not have to
* honor it).
*/
if ( virtnet->virtio_version ) {
vpm_get_isr ( &virtnet->vdev );
} else {
vp_get_isr ( virtnet->ioaddr );
}
virtnet_process_tx_packets ( netdev );
virtnet_process_rx_packets ( netdev );
}
/** Enable or disable interrupts
*
* @v netdev Network device
* @v enable Interrupts should be enabled
*/
static void virtnet_irq ( struct net_device *netdev, int enable ) {
struct virtnet_nic *virtnet = netdev->priv;
int i;
for ( i = 0; i < QUEUE_NB; i++ ) {
if ( enable )
vring_enable_cb ( &virtnet->virtqueue[i] );
else
vring_disable_cb ( &virtnet->virtqueue[i] );
}
}
/** virtio-net device operations */
static struct net_device_operations virtnet_operations = {
.open = virtnet_open,
.close = virtnet_close,
.transmit = virtnet_transmit,
.poll = virtnet_poll,
.irq = virtnet_irq,
};
/**
* Probe PCI device, legacy virtio 0.9.5
*
* @v pci PCI device
* @ret rc Return status code
*/
static int virtnet_probe_legacy ( struct pci_device *pci ) {
unsigned long ioaddr = pci->ioaddr;
struct net_device *netdev;
struct virtnet_nic *virtnet;
u32 features;
u16 mtu;
int rc;
/* Allocate and hook up net device */
netdev = alloc_etherdev ( sizeof ( *virtnet ) );
if ( ! netdev )
return -ENOMEM;
netdev_init ( netdev, &virtnet_operations );
virtnet = netdev->priv;
virtnet->ioaddr = ioaddr;
pci_set_drvdata ( pci, netdev );
netdev->dev = &pci->dev;
DBGC ( virtnet, "VIRTIO-NET %p busaddr=%s ioaddr=%#lx irq=%d\n",
virtnet, pci->dev.name, ioaddr, pci->irq );
/* Enable PCI bus master and reset NIC */
adjust_pci_device ( pci );
vp_reset ( ioaddr );
/* Load MAC address and MTU */
features = vp_get_features ( ioaddr );
if ( features & ( 1 << VIRTIO_NET_F_MAC ) ) {
vp_get ( ioaddr, offsetof ( struct virtio_net_config, mac ),
netdev->hw_addr, ETH_ALEN );
DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
eth_ntoa ( netdev->hw_addr ) );
}
if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
vp_get ( ioaddr, offsetof ( struct virtio_net_config, mtu ),
&mtu, sizeof ( mtu ) );
DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu );
netdev->max_pkt_len = ( mtu + ETH_HLEN );
netdev->mtu = mtu;
}
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
goto err_register_netdev;
/* Mark link as up, control virtqueue is not used */
netdev_link_up ( netdev );
return 0;
unregister_netdev ( netdev );
err_register_netdev:
vp_reset ( ioaddr );
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
}
/**
* Probe PCI device, modern virtio 1.0
*
* @v pci PCI device
* @v found_dev Set to non-zero if modern device was found (probe may still fail)
* @ret rc Return status code
*/
static int virtnet_probe_modern ( struct pci_device *pci, int *found_dev ) {
struct net_device *netdev;
struct virtnet_nic *virtnet;
u64 features;
u16 mtu;
int rc, common, isr, notify, config, device;
common = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_COMMON_CFG );
if ( ! common ) {
DBG ( "Common virtio capability not found!\n" );
return -ENODEV;
}
*found_dev = 1;
isr = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_ISR_CFG );
notify = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_NOTIFY_CFG );
config = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_PCI_CFG );
if ( ! isr || ! notify || ! config ) {
DBG ( "Missing virtio capabilities %i/%i/%i/%i\n",
common, isr, notify, config );
return -EINVAL;
}
device = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_DEVICE_CFG );
/* Allocate and hook up net device */
netdev = alloc_etherdev ( sizeof ( *virtnet ) );
if ( ! netdev )
return -ENOMEM;
netdev_init ( netdev, &virtnet_operations );
virtnet = netdev->priv;
pci_set_drvdata ( pci, netdev );
netdev->dev = &pci->dev;
DBGC ( virtnet, "VIRTIO-NET modern %p busaddr=%s irq=%d\n",
virtnet, pci->dev.name, pci->irq );
virtnet->vdev.pci = pci;
rc = virtio_pci_map_capability ( pci, common,
sizeof ( struct virtio_pci_common_cfg ), 4,
0, sizeof ( struct virtio_pci_common_cfg ),
&virtnet->vdev.common );
if ( rc )
goto err_map_common;
rc = virtio_pci_map_capability ( pci, isr, sizeof ( u8 ), 1,
0, 1,
&virtnet->vdev.isr );
if ( rc )
goto err_map_isr;
virtnet->vdev.notify_cap_pos = notify;
virtnet->vdev.cfg_cap_pos = config;
/* Map the device capability */
if ( device ) {
rc = virtio_pci_map_capability ( pci, device,
0, 4, 0, sizeof ( struct virtio_net_config ),
&virtnet->vdev.device );
if ( rc )
goto err_map_device;
}
/* Enable the PCI device */
adjust_pci_device ( pci );
/* Reset the device and set initial status bits */
vpm_reset ( &virtnet->vdev );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER );
/* Load MAC address and MTU */
if ( device ) {
features = vpm_get_features ( &virtnet->vdev );
if ( features & ( 1ULL << VIRTIO_NET_F_MAC ) ) {
vpm_get ( &virtnet->vdev,
offsetof ( struct virtio_net_config, mac ),
netdev->hw_addr, ETH_ALEN );
DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
eth_ntoa ( netdev->hw_addr ) );
}
if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
vpm_get ( &virtnet->vdev,
offsetof ( struct virtio_net_config, mtu ),
&mtu, sizeof ( mtu ) );
DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet,
mtu );
netdev->max_pkt_len = ( mtu + ETH_HLEN );
}
}
/* We need a valid MAC address */
if ( ! is_valid_ether_addr ( netdev->hw_addr ) ) {
rc = -EADDRNOTAVAIL;
goto err_mac_address;
}
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
goto err_register_netdev;
/* Mark link as up, control virtqueue is not used */
netdev_link_up ( netdev );
virtnet->virtio_version = 1;
return 0;
unregister_netdev ( netdev );
err_register_netdev:
err_mac_address:
vpm_reset ( &virtnet->vdev );
netdev_nullify ( netdev );
netdev_put ( netdev );
virtio_pci_unmap_capability ( &virtnet->vdev.device );
err_map_device:
virtio_pci_unmap_capability ( &virtnet->vdev.isr );
err_map_isr:
virtio_pci_unmap_capability ( &virtnet->vdev.common );
err_map_common:
return rc;
}
/**
* Probe PCI device
*
* @v pci PCI device
* @ret rc Return status code
*/
static int virtnet_probe ( struct pci_device *pci ) {
int found_modern = 0;
int rc = virtnet_probe_modern ( pci, &found_modern );
if ( ! found_modern && pci->device < 0x1040 ) {
/* fall back to the legacy probe */
rc = virtnet_probe_legacy ( pci );
}
return rc;
}
/**
* Remove device
*
* @v pci PCI device
*/
static void virtnet_remove ( struct pci_device *pci ) {
struct net_device *netdev = pci_get_drvdata ( pci );
struct virtnet_nic *virtnet = netdev->priv;
virtio_pci_unmap_capability ( &virtnet->vdev.device );
virtio_pci_unmap_capability ( &virtnet->vdev.isr );
virtio_pci_unmap_capability ( &virtnet->vdev.common );
unregister_netdev ( netdev );
netdev_nullify ( netdev );
netdev_put ( netdev );
}
static struct pci_device_id virtnet_nics[] = {
PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface", 0),
PCI_ROM(0x1af4, 0x1041, "virtio-net", "Virtio Network Interface 1.0", 0),
};
struct pci_driver virtnet_driver __pci_driver = {
.ids = virtnet_nics,
.id_count = ( sizeof ( virtnet_nics ) / sizeof ( virtnet_nics[0] ) ),
.probe = virtnet_probe,
.remove = virtnet_remove,
};