src/interface/hyperv/vmbus.c - ipxe - Git at Google

 /*
  * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
  * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  * 02110-1301, USA.
  *
  * You can also choose to distribute this program under the terms of
  * the Unmodified Binary Distribution Licence (as given in the file
  * COPYING.UBDL), provided that you have satisfied its requirements.
  */

 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

 /** @file
  *
  * Hyper-V virtual machine bus
  *
  */

 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
 #include <assert.h>
 #include <byteswap.h>
 #include <ipxe/nap.h>
 #include <ipxe/malloc.h>
 #include <ipxe/iobuf.h>
 #include <ipxe/bitops.h>
 #include <ipxe/hyperv.h>
 #include <ipxe/vmbus.h>

 /** VMBus initial GPADL ID
  *
  * This is an opaque value with no meaning.  The Linux kernel uses
  * 0xe1e10.
  */
 #define VMBUS_GPADL_MAGIC 0x18ae0000

 /** Current (i.e. most recently issued) GPADL ID */
 static unsigned int vmbus_gpadl = VMBUS_GPADL_MAGIC;

 /** Obsolete GPADL ID threshold
  *
  * When the Hyper-V connection is reset, any previous GPADLs are
  * automatically rendered obsolete.
  */
 unsigned int vmbus_obsolete_gpadl;

 /**
  * Post message
  *
  * @v hv		Hyper-V hypervisor
  * @v header		Message header
  * @v len		Length of message (including header)
  * @ret rc		Return status code
  */
 static int vmbus_post_message ( struct hv_hypervisor *hv,
 				const struct vmbus_message_header *header,
 				size_t len ) {
 	struct vmbus *vmbus = hv->vmbus;
 	int rc;

 	/* Post message */
 	if ( ( rc = hv_post_message ( hv, VMBUS_MESSAGE_ID, VMBUS_MESSAGE_TYPE,
 				      header, len ) ) != 0 ) {
 		DBGC ( vmbus, "VMBUS %p could not post message: %s\n",
 		       vmbus, strerror ( rc ) );
 		return rc;
 	}

 	return 0;
 }

 /**
  * Post empty message
  *
  * @v hv		Hyper-V hypervisor
  * @v type		Message type
  * @ret rc		Return status code
  */
 static int vmbus_post_empty_message ( struct hv_hypervisor *hv,
 				      unsigned int type ) {
 	struct vmbus_message_header header = { .type = cpu_to_le32 ( type ) };

 	return vmbus_post_message ( hv, &header, sizeof ( header ) );
 }

 /**
  * Wait for received message of any type
  *
  * @v hv		Hyper-V hypervisor
  * @ret rc		Return status code
  */
 static int vmbus_wait_for_any_message ( struct hv_hypervisor *hv ) {
 	struct vmbus *vmbus = hv->vmbus;
 	int rc;

 	/* Wait for message */
 	if ( ( rc = hv_wait_for_message ( hv, VMBUS_MESSAGE_SINT ) ) != 0 ) {
 		DBGC ( vmbus, "VMBUS %p failed waiting for message: %s\n",
 		       vmbus, strerror ( rc ) );
 		return rc;
 	}

 	/* Sanity check */
 	if ( hv->message->received.type != cpu_to_le32 ( VMBUS_MESSAGE_TYPE ) ){
 		DBGC ( vmbus, "VMBUS %p invalid message type %d\n",
 		       vmbus, le32_to_cpu ( hv->message->received.type ) );
 		return -EINVAL;
 	}

 	return 0;
 }

 /**
  * Wait for received message of a specified type, ignoring any others
  *
  * @v hv		Hyper-V hypervisor
  * @v type		Message type
  * @ret rc		Return status code
  */
 static int vmbus_wait_for_message ( struct hv_hypervisor *hv,
 				    unsigned int type ) {
 	struct vmbus *vmbus = hv->vmbus;
 	const struct vmbus_message_header *header = &vmbus->message->header;
 	int rc;

 	/* Loop until specified message arrives, or until an error occurs */
 	while ( 1 ) {

 		/* Wait for message */
 		if ( ( rc = vmbus_wait_for_any_message ( hv ) ) != 0 )
 			return rc;

 		/* Check for requested message type */
 		if ( header->type == cpu_to_le32 ( type ) )
 			return 0;

 		/* Ignore any other messages (e.g. due to additional
 		 * channels being offered at runtime).
 		 */
 		DBGC ( vmbus, "VMBUS %p ignoring message type %d (expecting "
 		       "%d)\n", vmbus, le32_to_cpu ( header->type ), type );
 	}
 }

 /**
  * Initiate contact
  *
  * @v hv		Hyper-V hypervisor
  * @v raw		VMBus protocol (raw) version
  * @ret rc		Return status code
  */
 static int vmbus_initiate_contact ( struct hv_hypervisor *hv,
 				    unsigned int raw ) {
 	struct vmbus *vmbus = hv->vmbus;
 	const struct vmbus_version_response *version = &vmbus->message->version;
 	struct vmbus_initiate_contact initiate;
 	int rc;

 	/* Construct message */
 	memset ( &initiate, 0, sizeof ( initiate ) );
 	initiate.header.type = cpu_to_le32 ( VMBUS_INITIATE_CONTACT );
 	initiate.version.raw = cpu_to_le32 ( raw );
 	initiate.intr = virt_to_phys ( vmbus->intr );
 	initiate.monitor_in = virt_to_phys ( vmbus->monitor_in );
 	initiate.monitor_out = virt_to_phys ( vmbus->monitor_out );

 	/* Post message */
 	if ( ( rc = vmbus_post_message ( hv, &initiate.header,
 					 sizeof ( initiate ) ) ) != 0 )
 		return rc;

 	/* Wait for response */
 	if ( ( rc = vmbus_wait_for_message ( hv, VMBUS_VERSION_RESPONSE ) ) !=0)
 		return rc;

 	/* Check response */
 	if ( ! version->supported ) {
 		DBGC ( vmbus, "VMBUS %p requested version not supported\n",
 		       vmbus );
 		return -ENOTSUP;
 	}

 	DBGC ( vmbus, "VMBUS %p initiated contact using version %d.%d\n",
 	       vmbus, le16_to_cpu ( initiate.version.major ),
 	       le16_to_cpu ( initiate.version.minor ) );
 	return 0;
 }

 /**
  * Terminate contact
  *
  * @v hv		Hyper-V hypervisor
  * @ret rc		Return status code
  */
 static int vmbus_unload ( struct hv_hypervisor *hv ) {
 	int rc;

 	/* Post message */
 	if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_UNLOAD ) ) != 0 )
 		return rc;

 	/* Wait for response */
 	if ( ( rc = vmbus_wait_for_message ( hv, VMBUS_UNLOAD_RESPONSE ) ) != 0)
 		return rc;

 	return 0;
 }

 /**
  * Negotiate protocol version
  *
  * @v hv		Hyper-V hypervisor
  * @ret rc		Return status code
  */
 static int vmbus_negotiate_version ( struct hv_hypervisor *hv ) {
 	int rc;

 	/* We require the ability to disconnect from and reconnect to
 	 * VMBus; if we don't have this then there is no (viable) way
 	 * for a loaded operating system to continue to use any VMBus
 	 * devices.  (There is also a small but non-zero risk that the
 	 * host will continue to write to our interrupt and monitor
 	 * pages, since the VMBUS_UNLOAD message in earlier versions
 	 * is essentially a no-op.)
 	 *
 	 * This requires us to ensure that the host supports protocol
 	 * version 3.0 (VMBUS_VERSION_WIN8_1).  However, we can't
 	 * actually _use_ protocol version 3.0, since doing so causes
 	 * an iSCSI-booted Windows Server 2012 R2 VM to crash due to a
 	 * NULL pointer dereference in vmbus.sys.
 	 *
 	 * To work around this problem, we first ensure that we can
 	 * connect using protocol v3.0, then disconnect and reconnect
 	 * using the oldest known protocol.
 	 */

 	/* Initiate contact to check for required protocol support */
 	if ( ( rc = vmbus_initiate_contact ( hv, VMBUS_VERSION_WIN8_1 ) ) != 0 )
 		return rc;

 	/* Terminate contact */
 	if ( ( rc = vmbus_unload ( hv ) ) != 0 )
 		return rc;

 	/* Reinitiate contact using the oldest known protocol version */
 	if ( ( rc = vmbus_initiate_contact ( hv, VMBUS_VERSION_WS2008 ) ) != 0 )
 		return rc;

 	return 0;
 }

 /**
  * Establish GPA descriptor list
  *
  * @v vmdev		VMBus device
  * @v data		Data buffer
  * @v len		Length of data buffer
  * @ret gpadl		GPADL ID, or negative error
  */
 int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data,
 			    size_t len ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus *vmbus = hv->vmbus;
 	physaddr_t addr = user_to_phys ( data, 0 );
 	unsigned int pfn_count = hv_pfn_count ( addr, len );
 	struct {
 		struct vmbus_gpadl_header gpadlhdr;
 		struct vmbus_gpa_range range;
 		uint64_t pfn[pfn_count];
 	} __attribute__ (( packed )) gpadlhdr;
 	const struct vmbus_gpadl_created *created = &vmbus->message->created;
 	unsigned int gpadl;
 	unsigned int i;
 	int rc;

 	/* Allocate GPADL ID */
 	gpadl = ++vmbus_gpadl;

 	/* Construct message */
 	memset ( &gpadlhdr, 0, sizeof ( gpadlhdr ) );
 	gpadlhdr.gpadlhdr.header.type = cpu_to_le32 ( VMBUS_GPADL_HEADER );
 	gpadlhdr.gpadlhdr.channel = cpu_to_le32 ( vmdev->channel );
 	gpadlhdr.gpadlhdr.gpadl = cpu_to_le32 ( gpadl );
 	gpadlhdr.gpadlhdr.range_len =
 		cpu_to_le16 ( ( sizeof ( gpadlhdr.range ) +
 				sizeof ( gpadlhdr.pfn ) ) );
 	gpadlhdr.gpadlhdr.range_count = cpu_to_le16 ( 1 );
 	gpadlhdr.range.len = cpu_to_le32 ( len );
 	gpadlhdr.range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) );
 	for ( i = 0 ; i < pfn_count ; i++ )
 		gpadlhdr.pfn[i] = ( ( addr / PAGE_SIZE ) + i );

 	/* Post message */
 	if ( ( rc = vmbus_post_message ( hv, &gpadlhdr.gpadlhdr.header,
 					 sizeof ( gpadlhdr ) ) ) != 0 )
 		return rc;

 	/* Wait for response */
 	if ( ( rc = vmbus_wait_for_message ( hv, VMBUS_GPADL_CREATED ) ) != 0 )
 		return rc;

 	/* Check response */
 	if ( created->channel != cpu_to_le32 ( vmdev->channel ) ) {
 		DBGC ( vmdev, "VMBUS %s unexpected GPADL channel %d\n",
 		       vmdev->dev.name, le32_to_cpu ( created->channel ) );
 		return -EPROTO;
 	}
 	if ( created->gpadl != cpu_to_le32 ( gpadl ) ) {
 		DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( created->gpadl ) );
 		return -EPROTO;
 	}
 	if ( created->status != 0 ) {
 		DBGC ( vmdev, "VMBUS %s GPADL creation failed: %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( created->status ) );
 		return -EPROTO;
 	}

 	DBGC ( vmdev, "VMBUS %s GPADL %#08x is [%08lx,%08lx)\n",
 	       vmdev->dev.name, gpadl, addr, ( addr + len ) );
 	return gpadl;
 }

 /**
  * Tear down GPA descriptor list
  *
  * @v vmdev		VMBus device
  * @v gpadl		GPADL ID
  * @ret rc		Return status code
  */
 int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, unsigned int gpadl ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus *vmbus = hv->vmbus;
 	struct vmbus_gpadl_teardown teardown;
 	const struct vmbus_gpadl_torndown *torndown = &vmbus->message->torndown;
 	int rc;

 	/* If GPADL is obsolete (i.e. was created before the most
 	 * recent Hyper-V reset), then we will never receive a
 	 * response to the teardown message.  Since the GPADL is
 	 * already destroyed as far as the hypervisor is concerned, no
 	 * further action is required.
 	 */
 	if ( vmbus_gpadl_is_obsolete ( gpadl ) )
 		return 0;

 	/* Construct message */
 	memset ( &teardown, 0, sizeof ( teardown ) );
 	teardown.header.type = cpu_to_le32 ( VMBUS_GPADL_TEARDOWN );
 	teardown.channel = cpu_to_le32 ( vmdev->channel );
 	teardown.gpadl = cpu_to_le32 ( gpadl );

 	/* Post message */
 	if ( ( rc = vmbus_post_message ( hv, &teardown.header,
 					 sizeof ( teardown ) ) ) != 0 )
 		return rc;

 	/* Wait for response */
 	if ( ( rc = vmbus_wait_for_message ( hv, VMBUS_GPADL_TORNDOWN ) ) != 0 )
 		return rc;

 	/* Check response */
 	if ( torndown->gpadl != cpu_to_le32 ( gpadl ) ) {
 		DBGC ( vmdev, "VMBUS %s unexpected GPADL ID %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( torndown->gpadl ) );
 		return -EPROTO;
 	}

 	return 0;
 }

 /**
  * Open VMBus channel
  *
  * @v vmdev		VMBus device
  * @v op		Channel operations
  * @v out_len		Outbound ring buffer length
  * @v in_len		Inbound ring buffer length
  * @v mtu		Maximum expected data packet length (including headers)
  * @ret rc		Return status code
  *
  * Both outbound and inbound ring buffer lengths must be a power of
  * two and a multiple of PAGE_SIZE.  The requirement to be a power of
  * two is a policy decision taken to simplify the ring buffer indexing
  * logic.
  */
 int vmbus_open ( struct vmbus_device *vmdev,
 		 struct vmbus_channel_operations *op,
 		 size_t out_len, size_t in_len, size_t mtu ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus *vmbus = hv->vmbus;
 	struct vmbus_open_channel open;
 	const struct vmbus_open_channel_result *opened =
 		&vmbus->message->opened;
 	size_t len;
 	void *ring;
 	void *packet;
 	int gpadl;
 	uint32_t open_id;
 	int rc;

 	/* Sanity checks */
 	assert ( ( out_len % PAGE_SIZE ) == 0 );
 	assert ( ( out_len & ( out_len - 1 ) ) == 0 );
 	assert ( ( in_len % PAGE_SIZE ) == 0 );
 	assert ( ( in_len & ( in_len - 1 ) ) == 0 );
 	assert ( mtu >= ( sizeof ( struct vmbus_packet_header ) +
 			  sizeof ( struct vmbus_packet_footer ) ) );

 	/* Allocate packet buffer */
 	packet = malloc ( mtu );
 	if ( ! packet ) {
 		rc = -ENOMEM;
 		goto err_alloc_packet;
 	}

 	/* Allocate ring buffer */
 	len = ( sizeof ( *vmdev->out ) + out_len +
 		sizeof ( *vmdev->in ) + in_len );
 	assert ( ( len % PAGE_SIZE ) == 0 );
 	ring = malloc_phys ( len, PAGE_SIZE );
 	if ( ! ring ) {
 		rc = -ENOMEM;
 		goto err_alloc_ring;
 	}
 	memset ( ring, 0, len );

 	/* Establish GPADL for ring buffer */
 	gpadl = vmbus_establish_gpadl ( vmdev, virt_to_user ( ring ), len );
 	if ( gpadl < 0 ) {
 		rc = gpadl;
 		goto err_establish;
 	}

 	/* Construct message */
 	memset ( &open, 0, sizeof ( open ) );
 	open.header.type = cpu_to_le32 ( VMBUS_OPEN_CHANNEL );
 	open.channel = cpu_to_le32 ( vmdev->channel );
 	open_id = random();
 	open.id = open_id; /* Opaque random value: endianness irrelevant */
 	open.gpadl = cpu_to_le32 ( gpadl );
 	open.out_pages = ( ( sizeof ( *vmdev->out ) / PAGE_SIZE ) +
 			   ( out_len / PAGE_SIZE ) );

 	/* Post message */
 	if ( ( rc = vmbus_post_message ( hv, &open.header,
 					 sizeof ( open ) ) ) != 0 )
 		goto err_post_message;

 	/* Wait for response */
 	if ( ( rc = vmbus_wait_for_message ( hv,
 					     VMBUS_OPEN_CHANNEL_RESULT ) ) != 0)
 		goto err_wait_for_message;

 	/* Check response */
 	if ( opened->channel != cpu_to_le32 ( vmdev->channel ) ) {
 		DBGC ( vmdev, "VMBUS %s unexpected opened channel %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( opened->channel ) );
 		rc = -EPROTO;
 		goto err_check_response;
 	}
 	if ( opened->id != open_id /* Non-endian */ ) {
 		DBGC ( vmdev, "VMBUS %s unexpected open ID %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( opened->id ) );
 		rc = -EPROTO;
 		goto err_check_response;
 	}
 	if ( opened->status != 0 ) {
 		DBGC ( vmdev, "VMBUS %s open failed: %#08x\n",
 		       vmdev->dev.name, le32_to_cpu ( opened->status ) );
 		rc = -EPROTO;
 		goto err_check_response;
 	}

 	/* Store channel parameters */
 	vmdev->out_len = out_len;
 	vmdev->in_len = in_len;
 	vmdev->out = ring;
 	vmdev->in = ( ring + sizeof ( *vmdev->out ) + out_len );
 	vmdev->gpadl = gpadl;
 	vmdev->op = op;
 	vmdev->mtu = mtu;
 	vmdev->packet = packet;

 	DBGC ( vmdev, "VMBUS %s channel GPADL %#08x ring "
 		"[%#08lx,%#08lx,%#08lx)\n", vmdev->dev.name, vmdev->gpadl,
 		virt_to_phys ( vmdev->out ), virt_to_phys ( vmdev->in ),
 		( virt_to_phys ( vmdev->out ) + len ) );
 	return 0;

  err_check_response:
  err_wait_for_message:
  err_post_message:
 	vmbus_gpadl_teardown ( vmdev, vmdev->gpadl );
  err_establish:
 	free_phys ( ring, len );
  err_alloc_ring:
 	free ( packet );
  err_alloc_packet:
 	return rc;
 }

 /**
  * Close VMBus channel
  *
  * @v vmdev		VMBus device
  */
 void vmbus_close ( struct vmbus_device *vmdev ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus_close_channel close;
 	size_t len;
 	int rc;

 	/* Construct message */
 	memset ( &close, 0, sizeof ( close ) );
 	close.header.type = cpu_to_le32 ( VMBUS_CLOSE_CHANNEL );
 	close.channel = cpu_to_le32 ( vmdev->channel );

 	/* Post message */
 	if ( ( rc = vmbus_post_message ( hv, &close.header,
 					 sizeof ( close ) ) ) != 0 ) {
 		DBGC ( vmdev, "VMBUS %s failed to close: %s\n",
 		       vmdev->dev.name, strerror ( rc ) );
 		/* Continue to attempt to tear down GPADL, so that our
 		 * memory is no longer accessible by the remote VM.
 		 */
 	}

 	/* Tear down GPADL */
 	if ( ( rc = vmbus_gpadl_teardown ( vmdev, vmdev->gpadl ) ) != 0 ) {
 		DBGC ( vmdev, "VMBUS %s failed to tear down channel GPADL: "
 		       "%s\n", vmdev->dev.name, strerror ( rc ) );
 		/* We can't prevent the remote VM from continuing to
 		 * access this memory, so leak it.
 		 */
 		return;
 	}

 	/* Free ring buffer */
 	len = ( sizeof ( *vmdev->out ) + vmdev->out_len +
 		sizeof ( *vmdev->in ) + vmdev->in_len );
 	free_phys ( vmdev->out, len );
 	vmdev->out = NULL;
 	vmdev->in = NULL;

 	/* Free packet buffer */
 	free ( vmdev->packet );
 	vmdev->packet = NULL;

 	DBGC ( vmdev, "VMBUS %s closed\n", vmdev->dev.name );
 }

 /**
  * Signal channel via monitor page
  *
  * @v vmdev		VMBus device
  */
 static void vmbus_signal_monitor ( struct vmbus_device *vmdev ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus *vmbus = hv->vmbus;
 	struct hv_monitor_trigger *trigger;
 	unsigned int group;
 	unsigned int bit;

 	/* Set bit in monitor trigger group */
 	group = ( vmdev->monitor / ( 8 * sizeof ( trigger->pending ) ));
 	bit = ( vmdev->monitor % ( 8 * sizeof ( trigger->pending ) ) );
 	trigger = &vmbus->monitor_out->trigger[group];
 	set_bit ( bit, trigger );
 }

 /**
  * Signal channel via hypervisor event
  *
  * @v vmdev		VMBus device
  */
 static void vmbus_signal_event ( struct vmbus_device *vmdev ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	int rc;

 	/* Signal hypervisor event */
 	if ( ( rc = hv_signal_event ( hv, VMBUS_EVENT_ID, 0 ) ) != 0 ) {
 		DBGC ( vmdev, "VMBUS %s could not signal event: %s\n",
 		       vmdev->dev.name, strerror ( rc ) );
 		return;
 	}
 }

 /**
  * Fill outbound ring buffer
  *
  * @v vmdev		VMBus device
  * @v prod		Producer index
  * @v data		Data
  * @v len		Length
  * @ret prod		New producer index
  *
  * The caller must ensure that there is sufficient space in the ring
  * buffer.
  */
 static size_t vmbus_produce ( struct vmbus_device *vmdev, size_t prod,
 			      const void *data, size_t len ) {
 	size_t first;
 	size_t second;

 	/* Determine fragment lengths */
 	first = ( vmdev->out_len - prod );
 	if ( first > len )
 		first = len;
 	second = ( len - first );

 	/* Copy fragment(s) */
 	memcpy ( &vmdev->out->data[prod], data, first );
 	if ( second )
 		memcpy ( &vmdev->out->data[0], ( data + first ), second );

 	return ( ( prod + len ) & ( vmdev->out_len - 1 ) );
 }

 /**
  * Consume inbound ring buffer
  *
  * @v vmdev		VMBus device
  * @v cons		Consumer index
  * @v data		Data buffer, or NULL
  * @v len		Length to consume
  * @ret cons		New consumer index
  */
 static size_t vmbus_consume ( struct vmbus_device *vmdev, size_t cons,
 			      void *data, size_t len ) {
 	size_t first;
 	size_t second;

 	/* Determine fragment lengths */
 	first = ( vmdev->in_len - cons );
 	if ( first > len )
 		first = len;
 	second = ( len - first );

 	/* Copy fragment(s) */
 	memcpy ( data, &vmdev->in->data[cons], first );
 	if ( second )
 		memcpy ( ( data + first ), &vmdev->in->data[0], second );

 	return ( ( cons + len ) & ( vmdev->in_len - 1 ) );
 }

 /**
  * Send packet via ring buffer
  *
  * @v vmdev		VMBus device
  * @v header		Packet header
  * @v data		Data
  * @v len		Length of data
  * @ret rc		Return status code
  *
  * Send a packet via the outbound ring buffer.  All fields in the
  * packet header must be filled in, with the exception of the total
  * packet length.
  */
 static int vmbus_send ( struct vmbus_device *vmdev,
 			struct vmbus_packet_header *header,
 			const void *data, size_t len ) {
 	struct hv_hypervisor *hv = vmdev->hv;
 	struct vmbus *vmbus = hv->vmbus;
 	static uint8_t padding[ 8 - 1 ];
 	struct vmbus_packet_footer footer;
 	size_t header_len;
 	size_t pad_len;
 	size_t footer_len;
 	size_t ring_len;
 	size_t cons;
 	size_t prod;
 	size_t old_prod;
 	size_t fill;

 	/* Sanity check */
 	assert ( vmdev->out != NULL );

 	/* Calculate lengths */
 	header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 );
 	pad_len = ( ( -len ) & ( 8 - 1 ) );
 	footer_len = sizeof ( footer );
 	ring_len = ( header_len + len + pad_len + footer_len );

 	/* Check that we have enough room in the outbound ring buffer */
 	cons = le32_to_cpu ( vmdev->out->cons );
 	prod = le32_to_cpu ( vmdev->out->prod );
 	old_prod = prod;
 	fill = ( ( prod - cons ) & ( vmdev->out_len - 1 ) );
 	if ( ( fill + ring_len ) >= vmdev->out_len ) {
 		DBGC ( vmdev, "VMBUS %s ring buffer full\n", vmdev->dev.name );
 		return -ENOBUFS;
 	}

 	/* Complete header */
 	header->qlen = cpu_to_le16 ( ( ring_len - footer_len ) / 8 );

 	/* Construct footer */
 	footer.reserved = 0;
 	footer.prod = vmdev->out->prod;

 	/* Copy packet to buffer */
 	DBGC2 ( vmdev, "VMBUS %s sending:\n", vmdev->dev.name );
 	DBGC2_HDA ( vmdev, prod, header, header_len );
 	prod = vmbus_produce ( vmdev, prod, header, header_len );
 	DBGC2_HDA ( vmdev, prod, data, len );
 	prod = vmbus_produce ( vmdev, prod, data, len );
 	prod = vmbus_produce ( vmdev, prod, padding, pad_len );
 	DBGC2_HDA ( vmdev, prod, &footer, sizeof ( footer ) );
 	prod = vmbus_produce ( vmdev, prod, &footer, sizeof ( footer ) );
 	assert ( ( ( prod - old_prod ) & ( vmdev->out_len - 1 ) ) == ring_len );

 	/* Update producer index */
 	wmb();
 	vmdev->out->prod = cpu_to_le32 ( prod );

 	/* Return if we do not need to signal the host.  This follows
 	 * the logic of hv_need_to_signal() in the Linux driver.
 	 */
 	mb();
 	if ( vmdev->out->intr_mask )
 		return 0;
 	rmb();
 	cons = le32_to_cpu ( vmdev->out->cons );
 	if ( cons != old_prod )
 		return 0;

 	/* Set channel bit in interrupt page */
 	set_bit ( vmdev->channel, vmbus->intr->out );

 	/* Signal the host */
 	vmdev->signal ( vmdev );

 	return 0;
 }

 /**
  * Send control packet via ring buffer
  *
  * @v vmdev		VMBus device
  * @v xid		Transaction ID (or zero to not request completion)
  * @v data		Data
  * @v len		Length of data
  * @ret rc		Return status code
  *
  * Send data using a VMBUS_DATA_INBAND packet.
  */
 int vmbus_send_control ( struct vmbus_device *vmdev, uint64_t xid,
 			 const void *data, size_t len ) {
 	struct vmbus_packet_header *header = vmdev->packet;

 	/* Construct header in packet buffer */
 	assert ( header != NULL );
 	header->type = cpu_to_le16 ( VMBUS_DATA_INBAND );
 	header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
 	header->flags = ( xid ?
 			  cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED ) : 0 );
 	header->xid = xid; /* Non-endian */

 	return vmbus_send ( vmdev, header, data, len );
 }

 /**
  * Send data packet via ring buffer
  *
  * @v vmdev		VMBus device
  * @v xid		Transaction ID
  * @v data		Data
  * @v len		Length of data
  * @v iobuf		I/O buffer
  * @ret rc		Return status code
  *
  * Send data using a VMBUS_DATA_GPA_DIRECT packet.  The caller is
  * responsible for ensuring that the I/O buffer remains untouched
  * until the corresponding completion has been received.
  */
 int vmbus_send_data ( struct vmbus_device *vmdev, uint64_t xid,
 		      const void *data, size_t len, struct io_buffer *iobuf ) {
 	physaddr_t addr = virt_to_phys ( iobuf->data );
 	unsigned int pfn_count = hv_pfn_count ( addr, iob_len ( iobuf ) );
 	struct {
 		struct vmbus_gpa_direct_header gpa;
 		struct vmbus_gpa_range range;
 		uint64_t pfn[pfn_count];
 	} __attribute__ (( packed )) *header = vmdev->packet;
 	unsigned int i;

 	/* Sanity check */
 	assert ( header != NULL );
 	assert ( sizeof ( *header ) <= vmdev->mtu );

 	/* Construct header in packet buffer */
 	header->gpa.header.type = cpu_to_le16 ( VMBUS_DATA_GPA_DIRECT );
 	header->gpa.header.hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
 	header->gpa.header.flags = cpu_to_le16 ( VMBUS_COMPLETION_REQUESTED );
 	header->gpa.header.xid = xid; /* Non-endian */
 	header->gpa.range_count = 1;
 	header->range.len = cpu_to_le32 ( iob_len ( iobuf ) );
 	header->range.offset = cpu_to_le32 ( addr & ( PAGE_SIZE - 1 ) );
 	for ( i = 0 ; i < pfn_count ; i++ )
 		header->pfn[i] = ( ( addr / PAGE_SIZE ) + i );

 	return vmbus_send ( vmdev, &header->gpa.header, data, len );
 }

 /**
  * Send completion packet via ring buffer
  *
  * @v vmdev		VMBus device
  * @v xid		Transaction ID
  * @v data		Data
  * @v len		Length of data
  * @ret rc		Return status code
  *
  * Send data using a VMBUS_COMPLETION packet.
  */
 int vmbus_send_completion ( struct vmbus_device *vmdev, uint64_t xid,
 			    const void *data, size_t len ) {
 	struct vmbus_packet_header *header = vmdev->packet;

 	/* Construct header in packet buffer */
 	assert ( header != NULL );
 	header->type = cpu_to_le16 ( VMBUS_COMPLETION );
 	header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
 	header->flags = 0;
 	header->xid = xid; /* Non-endian */

 	return vmbus_send ( vmdev, header, data, len );
 }

 /**
  * Send cancellation packet via ring buffer
  *
  * @v vmdev		VMBus device
  * @v xid		Transaction ID
  * @ret rc		Return status code
  *
  * Send data using a VMBUS_CANCELLATION packet.
  */
 int vmbus_send_cancellation ( struct vmbus_device *vmdev, uint64_t xid ) {
 	struct vmbus_packet_header *header = vmdev->packet;

 	/* Construct header in packet buffer */
 	assert ( header != NULL );
 	header->type = cpu_to_le16 ( VMBUS_CANCELLATION );
 	header->hdr_qlen = cpu_to_le16 ( sizeof ( *header ) / 8 );
 	header->flags = 0;
 	header->xid = xid; /* Non-endian */

 	return vmbus_send ( vmdev, header, NULL, 0 );
 }

 /**
  * Get transfer page set from pageset ID
  *
  * @v vmdev		VMBus device
  * @v pageset		Page set ID (in protocol byte order)
  * @ret pages		Page set, or NULL if not found
  */
 static struct vmbus_xfer_pages * vmbus_xfer_pages ( struct vmbus_device *vmdev,
 						    uint16_t pageset ) {
 	struct vmbus_xfer_pages *pages;

 	/* Locate page set */
 	list_for_each_entry ( pages, &vmdev->pages, list ) {
 		if ( pages->pageset == pageset )
 			return pages;
 	}

 	DBGC ( vmdev, "VMBUS %s unrecognised page set ID %#04x\n",
 	       vmdev->dev.name, le16_to_cpu ( pageset ) );
 	return NULL;
 }

 /**
  * Construct I/O buffer list from transfer pages
  *
  * @v vmdev		VMBus device
  * @v header		Transfer page header
  * @v list		I/O buffer list to populate
  * @ret rc		Return status code
  */
 static int vmbus_xfer_page_iobufs ( struct vmbus_device *vmdev,
 				    struct vmbus_packet_header *header,
 				    struct list_head *list ) {
 	struct vmbus_xfer_page_header *page_header =
 		container_of ( header, struct vmbus_xfer_page_header, header );
 	struct vmbus_xfer_pages *pages;
 	struct io_buffer *iobuf;
 	struct io_buffer *tmp;
 	size_t len;
 	size_t offset;
 	unsigned int range_count;
 	unsigned int i;
 	int rc;

 	/* Sanity check */
 	assert ( header->type == cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) );

 	/* Locate page set */
 	pages = vmbus_xfer_pages ( vmdev, page_header->pageset );
 	if ( ! pages ) {
 		rc = -ENOENT;
 		goto err_pages;
 	}

 	/* Allocate and populate I/O buffers */
 	range_count = le32_to_cpu ( page_header->range_count );
 	for ( i = 0 ; i < range_count ; i++ ) {

 		/* Parse header */
 		len = le32_to_cpu ( page_header->range[i].len );
 		offset = le32_to_cpu ( page_header->range[i].offset );

 		/* Allocate I/O buffer */
 		iobuf = alloc_iob ( len );
 		if ( ! iobuf ) {
 			DBGC ( vmdev, "VMBUS %s could not allocate %zd-byte "
 			       "I/O buffer\n", vmdev->dev.name, len );
 			rc = -ENOMEM;
 			goto err_alloc;
 		}

 		/* Add I/O buffer to list */
 		list_add ( &iobuf->list, list );

 		/* Populate I/O buffer */
 		if ( ( rc = pages->op->copy ( pages, iob_put ( iobuf, len ),
 					      offset, len ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not populate I/O buffer "
 			       "range [%zd,%zd): %s\n",
 			       vmdev->dev.name, offset, len, strerror ( rc ) );
 			goto err_copy;
 		}
 	}

 	return 0;

  err_copy:
  err_alloc:
 	list_for_each_entry_safe ( iobuf, tmp, list, list ) {
 		list_del ( &iobuf->list );
 		free_iob ( iobuf );
 	}
  err_pages:
 	return rc;
 }

 /**
  * Poll ring buffer
  *
  * @v vmdev		VMBus device
  * @ret rc		Return status code
  */
 int vmbus_poll ( struct vmbus_device *vmdev ) {
 	struct vmbus_packet_header *header = vmdev->packet;
 	struct list_head list;
 	void *data;
 	size_t header_len;
 	size_t len;
 	size_t footer_len;
 	size_t ring_len;
 	size_t cons;
 	size_t old_cons;
 	uint64_t xid;
 	int rc;

 	/* Sanity checks */
 	assert ( vmdev->packet != NULL );
 	assert ( vmdev->in != NULL );

 	/* Return immediately if buffer is empty */
 	if ( ! vmbus_has_data ( vmdev ) )
 		return 0;
 	cons = le32_to_cpu ( vmdev->in->cons );
 	old_cons = cons;

 	/* Consume (start of) header */
 	cons = vmbus_consume ( vmdev, cons, header, sizeof ( *header ) );

 	/* Parse and sanity check header */
 	header_len = ( le16_to_cpu ( header->hdr_qlen ) * 8 );
 	if ( header_len < sizeof ( *header ) ) {
 		DBGC ( vmdev, "VMBUS %s received underlength header (%zd "
 		       "bytes)\n", vmdev->dev.name, header_len );
 		return -EINVAL;
 	}
 	len = ( ( le16_to_cpu ( header->qlen ) * 8 ) - header_len );
 	footer_len = sizeof ( struct vmbus_packet_footer );
 	ring_len = ( header_len + len + footer_len );
 	if ( ring_len > vmdev->mtu ) {
 		DBGC ( vmdev, "VMBUS %s received overlength packet (%zd "
 		       "bytes)\n", vmdev->dev.name, ring_len );
 		return -ERANGE;
 	}
 	xid = le64_to_cpu ( header->xid );

 	/* Consume remainder of packet */
 	cons = vmbus_consume ( vmdev, cons,
 			       ( ( ( void * ) header ) + sizeof ( *header ) ),
 			       ( ring_len - sizeof ( *header ) ) );
 	DBGC2 ( vmdev, "VMBUS %s received:\n", vmdev->dev.name );
 	DBGC2_HDA ( vmdev, old_cons, header, ring_len );
 	assert ( ( ( cons - old_cons ) & ( vmdev->in_len - 1 ) ) == ring_len );

 	/* Allocate I/O buffers, if applicable */
 	INIT_LIST_HEAD ( &list );
 	if ( header->type == cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) ) {
 		if ( ( rc = vmbus_xfer_page_iobufs ( vmdev, header,
 						     &list ) ) != 0 )
 			return rc;
 	}

 	/* Update producer index */
 	rmb();
 	vmdev->in->cons = cpu_to_le32 ( cons );

 	/* Handle packet */
 	data = ( ( ( void * ) header ) + header_len );
 	switch ( header->type ) {

 	case cpu_to_le16 ( VMBUS_DATA_INBAND ) :
 		if ( ( rc = vmdev->op->recv_control ( vmdev, xid, data,
 						      len ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not handle control "
 			       "packet: %s\n",
 			       vmdev->dev.name, strerror ( rc ) );
 			return rc;
 		}
 		break;

 	case cpu_to_le16 ( VMBUS_DATA_XFER_PAGES ) :
 		if ( ( rc = vmdev->op->recv_data ( vmdev, xid, data, len,
 						   &list ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not handle data packet: "
 			       "%s\n", vmdev->dev.name, strerror ( rc ) );
 			return rc;
 		}
 		break;

 	case cpu_to_le16 ( VMBUS_COMPLETION ) :
 		if ( ( rc = vmdev->op->recv_completion ( vmdev, xid, data,
 							 len ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not handle completion: "
 			       "%s\n", vmdev->dev.name, strerror ( rc ) );
 			return rc;
 		}
 		break;

 	case cpu_to_le16 ( VMBUS_CANCELLATION ) :
 		if ( ( rc = vmdev->op->recv_cancellation ( vmdev, xid ) ) != 0){
 			DBGC ( vmdev, "VMBUS %s could not handle cancellation: "
 			       "%s\n", vmdev->dev.name, strerror ( rc ) );
 			return rc;
 		}
 		break;

 	default:
 		DBGC ( vmdev, "VMBUS %s unknown packet type %d\n",
 		       vmdev->dev.name, le16_to_cpu ( header->type ) );
 		return -ENOTSUP;
 	}

 	return 0;
 }

 /**
  * Dump channel status (for debugging)
  *
  * @v vmdev		VMBus device
  */
 void vmbus_dump_channel ( struct vmbus_device *vmdev ) {
 	size_t out_prod = le32_to_cpu ( vmdev->out->prod );
 	size_t out_cons = le32_to_cpu ( vmdev->out->cons );
 	size_t in_prod = le32_to_cpu ( vmdev->in->prod );
 	size_t in_cons = le32_to_cpu ( vmdev->in->cons );
 	size_t in_len;
 	size_t first;
 	size_t second;

 	/* Dump ring status */
 	DBGC ( vmdev, "VMBUS %s out %03zx:%03zx%s in %03zx:%03zx%s\n",
 	       vmdev->dev.name, out_prod, out_cons,
 	       ( vmdev->out->intr_mask ? "(m)" : "" ), in_prod, in_cons,
 	       ( vmdev->in->intr_mask ? "(m)" : "" ) );

 	/* Dump inbound ring contents, if any */
 	if ( in_prod != in_cons ) {
 		in_len = ( ( in_prod - in_cons ) &
 			   ( vmdev->in_len - 1 ) );
 		first = ( vmdev->in_len - in_cons );
 		if ( first > in_len )
 			first = in_len;
 		second = ( in_len - first );
 		DBGC_HDA ( vmdev, in_cons, &vmdev->in->data[in_cons], first );
 		DBGC_HDA ( vmdev, 0, &vmdev->in->data[0], second );
 	}
 }

 /**
  * Find driver for VMBus device
  *
  * @v vmdev		VMBus device
  * @ret driver		Driver, or NULL
  */
 static struct vmbus_driver * vmbus_find_driver ( const union uuid *type ) {
 	struct vmbus_driver *vmdrv;

 	for_each_table_entry ( vmdrv, VMBUS_DRIVERS ) {
 		if ( memcmp ( &vmdrv->type, type, sizeof ( *type ) ) == 0 )
 			return vmdrv;
 	}
 	return NULL;
 }

 /**
  * Probe channels
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  * @ret rc		Return status code
  */
 static int vmbus_probe_channels ( struct hv_hypervisor *hv,
 				  struct device *parent ) {
 	struct vmbus *vmbus = hv->vmbus;
 	const struct vmbus_message_header *header = &vmbus->message->header;
 	const struct vmbus_offer_channel *offer = &vmbus->message->offer;
 	const union uuid *type;
 	union uuid instance;
 	struct vmbus_driver *driver;
 	struct vmbus_device *vmdev;
 	struct vmbus_device *tmp;
 	unsigned int channel;
 	int rc;

 	/* Post message */
 	if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0)
 		goto err_post_message;

 	/* Collect responses */
 	while ( 1 ) {

 		/* Wait for response */
 		if ( ( rc = vmbus_wait_for_any_message ( hv ) ) != 0 )
 			goto err_wait_for_any_message;

 		/* Handle response */
 		if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) {

 			/* Parse offer */
 			type = &offer->type;
 			channel = le32_to_cpu ( offer->channel );
 			DBGC2 ( vmbus, "VMBUS %p offer %d type %s",
 				vmbus, channel, uuid_ntoa ( type ) );
 			if ( offer->monitored )
 				DBGC2 ( vmbus, " monitor %d", offer->monitor );
 			DBGC2 ( vmbus, "\n" );

 			/* Look for a driver */
 			driver = vmbus_find_driver ( type );
 			if ( ! driver ) {
 				DBGC2 ( vmbus, "VMBUS %p has no driver for "
 					"type %s\n", vmbus, uuid_ntoa ( type ));
 				/* Not a fatal error */
 				continue;
 			}

 			/* Allocate and initialise device */
 			vmdev = zalloc ( sizeof ( *vmdev ) );
 			if ( ! vmdev ) {
 				rc = -ENOMEM;
 				goto err_alloc_vmdev;
 			}
 			memcpy ( &instance, &offer->instance,
 				 sizeof ( instance ) );
 			uuid_mangle ( &instance );
 			snprintf ( vmdev->dev.name, sizeof ( vmdev->dev.name ),
 				   "{%s}", uuid_ntoa ( &instance ) );
 			vmdev->dev.desc.bus_type = BUS_TYPE_HV;
 			INIT_LIST_HEAD ( &vmdev->dev.children );
 			list_add_tail ( &vmdev->dev.siblings,
 					&parent->children );
 			vmdev->dev.parent = parent;
 			vmdev->hv = hv;
 			memcpy ( &vmdev->instance, &offer->instance,
 				 sizeof ( vmdev->instance ) );
 			vmdev->channel = channel;
 			vmdev->monitor = offer->monitor;
 			vmdev->signal = ( offer->monitored ?
 					  vmbus_signal_monitor :
 					  vmbus_signal_event );
 			INIT_LIST_HEAD ( &vmdev->pages );
 			vmdev->driver = driver;
 			vmdev->dev.driver_name = driver->name;
 			DBGC ( vmdev, "VMBUS %s has driver \"%s\"\n",
 			       vmdev->dev.name, vmdev->driver->name );

 		} else if ( header->type ==
 			    cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) {

 			/* End of offer list */
 			break;

 		} else {
 			DBGC ( vmbus, "VMBUS %p unexpected offer response type "
 			       "%d\n", vmbus, le32_to_cpu ( header->type ) );
 			rc = -EPROTO;
 			goto err_unexpected_offer;
 		}
 	}

 	/* Probe all devices.  We do this only after completing
 	 * enumeration since devices will need to send and receive
 	 * VMBus messages.
 	 */
 	list_for_each_entry ( vmdev, &parent->children, dev.siblings ) {
 		if ( ( rc = vmdev->driver->probe ( vmdev ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not probe: %s\n",
 			       vmdev->dev.name, strerror ( rc ) );
 			goto err_probe;
 		}
 	}

 	return 0;

  err_probe:
 	/* Remove driver from each device that was already probed */
 	list_for_each_entry_continue_reverse ( vmdev, &parent->children,
 					       dev.siblings ) {
 		vmdev->driver->remove ( vmdev );
 	}
  err_unexpected_offer:
  err_alloc_vmdev:
  err_wait_for_any_message:
 	/* Free any devices allocated (but potentially not yet probed) */
 	list_for_each_entry_safe ( vmdev, tmp, &parent->children,
 				   dev.siblings ) {
 		list_del ( &vmdev->dev.siblings );
 		free ( vmdev );
 	}
  err_post_message:
 	return rc;
 }


 /**
  * Reset channels
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  * @ret rc		Return status code
  */
 static int vmbus_reset_channels ( struct hv_hypervisor *hv,
 				  struct device *parent ) {
 	struct vmbus *vmbus = hv->vmbus;
 	const struct vmbus_message_header *header = &vmbus->message->header;
 	const struct vmbus_offer_channel *offer = &vmbus->message->offer;
 	const union uuid *type;
 	struct vmbus_device *vmdev;
 	unsigned int channel;
 	int rc;

 	/* Post message */
 	if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0)
 		return rc;

 	/* Collect responses */
 	while ( 1 ) {

 		/* Wait for response */
 		if ( ( rc = vmbus_wait_for_any_message ( hv ) ) != 0 )
 			return rc;

 		/* Handle response */
 		if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) {

 			/* Parse offer */
 			type = &offer->type;
 			channel = le32_to_cpu ( offer->channel );
 			DBGC2 ( vmbus, "VMBUS %p offer %d type %s",
 				vmbus, channel, uuid_ntoa ( type ) );
 			if ( offer->monitored )
 				DBGC2 ( vmbus, " monitor %d", offer->monitor );
 			DBGC2 ( vmbus, "\n" );

 			/* Do nothing with the offer; we already have all
 			 * of the relevant state from the initial probe.
 			 */

 		} else if ( header->type ==
 			    cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) {

 			/* End of offer list */
 			break;

 		} else {
 			DBGC ( vmbus, "VMBUS %p unexpected offer response type "
 			       "%d\n", vmbus, le32_to_cpu ( header->type ) );
 			return -EPROTO;
 		}
 	}

 	/* Reset all devices */
 	list_for_each_entry ( vmdev, &parent->children, dev.siblings ) {
 		if ( ( rc = vmdev->driver->reset ( vmdev ) ) != 0 ) {
 			DBGC ( vmdev, "VMBUS %s could not reset: %s\n",
 			       vmdev->dev.name, strerror ( rc ) );
 			/* Continue attempting to reset other devices */
 			continue;
 		}
 	}

 	return 0;
 }

 /**
  * Remove channels
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  */
 static void vmbus_remove_channels ( struct hv_hypervisor *hv __unused,
 				    struct device *parent ) {
 	struct vmbus_device *vmdev;
 	struct vmbus_device *tmp;

 	/* Remove devices */
 	list_for_each_entry_safe ( vmdev, tmp, &parent->children,
 				   dev.siblings ) {
 		vmdev->driver->remove ( vmdev );
 		assert ( list_empty ( &vmdev->dev.children ) );
 		assert ( vmdev->out == NULL );
 		assert ( vmdev->in == NULL );
 		assert ( vmdev->packet == NULL );
 		assert ( list_empty ( &vmdev->pages ) );
 		list_del ( &vmdev->dev.siblings );
 		free ( vmdev );
 	}
 }

 /**
  * Probe Hyper-V virtual machine bus
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  * @ret rc		Return status code
  */
 int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ) {
 	struct vmbus *vmbus;
 	int rc;

 	/* Allocate and initialise structure */
 	vmbus = zalloc ( sizeof ( *vmbus ) );
 	if ( ! vmbus ) {
 		rc = -ENOMEM;
 		goto err_alloc;
 	}
 	hv->vmbus = vmbus;

 	/* Initialise message buffer pointer
 	 *
 	 * We use a pointer to the fixed-size Hyper-V received message
 	 * buffer.  This allows us to access fields within received
 	 * messages without first checking the message size: any
 	 * fields beyond the end of the message will read as zero.
 	 */
 	vmbus->message = ( ( void * ) hv->message->received.data );
 	assert ( sizeof ( *vmbus->message ) <=
 		 sizeof ( hv->message->received.data ) );

 	/* Allocate interrupt and monitor pages */
 	if ( ( rc = hv_alloc_pages ( hv, &vmbus->intr, &vmbus->monitor_in,
 				     &vmbus->monitor_out, NULL ) ) != 0 )
 		goto err_alloc_pages;

 	/* Enable message interrupt */
 	hv_enable_sint ( hv, VMBUS_MESSAGE_SINT );

 	/* Negotiate protocol version */
 	if ( ( rc = vmbus_negotiate_version ( hv ) ) != 0 )
 		goto err_negotiate_version;

 	/* Enumerate channels */
 	if ( ( rc = vmbus_probe_channels ( hv, parent ) ) != 0 )
 		goto err_probe_channels;

 	return 0;

 	vmbus_remove_channels ( hv, parent );
  err_probe_channels:
 	vmbus_unload ( hv );
  err_negotiate_version:
 	hv_disable_sint ( hv, VMBUS_MESSAGE_SINT );
 	hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out,
 			NULL );
  err_alloc_pages:
 	free ( vmbus );
  err_alloc:
 	return rc;
 }

 /**
  * Reset Hyper-V virtual machine bus
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  * @ret rc		Return status code
  */
 int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent ) {
 	struct vmbus *vmbus = hv->vmbus;
 	int rc;

 	/* Mark all existent GPADLs as obsolete */
 	vmbus_obsolete_gpadl = vmbus_gpadl;

 	/* Clear interrupt and monitor pages */
 	memset ( vmbus->intr, 0, PAGE_SIZE );
 	memset ( vmbus->monitor_in, 0, PAGE_SIZE );
 	memset ( vmbus->monitor_out, 0, PAGE_SIZE );

 	/* Enable message interrupt */
 	hv_enable_sint ( hv, VMBUS_MESSAGE_SINT );

 	/* Renegotiate protocol version */
 	if ( ( rc = vmbus_negotiate_version ( hv ) ) != 0 )
 		return rc;

 	/* Reenumerate channels */
 	if ( ( rc = vmbus_reset_channels ( hv, parent ) ) != 0 )
 		return rc;

 	return 0;
 }

 /**
  * Remove Hyper-V virtual machine bus
  *
  * @v hv		Hyper-V hypervisor
  * @v parent		Parent device
  */
 void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent ) {
 	struct vmbus *vmbus = hv->vmbus;

 	vmbus_remove_channels ( hv, parent );
 	vmbus_unload ( hv );
 	hv_disable_sint ( hv, VMBUS_MESSAGE_SINT );
 	hv_free_pages ( hv, vmbus->intr, vmbus->monitor_in, vmbus->monitor_out,
 			NULL );
 	free ( vmbus );
 }