blob: 794e7b81c3cc98a6b2e0ce800b72fc27a5f543c6 [file] [log] [blame]
/*
* Copyright (c) 2020 Nutanix Inc. All rights reserved.
*
* Authors: Thanos Makatos <thanos@nutanix.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Nutanix nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
*/
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include "common.h"
#include "migration.h"
#include "private.h"
#include "migration_priv.h"
bool
MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to)
{
return migr_states[from].state & (1 << to);
}
EXPORT size_t
vfu_get_migr_register_area_size(void)
{
return ROUND_UP(sizeof(struct vfio_user_migration_info),
sysconf(_SC_PAGE_SIZE));
}
/*
* TODO no need to dynamically allocate memory, we can keep struct migration
* in vfu_ctx_t.
*/
struct migration *
init_migration(const vfu_migration_callbacks_t * callbacks,
uint64_t data_offset, int *err)
{
struct migration *migr;
if (data_offset < vfu_get_migr_register_area_size()) {
*err = EINVAL;
return NULL;
}
migr = calloc(1, sizeof(*migr));
if (migr == NULL) {
*err = ENOMEM;
return NULL;
}
/*
* FIXME: incorrect, if the client doesn't give a pgsize value, it means "no
* migration support", handle this
* FIXME must be available even if migration callbacks aren't used
*/
migr->pgsize = sysconf(_SC_PAGESIZE);
/* FIXME this should be done in vfu_ctx_realize */
migr->info.device_state = VFIO_DEVICE_STATE_V1_RUNNING;
migr->data_offset = data_offset;
migr->callbacks = *callbacks;
if (migr->callbacks.transition == NULL ||
migr->callbacks.get_pending_bytes == NULL ||
migr->callbacks.prepare_data == NULL ||
migr->callbacks.read_data == NULL ||
migr->callbacks.write_data == NULL) {
free(migr);
*err = EINVAL;
return NULL;
}
return migr;
}
void
MOCK_DEFINE(migr_state_transition)(struct migration *migr,
enum migr_iter_state state)
{
assert(migr != NULL);
/* FIXME validate that state transition */
migr->iter.state = state;
}
vfu_migr_state_t
MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t device_state)
{
switch (device_state) {
case VFIO_DEVICE_STATE_V1_STOP:
return VFU_MIGR_STATE_STOP;
case VFIO_DEVICE_STATE_V1_RUNNING:
return VFU_MIGR_STATE_RUNNING;
case VFIO_DEVICE_STATE_V1_SAVING:
/*
* FIXME How should the device operate during the stop-and-copy
* phase? Should we only allow the migration data to be read from
* the migration region? E.g. Access to any other region should be
* failed? This might be a good question to send to LKML.
*/
return VFU_MIGR_STATE_STOP_AND_COPY;
case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING:
return VFU_MIGR_STATE_PRE_COPY;
case VFIO_DEVICE_STATE_V1_RESUMING:
return VFU_MIGR_STATE_RESUME;
}
return -1;
}
/**
* Returns 0 on success, -1 on error setting errno.
*/
int
MOCK_DEFINE(state_trans_notify)(vfu_ctx_t *vfu_ctx,
int (*fn)(vfu_ctx_t *, vfu_migr_state_t),
uint32_t vfio_device_state)
{
/*
* We've already checked that device_state is valid by calling
* vfio_migr_state_transition_is_valid.
*/
return fn(vfu_ctx, migr_state_vfio_to_vfu(vfio_device_state));
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
ssize_t
MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint32_t device_state, bool notify)
{
if (notify) {
int ret;
assert(!vfu_ctx->in_cb);
vfu_ctx->in_cb = CB_MIGR_STATE;
ret = state_trans_notify(vfu_ctx, migr->callbacks.transition,
device_state);
vfu_ctx->in_cb = CB_NONE;
if (ret != 0) {
return ret;
}
}
migr->info.device_state = device_state;
migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL);
return 0;
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
ssize_t
MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint32_t device_state, bool notify)
{
assert(migr != NULL);
if (!vfio_migr_state_transition_is_valid(migr->info.device_state,
device_state)) {
return ERROR_INT(EINVAL);
}
return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify);
}
/**
* Returns 0 on success, -1 on error setting errno.
*/
static ssize_t
handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *pending_bytes, bool is_write)
{
assert(migr != NULL);
assert(pending_bytes != NULL);
if (is_write) {
return ERROR_INT(EINVAL);
}
if (migr->iter.state == VFIO_USER_MIGR_ITER_STATE_FINISHED) {
*pending_bytes = 0;
return 0;
}
switch (migr->iter.state) {
case VFIO_USER_MIGR_ITER_STATE_INITIAL:
case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED:
/*
* FIXME what happens if data haven't been consumed in the previous
* iteration? Check https://www.spinics.net/lists/kvm/msg228608.html.
*/
*pending_bytes = migr->iter.pending_bytes = migr->callbacks.get_pending_bytes(vfu_ctx);
if (*pending_bytes == 0) {
migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_FINISHED);
} else {
migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_STARTED);
}
break;
case VFIO_USER_MIGR_ITER_STATE_STARTED:
/*
* FIXME We might be wrong returning a cached value, check
* https://www.spinics.net/lists/kvm/msg228608.html
*
*/
*pending_bytes = migr->iter.pending_bytes;
break;
default:
return ERROR_INT(EINVAL);
}
return 0;
}
/*
* FIXME reading or writing migration registers with the wrong device state or
* out of sequence is undefined, but should not result in EINVAL, it should
* simply be ignored. However this way it's easier to catch development errors.
* Make this behavior conditional.
*/
/**
* Returns 0 on success, -1 on error setting errno.
*/
static ssize_t
handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
bool is_write)
{
int ret = 0;
assert(migr != NULL);
if (is_write) {
vfu_log(vfu_ctx, LOG_ERR, "data_offset is RO when saving");
return ERROR_INT(EINVAL);
}
switch (migr->iter.state) {
case VFIO_USER_MIGR_ITER_STATE_STARTED:
ret = migr->callbacks.prepare_data(vfu_ctx, &migr->iter.offset,
&migr->iter.size);
if (ret != 0) {
return ret;
}
/*
* FIXME must first read data_offset and then data_size. They way we've
* implemented it now, if data_size is read before data_offset we
* transition to state VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED without
* calling callbacks.prepare_data, which is wrong. Maybe we need
* separate states for data_offset and data_size.
*/
migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED);
break;
case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED:
/*
* data_offset is invariant during a save iteration.
*/
break;
default:
vfu_log(vfu_ctx, LOG_ERR,
"reading data_offset out of sequence is undefined");
return ERROR_INT(EINVAL);
}
return 0;
}
/**
* Returns 0 on success, -1 on error setting errno.
*/
static ssize_t
handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *offset, bool is_write)
{
int ret;
assert(migr != NULL);
assert(offset != NULL);
switch (migr->info.device_state) {
case VFIO_DEVICE_STATE_V1_SAVING:
case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING:
ret = handle_data_offset_when_saving(vfu_ctx, migr, is_write);
if (ret == 0 && !is_write) {
*offset = migr->iter.offset + migr->data_offset;
}
return ret;
case VFIO_DEVICE_STATE_V1_RESUMING:
if (is_write) {
/* TODO writing to read-only registers should be simply ignored */
vfu_log(vfu_ctx, LOG_ERR, "bad write to migration data_offset");
return ERROR_INT(EINVAL);
}
ret = migr->callbacks.prepare_data(vfu_ctx, offset, NULL);
if (ret != 0) {
return ret;
}
*offset += migr->data_offset;
return 0;
}
/* TODO improve error message */
vfu_log(vfu_ctx, LOG_ERR,
"bad access to migration data_offset in state %s",
migr_states[migr->info.device_state].name);
return ERROR_INT(EINVAL);
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
static ssize_t
handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
bool is_write)
{
assert(migr != NULL);
if (is_write) {
/* TODO improve error message */
vfu_log(vfu_ctx, LOG_ERR, "data_size is RO when saving");
return ERROR_INT(EINVAL);
}
if (migr->iter.state != VFIO_USER_MIGR_ITER_STATE_STARTED &&
migr->iter.state != VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED) {
vfu_log(vfu_ctx, LOG_ERR,
"reading data_size ouf of sequence is undefined");
return ERROR_INT(EINVAL);
}
return 0;
}
/**
* Returns 0 on success, -1 on error setting errno.
*/
static ssize_t
handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t size, bool is_write)
{
assert(migr != NULL);
if (is_write) {
return migr->callbacks.data_written(vfu_ctx, size);
}
return 0;
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
static ssize_t
handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *size, bool is_write)
{
int ret;
assert(vfu_ctx != NULL);
assert(size != NULL);
switch (migr->info.device_state){
case VFIO_DEVICE_STATE_V1_SAVING:
case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING:
ret = handle_data_size_when_saving(vfu_ctx, migr, is_write);
if (ret == 0 && !is_write) {
*size = migr->iter.size;
}
return ret;
case VFIO_DEVICE_STATE_V1_RESUMING:
return handle_data_size_when_resuming(vfu_ctx, migr, *size, is_write);
}
/* TODO improve error message */
vfu_log(vfu_ctx, LOG_ERR, "bad access to data_size");
return ERROR_INT(EINVAL);
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
ssize_t
MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf,
size_t count, loff_t pos,
bool is_write)
{
struct migration *migr = vfu_ctx->migration;
int ret;
uint32_t *device_state, old_device_state;
assert(migr != NULL);
switch (pos) {
case offsetof(struct vfio_user_migration_info, device_state):
if (count != sizeof(migr->info.device_state)) {
vfu_log(vfu_ctx, LOG_ERR,
"bad device_state access size %zu", count);
return ERROR_INT(EINVAL);
}
device_state = (uint32_t *)buf;
if (!is_write) {
*device_state = migr->info.device_state;
return 0;
}
old_device_state = migr->info.device_state;
vfu_log(vfu_ctx, LOG_DEBUG,
"migration: transitioning from state %s to state %s",
migr_states[old_device_state].name,
migr_states[*device_state].name);
ret = handle_device_state(vfu_ctx, migr, *device_state, true);
if (ret == 0) {
vfu_log(vfu_ctx, LOG_DEBUG,
"migration: transitioned from state %s to state %s",
migr_states[old_device_state].name,
migr_states[*device_state].name);
} else {
vfu_log(vfu_ctx, LOG_ERR,
"migration: failed to transition from state %s to state %s",
migr_states[old_device_state].name,
migr_states[*device_state].name);
}
break;
case offsetof(struct vfio_user_migration_info, pending_bytes):
if (count != sizeof(migr->info.pending_bytes)) {
vfu_log(vfu_ctx, LOG_ERR,
"bad pending_bytes access size %zu", count);
return ERROR_INT(EINVAL);
}
ret = handle_pending_bytes(vfu_ctx, migr, (uint64_t *)buf, is_write);
break;
case offsetof(struct vfio_user_migration_info, data_offset):
if (count != sizeof(migr->info.data_offset)) {
vfu_log(vfu_ctx, LOG_ERR,
"bad data_offset access size %zu", count);
return ERROR_INT(EINVAL);
}
ret = handle_data_offset(vfu_ctx, migr, (uint64_t *)buf, is_write);
break;
case offsetof(struct vfio_user_migration_info, data_size):
if (count != sizeof(migr->info.data_size)) {
vfu_log(vfu_ctx, LOG_ERR,
"bad data_size access size %zu", count);
return ERROR_INT(EINVAL);
}
ret = handle_data_size(vfu_ctx, migr, (uint64_t *)buf, is_write);
break;
default:
vfu_log(vfu_ctx, LOG_ERR,
"bad migration region register offset %#llx",
(ull_t)pos);
return ERROR_INT(EINVAL);
}
return ret;
}
ssize_t
migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
loff_t pos, bool is_write)
{
struct migration *migr = vfu_ctx->migration;
ssize_t ret;
assert(migr != NULL);
assert(buf != NULL);
/*
* FIXME don't call the device callback if the migration state is in not in
* pre-copy/stop-and-copy/resuming state, since the behavior is undefined
* in that case.
*/
if (pos + count <= sizeof(struct vfio_user_migration_info)) {
ret = migration_region_access_registers(vfu_ctx, buf, count,
pos, is_write);
if (ret != 0) {
return ret;
}
} else {
if (pos < (loff_t)migr->data_offset) {
/*
* TODO we can simply ignore the access to that part and handle
* any access to the data region properly.
*/
vfu_log(vfu_ctx, LOG_WARNING,
"bad access to dead space %#llx - %#llx in migration region",
(ull_t)pos,
(ull_t)(pos + count - 1));
return ERROR_INT(EINVAL);
}
pos -= migr->data_offset;
if (is_write) {
ret = migr->callbacks.write_data(vfu_ctx, buf, count, pos);
if (ret < 0) {
return -1;
}
} else {
/*
* FIXME <linux/vfio.h> says:
*
* d. Read data_size bytes of data from (region + data_offset) from the
* migration region.
*
* Does this mean that partial reads are not allowed?
*/
ret = migr->callbacks.read_data(vfu_ctx, buf, count, pos);
if (ret < 0) {
return -1;
}
}
}
return count;
}
bool
MOCK_DEFINE(device_is_stopped_and_copying)(struct migration *migr)
{
return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_SAVING;
}
bool
MOCK_DEFINE(device_is_stopped)(struct migration *migr)
{
return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_STOP;
}
size_t
migration_get_pgsize(struct migration *migr)
{
assert(migr != NULL);
return migr->pgsize;
}
int
migration_set_pgsize(struct migration *migr, size_t pgsize)
{
assert(migr != NULL);
// FIXME?
if (pgsize != PAGE_SIZE) {
return ERROR_INT(EINVAL);
}
migr->pgsize = pgsize;
return 0;
}
bool
access_migration_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index,
uint64_t offset)
{
/*
* Writing to the migration state register with an unaligned access won't
* trigger this check but that's not a problem because
* migration_region_access_registers will fail the access.
*/
return region_index == VFU_PCI_DEV_MIGR_REGION_IDX
&& vfu_ctx->migration != NULL
&& offset == offsetof(struct vfio_user_migration_info, device_state);
}
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */