blob: 02c29c19b3711bc1d5155ab9bf4f60829bd8f674 [file] [log] [blame]
/*
* Copyright (c) 2020 Nutanix Inc. All rights reserved.
*
* Authors: Thanos Makatos <thanos@nutanix.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Nutanix nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
*/
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include "common.h"
#include "migration.h"
#include "private.h"
#include "migration_priv.h"
/*
* This defines valid migration state transitions. Each element in the array
* corresponds to a FROM state and each bit of the element to a TO state. If the
* bit is set, then the transition is allowed.
*
* The indices of each state are those in the vfio_user_device_mig_state enum.
*/
static const char transitions[VFIO_USER_DEVICE_NUM_STATES] = {
[VFIO_USER_DEVICE_STATE_ERROR] = 0,
[VFIO_USER_DEVICE_STATE_STOP] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) |
(1 << VFIO_USER_DEVICE_STATE_STOP_COPY) |
(1 << VFIO_USER_DEVICE_STATE_RESUMING),
[VFIO_USER_DEVICE_STATE_RUNNING] = (1 << VFIO_USER_DEVICE_STATE_STOP) |
(1 << VFIO_USER_DEVICE_STATE_PRE_COPY),
[VFIO_USER_DEVICE_STATE_STOP_COPY] = 1 << VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RESUMING] = 1 << VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = 0,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) |
(1 << VFIO_USER_DEVICE_STATE_STOP_COPY),
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = 0
};
/*
* The spec dictates that, if no direct transition is allowed, and the
* transition is not one of the explicitly disallowed ones (i.e. anything to
* ERROR, anything from ERROR, and STOP_COPY -> PRE_COPY), we should take the
* shortest allowed path.
*
* This can be indexed as `next_state[current][target] == next`. If next is
* ERROR, then the transition is not allowed.
*/
static const uint32_t
next_state[VFIO_USER_DEVICE_NUM_STATES][VFIO_USER_DEVICE_NUM_STATES] = {
[VFIO_USER_DEVICE_STATE_ERROR] = { 0, 0, 0, 0, 0, 0, 0, 0 },
[VFIO_USER_DEVICE_STATE_STOP] = {
[VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY,
[VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
},
[VFIO_USER_DEVICE_STATE_RUNNING] = {
[VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY,
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
},
[VFIO_USER_DEVICE_STATE_STOP_COPY] = {
[VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY,
[VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
},
[VFIO_USER_DEVICE_STATE_RESUMING] = {
[VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_STOP,
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
},
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 },
[VFIO_USER_DEVICE_STATE_PRE_COPY] = {
[VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY,
[VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RUNNING,
[VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
[VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY,
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR,
},
[VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 },
};
bool
MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to)
{
return from < VFIO_USER_DEVICE_NUM_STATES
&& to < VFIO_USER_DEVICE_NUM_STATES
&& (transitions[from] & (1 << to)) != 0;
}
/*
* TODO no need to dynamically allocate memory, we can keep struct migration
* in vfu_ctx_t.
*/
struct migration *
init_migration(const vfu_migration_callbacks_t *callbacks, int *err)
{
struct migration *migr;
migr = calloc(1, sizeof(*migr));
if (migr == NULL) {
*err = ENOMEM;
return NULL;
}
/*
* FIXME: incorrect, if the client doesn't give a pgsize value, it means "no
* migration support", handle this
* FIXME must be available even if migration callbacks aren't used
*/
migr->pgsize = sysconf(_SC_PAGESIZE);
/* FIXME this should be done in vfu_ctx_realize */
migr->state = VFIO_USER_DEVICE_STATE_RUNNING;
migr->callbacks = *callbacks;
if (migr->callbacks.transition == NULL ||
migr->callbacks.read_data == NULL ||
migr->callbacks.write_data == NULL ||
migr->callbacks.version != VFU_MIGR_CALLBACKS_VERS) {
free(migr);
*err = EINVAL;
return NULL;
}
return migr;
}
void
MOCK_DEFINE(migr_state_transition)(struct migration *migr,
enum vfio_user_device_mig_state state)
{
assert(migr != NULL);
migr->state = state;
}
vfu_migr_state_t
MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t state)
{
switch (state) {
case VFIO_USER_DEVICE_STATE_STOP:
return VFU_MIGR_STATE_STOP;
case VFIO_USER_DEVICE_STATE_RUNNING:
return VFU_MIGR_STATE_RUNNING;
case VFIO_USER_DEVICE_STATE_STOP_COPY:
return VFU_MIGR_STATE_STOP_AND_COPY;
case VFIO_USER_DEVICE_STATE_RESUMING:
return VFU_MIGR_STATE_RESUME;
case VFIO_USER_DEVICE_STATE_PRE_COPY:
return VFU_MIGR_STATE_PRE_COPY;
default:
return -1;
}
}
/**
* Returns 0 on success, -1 on error setting errno.
*/
int
MOCK_DEFINE(state_trans_notify)(vfu_ctx_t *vfu_ctx,
int (*fn)(vfu_ctx_t *, vfu_migr_state_t),
uint32_t vfio_device_state)
{
/*
* We've already checked that device_state is valid by calling
* vfio_migr_state_transition_is_valid.
*/
return fn(vfu_ctx, migr_state_vfio_to_vfu(vfio_device_state));
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
ssize_t
MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint32_t device_state, bool notify)
{
if (notify) {
int ret;
assert(!vfu_ctx->in_cb);
vfu_ctx->in_cb = CB_MIGR_STATE;
ret = state_trans_notify(vfu_ctx, migr->callbacks.transition,
device_state);
vfu_ctx->in_cb = CB_NONE;
if (ret != 0) {
return ret;
}
}
migr_state_transition(migr, device_state);
return 0;
}
/**
* Returns 0 on success, -1 on failure setting errno.
*/
ssize_t
MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint32_t device_state, bool notify)
{
assert(vfu_ctx != NULL);
assert(migr != NULL);
if (!vfio_migr_state_transition_is_valid(migr->state, device_state)) {
return ERROR_INT(EINVAL);
}
return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify);
}
size_t
migration_get_state(vfu_ctx_t *vfu_ctx)
{
return vfu_ctx->migration->state;
}
ssize_t
migration_set_state(vfu_ctx_t *vfu_ctx, uint32_t device_state)
{
struct migration *migr = vfu_ctx->migration;
uint32_t state;
ssize_t ret = 0;
if (device_state > VFIO_USER_DEVICE_NUM_STATES) {
return ERROR_INT(EINVAL);
}
while (migr->state != device_state && ret == 0) {
state = next_state[migr->state][device_state];
if (state == VFIO_USER_DEVICE_STATE_ERROR) {
return ERROR_INT(EINVAL);
}
ret = handle_device_state(vfu_ctx, migr, state, true);
};
return ret;
}
ssize_t
handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
{
assert(vfu_ctx != NULL);
assert(msg != NULL);
if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) {
vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)",
msg->in.iov.iov_len);
return ERROR_INT(EINVAL);
}
struct migration *migr = vfu_ctx->migration;
struct vfio_user_mig_data *req = msg->in.iov.iov_base;
if (vfu_ctx->migration == NULL) {
vfu_log(vfu_ctx, LOG_ERR, "migration not enabled");
return ERROR_INT(EINVAL);
}
if (migr->state != VFIO_USER_DEVICE_STATE_PRE_COPY
&& migr->state != VFIO_USER_DEVICE_STATE_STOP_COPY) {
vfu_log(vfu_ctx, LOG_ERR, "bad migration state to read data: %d",
migr->state);
return ERROR_INT(EINVAL);
}
if (req->size > vfu_ctx->client_max_data_xfer_size) {
vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)",
req->size, vfu_ctx->client_max_data_xfer_size);
return ERROR_INT(EINVAL);
}
if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) {
vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)",
req->argsz, sizeof(struct vfio_user_mig_data) + req->size);
return ERROR_INT(EINVAL);
}
msg->out.iov.iov_len = msg->in.iov.iov_len + req->size;
msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len);
if (msg->out.iov.iov_base == NULL) {
return ERROR_INT(ENOMEM);
}
struct vfio_user_mig_data *res = msg->out.iov.iov_base;
ssize_t ret = migr->callbacks.read_data(vfu_ctx, &res->data, req->size);
if (ret < 0) {
vfu_log(vfu_ctx, LOG_ERR, "read_data callback failed, errno=%d", errno);
iov_free(&msg->out.iov);
return ret;
}
res->size = ret;
res->argsz = sizeof(struct vfio_user_mig_data) + ret;
return 0;
}
ssize_t
handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
{
assert(vfu_ctx != NULL);
assert(msg != NULL);
if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) {
vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)",
msg->in.iov.iov_len);
return ERROR_INT(EINVAL);
}
struct migration *migr = vfu_ctx->migration;
struct vfio_user_mig_data *req = msg->in.iov.iov_base;
if (vfu_ctx->migration == NULL) {
vfu_log(vfu_ctx, LOG_ERR, "migration not enabled");
return ERROR_INT(EINVAL);
}
if (migr->state != VFIO_USER_DEVICE_STATE_RESUMING) {
vfu_log(vfu_ctx, LOG_ERR, "bad migration state to write data: %d",
migr->state);
return ERROR_INT(EINVAL);
}
if (req->size > vfu_ctx->client_max_data_xfer_size) {
vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)",
req->size, vfu_ctx->client_max_data_xfer_size);
return ERROR_INT(EINVAL);
}
if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) {
vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)",
req->argsz, sizeof(struct vfio_user_mig_data) + req->size);
return ERROR_INT(EINVAL);
}
if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data) + req->size) {
vfu_log(vfu_ctx, LOG_ERR, "short write (%d < %ld)",
req->argsz, sizeof(struct vfio_user_mig_data) + req->size);
return ERROR_INT(EINVAL);
}
ssize_t ret = migr->callbacks.write_data(vfu_ctx, &req->data, req->size);
if (ret < 0) {
vfu_log(vfu_ctx, LOG_ERR, "write_data callback failed, errno=%d",
errno);
return ret;
} else if (ret != req->size) {
vfu_log(vfu_ctx, LOG_ERR, "migration data partial write of size=%ld",
ret);
return ERROR_INT(EINVAL);
}
return 0;
}
bool
MOCK_DEFINE(device_is_stopped_and_copying)(struct migration *migr)
{
return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP_COPY;
}
bool
MOCK_DEFINE(device_is_stopped)(struct migration *migr)
{
return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP;
}
size_t
migration_get_pgsize(struct migration *migr)
{
assert(migr != NULL);
return migr->pgsize;
}
int
migration_set_pgsize(struct migration *migr, size_t pgsize)
{
assert(migr != NULL);
// FIXME?
if (pgsize != PAGE_SIZE) {
return ERROR_INT(EINVAL);
}
migr->pgsize = pgsize;
return 0;
}
bool
migration_feature_needs_quiesce(struct vfio_user_device_feature *feature)
{
return ((feature->flags &
(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE)) != 0)
&& !(feature->flags & VFIO_DEVICE_FEATURE_PROBE);
}
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */