File I/O DMA region access mode (#842)
This adds an enum to convey how a DMA region is to be accessed. Previously,
this was implicit in the presence of a file descriptor in the DMA region
tracking information. With this change, a new file I/O access mode is added
that designates accesses should happen via file I/O syscalls, such as
`pread()`/`pwrite()`. This is useful for file descriptors that don't support
`mmap()`, such as `/proc/<pid>/mem`. The access mode to be used is determined
from new flag bits in the VFIO_USER_DMA_MAP message, and the message handler
makes sure the passed flags and file descriptor presence are consistent.
Existing code is updated to use the access mode in conditionals rather than
testing file descriptor presence.
Signed-off-by: Mattias Nissler <mnissler@meta.com>
diff --git a/include/libvfio-user.h b/include/libvfio-user.h
index 9b81420..76181ad 100644
--- a/include/libvfio-user.h
+++ b/include/libvfio-user.h
@@ -697,7 +697,9 @@
* vfu_sgl_put().
*
* This is only supported when a @dma_unregister callback is provided to
- * vfu_setup_device_dma().
+ * vfu_setup_device_dma(). In addition, the client must have registered the
+ * relevant DMA regions to allow mmap()-ed access and supplied file
+ * descriptors, otherwise this function will fail with EFAULT.
*
* @vfu_ctx: the libvfio-user context
* @sgl: array of scatter/gather entries returned by vfu_addr_to_sg. These
@@ -738,12 +740,19 @@
void
vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt);
+/* Flags to vfu_sgl_read() and vfu_sgl_write(). */
+
+/* Perform direct access via file descriptor, if possible. */
+#define VFU_SGL_DIRECT_ACCESS (1)
+
/**
* Read from the dma region exposed by the client. This can be used as an
* alternative to reading from a vfu_sgl_get() mapping, if the region is not
* directly mappable, or DMA notification callbacks have not been provided.
*
- * The implementation involves a round-trip communication with the client.
+ * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is
+ * performed against a client-supplied file descriptor, if applicable.
+ * Otherwise, the operation incurs an IPC round-trip with the client.
*
* Note that currently, only one @sg entry is supported (@sg_cnt must be 1).
*
@@ -751,7 +760,7 @@
* @sg: array of scatter/gather entries
* @sg_cnt: number of scatter/gather entries
* @data: data buffer to read
- * @flags: must be 0
+ * @flags: VFU_SGL_* flags
*
* @returns 0 on success, -1 on failure. Sets errno.
*/
@@ -764,18 +773,21 @@
* alternative to reading from a vfu_sgl_get() mapping, if the region is not
* directly mappable, or DMA notification callbacks have not been provided.
*
- * The implementation involves a round-trip communication with the client.
+ * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is
+ * performed against a client-supplied file descriptor, if applicable.
+ * Otherwise, the operation incurs an IPC round-trip with the client.
*
* Note that currently, only one @sg entry is supported (@sg_cnt must be 1).
*
- * During live migration, this call does not mark any of the written pages as
- * dirty; the client is expected to track this.
+ * During live migration, this call does only mark the written pages as dirty
+ * if the access is performed directly against a file descriptor. For access
+ * via IPC, the client is expected to track which pages have been written.
*
* @vfu_ctx: the libvfio-user context
* @sg: array of scatter/gather entries
* @sg_cnt: number of scatter/gather entries
* @data: data buffer to write
- * @flags: must be 0
+ * @flags: VFU_SGL_* flags
*
* @returns 0 on success, -1 on failure. Sets errno.
*/
diff --git a/include/vfio-user.h b/include/vfio-user.h
index 0b115d3..eb1b141 100644
--- a/include/vfio-user.h
+++ b/include/vfio-user.h
@@ -122,6 +122,8 @@
uint32_t argsz;
#define VFIO_USER_F_DMA_REGION_READ (1 << 0)
#define VFIO_USER_F_DMA_REGION_WRITE (1 << 1)
+#define VFIO_USER_F_DMA_REGION_MMAP (1 << 2)
+#define VFIO_USER_F_DMA_REGION_FILE_IO (1 << 3)
uint32_t flags;
uint64_t offset;
uint64_t addr;
diff --git a/lib/dma.c b/lib/dma.c
index 724c170..c0bea6d 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -52,7 +52,7 @@
bool
dma_sg_is_mappable(const dma_sg_t *sg) {
- return sg->region->info.vaddr != NULL;
+ return sg->region->access_mode == REGION_ACCESS_MODE_MMAP;
}
static inline ssize_t
@@ -69,7 +69,7 @@
static int
dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize)
{
- assert(region->fd != -1);
+ assert(region->access_mode != REGION_ACCESS_MODE_MSG);
ssize_t size = get_bitmap_size(region->info.iova.iov_len, pgsize);
if (size < 0) {
@@ -158,13 +158,42 @@
region->fd, region->info.mapping.iov_base,
iov_end(®ion->info.mapping));
}
+}
- assert(region->fd != -1);
+static void
+dma_controller_destroy_region(dma_controller_t *dma, dma_memory_region_t *region,
+ vfu_dma_unregister_cb_t *dma_unregister,
+ void *data)
+{
+ int err;
+ assert(dma != NULL);
+ assert(region != NULL);
+
+ if (dma_unregister != NULL) {
+ dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER;
+ dma_unregister(data, ®ion->info);
+ dma->vfu_ctx->in_cb = CB_NONE;
+ }
+
+ switch (region->access_mode) {
+ case REGION_ACCESS_MODE_MSG:
+ assert(region->fd == -1);
+ break;
+ case REGION_ACCESS_MODE_MMAP:
+ if (region->info.vaddr != NULL) {
+ dma_controller_unmap_region(dma, region);
+ }
+ /* fall through */
+ case REGION_ACCESS_MODE_FILE_IO:
+ err = fd_cache_put(®ion->fd);
+ assert(err == 0);
+ break;
+ }
+
+ dma_controller_increment_regions_generation(dma);
dirty_page_logging_stop_on_region(region);
-
- err = fd_cache_put(®ion->fd);
- assert(err == 0);
+ free(region);
}
/* FIXME not thread safe */
@@ -190,21 +219,8 @@
return ERROR_INT(ENOENT);
}
- if (dma_unregister != NULL) {
- dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER;
- dma_unregister(data, ®ion->info);
- dma->vfu_ctx->in_cb = CB_NONE;
- }
-
- if (region->info.vaddr != NULL) {
- dma_controller_unmap_region(dma, region);
- } else {
- assert(region->fd == -1);
- }
-
btree_iter_remove(&iter);
- dma_controller_increment_regions_generation(dma);
- free(region);
+ dma_controller_destroy_region(dma, region, dma_unregister, data);
return 0;
}
@@ -228,19 +244,7 @@
region->info.vaddr, region->info.mapping.iov_base,
iov_end(®ion->info.mapping));
- if (dma_unregister != NULL) {
- dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER;
- dma_unregister(data, ®ion->info);
- dma->vfu_ctx->in_cb = CB_NONE;
- }
-
- if (region->info.vaddr != NULL) {
- dma_controller_unmap_region(dma, region);
- } else {
- assert(region->fd == -1);
- }
-
- free(region);
+ dma_controller_destroy_region(dma, region, dma_unregister, data);
}
}
@@ -289,7 +293,8 @@
dma_memory_region_t *
MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma,
vfu_dma_addr_t dma_addr, uint64_t size,
- int fd, off_t offset, uint32_t prot)
+ int *fd, off_t offset, uint32_t prot,
+ enum region_access_mode access_mode)
{
dma_memory_region_t *existing = NULL;
dma_memory_region_t *region = NULL;
@@ -297,12 +302,11 @@
int page_size = 0;
char rstr[1024];
int ret = 0;
- int err;
assert(dma != NULL);
snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#llx prot=%#x",
- dma_addr, dma_addr + size, fd, (ull_t)offset, prot);
+ dma_addr, dma_addr + size, *fd, (ull_t)offset, prot);
if (size > dma->max_size) {
vfu_log(dma->vfu_ctx, LOG_ERR, "DMA region size %llu > max %zu",
@@ -322,7 +326,7 @@
(ull_t)existing->offset);
return ERROR_PTR(EINVAL);
}
- if (fd_cache_is_same_file(existing->fd, fd) != 0) {
+ if (fd_cache_is_same_file(existing->fd, *fd) != 0) {
vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; "
"existing=%d", rstr, existing->fd);
return ERROR_PTR(EINVAL);
@@ -332,7 +336,7 @@
"%s; existing=%#x", rstr, existing->info.prot);
return ERROR_PTR(EINVAL);
}
- close_safely(&fd);
+ close_safely(fd);
return existing;
}
@@ -353,82 +357,81 @@
return ERROR_PTR(EINVAL);
}
- if (fd != -1) {
- page_size = fd_get_blocksize(fd);
- if (page_size < 0) {
- vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size);
- return ERROR_PTR(EINVAL);
- }
-
- fd = fd_cache_get(fd);
- if (fd == -1) {
- vfu_log(dma->vfu_ctx, LOG_ERR,
- "failed to de-duplicate fd for new DMA region %s: %m",
- rstr);
- return NULL;
- }
- }
- page_size = MAX(page_size, getpagesize());
-
region = calloc(1, sizeof(*region));
if (region == NULL) {
- errno = ENOMEM;
- goto rollback;
+ vfu_log(dma->vfu_ctx, LOG_ERR, "failed to allocate region entry");
+ return ERROR_PTR(ENOMEM);
}
region->info.iova.iov_base = (void *)dma_addr;
region->info.iova.iov_len = size;
- region->info.page_size = page_size;
region->info.prot = prot;
+ region->info.page_size = getpagesize();
+ region->access_mode = access_mode;
region->offset = offset;
- region->fd = fd;
+ region->fd = -1;
- if (fd != -1) {
- /*
- * TODO introduce a function that tells whether dirty page logging is
- * enabled
- */
- if (dma->dirty_pgsize != 0) {
- if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) {
- /*
- * TODO We don't necessarily have to fail, we can continue
- * and fail the get dirty page bitmap request later.
- */
- goto rollback;
+ switch (access_mode) {
+ case REGION_ACCESS_MODE_MSG:
+ assert(*fd == -1);
+ break;
+ case REGION_ACCESS_MODE_MMAP:
+ case REGION_ACCESS_MODE_FILE_IO:
+ assert(*fd != -1);
+ region->fd = fd_cache_get(*fd);
+ if (region->fd == -1) {
+ vfu_log(dma->vfu_ctx, LOG_ERR,
+ "failed to de-duplicate fd for new DMA region %s: %m",
+ rstr);
+ goto destroy_region;
}
- }
+ *fd = -1; /* The fd is now owned by the cache */
- ret = dma_map_region(dma, region);
+ page_size = fd_get_blocksize(region->fd);
+ if (page_size < 0) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size);
+ errno = EINVAL;
+ goto destroy_region;
+ }
+ region->info.page_size = MAX(region->info.page_size, (size_t)page_size);
- if (ret != 0) {
- vfu_log(dma->vfu_ctx, LOG_ERR,
- "failed to memory map DMA region %s: %m", rstr);
- goto rollback;
- } else {
- /* Ownership of the fd is now with the region. */
- fd = -1;
- }
+ /*
+ * TODO introduce a function that tells whether dirty page logging is
+ * enabled
+ */
+ if (dma->dirty_pgsize != 0) {
+ if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) {
+ /*
+ * TODO We don't necessarily have to fail, we can continue
+ * and fail the get dirty page bitmap request later.
+ */
+ goto destroy_region;
+ }
+ }
+
+ if (access_mode == REGION_ACCESS_MODE_MMAP) {
+ ret = dma_map_region(dma, region);
+
+ if (ret != 0) {
+ vfu_log(dma->vfu_ctx, LOG_ERR,
+ "failed to memory map DMA region %s: %m", rstr);
+ goto destroy_region;
+ }
+ }
+ break;
}
if (btree_iter_insert(&iter, (uintptr_t)dma_addr + size - 1, region) != 0) {
- goto rollback;
+ goto destroy_region;
}
dma_controller_increment_regions_generation(dma);
return region;
-rollback:
+destroy_region:
ret = errno;
- if (region != NULL) {
- if (region->info.vaddr != NULL) {
- dma_controller_unmap_region(dma, region);
- }
- dirty_page_logging_stop_on_region(region);
- free(region);
- }
- err = fd_cache_put(&fd);
- assert(err == 0);
+ dma_controller_destroy_region(dma, region, NULL, NULL);
return ERROR_PTR(ret);
}
@@ -513,7 +516,7 @@
for (btree_iter_init(&dma->regions, 0, &iter);
(region = btree_iter_get(&iter, NULL)) != NULL;
btree_iter_next(&iter)) {
- if (region->fd == -1) {
+ if (region->access_mode == REGION_ACCESS_MODE_MSG) {
continue;
}
@@ -776,8 +779,8 @@
region = sg.region;
- if (region->fd == -1) {
- vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] is not mapped",
+ if (region->access_mode == REGION_ACCESS_MODE_MSG) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] isn't accessed directly",
region->info.iova.iov_base, iov_end(®ion->info.iova));
return ERROR_INT(EINVAL);
}
diff --git a/lib/dma.h b/lib/dma.h
index 90200a6..890143f 100644
--- a/lib/dma.h
+++ b/lib/dma.h
@@ -85,9 +85,20 @@
struct vfu_ctx;
+/* Designates how to access a DMA target region */
+enum region_access_mode {
+ /* Use VFIO_USER_DMA_{READ,WRITE} messages on socket */
+ REGION_ACCESS_MODE_MSG,
+ /* mmap() provided file descriptor */
+ REGION_ACCESS_MODE_MMAP,
+ /* pread()/pwrite() on provided file descriptor */
+ REGION_ACCESS_MODE_FILE_IO,
+};
+
typedef struct {
vfu_dma_info_t info;
- int fd; // File descriptor to mmap
+ enum region_access_mode access_mode;
+ int fd; // File descriptor for direct access
off_t offset; // File offset
uint8_t *dirty_bitmap; // Dirty page bitmap
} dma_memory_region_t;
@@ -127,7 +138,8 @@
*/
MOCK_DECLARE(dma_memory_region_t *, dma_controller_add_region,
dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size,
- int fd, off_t offset, uint32_t prot);
+ int *fd, off_t offset, uint32_t prot,
+ enum region_access_mode access_mode);
MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma,
vfu_dma_addr_t dma_addr, size_t size,
@@ -294,9 +306,10 @@
return ERROR_INT(EINVAL);
}
- if (sg->region->info.vaddr == NULL) {
+ if (sg->region->access_mode != REGION_ACCESS_MODE_MMAP) {
return ERROR_INT(EFAULT);
}
+ assert(sg->region->info.vaddr != NULL);
#ifdef DEBUG_SGL
vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %p-%p",
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 21a4545..407061a 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -676,6 +676,7 @@
handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg,
struct vfio_user_dma_map *dma_map)
{
+ enum region_access_mode access_mode;
dma_memory_region_t *region;
char rstr[1024];
int fd = -1;
@@ -699,6 +700,14 @@
vfu_log(vfu_ctx, LOG_DEBUG, "adding DMA region %s", rstr);
+ if (msg->in.nr_fds > 0) {
+ fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0);
+ if (fd < 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr);
+ return -1;
+ }
+ }
+
if (dma_map->flags & VFIO_USER_F_DMA_REGION_READ) {
prot |= PROT_READ;
dma_map->flags &= ~VFIO_USER_F_DMA_REGION_READ;
@@ -709,22 +718,37 @@
dma_map->flags &= ~VFIO_USER_F_DMA_REGION_WRITE;
}
- if (dma_map->flags != 0) {
- vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", dma_map->flags);
- return ERROR_INT(EINVAL);
+ if (dma_map->flags & VFIO_USER_F_DMA_REGION_MMAP) {
+ if (fd == -1) {
+ vfu_log(vfu_ctx, LOG_ERR,
+ "file descriptor required for MMAP DMA region");
+ return ERROR_INT(EINVAL);
+ }
+ access_mode = REGION_ACCESS_MODE_MMAP;
+ dma_map->flags &= ~VFIO_USER_F_DMA_REGION_MMAP;
+ } else if (dma_map->flags & VFIO_USER_F_DMA_REGION_FILE_IO) {
+ if (fd == -1) {
+ vfu_log(vfu_ctx, LOG_ERR,
+ "file descriptor required for FILE_IO DMA region");
+ return ERROR_INT(EINVAL);
+ }
+ access_mode = REGION_ACCESS_MODE_FILE_IO;
+ dma_map->flags &= ~VFIO_USER_F_DMA_REGION_FILE_IO;
+ } else {
+ /* Compatibility: default to mmap()-ed access if an fd is provided. */
+ access_mode =
+ fd != -1 ? REGION_ACCESS_MODE_MMAP : REGION_ACCESS_MODE_MSG;
}
- if (msg->in.nr_fds > 0) {
- fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0);
- if (fd < 0) {
- vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr);
- return -1;
- }
+ if (dma_map->flags != 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", dma_map->flags);
+ close_safely(&fd);
+ return ERROR_INT(EINVAL);
}
region = dma_controller_add_region(
vfu_ctx->dma, (vfu_dma_addr_t)(uintptr_t)dma_map->addr, dma_map->size,
- fd, dma_map->offset, prot);
+ &fd, dma_map->offset, prot, access_mode);
if (region == NULL) {
vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr);
close_safely(&fd);
@@ -2372,7 +2396,7 @@
static int
vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd,
- dma_sg_t *sg, void *data)
+ int flags, dma_sg_t *sg, void *data)
{
struct vfio_user_dma_region_access *dma_reply;
struct vfio_user_dma_region_access *dma_req;
@@ -2391,6 +2415,48 @@
return ERROR_INT(EPERM);
}
+ if (flags & VFU_SGL_DIRECT_ACCESS) {
+ if (sg->region->access_mode == REGION_ACCESS_MODE_MMAP) {
+ void *target, *src, *dst;
+ assert(sg->region->info.vaddr != NULL);
+ target = sg->region->info.vaddr + sg->offset;
+ src = cmd == VFIO_USER_DMA_READ ? target : data;
+ dst = cmd == VFIO_USER_DMA_READ ? data : target;
+ if (cmd == VFIO_USER_DMA_WRITE) {
+ dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1);
+ }
+ memcpy(dst, src, sg->length);
+ return 0;
+ }
+
+ if (sg->region->access_mode == REGION_ACCESS_MODE_FILE_IO) {
+ size_t length, offset;
+ assert(sg->region->fd != -1);
+ length = sg->length;
+ offset = sg->offset + sg->region->offset;
+ if (cmd == VFIO_USER_DMA_WRITE) {
+ dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1);
+ }
+ while (length > 0) {
+ ssize_t ret;
+ if (cmd == VFIO_USER_DMA_READ) {
+ ret = pread(sg->region->fd, data, length, offset);
+ } else {
+ ret = pwrite(sg->region->fd, data, length, offset);
+ }
+ if (ret <= 0) {
+ return ERROR_INT(EIO);
+ }
+ data += ret;
+ offset += ret;
+ length -= ret;
+ }
+ return 0;
+ }
+
+ assert(sg->region->access_mode == REGION_ACCESS_MODE_MSG);
+ }
+
rlen = sizeof(struct vfio_user_dma_region_access) +
MIN(sg->length, vfu_ctx->client_max_data_xfer_size);
@@ -2473,7 +2539,7 @@
{
assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE);
- if (flags != 0) {
+ if (flags & ~VFU_SGL_DIRECT_ACCESS) {
return ERROR_INT(EINVAL);
}
@@ -2482,7 +2548,7 @@
return ERROR_INT(ENOTSUP);
}
- return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sgl, data);
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, flags, sgl, data);
}
EXPORT int
@@ -2491,7 +2557,7 @@
{
assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE);
- if (flags != 0) {
+ if (flags & ~VFU_SGL_DIRECT_ACCESS) {
return ERROR_INT(EINVAL);
}
@@ -2500,7 +2566,7 @@
return ERROR_INT(ENOTSUP);
}
- return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sgl, data);
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, flags, sgl, data);
}
EXPORT bool
diff --git a/test/mocks.c b/test/mocks.c
index 6b41352..35d4720 100644
--- a/test/mocks.c
+++ b/test/mocks.c
@@ -112,13 +112,13 @@
}
dma_memory_region_t *
-dma_controller_add_region(dma_controller_t *dma, void *dma_addr,
- uint64_t size, int fd, off_t offset,
- uint32_t prot)
+dma_controller_add_region(dma_controller_t *dma, void *dma_addr, uint64_t size,
+ int *fd, off_t offset, uint32_t prot,
+ enum region_access_mode access_mode)
{
if (!is_patched("dma_controller_add_region")) {
return __real_dma_controller_add_region(dma, dma_addr, size, fd, offset,
- prot);
+ prot, access_mode);
}
check_expected_ptr(dma);
@@ -127,6 +127,7 @@
check_expected_int(fd);
check_expected_int(offset);
check_expected_uint(prot);
+ check_expected_int(access_mode);
errno = mock();
return mock_ptr_type(dma_memory_region_t *);
}
diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py
index e756780..0efbc59 100644
--- a/test/py/libvfio_user.py
+++ b/test/py/libvfio_user.py
@@ -214,6 +214,8 @@
VFIO_USER_F_DMA_REGION_READ = (1 << 0)
VFIO_USER_F_DMA_REGION_WRITE = (1 << 1)
+VFIO_USER_F_DMA_REGION_MMAP = (1 << 2)
+VFIO_USER_F_DMA_REGION_FILE_IO = (1 << 3)
VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP = (1 << 0)
@@ -279,6 +281,8 @@
VFU_MIGR_CALLBACKS_VERS = 2
+VFU_SGL_DIRECT_ACCESS = 1
+
SOCK_PATH = b"/tmp/vfio-user.sock.%d" % os.getpid()
topdir = os.path.realpath(os.path.dirname(__file__) + "/../..")
diff --git a/test/py/test_sgl_read_write.py b/test/py/test_sgl_read_write.py
index 4fd31b6..6ec4779 100644
--- a/test/py/test_sgl_read_write.py
+++ b/test/py/test_sgl_read_write.py
@@ -29,7 +29,10 @@
#
from libvfio_user import *
+import mmap
+import pytest
import select
+import tempfile
import threading
MAP_ADDR = 0x10000000
@@ -39,6 +42,11 @@
client = None
+class Counter:
+ def __init__(self):
+ self.value = 0
+
+
class DMARegionHandler:
"""
A helper to service DMA region accesses arriving over a socket. Accesses
@@ -46,7 +54,7 @@
takes place on a separate thread so as to not block the test code.
"""
- def __handle_requests(sock, pipe, buf, lock, addr, error_no):
+ def __handle_requests(sock, pipe, buf, counter, lock, addr, error_no):
while True:
(ready, _, _) = select.select([sock, pipe], [], [])
if pipe in ready:
@@ -63,6 +71,7 @@
offset = access.addr - addr
with lock:
+ counter.value += 1
if cmd == VFIO_USER_DMA_READ:
data = buf[offset:offset + access.count]
else:
@@ -80,16 +89,20 @@
sock.close()
def __init__(self, sock, addr, size, error_no=0):
- self.data = bytearray(size)
+ self.file = tempfile.TemporaryFile()
+ self.file.truncate(size)
+ self.data = mmap.mmap(self.file.fileno(), 0)
self.data_lock = threading.Lock()
self.addr = addr
(pipe_r, self.pipe_w) = os.pipe()
# Duplicate the socket file descriptor so the thread can own it and
# make sure it gets closed only when terminating the thread.
sock = socket.socket(fileno=os.dup(sock.fileno()))
+ self.counter = Counter()
thread = threading.Thread(
target=DMARegionHandler.__handle_requests,
- args=[sock, pipe_r, self.data, self.data_lock, addr, error_no])
+ args=[sock, pipe_r, self.data, self.counter, self.data_lock, addr,
+ error_no])
thread.start()
def shutdown(self):
@@ -102,7 +115,15 @@
return self.data[offset:offset + size]
-def setup_function(function):
+access_mode_flags = [
+ 0,
+ VFIO_USER_F_DMA_REGION_MMAP,
+ VFIO_USER_F_DMA_REGION_FILE_IO,
+]
+
+
+@pytest.fixture(autouse=True, params=access_mode_flags)
+def flags(request):
global ctx, client, dma_handler
ctx = prepare_ctx_for_dma()
assert ctx is not None
@@ -117,26 +138,35 @@
client = connect_client(ctx, caps)
assert client.client_cmd_socket is not None
+ dma_handler = DMARegionHandler(client.client_cmd_socket, MAP_ADDR,
+ MAP_SIZE)
+
+ flags = (request.param | VFIO_USER_F_DMA_REGION_READ
+ | VFIO_USER_F_DMA_REGION_WRITE)
payload = vfio_user_dma_map(argsz=len(vfio_user_dma_map()),
- flags=(VFIO_USER_F_DMA_REGION_READ
- | VFIO_USER_F_DMA_REGION_WRITE),
+ flags=flags,
offset=0,
addr=MAP_ADDR,
size=MAP_SIZE)
- msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload)
+ if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO):
+ fds = [dma_handler.file.fileno()]
+ else:
+ fds = []
- dma_handler = DMARegionHandler(client.client_cmd_socket, payload.addr,
- payload.size)
+ msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload, fds=fds)
+
+ return flags
def teardown_function(function):
+ global ctx, client, dma_handler
dma_handler.shutdown()
client.disconnect(ctx)
vfu_destroy_ctx(ctx)
-def test_dma_read_write():
+def test_dma_read_write(flags):
ret, sg = vfu_addr_to_sgl(ctx,
dma_addr=MAP_ADDR + 0x1000,
length=64,
@@ -145,13 +175,19 @@
assert ret == 1
data = bytearray([x & 0xff for x in range(0, sg[0].length)])
- assert vfu_sgl_write(ctx, sg, 1, data, 0) == 0
+ assert vfu_sgl_write(ctx, sg, 1, data, VFU_SGL_DIRECT_ACCESS) == 0
- assert vfu_sgl_read(ctx, sg, 1, 0) == (0, data)
+ assert vfu_sgl_read(ctx, sg, 1, VFU_SGL_DIRECT_ACCESS) == (0, data)
assert dma_handler.read(sg[0].dma_addr + sg[0].offset,
sg[0].length) == data
+ # Make sure we only see DMA IPC when there is no other option.
+ if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO):
+ assert dma_handler.counter.value == 0
+ else:
+ assert dma_handler.counter.value == 2
+
def test_dma_read_write_large():
ret, sg = vfu_addr_to_sgl(ctx,
@@ -170,7 +206,10 @@
sg[0].length) == data
-def test_dma_read_write_error():
+def test_dma_read_write_error(flags):
+ if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO):
+ pytest.skip("Can't generate error for direct file access modes")
+
# Reinitialize the handler to return EIO.
global dma_handler
dma_handler.shutdown()
diff --git a/test/unit-tests.c b/test/unit-tests.c
index 361112c..7377052 100644
--- a/test/unit-tests.c
+++ b/test/unit-tests.c
@@ -145,6 +145,13 @@
assert_int_equal(0, ret);
}
+static int check_fd_ptr(const uintmax_t value, const uintmax_t fd_val)
+{
+ int *fd_ptr = (int*)value;
+ int expected_fd = (int)fd_val;
+ return fd_ptr != NULL && *fd_ptr == expected_fd;
+}
+
static void
test_dma_map_without_fd(void **state UNUSED)
{
@@ -162,9 +169,11 @@
expect_uint_value(dma_controller_add_region, dma, (uintptr_t)vfu_ctx.dma);
expect_uint_value(dma_controller_add_region, dma_addr, (uintptr_t)dma_map.addr);
expect_uint_value(dma_controller_add_region, size, dma_map.size);
- expect_int_value(dma_controller_add_region, fd, -1);
+ expect_check(dma_controller_add_region, fd, check_fd_ptr, -1);
expect_int_value(dma_controller_add_region, offset, dma_map.offset);
expect_uint_value(dma_controller_add_region, prot, PROT_NONE);
+ expect_uint_value(dma_controller_add_region, access_mode,
+ REGION_ACCESS_MODE_MSG);
ret = handle_dma_map(&vfu_ctx,
mkmsg(VFIO_USER_DMA_MAP, &dma_map, sizeof(dma_map)),
&dma_map);
@@ -252,9 +261,11 @@
expect_uint_value(dma_controller_add_region, dma, (uintptr_t)vfu_ctx.dma);
expect_uint_value(dma_controller_add_region, dma_addr, (uintptr_t) dma_map.addr);
expect_uint_value(dma_controller_add_region, size, dma_map.size);
- expect_int_value(dma_controller_add_region, fd, -1);
+ expect_check(dma_controller_add_region, fd, check_fd_ptr, -1);
expect_int_value(dma_controller_add_region, offset, dma_map.offset);
expect_uint_value(dma_controller_add_region, prot, PROT_NONE);
+ expect_uint_value(dma_controller_add_region, access_mode,
+ REGION_ACCESS_MODE_MSG);
will_return(dma_controller_add_region, 0);
will_return(dma_controller_add_region, 2);
@@ -324,8 +335,8 @@
size_t size = 0;
int fd = -1;
- r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd, offset,
- PROT_NONE);
+ r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd, offset,
+ PROT_NONE, REGION_ACCESS_MODE_MSG);
assert_non_null(r);
assert_int_equal(1, btree_size(&vfu_ctx.dma->regions));
assert_ptr_equal(NULL, r->info.vaddr);
@@ -358,8 +369,8 @@
fd2 = dup(fd1);
assert_int_not_equal(-1, fd2);
- r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset,
- PROT_NONE);
+ r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset,
+ PROT_NONE, REGION_ACCESS_MODE_MMAP);
assert_non_null(r1);
assert_int_equal(1, btree_size(&vfu_ctx.dma->regions));
assert_ptr_not_equal(NULL, r1->info.vaddr);
@@ -375,8 +386,9 @@
* Add another mapping for the same file and verify that the file
* descriptor gets de-duplicated.
*/
- r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, fd2,
- offset + size, PROT_NONE);
+ r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, &fd2,
+ offset + size, PROT_NONE,
+ REGION_ACCESS_MODE_MMAP);
assert_non_null(r2);
assert_int_equal(2, btree_size(&vfu_ctx.dma->regions));
assert_int_equal(r1->fd, r2->fd);
@@ -400,14 +412,14 @@
fd2 = dup(fd1);
assert_int_not_equal(-1, fd2);
- r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset,
- PROT_NONE);
+ r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset,
+ PROT_NONE, REGION_ACCESS_MODE_MMAP);
assert_non_null(r1);
assert_int_equal(1, btree_size(&vfu_ctx.dma->regions));
/* Once more to confirm that identical regions are accepted. */
- r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd2, offset,
- PROT_NONE);
+ r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd2, offset,
+ PROT_NONE, REGION_ACCESS_MODE_MMAP);
assert_non_null(r2);
assert_int_equal(1, btree_size(&vfu_ctx.dma->regions));
assert_ptr_equal(r1, r2);
@@ -422,6 +434,9 @@
.info.mapping.iov_base = (void *)0xcafebabe,
.info.mapping.iov_len = 0x1000,
.info.vaddr = (void *)0xcafebabe,
+ .access_mode = REGION_ACCESS_MODE_MMAP,
+ /* Cheating a bit with the fd to avoid opening a file */
+ .fd = -1,
};
stage_dma_region(vfu_ctx.dma, ®ion);
@@ -470,12 +485,14 @@
.info.iova.iov_base = (void *)0x1000,
.info.iova.iov_len = 0x4000,
.info.vaddr = (void *)0xdeadbeef,
+ .access_mode = REGION_ACCESS_MODE_MMAP,
},
{
.info.iova.iov_base = (void *)0x5000,
.info.iova.iov_len = 0x2000,
.info.vaddr = (void *)0xcafebabe,
.info.prot = PROT_WRITE,
+ .access_mode = REGION_ACCESS_MODE_MMAP,
},
};