9p: Added virtfs option 'multidevs=remap|forbid|warn'
'warn' (default): Only log an error message (once) on host if more than one
device is shared by same export, except of that just ignore this config
error though. This is the default behaviour for not breaking existing
installations implying that they really know what they are doing.
'forbid': Like 'warn', but except of just logging an error this
also denies access of guest to additional devices.
'remap': Allows to share more than one device per export by remapping
inodes from host to guest appropriately. To support multiple devices on the
9p share, and avoid qid path collisions we take the device id as input to
generate a unique QID path. The lowest 48 bits of the path will be set
equal to the file inode, and the top bits will be uniquely assigned based
on the top 16 bits of the inode and the device id.
Signed-off-by: Antonios Motakis <antonios.motakis@huawei.com>
[CS: - Rebased to https://github.com/gkurz/qemu/commits/9p-next
(SHA1 7fc4c49e91).
- Added virtfs option 'multidevs', original patch simply did the inode
remapping without being asked.
- Updated hash calls to new xxhash API.
- Updated docs for new option 'multidevs'.
- Fixed v9fs_do_readdir() not having remapped inodes.
- Log error message when running out of prefixes in
qid_path_prefixmap().
- Fixed definition of QPATH_INO_MASK.
- Wrapped qpp_table initialization to dedicated qpp_table_init()
function.
- Dropped unnecessary parantheses in qpp_lookup_func().
- Dropped unnecessary g_malloc0() result checks. ]
Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
[groug: - Moved "multidevs" parsing to the local backend.
- Added hint to invalid multidevs option error.
- Turn "remap" into "x-remap". ]
Signed-off-by: Greg Kurz <groug@kaod.org>
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index c757c80..f2f7772 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -59,6 +59,11 @@
#define V9FS_RDONLY 0x00000040
#define V9FS_PROXY_SOCK_FD 0x00000080
#define V9FS_PROXY_SOCK_NAME 0x00000100
+/*
+ * multidevs option (either one of the two applies exclusively)
+ */
+#define V9FS_REMAP_INODES 0x00000200
+#define V9FS_FORBID_MULTIDEVS 0x00000400
#define V9FS_SEC_MASK 0x0000003C
diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
index 7c31fff..07a18c6 100644
--- a/fsdev/qemu-fsdev-opts.c
+++ b/fsdev/qemu-fsdev-opts.c
@@ -31,7 +31,9 @@
}, {
.name = "readonly",
.type = QEMU_OPT_BOOL,
-
+ }, {
+ .name = "multidevs",
+ .type = QEMU_OPT_STRING,
}, {
.name = "socket",
.type = QEMU_OPT_STRING,
@@ -76,6 +78,9 @@
.name = "readonly",
.type = QEMU_OPT_BOOL,
}, {
+ .name = "multidevs",
+ .type = QEMU_OPT_STRING,
+ }, {
.name = "socket",
.type = QEMU_OPT_STRING,
}, {
diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c
index 077a8c4..a9e069c 100644
--- a/fsdev/qemu-fsdev.c
+++ b/fsdev/qemu-fsdev.c
@@ -58,6 +58,7 @@
"writeout",
"fmode",
"dmode",
+ "multidevs",
"throttling.bps-total",
"throttling.bps-read",
"throttling.bps-write",
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 5c7f4cd..4708c0b 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1483,6 +1483,7 @@
{
const char *sec_model = qemu_opt_get(opts, "security_model");
const char *path = qemu_opt_get(opts, "path");
+ const char *multidevs = qemu_opt_get(opts, "multidevs");
Error *local_err = NULL;
if (!sec_model) {
@@ -1506,6 +1507,26 @@
return -1;
}
+ if (multidevs) {
+ if (!strcmp(multidevs, "remap")) {
+ fse->export_flags &= ~V9FS_FORBID_MULTIDEVS;
+ fse->export_flags |= V9FS_REMAP_INODES;
+ } else if (!strcmp(multidevs, "forbid")) {
+ fse->export_flags &= ~V9FS_REMAP_INODES;
+ fse->export_flags |= V9FS_FORBID_MULTIDEVS;
+ } else if (!strcmp(multidevs, "warn")) {
+ fse->export_flags &= ~V9FS_FORBID_MULTIDEVS;
+ fse->export_flags &= ~V9FS_REMAP_INODES;
+ } else {
+ error_setg(&local_err, "invalid multidevs property '%s'",
+ multidevs);
+ error_append_hint(&local_err, "Valid options are: multidevs="
+ "[remap|forbid|warn]\n");
+ error_propagate(errp, local_err);
+ return -1;
+ }
+ }
+
if (!path) {
error_setg(errp, "path property not set");
return -1;
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 5a895ae..8eb89c5 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -26,6 +26,7 @@
#include "trace.h"
#include "migration/blocker.h"
#include "sysemu/qtest.h"
+#include "qemu/xxhash.h"
int open_fd_hw;
int total_open_fd;
@@ -572,23 +573,117 @@
P9_STAT_MODE_NAMED_PIPE | \
P9_STAT_MODE_SOCKET)
-/* This is the algorithm from ufs in spfs */
-static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
+/* creative abuse of tb_hash_func7, which is based on xxhash */
+static uint32_t qpp_hash(QppEntry e)
{
- size_t size;
+ return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
+}
- if (pdu->s->dev_id != stbuf->st_dev) {
- warn_report_once(
- "9p: Multiple devices detected in same VirtFS export, "
- "which might lead to file ID collisions and severe "
- "misbehaviours on guest! You should use a separate "
- "export for each device shared from host."
- );
+static bool qpp_lookup_func(const void *obj, const void *userp)
+{
+ const QppEntry *e1 = obj, *e2 = userp;
+ return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
+}
+
+static void qpp_table_remove(void *p, uint32_t h, void *up)
+{
+ g_free(p);
+}
+
+static void qpp_table_destroy(struct qht *ht)
+{
+ if (!ht || !ht->map) {
+ return;
+ }
+ qht_iter(ht, qpp_table_remove, NULL);
+ qht_destroy(ht);
+}
+
+static void qpp_table_init(struct qht *ht)
+{
+ qht_init(ht, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE);
+}
+
+/*
+ * stat_to_qid needs to map inode number (64 bits) and device id (32 bits)
+ * to a unique QID path (64 bits). To avoid having to map and keep track
+ * of up to 2^64 objects, we map only the 16 highest bits of the inode plus
+ * the device id to the 16 highest bits of the QID path. The 48 lowest bits
+ * of the QID path equal to the lowest bits of the inode number.
+ *
+ * This takes advantage of the fact that inode number are usually not
+ * random but allocated sequentially, so we have fewer items to keep
+ * track of.
+ */
+static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf,
+ uint64_t *path)
+{
+ QppEntry lookup = {
+ .dev = stbuf->st_dev,
+ .ino_prefix = (uint16_t) (stbuf->st_ino >> 48)
+ }, *val;
+ uint32_t hash = qpp_hash(lookup);
+
+ val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
+
+ if (!val) {
+ if (pdu->s->qp_prefix_next == 0) {
+ /* we ran out of prefixes */
+ error_report_once(
+ "9p: No more prefixes available for remapping inodes from "
+ "host to guest."
+ );
+ return -ENFILE;
+ }
+
+ val = g_malloc0(sizeof(QppEntry));
+ *val = lookup;
+
+ /* new unique inode prefix and device combo */
+ val->qp_prefix = pdu->s->qp_prefix_next++;
+ qht_insert(&pdu->s->qpp_table, val, hash, NULL);
}
- memset(&qidp->path, 0, sizeof(qidp->path));
- size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
- memcpy(&qidp->path, &stbuf->st_ino, size);
+ *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK);
+ return 0;
+}
+
+static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
+{
+ int err;
+ size_t size;
+
+ if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
+ /* map inode+device to qid path (fast path) */
+ err = qid_path_prefixmap(pdu, stbuf, &qidp->path);
+ if (err) {
+ return err;
+ }
+ } else {
+ if (pdu->s->dev_id != stbuf->st_dev) {
+ if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
+ error_report_once(
+ "9p: Multiple devices detected in same VirtFS export. "
+ "Access of guest to additional devices is (partly) "
+ "denied due to virtfs option 'multidevs=forbid' being "
+ "effective."
+ );
+ return -ENODEV;
+ } else {
+ warn_report_once(
+ "9p: Multiple devices detected in same VirtFS export, "
+ "which might lead to file ID collisions and severe "
+ "misbehaviours on guest! You should either use a "
+ "separate export for each device shared from host or "
+ "use virtfs option 'multidevs=remap'!"
+ );
+ }
+ }
+ memset(&qidp->path, 0, sizeof(qidp->path));
+ size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
+ memcpy(&qidp->path, &stbuf->st_ino, size);
+ }
+
qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
qidp->type = 0;
if (S_ISDIR(stbuf->st_mode)) {
@@ -618,6 +713,30 @@
return 0;
}
+static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
+ struct dirent *dent, V9fsQID *qidp)
+{
+ struct stat stbuf;
+ V9fsPath path;
+ int err;
+
+ v9fs_path_init(&path);
+
+ err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
+ if (err < 0) {
+ goto out;
+ }
+ err = v9fs_co_lstat(pdu, &path, &stbuf);
+ if (err < 0) {
+ goto out;
+ }
+ err = stat_to_qid(pdu, &stbuf, qidp);
+
+out:
+ v9fs_path_free(&path);
+ return err;
+}
+
V9fsPDU *pdu_alloc(V9fsState *s)
{
V9fsPDU *pdu = NULL;
@@ -1966,16 +2085,39 @@
v9fs_string_free(&name);
return count;
}
- /*
- * Fill up just the path field of qid because the client uses
- * only that. To fill the entire qid structure we will have
- * to stat each dirent found, which is expensive
- */
- size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
- memcpy(&qid.path, &dent->d_ino, size);
- /* Fill the other fields with dummy values */
- qid.type = 0;
- qid.version = 0;
+
+ if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
+ /*
+ * dirent_to_qid() implies expensive stat call for each entry,
+ * we must do that here though since inode remapping requires
+ * the device id, which in turn might be different for
+ * different entries; we cannot make any assumption to avoid
+ * that here.
+ */
+ err = dirent_to_qid(pdu, fidp, dent, &qid);
+ if (err < 0) {
+ v9fs_readdir_unlock(&fidp->fs.dir);
+ v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
+ v9fs_string_free(&name);
+ return err;
+ }
+ } else {
+ /*
+ * Fill up just the path field of qid because the client uses
+ * only that. To fill the entire qid structure we will have
+ * to stat each dirent found, which is expensive. For the
+ * latter reason we don't call dirent_to_qid() here. Only drawback
+ * is that no multi-device export detection of stat_to_qid()
+ * would be done and provided as error to the user here. But
+ * user would get that error anyway when accessing those
+ * files/dirs through other ways.
+ */
+ size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
+ memcpy(&qid.path, &dent->d_ino, size);
+ /* Fill the other fields with dummy values */
+ qid.type = 0;
+ qid.version = 0;
+ }
/* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
len = pdu_marshal(pdu, 11 + count, "Qqbs",
@@ -3676,6 +3818,9 @@
s->dev_id = stat.st_dev;
+ qpp_table_init(&s->qpp_table);
+ s->qp_prefix_next = 1; /* reserve 0 to detect overflow */
+
s->ctx.fst = &fse->fst;
fsdev_throttle_init(s->ctx.fst);
@@ -3697,6 +3842,7 @@
fsdev_throttle_cleanup(s->ctx.fst);
}
g_free(s->tag);
+ qpp_table_destroy(&s->qpp_table);
g_free(s->ctx.fs_root);
}
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 5e31617..7262fe8 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -8,6 +8,7 @@
#include "fsdev/9p-iov-marshal.h"
#include "qemu/thread.h"
#include "qemu/coroutine.h"
+#include "qemu/qht.h"
enum {
P9_TLERROR = 6,
@@ -235,6 +236,15 @@
V9fsFidState *rclm_lst;
};
+#define QPATH_INO_MASK ((1ULL << 48) - 1)
+
+/* QID path prefix entry, see stat_to_qid */
+typedef struct {
+ dev_t dev;
+ uint16_t ino_prefix;
+ uint16_t qp_prefix;
+} QppEntry;
+
struct V9fsState
{
QLIST_HEAD(, V9fsPDU) free_list;
@@ -257,6 +267,8 @@
V9fsConf fsconf;
V9fsQID root_qid;
dev_t dev_id;
+ struct qht qpp_table;
+ uint16_t qp_prefix_next;
};
/* 9p2000.L open flags */
diff --git a/qemu-options.hx b/qemu-options.hx
index 2a04ca6..793d70f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1339,7 +1339,7 @@
DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
"-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n"
- " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n"
+ " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n"
"-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n"
"-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n"
"-virtfs synth,mount_tag=tag[,id=id][,readonly]\n",
@@ -1347,7 +1347,7 @@
STEXI
-@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}]
+@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}]
@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
@itemx -virtfs synth,mount_tag=@var{mount_tag}
@@ -1403,6 +1403,28 @@
only with security models "mapped-xattr" and "mapped-file".
@item mount_tag=@var{mount_tag}
Specifies the tag name to be used by the guest to mount this export point.
+@item multidevs=@var{multidevs}
+Specifies how to deal with multiple devices being shared with a 9p export.
+Supported behaviours are either "remap", "forbid" or "warn". The latter is
+the default behaviour on which virtfs 9p expects only one device to be
+shared with the same export, and if more than one device is shared and
+accessed via the same 9p export then only a warning message is logged
+(once) by qemu on host side. In order to avoid file ID collisions on guest
+you should either create a separate virtfs export for each device to be
+shared with guests (recommended way) or you might use "remap" instead which
+allows you to share multiple devices with only one export instead, which is
+achieved by remapping the original inode numbers from host to guest in a
+way that would prevent such collisions. Remapping inodes in such use cases
+is required because the original device IDs from host are never passed and
+exposed on guest. Instead all files of an export shared with virtfs always
+share the same device id on guest. So two files with identical inode
+numbers but from actually different devices on host would otherwise cause a
+file ID collision and hence potential misbehaviours on guest. "forbid" on
+the other hand assumes like "warn" that only one device is shared by the
+same export, however it will not only log a warning message but also
+deny access to additional devices on guest. Note though that "forbid" does
+currently not block all possible file access operations (e.g. readdir()
+would still return entries from other devices).
@end table
ETEXI
diff --git a/vl.c b/vl.c
index 002bf49..0a295e5 100644
--- a/vl.c
+++ b/vl.c
@@ -3335,7 +3335,8 @@
case QEMU_OPTION_virtfs: {
QemuOpts *fsdev;
QemuOpts *device;
- const char *writeout, *sock_fd, *socket, *path, *security_model;
+ const char *writeout, *sock_fd, *socket, *path, *security_model,
+ *multidevs;
olist = qemu_find_opts("virtfs");
if (!olist) {
@@ -3395,6 +3396,10 @@
qemu_opt_set_bool(fsdev, "readonly",
qemu_opt_get_bool(opts, "readonly", 0),
&error_abort);
+ multidevs = qemu_opt_get(opts, "multidevs");
+ if (multidevs) {
+ qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort);
+ }
device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);