Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201012a' into staging
v3 Migration+ virtiofsd pull 2020-10-12
V3
Remove the postcopy recovery changes
Migration:
Dirtyrate measurement API cleanup
Virtiofsd:
Missing qemu_init_exec_dir call
Support for setting the group on socket creation
Stop a gcc warning
Avoid tempdir in sandboxing
# gpg: Signature made Mon 12 Oct 2020 12:43:30 BST
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert/tags/pull-migration-20201012a:
migration/dirtyrate: present dirty rate only when querying the rate has completed
migration/dirtyrate: record start_time and calc_time while at the measuring state
virtiofsd: avoid /proc/self/fd tempdir
virtiofsd: Call qemu_init_exec_dir
tools/virtiofsd: add support for --socket-group
virtiofsd: Silence gcc warning
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
index ae02938..7ecee49 100644
--- a/docs/tools/virtiofsd.rst
+++ b/docs/tools/virtiofsd.rst
@@ -87,6 +87,10 @@
Listen on vhost-user UNIX domain socket at PATH.
+.. option:: --socket-group=GROUP
+
+ Set the vhost-user UNIX domain socket gid to GROUP.
+
.. option:: --fd=FDNUM
Accept connections from vhost-user UNIX domain socket file descriptor FDNUM.
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 68577ef..ab9e130 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -69,9 +69,8 @@
struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
+ info->has_dirty_rate = true;
info->dirty_rate = dirty_rate;
- } else {
- info->dirty_rate = -1;
}
info->status = CalculatingState;
@@ -83,14 +82,14 @@
return info;
}
-static void reset_dirtyrate_stat(void)
+static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
{
DirtyStat.total_dirty_samples = 0;
DirtyStat.total_sample_count = 0;
DirtyStat.total_block_mem_MB = 0;
DirtyStat.dirty_rate = -1;
- DirtyStat.start_time = 0;
- DirtyStat.calc_time = 0;
+ DirtyStat.start_time = start_time;
+ DirtyStat.calc_time = calc_time;
}
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
@@ -335,7 +334,6 @@
int64_t initial_time;
rcu_register_thread();
- reset_dirtyrate_stat();
rcu_read_lock();
initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
@@ -365,6 +363,8 @@
{
struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
int ret;
+ int64_t start_time;
+ int64_t calc_time;
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
DIRTY_RATE_STATUS_MEASURING);
@@ -373,6 +373,10 @@
return NULL;
}
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+ calc_time = config.sample_period_seconds;
+ init_dirtyrate_stat(start_time, calc_time);
+
calculate_dirtyrate(config);
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
diff --git a/qapi/migration.json b/qapi/migration.json
index 7f5e6fd..974021a 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1743,10 +1743,8 @@
#
# Information about current dirty page rate of vm.
#
-# @dirty-rate: @dirtyrate describing the dirty page rate of vm
-# in units of MB/s.
-# If this field returns '-1', it means querying has not
-# yet started or completed.
+# @dirty-rate: an estimate of the dirty page rate of the VM in units of
+# MB/s, present only when estimating the rate has completed.
#
# @status: status containing dirtyrate query status includes
# 'unstarted' or 'measuring' or 'measured'
@@ -1759,7 +1757,7 @@
#
##
{ 'struct': 'DirtyRateInfo',
- 'data': {'dirty-rate': 'int64',
+ 'data': {'*dirty-rate': 'int64',
'status': 'DirtyRateStatus',
'start-time': 'int64',
'calc-time': 'int64'} }
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 1240828..492e002 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -68,6 +68,7 @@
size_t bufsize;
int error;
char *vu_socket_path;
+ char *vu_socket_group;
int vu_listen_fd;
int vu_socketfd;
struct fv_VuDev *virtio_dev;
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 2dd36ec..4d1ba29 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2523,6 +2523,7 @@
LL_OPTION("--debug", debug, 1),
LL_OPTION("allow_root", deny_others, 1),
LL_OPTION("--socket-path=%s", vu_socket_path, 0),
+ LL_OPTION("--socket-group=%s", vu_socket_group, 0),
LL_OPTION("--fd=%d", vu_listen_fd, 0),
LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0),
FUSE_OPT_END
@@ -2630,6 +2631,11 @@
"fuse: --socket-path and --fd cannot be given together\n");
goto out4;
}
+ if (se->vu_socket_group && !se->vu_socket_path) {
+ fuse_log(FUSE_LOG_ERR,
+ "fuse: --socket-group can only be used with --socket-path\n");
+ goto out4;
+ }
se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE;
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index d5c8e98..89f537f 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -31,6 +31,8 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
+#include <sys/types.h>
+#include <grp.h>
#include <unistd.h>
#include "contrib/libvhost-user/libvhost-user.h"
@@ -924,15 +926,30 @@
/*
* Unfortunately bind doesn't let you set the mask on the socket,
- * so set umask to 077 and restore it later.
+ * so set umask appropriately and restore it later.
*/
- old_umask = umask(0077);
+ if (se->vu_socket_group) {
+ old_umask = umask(S_IROTH | S_IWOTH | S_IXOTH);
+ } else {
+ old_umask = umask(S_IRGRP | S_IWGRP | S_IXGRP |
+ S_IROTH | S_IWOTH | S_IXOTH);
+ }
if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
close(listen_sock);
umask(old_umask);
return -1;
}
+ if (se->vu_socket_group) {
+ struct group *g = getgrnam(se->vu_socket_group);
+ if (g) {
+ if (!chown(se->vu_socket_path, -1, g->gr_gid)) {
+ fuse_log(FUSE_LOG_WARNING,
+ "vhost socket failed to set group to %s (%d)\n",
+ se->vu_socket_group, g->gr_gid);
+ }
+ }
+ }
umask(old_umask);
if (listen(listen_sock, 1) == -1) {
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 0b229eb..ff53df4 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -620,7 +620,7 @@
struct lo_inode *inode;
int ifd;
int res;
- int fd;
+ int fd = -1;
inode = lo_inode(req, ino);
if (!inode) {
@@ -2393,8 +2393,6 @@
static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
{
pid_t child;
- char template[] = "virtiofsd-XXXXXX";
- char *tmpdir;
/*
* Create a new pid namespace for *child* processes. We'll have to
@@ -2458,33 +2456,23 @@
exit(1);
}
- tmpdir = mkdtemp(template);
- if (!tmpdir) {
- fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template);
+ /*
+ * We only need /proc/self/fd. Prevent ".." from accessing parent
+ * directories of /proc/self/fd by bind-mounting it over /proc. Since / was
+ * previously remounted with MS_REC | MS_SLAVE this mount change only
+ * affects our process.
+ */
+ if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) {
+ fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n");
exit(1);
}
- if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) {
- fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n",
- tmpdir);
- exit(1);
- }
-
- /* Now we can get our /proc/self/fd directory file descriptor */
- lo->proc_self_fd = open(tmpdir, O_PATH);
+ /* Get the /proc (actually /proc/self/fd, see above) file descriptor */
+ lo->proc_self_fd = open("/proc", O_PATH);
if (lo->proc_self_fd == -1) {
- fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir);
+ fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n");
exit(1);
}
-
- if (umount2(tmpdir, MNT_DETACH) < 0) {
- fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir);
- exit(1);
- }
-
- if (rmdir(tmpdir) < 0) {
- fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir);
- }
}
/*
@@ -2839,6 +2827,8 @@
/* Don't mask creation mode, kernel already did that */
umask(0);
+ qemu_init_exec_dir(argv[0]);
+
pthread_mutex_init(&lo.mutex, NULL);
lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal);
lo.root.fd = -1;