|  | /* | 
|  | * QEMU low level functions | 
|  | * | 
|  | * Copyright (c) 2003 Fabrice Bellard | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | * of this software and associated documentation files (the "Software"), to deal | 
|  | * in the Software without restriction, including without limitation the rights | 
|  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | * copies of the Software, and to permit persons to whom the Software is | 
|  | * furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | 
|  | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | * THE SOFTWARE. | 
|  | */ | 
|  | #include "qemu/osdep.h" | 
|  | #include "qapi/error.h" | 
|  | #include "qemu/cutils.h" | 
|  | #include "qemu/sockets.h" | 
|  | #include "qemu/error-report.h" | 
|  | #include "qemu/madvise.h" | 
|  | #include "qemu/mprotect.h" | 
|  | #include "qemu/hw-version.h" | 
|  | #include "monitor/monitor.h" | 
|  |  | 
|  | static const char *hw_version = QEMU_HW_VERSION; | 
|  |  | 
|  | int socket_set_cork(int fd, int v) | 
|  | { | 
|  | #if defined(SOL_TCP) && defined(TCP_CORK) | 
|  | return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); | 
|  | #else | 
|  | return 0; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | int socket_set_nodelay(int fd) | 
|  | { | 
|  | int v = 1; | 
|  | return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); | 
|  | } | 
|  |  | 
|  | int qemu_madvise(void *addr, size_t len, int advice) | 
|  | { | 
|  | if (advice == QEMU_MADV_INVALID) { | 
|  | errno = EINVAL; | 
|  | return -1; | 
|  | } | 
|  | #if defined(CONFIG_MADVISE) | 
|  | return madvise(addr, len, advice); | 
|  | #elif defined(CONFIG_POSIX_MADVISE) | 
|  | int rc = posix_madvise(addr, len, advice); | 
|  | if (rc) { | 
|  | errno = rc; | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | #else | 
|  | errno = ENOSYS; | 
|  | return -1; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static int qemu_mprotect__osdep(void *addr, size_t size, int prot) | 
|  | { | 
|  | g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask())); | 
|  | g_assert(!(size & ~qemu_real_host_page_mask())); | 
|  |  | 
|  | #ifdef _WIN32 | 
|  | DWORD old_protect; | 
|  |  | 
|  | if (!VirtualProtect(addr, size, prot, &old_protect)) { | 
|  | g_autofree gchar *emsg = g_win32_error_message(GetLastError()); | 
|  | error_report("%s: VirtualProtect failed: %s", __func__, emsg); | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | #else | 
|  | if (mprotect(addr, size, prot)) { | 
|  | error_report("%s: mprotect failed: %s", __func__, strerror(errno)); | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | int qemu_mprotect_rw(void *addr, size_t size) | 
|  | { | 
|  | #ifdef _WIN32 | 
|  | return qemu_mprotect__osdep(addr, size, PAGE_READWRITE); | 
|  | #else | 
|  | return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | int qemu_mprotect_rwx(void *addr, size_t size) | 
|  | { | 
|  | #ifdef _WIN32 | 
|  | return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE); | 
|  | #else | 
|  | return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | int qemu_mprotect_none(void *addr, size_t size) | 
|  | { | 
|  | #ifdef _WIN32 | 
|  | return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS); | 
|  | #else | 
|  | return qemu_mprotect__osdep(addr, size, PROT_NONE); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | #ifndef _WIN32 | 
|  |  | 
|  | static int fcntl_op_setlk = -1; | 
|  | static int fcntl_op_getlk = -1; | 
|  |  | 
|  | /* | 
|  | * Dups an fd and sets the flags | 
|  | */ | 
|  | int qemu_dup_flags(int fd, int flags) | 
|  | { | 
|  | int ret; | 
|  | int serrno; | 
|  | int dup_flags; | 
|  |  | 
|  | ret = qemu_dup(fd); | 
|  | if (ret == -1) { | 
|  | goto fail; | 
|  | } | 
|  |  | 
|  | dup_flags = fcntl(ret, F_GETFL); | 
|  | if (dup_flags == -1) { | 
|  | goto fail; | 
|  | } | 
|  |  | 
|  | if ((flags & O_SYNC) != (dup_flags & O_SYNC)) { | 
|  | errno = EINVAL; | 
|  | goto fail; | 
|  | } | 
|  |  | 
|  | /* Set/unset flags that we can with fcntl */ | 
|  | if (fcntl(ret, F_SETFL, flags) == -1) { | 
|  | goto fail; | 
|  | } | 
|  |  | 
|  | /* Truncate the file in the cases that open() would truncate it */ | 
|  | if (flags & O_TRUNC || | 
|  | ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { | 
|  | if (ftruncate(ret, 0) == -1) { | 
|  | goto fail; | 
|  | } | 
|  | } | 
|  |  | 
|  | return ret; | 
|  |  | 
|  | fail: | 
|  | serrno = errno; | 
|  | if (ret != -1) { | 
|  | close(ret); | 
|  | } | 
|  | errno = serrno; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | int qemu_dup(int fd) | 
|  | { | 
|  | int ret; | 
|  | #ifdef F_DUPFD_CLOEXEC | 
|  | ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); | 
|  | #else | 
|  | ret = dup(fd); | 
|  | if (ret != -1) { | 
|  | qemu_set_cloexec(ret); | 
|  | } | 
|  | #endif | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int qemu_parse_fdset(const char *param) | 
|  | { | 
|  | return qemu_parse_fd(param); | 
|  | } | 
|  |  | 
|  | static void qemu_probe_lock_ops(void) | 
|  | { | 
|  | if (fcntl_op_setlk == -1) { | 
|  | #ifdef F_OFD_SETLK | 
|  | int fd; | 
|  | int ret; | 
|  | struct flock fl = { | 
|  | .l_whence = SEEK_SET, | 
|  | .l_start  = 0, | 
|  | .l_len    = 0, | 
|  | .l_type   = F_WRLCK, | 
|  | }; | 
|  |  | 
|  | fd = open("/dev/null", O_RDWR); | 
|  | if (fd < 0) { | 
|  | fprintf(stderr, | 
|  | "Failed to open /dev/null for OFD lock probing: %s\n", | 
|  | strerror(errno)); | 
|  | fcntl_op_setlk = F_SETLK; | 
|  | fcntl_op_getlk = F_GETLK; | 
|  | return; | 
|  | } | 
|  | ret = fcntl(fd, F_OFD_GETLK, &fl); | 
|  | close(fd); | 
|  | if (!ret) { | 
|  | fcntl_op_setlk = F_OFD_SETLK; | 
|  | fcntl_op_getlk = F_OFD_GETLK; | 
|  | } else { | 
|  | fcntl_op_setlk = F_SETLK; | 
|  | fcntl_op_getlk = F_GETLK; | 
|  | } | 
|  | #else | 
|  | fcntl_op_setlk = F_SETLK; | 
|  | fcntl_op_getlk = F_GETLK; | 
|  | #endif | 
|  | } | 
|  | } | 
|  |  | 
|  | bool qemu_has_ofd_lock(void) | 
|  | { | 
|  | qemu_probe_lock_ops(); | 
|  | #ifdef F_OFD_SETLK | 
|  | return fcntl_op_setlk == F_OFD_SETLK; | 
|  | #else | 
|  | return false; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) | 
|  | { | 
|  | int ret; | 
|  | struct flock fl = { | 
|  | .l_whence = SEEK_SET, | 
|  | .l_start  = start, | 
|  | .l_len    = len, | 
|  | .l_type   = fl_type, | 
|  | }; | 
|  | qemu_probe_lock_ops(); | 
|  | ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl)); | 
|  | return ret == -1 ? -errno : 0; | 
|  | } | 
|  |  | 
|  | int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) | 
|  | { | 
|  | return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK); | 
|  | } | 
|  |  | 
|  | int qemu_unlock_fd(int fd, int64_t start, int64_t len) | 
|  | { | 
|  | return qemu_lock_fcntl(fd, start, len, F_UNLCK); | 
|  | } | 
|  |  | 
|  | int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) | 
|  | { | 
|  | int ret; | 
|  | struct flock fl = { | 
|  | .l_whence = SEEK_SET, | 
|  | .l_start  = start, | 
|  | .l_len    = len, | 
|  | .l_type   = exclusive ? F_WRLCK : F_RDLCK, | 
|  | }; | 
|  | qemu_probe_lock_ops(); | 
|  | ret = fcntl(fd, fcntl_op_getlk, &fl); | 
|  | if (ret == -1) { | 
|  | return -errno; | 
|  | } else { | 
|  | return fl.l_type == F_UNLCK ? 0 : -EAGAIN; | 
|  | } | 
|  | } | 
|  | #endif | 
|  |  | 
|  | bool qemu_has_direct_io(void) | 
|  | { | 
|  | #ifdef O_DIRECT | 
|  | return true; | 
|  | #else | 
|  | return false; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static int qemu_open_cloexec(const char *name, int flags, mode_t mode) | 
|  | { | 
|  | int ret; | 
|  | #ifdef O_CLOEXEC | 
|  | ret = open(name, flags | O_CLOEXEC, mode); | 
|  | #else | 
|  | ret = open(name, flags, mode); | 
|  | if (ret >= 0) { | 
|  | qemu_set_cloexec(ret); | 
|  | } | 
|  | #endif | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Opens a file with FD_CLOEXEC set | 
|  | */ | 
|  | static int | 
|  | qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | #ifndef _WIN32 | 
|  | const char *fdset_id_str; | 
|  |  | 
|  | /* Attempt dup of fd from fd set */ | 
|  | if (strstart(name, "/dev/fdset/", &fdset_id_str)) { | 
|  | int64_t fdset_id; | 
|  |  | 
|  | fdset_id = qemu_parse_fdset(fdset_id_str); | 
|  | if (fdset_id == -1) { | 
|  | error_setg(errp, "Could not parse fdset %s", name); | 
|  | errno = EINVAL; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | return monitor_fdset_dup_fd_add(fdset_id, flags, errp); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | ret = qemu_open_cloexec(name, flags, mode); | 
|  |  | 
|  | if (ret == -1) { | 
|  | const char *action = flags & O_CREAT ? "create" : "open"; | 
|  | #ifdef O_DIRECT | 
|  | /* Give more helpful error message for O_DIRECT */ | 
|  | if (errno == EINVAL && (flags & O_DIRECT)) { | 
|  | ret = open(name, flags & ~O_DIRECT, mode); | 
|  | if (ret != -1) { | 
|  | close(ret); | 
|  | error_setg(errp, "Could not %s '%s': " | 
|  | "filesystem does not support O_DIRECT", | 
|  | action, name); | 
|  | errno = EINVAL; /* restore first open()'s errno */ | 
|  | return -1; | 
|  | } | 
|  | } | 
|  | #endif /* O_DIRECT */ | 
|  | error_setg_errno(errp, errno, "Could not %s '%s'", | 
|  | action, name); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  |  | 
|  | int qemu_open(const char *name, int flags, Error **errp) | 
|  | { | 
|  | assert(!(flags & O_CREAT)); | 
|  |  | 
|  | return qemu_open_internal(name, flags, 0, errp); | 
|  | } | 
|  |  | 
|  |  | 
|  | int qemu_create(const char *name, int flags, mode_t mode, Error **errp) | 
|  | { | 
|  | assert(!(flags & O_CREAT)); | 
|  |  | 
|  | return qemu_open_internal(name, flags | O_CREAT, mode, errp); | 
|  | } | 
|  |  | 
|  |  | 
|  | int qemu_open_old(const char *name, int flags, ...) | 
|  | { | 
|  | va_list ap; | 
|  | mode_t mode = 0; | 
|  | int ret; | 
|  |  | 
|  | va_start(ap, flags); | 
|  | if (flags & O_CREAT) { | 
|  | mode = va_arg(ap, int); | 
|  | } | 
|  | va_end(ap); | 
|  |  | 
|  | ret = qemu_open_internal(name, flags, mode, NULL); | 
|  |  | 
|  | #ifdef O_DIRECT | 
|  | if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { | 
|  | error_report("file system may not support O_DIRECT"); | 
|  | errno = EINVAL; /* in case it was clobbered */ | 
|  | } | 
|  | #endif /* O_DIRECT */ | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int qemu_close(int fd) | 
|  | { | 
|  | /* Close fd that was dup'd from an fdset */ | 
|  | monitor_fdset_dup_fd_remove(fd); | 
|  | return close(fd); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Delete a file from the filesystem, unless the filename is /dev/fdset/... | 
|  | * | 
|  | * Returns: On success, zero is returned.  On error, -1 is returned, | 
|  | * and errno is set appropriately. | 
|  | */ | 
|  | int qemu_unlink(const char *name) | 
|  | { | 
|  | if (g_str_has_prefix(name, "/dev/fdset/")) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | return unlink(name); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * A variant of write(2) which handles partial write. | 
|  | * | 
|  | * Return the number of bytes transferred. | 
|  | * Set errno if fewer than `count' bytes are written. | 
|  | * | 
|  | * This function don't work with non-blocking fd's. | 
|  | * Any of the possibilities with non-blocking fd's is bad: | 
|  | *   - return a short write (then name is wrong) | 
|  | *   - busy wait adding (errno == EAGAIN) to the loop | 
|  | */ | 
|  | ssize_t qemu_write_full(int fd, const void *buf, size_t count) | 
|  | { | 
|  | ssize_t ret = 0; | 
|  | ssize_t total = 0; | 
|  |  | 
|  | while (count) { | 
|  | ret = write(fd, buf, count); | 
|  | if (ret < 0) { | 
|  | if (errno == EINTR) | 
|  | continue; | 
|  | break; | 
|  | } | 
|  |  | 
|  | count -= ret; | 
|  | buf += ret; | 
|  | total += ret; | 
|  | } | 
|  |  | 
|  | return total; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Opens a socket with FD_CLOEXEC set | 
|  | */ | 
|  | int qemu_socket(int domain, int type, int protocol) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | #ifdef SOCK_CLOEXEC | 
|  | ret = socket(domain, type | SOCK_CLOEXEC, protocol); | 
|  | if (ret != -1 || errno != EINVAL) { | 
|  | return ret; | 
|  | } | 
|  | #endif | 
|  | ret = socket(domain, type, protocol); | 
|  | if (ret >= 0) { | 
|  | qemu_set_cloexec(ret); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Accept a connection and set FD_CLOEXEC | 
|  | */ | 
|  | int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | #ifdef CONFIG_ACCEPT4 | 
|  | ret = accept4(s, addr, addrlen, SOCK_CLOEXEC); | 
|  | if (ret != -1 || errno != ENOSYS) { | 
|  | return ret; | 
|  | } | 
|  | #endif | 
|  | ret = accept(s, addr, addrlen); | 
|  | if (ret >= 0) { | 
|  | qemu_set_cloexec(ret); | 
|  | } | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | ssize_t qemu_send_full(int s, const void *buf, size_t count) | 
|  | { | 
|  | ssize_t ret = 0; | 
|  | ssize_t total = 0; | 
|  |  | 
|  | while (count) { | 
|  | ret = send(s, buf, count, 0); | 
|  | if (ret < 0) { | 
|  | if (errno == EINTR) { | 
|  | continue; | 
|  | } | 
|  | break; | 
|  | } | 
|  |  | 
|  | count -= ret; | 
|  | buf += ret; | 
|  | total += ret; | 
|  | } | 
|  |  | 
|  | return total; | 
|  | } | 
|  |  | 
|  | void qemu_set_hw_version(const char *version) | 
|  | { | 
|  | hw_version = version; | 
|  | } | 
|  |  | 
|  | const char *qemu_hw_version(void) | 
|  | { | 
|  | return hw_version; | 
|  | } | 
|  |  | 
|  | #ifdef _WIN32 | 
|  | static void socket_cleanup(void) | 
|  | { | 
|  | WSACleanup(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | int socket_init(void) | 
|  | { | 
|  | #ifdef _WIN32 | 
|  | WSADATA Data; | 
|  | int ret, err; | 
|  |  | 
|  | ret = WSAStartup(MAKEWORD(2, 2), &Data); | 
|  | if (ret != 0) { | 
|  | err = WSAGetLastError(); | 
|  | fprintf(stderr, "WSAStartup: %d\n", err); | 
|  | return -1; | 
|  | } | 
|  | atexit(socket_cleanup); | 
|  | #endif | 
|  | return 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | #ifndef CONFIG_IOVEC | 
|  | static ssize_t | 
|  | readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) | 
|  | { | 
|  | unsigned i = 0; | 
|  | ssize_t ret = 0; | 
|  | ssize_t off = 0; | 
|  | while (i < iov_cnt) { | 
|  | ssize_t r = do_write | 
|  | ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off) | 
|  | : read(fd, iov[i].iov_base + off, iov[i].iov_len - off); | 
|  | if (r > 0) { | 
|  | ret += r; | 
|  | off += r; | 
|  | if (off < iov[i].iov_len) { | 
|  | continue; | 
|  | } | 
|  | } else if (!r) { | 
|  | break; | 
|  | } else if (errno == EINTR) { | 
|  | continue; | 
|  | } else { | 
|  | /* else it is some "other" error, | 
|  | * only return if there was no data processed. */ | 
|  | if (ret == 0) { | 
|  | ret = -1; | 
|  | } | 
|  | break; | 
|  | } | 
|  | off = 0; | 
|  | i++; | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | ssize_t | 
|  | readv(int fd, const struct iovec *iov, int iov_cnt) | 
|  | { | 
|  | return readv_writev(fd, iov, iov_cnt, false); | 
|  | } | 
|  |  | 
|  | ssize_t | 
|  | writev(int fd, const struct iovec *iov, int iov_cnt) | 
|  | { | 
|  | return readv_writev(fd, iov, iov_cnt, true); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | /* | 
|  | * Make sure data goes on disk, but if possible do not bother to | 
|  | * write out the inode just for timestamp updates. | 
|  | * | 
|  | * Unfortunately even in 2009 many operating systems do not support | 
|  | * fdatasync and have to fall back to fsync. | 
|  | */ | 
|  | int qemu_fdatasync(int fd) | 
|  | { | 
|  | #ifdef CONFIG_FDATASYNC | 
|  | return fdatasync(fd); | 
|  | #else | 
|  | return fsync(fd); | 
|  | #endif | 
|  | } |