blob: 4ce1ba9ca427495af1c8232a8454ee1e92cd1cdc [file] [log] [blame]
Jes Sorensenc1b0b932010-10-26 10:39:19 +02001/*
2 * os-posix-lib.c
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2010 Red Hat, Inc.
6 *
7 * QEMU library functions on POSIX which are shared between QEMU and
8 * the QEMU tools.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28
Peter Maydellaafd7582016-01-29 17:49:55 +000029#include "qemu/osdep.h"
Stefan Hajnoczi13401ba2013-11-14 11:54:16 +010030#include <termios.h>
Stefan Hajnoczi13401ba2013-11-14 11:54:16 +010031
Laszlo Erseke2ea3512013-05-18 06:31:48 +020032#include <glib/gprintf.h>
33
Paolo Bonzini9c17d612012-12-17 18:20:04 +010034#include "sysemu/sysemu.h"
Jes Sorensenc1b0b932010-10-26 10:39:19 +020035#include "trace.h"
Markus Armbrusterda34e652016-03-14 09:01:28 +010036#include "qapi/error.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/sockets.h"
Fam Zheng10f5bff2014-02-10 14:48:51 +080038#include <libgen.h>
Paolo Bonzini38183312014-05-14 17:43:21 +080039#include <sys/signal.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020040#include "qemu/cutils.h"
Jes Sorensenc1b0b932010-10-26 10:39:19 +020041
Paolo Bonzinicbcfa042011-09-12 16:20:11 +020042#ifdef CONFIG_LINUX
43#include <sys/syscall.h>
44#endif
Paolo Bonzinicbcfa042011-09-12 16:20:11 +020045
Andreas Färber41975b22014-03-13 14:27:59 +010046#ifdef __FreeBSD__
47#include <sys/sysctl.h>
Ed Mastea7764f12016-11-21 20:32:45 -050048#include <sys/user.h>
Michal Privoznik7dc9ae42016-09-27 17:24:56 +020049#include <libutil.h>
Andreas Färber41975b22014-03-13 14:27:59 +010050#endif
51
Kamil Rytarowski094611b2017-10-28 21:48:33 +020052#ifdef __NetBSD__
53#include <sys/sysctl.h>
54#endif
55
Markus Armbrustera9c94272016-06-22 19:11:19 +020056#include "qemu/mmap-alloc.h"
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030057
Peter Lieven7d992e42016-09-27 11:58:45 +020058#ifdef CONFIG_DEBUG_STACK_USAGE
59#include "qemu/error-report.h"
60#endif
61
Jitendra Kolhedfd0dcc2017-03-21 02:50:06 -040062#define MAX_MEM_PREALLOC_THREAD_COUNT 16
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +053063
64struct MemsetThread {
65 char *addr;
Stefan Weile947d472017-10-16 22:29:12 +020066 size_t numpages;
67 size_t hpagesize;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +053068 QemuThread pgthread;
69 sigjmp_buf env;
70};
71typedef struct MemsetThread MemsetThread;
72
73static MemsetThread *memset_thread;
74static int memset_num_threads;
75static bool memset_thread_failed;
76
Paolo Bonzinicbcfa042011-09-12 16:20:11 +020077int qemu_get_thread_id(void)
78{
79#if defined(__linux__)
80 return syscall(SYS_gettid);
81#else
82 return getpid();
83#endif
84}
Alexandre Raymondf97742d2011-06-06 23:34:10 -040085
86int qemu_daemon(int nochdir, int noclose)
87{
88 return daemon(nochdir, noclose);
89}
90
Marc-André Lureau9e6bdef2018-08-31 16:53:12 +020091bool qemu_write_pidfile(const char *path, Error **errp)
92{
93 int fd;
94 char pidstr[32];
95
96 while (1) {
97 struct stat a, b;
Marc-André Lureau35f7f3f2018-08-31 16:53:13 +020098 struct flock lock = {
99 .l_type = F_WRLCK,
100 .l_whence = SEEK_SET,
101 .l_len = 0,
102 };
Marc-André Lureau9e6bdef2018-08-31 16:53:12 +0200103
104 fd = qemu_open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
105 if (fd == -1) {
106 error_setg_errno(errp, errno, "Cannot open pid file");
107 return false;
108 }
109
110 if (fstat(fd, &b) < 0) {
111 error_setg_errno(errp, errno, "Cannot stat file");
112 goto fail_close;
113 }
114
Marc-André Lureau35f7f3f2018-08-31 16:53:13 +0200115 if (fcntl(fd, F_SETLK, &lock)) {
Marc-André Lureau9e6bdef2018-08-31 16:53:12 +0200116 error_setg_errno(errp, errno, "Cannot lock pid file");
117 goto fail_close;
118 }
119
120 /*
121 * Now make sure the path we locked is the same one that now
122 * exists on the filesystem.
123 */
124 if (stat(path, &a) < 0) {
125 /*
126 * PID file disappeared, someone else must be racing with
127 * us, so try again.
128 */
129 close(fd);
130 continue;
131 }
132
133 if (a.st_ino == b.st_ino) {
134 break;
135 }
136
137 /*
138 * PID file was recreated, someone else must be racing with
139 * us, so try again.
140 */
141 close(fd);
142 }
143
144 if (ftruncate(fd, 0) < 0) {
145 error_setg_errno(errp, errno, "Failed to truncate pid file");
146 goto fail_unlink;
147 }
148
149 snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
150 if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
151 error_setg(errp, "Failed to write pid file");
152 goto fail_unlink;
153 }
154
155 return true;
156
157fail_unlink:
158 unlink(path);
159fail_close:
160 close(fd);
161 return false;
162}
163
Jes Sorensenb152aa82010-10-26 10:39:26 +0200164void *qemu_oom_check(void *ptr)
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200165{
166 if (ptr == NULL) {
167 fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
168 abort();
169 }
170 return ptr;
171}
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200172
Kevin Wolf7d2a35c2014-05-20 12:24:05 +0200173void *qemu_try_memalign(size_t alignment, size_t size)
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200174{
175 void *ptr;
Kevin Wolfe5354652013-11-29 21:29:17 +0100176
177 if (alignment < sizeof(void*)) {
178 alignment = sizeof(void*);
179 }
180
Andreas Gustafsson9bc5a712018-01-04 19:39:36 +0200181#if defined(CONFIG_POSIX_MEMALIGN)
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200182 int ret;
183 ret = posix_memalign(&ptr, alignment, size);
184 if (ret != 0) {
Kevin Wolf7d2a35c2014-05-20 12:24:05 +0200185 errno = ret;
186 ptr = NULL;
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200187 }
188#elif defined(CONFIG_BSD)
Kevin Wolf7d2a35c2014-05-20 12:24:05 +0200189 ptr = valloc(size);
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200190#else
Kevin Wolf7d2a35c2014-05-20 12:24:05 +0200191 ptr = memalign(alignment, size);
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200192#endif
193 trace_qemu_memalign(alignment, size, ptr);
194 return ptr;
195}
196
Kevin Wolf7d2a35c2014-05-20 12:24:05 +0200197void *qemu_memalign(size_t alignment, size_t size)
198{
199 return qemu_oom_check(qemu_try_memalign(alignment, size));
200}
201
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200202/* alloc shared memory pages */
Marcel Apfelbaum06329cc2017-12-13 16:37:37 +0200203void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200204{
Avi Kivity36b58622011-09-05 11:07:05 +0300205 size_t align = QEMU_VMALLOC_ALIGN;
Marcel Apfelbaum06329cc2017-12-13 16:37:37 +0200206 void *ptr = qemu_ram_mmap(-1, size, align, shared);
Avi Kivity36b58622011-09-05 11:07:05 +0300207
Paolo Bonzini7dda5dc2013-04-09 17:43:43 +0200208 if (ptr == MAP_FAILED) {
Markus Armbruster39228252013-07-31 15:11:11 +0200209 return NULL;
Stefan Weilc2a82382011-10-31 21:29:46 +0100210 }
Stefan Weilc2a82382011-10-31 21:29:46 +0100211
Igor Mammedova2b257d2014-10-31 16:38:37 +0000212 if (alignment) {
213 *alignment = align;
214 }
Michael S. Tsirkinc2dfc5b2015-09-10 16:36:51 +0300215
Paolo Bonzini6eebf952013-05-13 16:19:55 +0200216 trace_qemu_anon_ram_alloc(size, ptr);
Jes Sorensenc7f41112011-07-25 17:13:36 +0200217 return ptr;
Jes Sorensenc1b0b932010-10-26 10:39:19 +0200218}
219
220void qemu_vfree(void *ptr)
221{
222 trace_qemu_vfree(ptr);
223 free(ptr);
224}
Jes Sorensen9549e762010-10-26 10:39:20 +0200225
Paolo Bonzinie7a09b92013-05-13 16:19:56 +0200226void qemu_anon_ram_free(void *ptr, size_t size)
227{
228 trace_qemu_anon_ram_free(ptr, size);
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +0300229 qemu_ram_munmap(ptr, size);
Paolo Bonzinie7a09b92013-05-13 16:19:56 +0200230}
231
Stefan Hajnoczif9e8cac2013-03-27 10:10:43 +0100232void qemu_set_block(int fd)
Paolo Bonzini154b9a02011-10-05 09:17:32 +0200233{
234 int f;
235 f = fcntl(fd, F_GETFL);
Li Qiangda93b822018-12-15 04:03:53 -0800236 assert(f != -1);
237 f = fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
238 assert(f != -1);
Paolo Bonzini154b9a02011-10-05 09:17:32 +0200239}
240
Stefan Hajnoczif9e8cac2013-03-27 10:10:43 +0100241void qemu_set_nonblock(int fd)
Jes Sorensen9549e762010-10-26 10:39:20 +0200242{
243 int f;
244 f = fcntl(fd, F_GETFL);
Li Qiangda93b822018-12-15 04:03:53 -0800245 assert(f != -1);
246 f = fcntl(fd, F_SETFL, f | O_NONBLOCK);
247 assert(f != -1);
Jes Sorensen9549e762010-10-26 10:39:20 +0200248}
249
Sebastian Ottlik606600a2013-10-02 12:23:12 +0200250int socket_set_fast_reuse(int fd)
251{
252 int val = 1, ret;
253
254 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
255 (const char *)&val, sizeof(val));
256
257 assert(ret == 0);
258
259 return ret;
260}
261
Jes Sorensen9549e762010-10-26 10:39:20 +0200262void qemu_set_cloexec(int fd)
263{
264 int f;
265 f = fcntl(fd, F_GETFD);
Stefano Stabellini7e6478e2017-05-09 12:04:52 -0700266 assert(f != -1);
267 f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
268 assert(f != -1);
Jes Sorensen9549e762010-10-26 10:39:20 +0200269}
Jes Sorensen70e72ce2010-10-26 10:39:21 +0200270
271/*
272 * Creates a pipe with FD_CLOEXEC set on both file descriptors
273 */
274int qemu_pipe(int pipefd[2])
275{
276 int ret;
277
278#ifdef CONFIG_PIPE2
279 ret = pipe2(pipefd, O_CLOEXEC);
280 if (ret != -1 || errno != ENOSYS) {
281 return ret;
282 }
283#endif
284 ret = pipe(pipefd);
285 if (ret == 0) {
286 qemu_set_cloexec(pipefd[0]);
287 qemu_set_cloexec(pipefd[1]);
288 }
289
290 return ret;
291}
Hidetoshi Seto38671422010-11-24 11:38:10 +0900292
Laszlo Erseke2ea3512013-05-18 06:31:48 +0200293char *
294qemu_get_local_state_pathname(const char *relative_pathname)
295{
296 return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
297 relative_pathname);
298}
Stefan Hajnoczi13401ba2013-11-14 11:54:16 +0100299
300void qemu_set_tty_echo(int fd, bool echo)
301{
302 struct termios tty;
303
304 tcgetattr(fd, &tty);
305
306 if (echo) {
307 tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
308 } else {
309 tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
310 }
311
312 tcsetattr(fd, TCSANOW, &tty);
313}
Fam Zheng10f5bff2014-02-10 14:48:51 +0800314
315static char exec_dir[PATH_MAX];
316
317void qemu_init_exec_dir(const char *argv0)
318{
319 char *dir;
320 char *p = NULL;
321 char buf[PATH_MAX];
322
323 assert(!exec_dir[0]);
324
325#if defined(__linux__)
326 {
327 int len;
328 len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
329 if (len > 0) {
330 buf[len] = 0;
331 p = buf;
332 }
333 }
Kamil Rytarowski094611b2017-10-28 21:48:33 +0200334#elif defined(__FreeBSD__) \
335 || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME))
Fam Zheng10f5bff2014-02-10 14:48:51 +0800336 {
Kamil Rytarowski094611b2017-10-28 21:48:33 +0200337#if defined(__FreeBSD__)
Fam Zheng10f5bff2014-02-10 14:48:51 +0800338 static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
Kamil Rytarowski094611b2017-10-28 21:48:33 +0200339#else
340 static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
341#endif
Fam Zheng10f5bff2014-02-10 14:48:51 +0800342 size_t len = sizeof(buf) - 1;
343
344 *buf = '\0';
345 if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
346 *buf) {
347 buf[sizeof(buf) - 1] = '\0';
348 p = buf;
349 }
350 }
351#endif
352 /* If we don't have any way of figuring out the actual executable
353 location then try argv[0]. */
354 if (!p) {
355 if (!argv0) {
356 return;
357 }
358 p = realpath(argv0, buf);
359 if (!p) {
360 return;
361 }
362 }
Wei Jiangang55ad7812016-04-07 10:46:24 +0800363 dir = g_path_get_dirname(p);
Fam Zheng10f5bff2014-02-10 14:48:51 +0800364
365 pstrcpy(exec_dir, sizeof(exec_dir), dir);
Wei Jiangang55ad7812016-04-07 10:46:24 +0800366
367 g_free(dir);
Fam Zheng10f5bff2014-02-10 14:48:51 +0800368}
369
370char *qemu_get_exec_dir(void)
371{
372 return g_strdup(exec_dir);
373}
Paolo Bonzini38183312014-05-14 17:43:21 +0800374
Paolo Bonzini38183312014-05-14 17:43:21 +0800375static void sigbus_handler(int signal)
376{
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530377 int i;
378 if (memset_thread) {
379 for (i = 0; i < memset_num_threads; i++) {
380 if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
381 siglongjmp(memset_thread[i].env, 1);
382 }
383 }
384 }
Paolo Bonzini38183312014-05-14 17:43:21 +0800385}
386
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530387static void *do_touch_pages(void *arg)
388{
389 MemsetThread *memset_args = (MemsetThread *)arg;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530390 sigset_t set, oldset;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530391
392 /* unblock SIGBUS */
393 sigemptyset(&set);
394 sigaddset(&set, SIGBUS);
395 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
396
397 if (sigsetjmp(memset_args->env, 1)) {
398 memset_thread_failed = true;
399 } else {
Stefan Weile947d472017-10-16 22:29:12 +0200400 char *addr = memset_args->addr;
401 size_t numpages = memset_args->numpages;
402 size_t hpagesize = memset_args->hpagesize;
403 size_t i;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530404 for (i = 0; i < numpages; i++) {
Daniel P. Berrange9dc44aa2017-03-03 11:32:55 +0000405 /*
406 * Read & write back the same value, so we don't
407 * corrupt existing user/app data that might be
408 * stored.
409 *
410 * 'volatile' to stop compiler optimizing this away
411 * to a no-op
412 *
413 * TODO: get a better solution from kernel so we
414 * don't need to write at all so we don't cause
415 * wear on the storage backing the region...
416 */
417 *(volatile char *)addr = *addr;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530418 addr += hpagesize;
419 }
420 }
421 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
422 return NULL;
423}
424
Jitendra Kolhedfd0dcc2017-03-21 02:50:06 -0400425static inline int get_memset_num_threads(int smp_cpus)
426{
427 long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
428 int ret = 1;
429
430 if (host_procs > 0) {
431 ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
432 }
433 /* In case sysconf() fails, we fall back to single threaded */
434 return ret;
435}
436
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530437static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
438 int smp_cpus)
439{
Stefan Weile947d472017-10-16 22:29:12 +0200440 size_t numpages_per_thread;
441 size_t size_per_thread;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530442 char *addr = area;
443 int i = 0;
444
445 memset_thread_failed = false;
Jitendra Kolhedfd0dcc2017-03-21 02:50:06 -0400446 memset_num_threads = get_memset_num_threads(smp_cpus);
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530447 memset_thread = g_new0(MemsetThread, memset_num_threads);
448 numpages_per_thread = (numpages / memset_num_threads);
449 size_per_thread = (hpagesize * numpages_per_thread);
450 for (i = 0; i < memset_num_threads; i++) {
451 memset_thread[i].addr = addr;
452 memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
453 numpages : numpages_per_thread;
454 memset_thread[i].hpagesize = hpagesize;
455 qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
456 do_touch_pages, &memset_thread[i],
457 QEMU_THREAD_JOINABLE);
458 addr += size_per_thread;
459 numpages -= numpages_per_thread;
460 }
461 for (i = 0; i < memset_num_threads; i++) {
462 qemu_thread_join(&memset_thread[i].pgthread);
463 }
464 g_free(memset_thread);
465 memset_thread = NULL;
466
467 return memset_thread_failed;
468}
469
470void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
471 Error **errp)
Paolo Bonzini38183312014-05-14 17:43:21 +0800472{
Stefan Weilb7bf8f52014-06-24 22:52:29 +0200473 int ret;
Paolo Bonzini38183312014-05-14 17:43:21 +0800474 struct sigaction act, oldact;
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530475 size_t hpagesize = qemu_fd_getpagesize(fd);
476 size_t numpages = DIV_ROUND_UP(memory, hpagesize);
Paolo Bonzini38183312014-05-14 17:43:21 +0800477
478 memset(&act, 0, sizeof(act));
479 act.sa_handler = &sigbus_handler;
480 act.sa_flags = 0;
481
482 ret = sigaction(SIGBUS, &act, &oldact);
483 if (ret) {
Igor Mammedov056b68a2016-07-20 11:54:03 +0200484 error_setg_errno(errp, errno,
485 "os_mem_prealloc: failed to install signal handler");
486 return;
Paolo Bonzini38183312014-05-14 17:43:21 +0800487 }
488
Jitendra Kolhe1e356fc2017-02-24 09:01:43 +0530489 /* touch pages simultaneously */
490 if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
Igor Mammedov056b68a2016-07-20 11:54:03 +0200491 error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
Philippe Mathieu-Daudé462e5d52017-05-15 21:11:49 -0300492 "pages available to allocate guest RAM");
Stefan Weilb7bf8f52014-06-24 22:52:29 +0200493 }
Igor Mammedov056b68a2016-07-20 11:54:03 +0200494
495 ret = sigaction(SIGBUS, &oldact, NULL);
496 if (ret) {
497 /* Terminate QEMU since it can't recover from error */
498 perror("os_mem_prealloc: failed to reinstall signal handler");
499 exit(1);
500 }
Paolo Bonzini38183312014-05-14 17:43:21 +0800501}
Daniel P. Berranged57e4e42015-05-12 17:09:19 +0100502
503
Michal Privoznik7dc9ae42016-09-27 17:24:56 +0200504char *qemu_get_pid_name(pid_t pid)
505{
506 char *name = NULL;
507
508#if defined(__FreeBSD__)
509 /* BSDs don't have /proc, but they provide a nice substitute */
510 struct kinfo_proc *proc = kinfo_getproc(pid);
511
512 if (proc) {
513 name = g_strdup(proc->ki_comm);
514 free(proc);
515 }
516#else
517 /* Assume a system with reasonable procfs */
518 char *pid_path;
519 size_t len;
520
521 pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
522 g_file_get_contents(pid_path, &name, &len, NULL);
523 g_free(pid_path);
524#endif
525
526 return name;
527}
528
529
Daniel P. Berrange57cb38b2015-08-28 14:40:01 +0100530pid_t qemu_fork(Error **errp)
531{
532 sigset_t oldmask, newmask;
533 struct sigaction sig_action;
534 int saved_errno;
535 pid_t pid;
536
537 /*
538 * Need to block signals now, so that child process can safely
539 * kill off caller's signal handlers without a race.
540 */
541 sigfillset(&newmask);
542 if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
543 error_setg_errno(errp, errno,
544 "cannot block signals");
545 return -1;
546 }
547
548 pid = fork();
549 saved_errno = errno;
550
551 if (pid < 0) {
552 /* attempt to restore signal mask, but ignore failure, to
553 * avoid obscuring the fork failure */
554 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
555 error_setg_errno(errp, saved_errno,
556 "cannot fork child process");
557 errno = saved_errno;
558 return -1;
559 } else if (pid) {
560 /* parent process */
561
562 /* Restore our original signal mask now that the child is
563 * safely running. Only documented failures are EFAULT (not
564 * possible, since we are using just-grabbed mask) or EINVAL
565 * (not possible, since we are using correct arguments). */
566 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
567 } else {
568 /* child process */
569 size_t i;
570
571 /* Clear out all signal handlers from parent so nothing
572 * unexpected can happen in our child once we unblock
573 * signals */
574 sig_action.sa_handler = SIG_DFL;
575 sig_action.sa_flags = 0;
576 sigemptyset(&sig_action.sa_mask);
577
578 for (i = 1; i < NSIG; i++) {
579 /* Only possible errors are EFAULT or EINVAL The former
580 * won't happen, the latter we expect, so no need to check
581 * return value */
582 (void)sigaction(i, &sig_action, NULL);
583 }
584
585 /* Unmask all signals in child, since we've no idea what the
586 * caller's done with their signal mask and don't want to
587 * propagate that to children */
588 sigemptyset(&newmask);
589 if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
590 Error *local_err = NULL;
591 error_setg_errno(&local_err, errno,
592 "cannot unblock signals");
593 error_report_err(local_err);
594 _exit(1);
595 }
596 }
597 return pid;
598}
Peter Lieven8737d9e2016-09-27 11:58:40 +0200599
600void *qemu_alloc_stack(size_t *sz)
601{
602 void *ptr, *guardpage;
Brad Smithfc3d1ba2018-10-19 08:52:39 -0400603 int flags;
Peter Lieven7d992e42016-09-27 11:58:45 +0200604#ifdef CONFIG_DEBUG_STACK_USAGE
605 void *ptr2;
606#endif
Peter Lieven8737d9e2016-09-27 11:58:40 +0200607 size_t pagesz = getpagesize();
608#ifdef _SC_THREAD_STACK_MIN
609 /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
610 long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
611 *sz = MAX(MAX(min_stack_sz, 0), *sz);
612#endif
613 /* adjust stack size to a multiple of the page size */
614 *sz = ROUND_UP(*sz, pagesz);
615 /* allocate one extra page for the guard page */
616 *sz += pagesz;
617
Brad Smithfc3d1ba2018-10-19 08:52:39 -0400618 flags = MAP_PRIVATE | MAP_ANONYMOUS;
619#if defined(MAP_STACK) && defined(__OpenBSD__)
620 /* Only enable MAP_STACK on OpenBSD. Other OS's such as
621 * Linux/FreeBSD/NetBSD have a flag with the same name
622 * but have differing functionality. OpenBSD will SEGV
623 * if it spots execution with a stack pointer pointing
624 * at memory that was not allocated with MAP_STACK.
625 */
626 flags |= MAP_STACK;
627#endif
628
629 ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
Peter Lieven8737d9e2016-09-27 11:58:40 +0200630 if (ptr == MAP_FAILED) {
Eduardo Habkoste916a6e2017-08-29 18:20:53 -0300631 perror("failed to allocate memory for stack");
Peter Lieven8737d9e2016-09-27 11:58:40 +0200632 abort();
633 }
634
635#if defined(HOST_IA64)
636 /* separate register stack */
637 guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
638#elif defined(HOST_HPPA)
639 /* stack grows up */
640 guardpage = ptr + *sz - pagesz;
641#else
642 /* stack grows down */
643 guardpage = ptr;
644#endif
645 if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
Eduardo Habkoste916a6e2017-08-29 18:20:53 -0300646 perror("failed to set up stack guard page");
Peter Lieven8737d9e2016-09-27 11:58:40 +0200647 abort();
648 }
649
Peter Lieven7d992e42016-09-27 11:58:45 +0200650#ifdef CONFIG_DEBUG_STACK_USAGE
651 for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
652 *(uint32_t *)ptr2 = 0xdeadbeaf;
653 }
654#endif
655
Peter Lieven8737d9e2016-09-27 11:58:40 +0200656 return ptr;
657}
658
Peter Lieven7d992e42016-09-27 11:58:45 +0200659#ifdef CONFIG_DEBUG_STACK_USAGE
660static __thread unsigned int max_stack_usage;
661#endif
662
Peter Lieven8737d9e2016-09-27 11:58:40 +0200663void qemu_free_stack(void *stack, size_t sz)
664{
Peter Lieven7d992e42016-09-27 11:58:45 +0200665#ifdef CONFIG_DEBUG_STACK_USAGE
666 unsigned int usage;
667 void *ptr;
668
669 for (ptr = stack + getpagesize(); ptr < stack + sz;
670 ptr += sizeof(uint32_t)) {
671 if (*(uint32_t *)ptr != 0xdeadbeaf) {
672 break;
673 }
674 }
675 usage = sz - (uintptr_t) (ptr - stack);
676 if (usage > max_stack_usage) {
677 error_report("thread %d max stack usage increased from %u to %u",
678 qemu_get_thread_id(), max_stack_usage, usage);
679 max_stack_usage = usage;
680 }
681#endif
682
Peter Lieven8737d9e2016-09-27 11:58:40 +0200683 munmap(stack, sz);
684}
Paolo Bonzinid98d4072017-02-08 13:22:12 +0100685
686void sigaction_invoke(struct sigaction *action,
687 struct qemu_signalfd_siginfo *info)
688{
Peter Maydell02ffa032017-07-20 17:32:21 +0100689 siginfo_t si = {};
Paolo Bonzinid98d4072017-02-08 13:22:12 +0100690 si.si_signo = info->ssi_signo;
691 si.si_errno = info->ssi_errno;
692 si.si_code = info->ssi_code;
693
694 /* Convert the minimal set of fields defined by POSIX.
695 * Positive si_code values are reserved for kernel-generated
696 * signals, where the valid siginfo fields are determined by
697 * the signal number. But according to POSIX, it is unspecified
698 * whether SI_USER and SI_QUEUE have values less than or equal to
699 * zero.
700 */
701 if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
702 info->ssi_code <= 0) {
703 /* SIGTERM, etc. */
704 si.si_pid = info->ssi_pid;
705 si.si_uid = info->ssi_uid;
706 } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
707 info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
708 si.si_addr = (void *)(uintptr_t)info->ssi_addr;
709 } else if (info->ssi_signo == SIGCHLD) {
710 si.si_pid = info->ssi_pid;
711 si.si_status = info->ssi_status;
712 si.si_uid = info->ssi_uid;
Paolo Bonzinid98d4072017-02-08 13:22:12 +0100713 }
714 action->sa_sigaction(info->ssi_signo, &si, NULL);
715}