blob: 64d6264fc745d22e74bd329c1bef15bcd100ba15 [file] [log] [blame]
/*
* QEMU coroutines
*
* Copyright IBM, Corp. 2011
*
* Authors:
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
* Kevin Wolf <kwolf@redhat.com>
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "trace.h"
#include "qemu/thread.h"
#include "qemu/atomic.h"
#include "qemu/coroutine_int.h"
#include "qemu/coroutine-tls.h"
#include "qemu/cutils.h"
#include "block/aio.h"
enum {
COROUTINE_POOL_BATCH_MAX_SIZE = 128,
};
/*
* Coroutine creation and deletion is expensive so a pool of unused coroutines
* is kept as a cache. When the pool has coroutines available, they are
* recycled instead of creating new ones from scratch. Coroutines are added to
* the pool upon termination.
*
* The pool is global but each thread maintains a small local pool to avoid
* global pool contention. Threads fetch and return batches of coroutines from
* the global pool to maintain their local pool. The local pool holds up to two
* batches whereas the maximum size of the global pool is controlled by the
* qemu_coroutine_inc_pool_size() API.
*
* .-----------------------------------.
* | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
* `-----------------------------------'
*
* .-------------------.
* | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
* `-------------------'
*/
typedef struct CoroutinePoolBatch {
/* Batches are kept in a list */
QSLIST_ENTRY(CoroutinePoolBatch) next;
/* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
QSLIST_HEAD(, Coroutine) list;
unsigned int size;
} CoroutinePoolBatch;
typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
/* Host operating system limit on number of pooled coroutines */
static unsigned int global_pool_hard_max_size;
static QemuMutex global_pool_lock; /* protects the following variables */
static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
static unsigned int global_pool_size;
static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
static CoroutinePoolBatch *coroutine_pool_batch_new(void)
{
CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
QSLIST_INIT(&batch->list);
batch->size = 0;
return batch;
}
static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
{
Coroutine *co;
Coroutine *tmp;
QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
QSLIST_REMOVE_HEAD(&batch->list, pool_next);
qemu_coroutine_delete(co);
}
g_free(batch);
}
static void local_pool_cleanup(Notifier *n, void *value)
{
CoroutinePool *local_pool = get_ptr_local_pool();
CoroutinePoolBatch *batch;
CoroutinePoolBatch *tmp;
QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
QSLIST_REMOVE_HEAD(local_pool, next);
coroutine_pool_batch_delete(batch);
}
}
/* Ensure the atexit notifier is registered */
static void local_pool_cleanup_init_once(void)
{
Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
if (!notifier->notify) {
notifier->notify = local_pool_cleanup;
qemu_thread_atexit_add(notifier);
}
}
/* Helper to get the next unused coroutine from the local pool */
static Coroutine *coroutine_pool_get_local(void)
{
CoroutinePool *local_pool = get_ptr_local_pool();
CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
Coroutine *co;
if (unlikely(!batch)) {
return NULL;
}
co = QSLIST_FIRST(&batch->list);
QSLIST_REMOVE_HEAD(&batch->list, pool_next);
batch->size--;
if (batch->size == 0) {
QSLIST_REMOVE_HEAD(local_pool, next);
coroutine_pool_batch_delete(batch);
}
return co;
}
/* Get the next batch from the global pool */
static void coroutine_pool_refill_local(void)
{
CoroutinePool *local_pool = get_ptr_local_pool();
CoroutinePoolBatch *batch = NULL;
WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
batch = QSLIST_FIRST(&global_pool);
if (batch) {
QSLIST_REMOVE_HEAD(&global_pool, next);
global_pool_size -= batch->size;
}
}
if (batch) {
QSLIST_INSERT_HEAD(local_pool, batch, next);
local_pool_cleanup_init_once();
}
}
/* Add a batch of coroutines to the global pool */
static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
{
WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
unsigned int max = MIN(global_pool_max_size,
global_pool_hard_max_size);
if (global_pool_size < max) {
QSLIST_INSERT_HEAD(&global_pool, batch, next);
/* Overshooting the max pool size is allowed */
global_pool_size += batch->size;
return;
}
}
/* The global pool was full, so throw away this batch */
coroutine_pool_batch_delete(batch);
}
/* Get the next unused coroutine from the pool or return NULL */
static Coroutine *coroutine_pool_get(void)
{
Coroutine *co;
co = coroutine_pool_get_local();
if (!co) {
coroutine_pool_refill_local();
co = coroutine_pool_get_local();
}
return co;
}
static void coroutine_pool_put(Coroutine *co)
{
CoroutinePool *local_pool = get_ptr_local_pool();
CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
if (unlikely(!batch)) {
batch = coroutine_pool_batch_new();
QSLIST_INSERT_HEAD(local_pool, batch, next);
local_pool_cleanup_init_once();
}
if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
/* Is the local pool full? */
if (next) {
QSLIST_REMOVE_HEAD(local_pool, next);
coroutine_pool_put_global(batch);
}
batch = coroutine_pool_batch_new();
QSLIST_INSERT_HEAD(local_pool, batch, next);
}
QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
batch->size++;
}
Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
{
Coroutine *co = NULL;
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
co = coroutine_pool_get();
}
if (!co) {
co = qemu_coroutine_new();
}
co->entry = entry;
co->entry_arg = opaque;
QSIMPLEQ_INIT(&co->co_queue_wakeup);
return co;
}
static void coroutine_delete(Coroutine *co)
{
co->caller = NULL;
if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
coroutine_pool_put(co);
} else {
qemu_coroutine_delete(co);
}
}
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
{
QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
Coroutine *from = qemu_coroutine_self();
QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
/* Run co and any queued coroutines */
while (!QSIMPLEQ_EMPTY(&pending)) {
Coroutine *to = QSIMPLEQ_FIRST(&pending);
CoroutineAction ret;
/*
* Read to before to->scheduled; pairs with qatomic_cmpxchg in
* qemu_co_sleep(), aio_co_schedule() etc.
*/
smp_read_barrier_depends();
const char *scheduled = qatomic_read(&to->scheduled);
QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
/* if the Coroutine has already been scheduled, entering it again will
* cause us to enter it twice, potentially even after the coroutine has
* been deleted */
if (scheduled) {
fprintf(stderr,
"%s: Co-routine was already scheduled in '%s'\n",
__func__, scheduled);
abort();
}
if (to->caller) {
fprintf(stderr, "Co-routine re-entered recursively\n");
abort();
}
to->caller = from;
to->ctx = ctx;
/* Store to->ctx before anything that stores to. Matches
* barrier in aio_co_wake and qemu_co_mutex_wake.
*/
smp_wmb();
ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
/* Queued coroutines are run depth-first; previously pending coroutines
* run after those queued more recently.
*/
QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
switch (ret) {
case COROUTINE_YIELD:
break;
case COROUTINE_TERMINATE:
assert(!to->locks_held);
trace_qemu_coroutine_terminate(to);
coroutine_delete(to);
break;
default:
abort();
}
}
}
void qemu_coroutine_enter(Coroutine *co)
{
qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
}
void qemu_coroutine_enter_if_inactive(Coroutine *co)
{
if (!qemu_coroutine_entered(co)) {
qemu_coroutine_enter(co);
}
}
void coroutine_fn qemu_coroutine_yield(void)
{
Coroutine *self = qemu_coroutine_self();
Coroutine *to = self->caller;
trace_qemu_coroutine_yield(self, to);
if (!to) {
fprintf(stderr, "Co-routine is yielding to no one\n");
abort();
}
self->caller = NULL;
qemu_coroutine_switch(self, to, COROUTINE_YIELD);
}
bool qemu_coroutine_entered(Coroutine *co)
{
return co->caller;
}
AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
{
return co->ctx;
}
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
{
QEMU_LOCK_GUARD(&global_pool_lock);
global_pool_max_size += additional_pool_size;
}
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
{
QEMU_LOCK_GUARD(&global_pool_lock);
global_pool_max_size -= removing_pool_size;
}
static unsigned int get_global_pool_hard_max_size(void)
{
#ifdef __linux__
g_autofree char *contents = NULL;
int max_map_count;
/*
* Linux processes can have up to max_map_count virtual memory areas
* (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
* must limit the coroutine pool to a safe size to avoid running out of
* VMAs.
*/
if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
NULL) &&
qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
/*
* This is an upper bound that avoids exceeding max_map_count. Leave a
* fixed amount for non-coroutine users like library dependencies,
* vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
* remaining amount.
*/
if (max_map_count > 5000) {
return (max_map_count - 5000) / 2;
} else {
/* Disable the global pool but threads still have local pools */
return 0;
}
}
#endif
return UINT_MAX;
}
static void __attribute__((constructor)) qemu_coroutine_init(void)
{
qemu_mutex_init(&global_pool_lock);
global_pool_hard_max_size = get_global_pool_hard_max_size();
}