|  | /* | 
|  | * QEMU coroutines | 
|  | * | 
|  | * Copyright IBM, Corp. 2011 | 
|  | * | 
|  | * Authors: | 
|  | *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com> | 
|  | *  Kevin Wolf         <kwolf@redhat.com> | 
|  | * | 
|  | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | 
|  | * See the COPYING.LIB file in the top-level directory. | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "trace.h" | 
|  | #include "qemu/thread.h" | 
|  | #include "qemu/atomic.h" | 
|  | #include "qemu/coroutine_int.h" | 
|  | #include "qemu/coroutine-tls.h" | 
|  | #include "qemu/cutils.h" | 
|  | #include "block/aio.h" | 
|  |  | 
|  | enum { | 
|  | COROUTINE_POOL_BATCH_MAX_SIZE = 128, | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Coroutine creation and deletion is expensive so a pool of unused coroutines | 
|  | * is kept as a cache. When the pool has coroutines available, they are | 
|  | * recycled instead of creating new ones from scratch. Coroutines are added to | 
|  | * the pool upon termination. | 
|  | * | 
|  | * The pool is global but each thread maintains a small local pool to avoid | 
|  | * global pool contention. Threads fetch and return batches of coroutines from | 
|  | * the global pool to maintain their local pool. The local pool holds up to two | 
|  | * batches whereas the maximum size of the global pool is controlled by the | 
|  | * qemu_coroutine_inc_pool_size() API. | 
|  | * | 
|  | * .-----------------------------------. | 
|  | * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool | 
|  | * `-----------------------------------' | 
|  | * | 
|  | * .-------------------. | 
|  | * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) | 
|  | * `-------------------' | 
|  | */ | 
|  | typedef struct CoroutinePoolBatch { | 
|  | /* Batches are kept in a list */ | 
|  | QSLIST_ENTRY(CoroutinePoolBatch) next; | 
|  |  | 
|  | /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ | 
|  | QSLIST_HEAD(, Coroutine) list; | 
|  | unsigned int size; | 
|  | } CoroutinePoolBatch; | 
|  |  | 
|  | typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; | 
|  |  | 
|  | /* Host operating system limit on number of pooled coroutines */ | 
|  | static unsigned int global_pool_hard_max_size; | 
|  |  | 
|  | static QemuMutex global_pool_lock; /* protects the following variables */ | 
|  | static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); | 
|  | static unsigned int global_pool_size; | 
|  | static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; | 
|  |  | 
|  | QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); | 
|  | QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); | 
|  |  | 
|  | static CoroutinePoolBatch *coroutine_pool_batch_new(void) | 
|  | { | 
|  | CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); | 
|  |  | 
|  | QSLIST_INIT(&batch->list); | 
|  | batch->size = 0; | 
|  | return batch; | 
|  | } | 
|  |  | 
|  | static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) | 
|  | { | 
|  | Coroutine *co; | 
|  | Coroutine *tmp; | 
|  |  | 
|  | QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { | 
|  | QSLIST_REMOVE_HEAD(&batch->list, pool_next); | 
|  | qemu_coroutine_delete(co); | 
|  | } | 
|  | g_free(batch); | 
|  | } | 
|  |  | 
|  | static void local_pool_cleanup(Notifier *n, void *value) | 
|  | { | 
|  | CoroutinePool *local_pool = get_ptr_local_pool(); | 
|  | CoroutinePoolBatch *batch; | 
|  | CoroutinePoolBatch *tmp; | 
|  |  | 
|  | QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { | 
|  | QSLIST_REMOVE_HEAD(local_pool, next); | 
|  | coroutine_pool_batch_delete(batch); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Ensure the atexit notifier is registered */ | 
|  | static void local_pool_cleanup_init_once(void) | 
|  | { | 
|  | Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); | 
|  | if (!notifier->notify) { | 
|  | notifier->notify = local_pool_cleanup; | 
|  | qemu_thread_atexit_add(notifier); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Helper to get the next unused coroutine from the local pool */ | 
|  | static Coroutine *coroutine_pool_get_local(void) | 
|  | { | 
|  | CoroutinePool *local_pool = get_ptr_local_pool(); | 
|  | CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); | 
|  | Coroutine *co; | 
|  |  | 
|  | if (unlikely(!batch)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | co = QSLIST_FIRST(&batch->list); | 
|  | QSLIST_REMOVE_HEAD(&batch->list, pool_next); | 
|  | batch->size--; | 
|  |  | 
|  | if (batch->size == 0) { | 
|  | QSLIST_REMOVE_HEAD(local_pool, next); | 
|  | coroutine_pool_batch_delete(batch); | 
|  | } | 
|  | return co; | 
|  | } | 
|  |  | 
|  | /* Get the next batch from the global pool */ | 
|  | static void coroutine_pool_refill_local(void) | 
|  | { | 
|  | CoroutinePool *local_pool = get_ptr_local_pool(); | 
|  | CoroutinePoolBatch *batch; | 
|  |  | 
|  | WITH_QEMU_LOCK_GUARD(&global_pool_lock) { | 
|  | batch = QSLIST_FIRST(&global_pool); | 
|  |  | 
|  | if (batch) { | 
|  | QSLIST_REMOVE_HEAD(&global_pool, next); | 
|  | global_pool_size -= batch->size; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (batch) { | 
|  | QSLIST_INSERT_HEAD(local_pool, batch, next); | 
|  | local_pool_cleanup_init_once(); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Add a batch of coroutines to the global pool */ | 
|  | static void coroutine_pool_put_global(CoroutinePoolBatch *batch) | 
|  | { | 
|  | WITH_QEMU_LOCK_GUARD(&global_pool_lock) { | 
|  | unsigned int max = MIN(global_pool_max_size, | 
|  | global_pool_hard_max_size); | 
|  |  | 
|  | if (global_pool_size < max) { | 
|  | QSLIST_INSERT_HEAD(&global_pool, batch, next); | 
|  |  | 
|  | /* Overshooting the max pool size is allowed */ | 
|  | global_pool_size += batch->size; | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* The global pool was full, so throw away this batch */ | 
|  | coroutine_pool_batch_delete(batch); | 
|  | } | 
|  |  | 
|  | /* Get the next unused coroutine from the pool or return NULL */ | 
|  | static Coroutine *coroutine_pool_get(void) | 
|  | { | 
|  | Coroutine *co; | 
|  |  | 
|  | co = coroutine_pool_get_local(); | 
|  | if (!co) { | 
|  | coroutine_pool_refill_local(); | 
|  | co = coroutine_pool_get_local(); | 
|  | } | 
|  | return co; | 
|  | } | 
|  |  | 
|  | static void coroutine_pool_put(Coroutine *co) | 
|  | { | 
|  | CoroutinePool *local_pool = get_ptr_local_pool(); | 
|  | CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); | 
|  |  | 
|  | if (unlikely(!batch)) { | 
|  | batch = coroutine_pool_batch_new(); | 
|  | QSLIST_INSERT_HEAD(local_pool, batch, next); | 
|  | local_pool_cleanup_init_once(); | 
|  | } | 
|  |  | 
|  | if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { | 
|  | CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); | 
|  |  | 
|  | /* Is the local pool full? */ | 
|  | if (next) { | 
|  | QSLIST_REMOVE_HEAD(local_pool, next); | 
|  | coroutine_pool_put_global(batch); | 
|  | } | 
|  |  | 
|  | batch = coroutine_pool_batch_new(); | 
|  | QSLIST_INSERT_HEAD(local_pool, batch, next); | 
|  | } | 
|  |  | 
|  | QSLIST_INSERT_HEAD(&batch->list, co, pool_next); | 
|  | batch->size++; | 
|  | } | 
|  |  | 
|  | Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) | 
|  | { | 
|  | Coroutine *co = NULL; | 
|  |  | 
|  | if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { | 
|  | co = coroutine_pool_get(); | 
|  | } | 
|  |  | 
|  | if (!co) { | 
|  | co = qemu_coroutine_new(); | 
|  | } | 
|  |  | 
|  | co->entry = entry; | 
|  | co->entry_arg = opaque; | 
|  | QSIMPLEQ_INIT(&co->co_queue_wakeup); | 
|  | return co; | 
|  | } | 
|  |  | 
|  | static void coroutine_delete(Coroutine *co) | 
|  | { | 
|  | co->caller = NULL; | 
|  |  | 
|  | if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { | 
|  | coroutine_pool_put(co); | 
|  | } else { | 
|  | qemu_coroutine_delete(co); | 
|  | } | 
|  | } | 
|  |  | 
|  | void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) | 
|  | { | 
|  | QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending); | 
|  | Coroutine *from = qemu_coroutine_self(); | 
|  |  | 
|  | QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next); | 
|  |  | 
|  | /* Run co and any queued coroutines */ | 
|  | while (!QSIMPLEQ_EMPTY(&pending)) { | 
|  | Coroutine *to = QSIMPLEQ_FIRST(&pending); | 
|  | CoroutineAction ret; | 
|  |  | 
|  | /* | 
|  | * Read to before to->scheduled; pairs with qatomic_cmpxchg in | 
|  | * qemu_co_sleep(), aio_co_schedule() etc. | 
|  | */ | 
|  | smp_read_barrier_depends(); | 
|  |  | 
|  | const char *scheduled = qatomic_read(&to->scheduled); | 
|  |  | 
|  | QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next); | 
|  |  | 
|  | trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg); | 
|  |  | 
|  | /* if the Coroutine has already been scheduled, entering it again will | 
|  | * cause us to enter it twice, potentially even after the coroutine has | 
|  | * been deleted */ | 
|  | if (scheduled) { | 
|  | fprintf(stderr, | 
|  | "%s: Co-routine was already scheduled in '%s'\n", | 
|  | __func__, scheduled); | 
|  | abort(); | 
|  | } | 
|  |  | 
|  | if (to->caller) { | 
|  | fprintf(stderr, "Co-routine re-entered recursively\n"); | 
|  | abort(); | 
|  | } | 
|  |  | 
|  | to->caller = from; | 
|  | to->ctx = ctx; | 
|  |  | 
|  | /* Store to->ctx before anything that stores to.  Matches | 
|  | * barrier in aio_co_wake and qemu_co_mutex_wake. | 
|  | */ | 
|  | smp_wmb(); | 
|  |  | 
|  | ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER); | 
|  |  | 
|  | /* Queued coroutines are run depth-first; previously pending coroutines | 
|  | * run after those queued more recently. | 
|  | */ | 
|  | QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup); | 
|  |  | 
|  | switch (ret) { | 
|  | case COROUTINE_YIELD: | 
|  | break; | 
|  | case COROUTINE_TERMINATE: | 
|  | assert(!to->locks_held); | 
|  | trace_qemu_coroutine_terminate(to); | 
|  | coroutine_delete(to); | 
|  | break; | 
|  | default: | 
|  | abort(); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void qemu_coroutine_enter(Coroutine *co) | 
|  | { | 
|  | qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co); | 
|  | } | 
|  |  | 
|  | void qemu_coroutine_enter_if_inactive(Coroutine *co) | 
|  | { | 
|  | if (!qemu_coroutine_entered(co)) { | 
|  | qemu_coroutine_enter(co); | 
|  | } | 
|  | } | 
|  |  | 
|  | void coroutine_fn qemu_coroutine_yield(void) | 
|  | { | 
|  | Coroutine *self = qemu_coroutine_self(); | 
|  | Coroutine *to = self->caller; | 
|  |  | 
|  | trace_qemu_coroutine_yield(self, to); | 
|  |  | 
|  | if (!to) { | 
|  | fprintf(stderr, "Co-routine is yielding to no one\n"); | 
|  | abort(); | 
|  | } | 
|  |  | 
|  | self->caller = NULL; | 
|  | qemu_coroutine_switch(self, to, COROUTINE_YIELD); | 
|  | } | 
|  |  | 
|  | bool qemu_coroutine_entered(Coroutine *co) | 
|  | { | 
|  | return co->caller; | 
|  | } | 
|  |  | 
|  | AioContext *qemu_coroutine_get_aio_context(Coroutine *co) | 
|  | { | 
|  | return co->ctx; | 
|  | } | 
|  |  | 
|  | void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) | 
|  | { | 
|  | QEMU_LOCK_GUARD(&global_pool_lock); | 
|  | global_pool_max_size += additional_pool_size; | 
|  | } | 
|  |  | 
|  | void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) | 
|  | { | 
|  | QEMU_LOCK_GUARD(&global_pool_lock); | 
|  | global_pool_max_size -= removing_pool_size; | 
|  | } | 
|  |  | 
|  | static unsigned int get_global_pool_hard_max_size(void) | 
|  | { | 
|  | #ifdef __linux__ | 
|  | g_autofree char *contents = NULL; | 
|  | int max_map_count; | 
|  |  | 
|  | /* | 
|  | * Linux processes can have up to max_map_count virtual memory areas | 
|  | * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We | 
|  | * must limit the coroutine pool to a safe size to avoid running out of | 
|  | * VMAs. | 
|  | */ | 
|  | if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, | 
|  | NULL) && | 
|  | qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { | 
|  | /* | 
|  | * This is an upper bound that avoids exceeding max_map_count. Leave a | 
|  | * fixed amount for non-coroutine users like library dependencies, | 
|  | * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the | 
|  | * remaining amount. | 
|  | */ | 
|  | if (max_map_count > 5000) { | 
|  | return (max_map_count - 5000) / 2; | 
|  | } else { | 
|  | /* Disable the global pool but threads still have local pools */ | 
|  | return 0; | 
|  | } | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return UINT_MAX; | 
|  | } | 
|  |  | 
|  | static void __attribute__((constructor)) qemu_coroutine_init(void) | 
|  | { | 
|  | qemu_mutex_init(&global_pool_lock); | 
|  | global_pool_hard_max_size = get_global_pool_hard_max_size(); | 
|  | } |