|  | /* | 
|  | * QEMU block layer thread pool | 
|  | * | 
|  | * Copyright IBM, Corp. 2008 | 
|  | * Copyright Red Hat, Inc. 2012 | 
|  | * | 
|  | * Authors: | 
|  | *  Anthony Liguori   <aliguori@us.ibm.com> | 
|  | *  Paolo Bonzini     <pbonzini@redhat.com> | 
|  | * | 
|  | * This work is licensed under the terms of the GNU GPL, version 2.  See | 
|  | * the COPYING file in the top-level directory. | 
|  | * | 
|  | * Contributions after 2012-01-13 are licensed under the terms of the | 
|  | * GNU GPL, version 2 or (at your option) any later version. | 
|  | */ | 
|  | #include "qemu/osdep.h" | 
|  | #include "qemu/defer-call.h" | 
|  | #include "qemu/queue.h" | 
|  | #include "qemu/thread.h" | 
|  | #include "qemu/coroutine.h" | 
|  | #include "trace.h" | 
|  | #include "block/thread-pool.h" | 
|  | #include "qemu/main-loop.h" | 
|  |  | 
|  | static void do_spawn_thread(ThreadPool *pool); | 
|  |  | 
|  | typedef struct ThreadPoolElement ThreadPoolElement; | 
|  |  | 
|  | enum ThreadState { | 
|  | THREAD_QUEUED, | 
|  | THREAD_ACTIVE, | 
|  | THREAD_DONE, | 
|  | }; | 
|  |  | 
|  | struct ThreadPoolElement { | 
|  | BlockAIOCB common; | 
|  | ThreadPool *pool; | 
|  | ThreadPoolFunc *func; | 
|  | void *arg; | 
|  |  | 
|  | /* Moving state out of THREAD_QUEUED is protected by lock.  After | 
|  | * that, only the worker thread can write to it.  Reads and writes | 
|  | * of state and ret are ordered with memory barriers. | 
|  | */ | 
|  | enum ThreadState state; | 
|  | int ret; | 
|  |  | 
|  | /* Access to this list is protected by lock.  */ | 
|  | QTAILQ_ENTRY(ThreadPoolElement) reqs; | 
|  |  | 
|  | /* This list is only written by the thread pool's mother thread.  */ | 
|  | QLIST_ENTRY(ThreadPoolElement) all; | 
|  | }; | 
|  |  | 
|  | struct ThreadPool { | 
|  | AioContext *ctx; | 
|  | QEMUBH *completion_bh; | 
|  | QemuMutex lock; | 
|  | QemuCond worker_stopped; | 
|  | QemuCond request_cond; | 
|  | QEMUBH *new_thread_bh; | 
|  |  | 
|  | /* The following variables are only accessed from one AioContext. */ | 
|  | QLIST_HEAD(, ThreadPoolElement) head; | 
|  |  | 
|  | /* The following variables are protected by lock.  */ | 
|  | QTAILQ_HEAD(, ThreadPoolElement) request_list; | 
|  | int cur_threads; | 
|  | int idle_threads; | 
|  | int new_threads;     /* backlog of threads we need to create */ | 
|  | int pending_threads; /* threads created but not running yet */ | 
|  | int min_threads; | 
|  | int max_threads; | 
|  | }; | 
|  |  | 
|  | static void *worker_thread(void *opaque) | 
|  | { | 
|  | ThreadPool *pool = opaque; | 
|  |  | 
|  | qemu_mutex_lock(&pool->lock); | 
|  | pool->pending_threads--; | 
|  | do_spawn_thread(pool); | 
|  |  | 
|  | while (pool->cur_threads <= pool->max_threads) { | 
|  | ThreadPoolElement *req; | 
|  | int ret; | 
|  |  | 
|  | if (QTAILQ_EMPTY(&pool->request_list)) { | 
|  | pool->idle_threads++; | 
|  | ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000); | 
|  | pool->idle_threads--; | 
|  | if (ret == 0 && | 
|  | QTAILQ_EMPTY(&pool->request_list) && | 
|  | pool->cur_threads > pool->min_threads) { | 
|  | /* Timed out + no work to do + no need for warm threads = exit.  */ | 
|  | break; | 
|  | } | 
|  | /* | 
|  | * Even if there was some work to do, check if there aren't | 
|  | * too many worker threads before picking it up. | 
|  | */ | 
|  | continue; | 
|  | } | 
|  |  | 
|  | req = QTAILQ_FIRST(&pool->request_list); | 
|  | QTAILQ_REMOVE(&pool->request_list, req, reqs); | 
|  | req->state = THREAD_ACTIVE; | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  |  | 
|  | ret = req->func(req->arg); | 
|  |  | 
|  | req->ret = ret; | 
|  | /* Write ret before state.  */ | 
|  | smp_wmb(); | 
|  | req->state = THREAD_DONE; | 
|  |  | 
|  | qemu_bh_schedule(pool->completion_bh); | 
|  | qemu_mutex_lock(&pool->lock); | 
|  | } | 
|  |  | 
|  | pool->cur_threads--; | 
|  | qemu_cond_signal(&pool->worker_stopped); | 
|  |  | 
|  | /* | 
|  | * Wake up another thread, in case we got a wakeup but decided | 
|  | * to exit due to pool->cur_threads > pool->max_threads. | 
|  | */ | 
|  | qemu_cond_signal(&pool->request_cond); | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static void do_spawn_thread(ThreadPool *pool) | 
|  | { | 
|  | QemuThread t; | 
|  |  | 
|  | /* Runs with lock taken.  */ | 
|  | if (!pool->new_threads) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | pool->new_threads--; | 
|  | pool->pending_threads++; | 
|  |  | 
|  | qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED); | 
|  | } | 
|  |  | 
|  | static void spawn_thread_bh_fn(void *opaque) | 
|  | { | 
|  | ThreadPool *pool = opaque; | 
|  |  | 
|  | qemu_mutex_lock(&pool->lock); | 
|  | do_spawn_thread(pool); | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  | } | 
|  |  | 
|  | static void spawn_thread(ThreadPool *pool) | 
|  | { | 
|  | pool->cur_threads++; | 
|  | pool->new_threads++; | 
|  | /* If there are threads being created, they will spawn new workers, so | 
|  | * we don't spend time creating many threads in a loop holding a mutex or | 
|  | * starving the current vcpu. | 
|  | * | 
|  | * If there are no idle threads, ask the main thread to create one, so we | 
|  | * inherit the correct affinity instead of the vcpu affinity. | 
|  | */ | 
|  | if (!pool->pending_threads) { | 
|  | qemu_bh_schedule(pool->new_thread_bh); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void thread_pool_completion_bh(void *opaque) | 
|  | { | 
|  | ThreadPool *pool = opaque; | 
|  | ThreadPoolElement *elem, *next; | 
|  |  | 
|  | defer_call_begin(); /* cb() may use defer_call() to coalesce work */ | 
|  |  | 
|  | restart: | 
|  | QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { | 
|  | if (elem->state != THREAD_DONE) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | trace_thread_pool_complete(pool, elem, elem->common.opaque, | 
|  | elem->ret); | 
|  | QLIST_REMOVE(elem, all); | 
|  |  | 
|  | if (elem->common.cb) { | 
|  | /* Read state before ret.  */ | 
|  | smp_rmb(); | 
|  |  | 
|  | /* Schedule ourselves in case elem->common.cb() calls aio_poll() to | 
|  | * wait for another request that completed at the same time. | 
|  | */ | 
|  | qemu_bh_schedule(pool->completion_bh); | 
|  |  | 
|  | elem->common.cb(elem->common.opaque, elem->ret); | 
|  |  | 
|  | /* We can safely cancel the completion_bh here regardless of someone | 
|  | * else having scheduled it meanwhile because we reenter the | 
|  | * completion function anyway (goto restart). | 
|  | */ | 
|  | qemu_bh_cancel(pool->completion_bh); | 
|  |  | 
|  | qemu_aio_unref(elem); | 
|  | goto restart; | 
|  | } else { | 
|  | qemu_aio_unref(elem); | 
|  | } | 
|  | } | 
|  |  | 
|  | defer_call_end(); | 
|  | } | 
|  |  | 
|  | static void thread_pool_cancel(BlockAIOCB *acb) | 
|  | { | 
|  | ThreadPoolElement *elem = (ThreadPoolElement *)acb; | 
|  | ThreadPool *pool = elem->pool; | 
|  |  | 
|  | trace_thread_pool_cancel(elem, elem->common.opaque); | 
|  |  | 
|  | QEMU_LOCK_GUARD(&pool->lock); | 
|  | if (elem->state == THREAD_QUEUED) { | 
|  | QTAILQ_REMOVE(&pool->request_list, elem, reqs); | 
|  | qemu_bh_schedule(pool->completion_bh); | 
|  |  | 
|  | elem->state = THREAD_DONE; | 
|  | elem->ret = -ECANCELED; | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | static const AIOCBInfo thread_pool_aiocb_info = { | 
|  | .aiocb_size         = sizeof(ThreadPoolElement), | 
|  | .cancel_async       = thread_pool_cancel, | 
|  | }; | 
|  |  | 
|  | BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | 
|  | BlockCompletionFunc *cb, void *opaque) | 
|  | { | 
|  | ThreadPoolElement *req; | 
|  | AioContext *ctx = qemu_get_current_aio_context(); | 
|  | ThreadPool *pool = aio_get_thread_pool(ctx); | 
|  |  | 
|  | /* Assert that the thread submitting work is the same running the pool */ | 
|  | assert(pool->ctx == qemu_get_current_aio_context()); | 
|  |  | 
|  | req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); | 
|  | req->func = func; | 
|  | req->arg = arg; | 
|  | req->state = THREAD_QUEUED; | 
|  | req->pool = pool; | 
|  |  | 
|  | QLIST_INSERT_HEAD(&pool->head, req, all); | 
|  |  | 
|  | trace_thread_pool_submit(pool, req, arg); | 
|  |  | 
|  | qemu_mutex_lock(&pool->lock); | 
|  | if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) { | 
|  | spawn_thread(pool); | 
|  | } | 
|  | QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs); | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  | qemu_cond_signal(&pool->request_cond); | 
|  | return &req->common; | 
|  | } | 
|  |  | 
|  | typedef struct ThreadPoolCo { | 
|  | Coroutine *co; | 
|  | int ret; | 
|  | } ThreadPoolCo; | 
|  |  | 
|  | static void thread_pool_co_cb(void *opaque, int ret) | 
|  | { | 
|  | ThreadPoolCo *co = opaque; | 
|  |  | 
|  | co->ret = ret; | 
|  | aio_co_wake(co->co); | 
|  | } | 
|  |  | 
|  | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | 
|  | { | 
|  | ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; | 
|  | assert(qemu_in_coroutine()); | 
|  | thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); | 
|  | qemu_coroutine_yield(); | 
|  | return tpc.ret; | 
|  | } | 
|  |  | 
|  | void thread_pool_submit(ThreadPoolFunc *func, void *arg) | 
|  | { | 
|  | thread_pool_submit_aio(func, arg, NULL, NULL); | 
|  | } | 
|  |  | 
|  | void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | 
|  | { | 
|  | qemu_mutex_lock(&pool->lock); | 
|  |  | 
|  | pool->min_threads = ctx->thread_pool_min; | 
|  | pool->max_threads = ctx->thread_pool_max; | 
|  |  | 
|  | /* | 
|  | * We either have to: | 
|  | *  - Increase the number available of threads until over the min_threads | 
|  | *    threshold. | 
|  | *  - Bump the worker threads so that they exit, until under the max_threads | 
|  | *    threshold. | 
|  | *  - Do nothing. The current number of threads fall in between the min and | 
|  | *    max thresholds. We'll let the pool manage itself. | 
|  | */ | 
|  | for (int i = pool->cur_threads; i < pool->min_threads; i++) { | 
|  | spawn_thread(pool); | 
|  | } | 
|  |  | 
|  | for (int i = pool->cur_threads; i > pool->max_threads; i--) { | 
|  | qemu_cond_signal(&pool->request_cond); | 
|  | } | 
|  |  | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  | } | 
|  |  | 
|  | static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | 
|  | { | 
|  | if (!ctx) { | 
|  | ctx = qemu_get_aio_context(); | 
|  | } | 
|  |  | 
|  | memset(pool, 0, sizeof(*pool)); | 
|  | pool->ctx = ctx; | 
|  | pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool); | 
|  | qemu_mutex_init(&pool->lock); | 
|  | qemu_cond_init(&pool->worker_stopped); | 
|  | qemu_cond_init(&pool->request_cond); | 
|  | pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); | 
|  |  | 
|  | QLIST_INIT(&pool->head); | 
|  | QTAILQ_INIT(&pool->request_list); | 
|  |  | 
|  | thread_pool_update_params(pool, ctx); | 
|  | } | 
|  |  | 
|  | ThreadPool *thread_pool_new(AioContext *ctx) | 
|  | { | 
|  | ThreadPool *pool = g_new(ThreadPool, 1); | 
|  | thread_pool_init_one(pool, ctx); | 
|  | return pool; | 
|  | } | 
|  |  | 
|  | void thread_pool_free(ThreadPool *pool) | 
|  | { | 
|  | if (!pool) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | assert(QLIST_EMPTY(&pool->head)); | 
|  |  | 
|  | qemu_mutex_lock(&pool->lock); | 
|  |  | 
|  | /* Stop new threads from spawning */ | 
|  | qemu_bh_delete(pool->new_thread_bh); | 
|  | pool->cur_threads -= pool->new_threads; | 
|  | pool->new_threads = 0; | 
|  |  | 
|  | /* Wait for worker threads to terminate */ | 
|  | pool->max_threads = 0; | 
|  | qemu_cond_broadcast(&pool->request_cond); | 
|  | while (pool->cur_threads > 0) { | 
|  | qemu_cond_wait(&pool->worker_stopped, &pool->lock); | 
|  | } | 
|  |  | 
|  | qemu_mutex_unlock(&pool->lock); | 
|  |  | 
|  | qemu_bh_delete(pool->completion_bh); | 
|  | qemu_cond_destroy(&pool->request_cond); | 
|  | qemu_cond_destroy(&pool->worker_stopped); | 
|  | qemu_mutex_destroy(&pool->lock); | 
|  | g_free(pool); | 
|  | } |