util/thread-pool.c - qemu - Git at Google

 /*
  * QEMU block layer thread pool
  *
  * Copyright IBM, Corp. 2008
  * Copyright Red Hat, Inc. 2012
  *
  * Authors:
  *  Anthony Liguori   <aliguori@us.ibm.com>
  *  Paolo Bonzini     <pbonzini@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
  *
  * Contributions after 2012-01-13 are licensed under the terms of the
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
 #include "qemu/defer-call.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "qemu/coroutine.h"
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"

 static void do_spawn_thread(ThreadPool *pool);

 typedef struct ThreadPoolElement ThreadPoolElement;

 enum ThreadState {
     THREAD_QUEUED,
     THREAD_ACTIVE,
     THREAD_DONE,
 };

 struct ThreadPoolElement {
     BlockAIOCB common;
     ThreadPool *pool;
     ThreadPoolFunc *func;
     void *arg;

     /* Moving state out of THREAD_QUEUED is protected by lock.  After
      * that, only the worker thread can write to it.  Reads and writes
      * of state and ret are ordered with memory barriers.
      */
     enum ThreadState state;
     int ret;

     /* Access to this list is protected by lock.  */
     QTAILQ_ENTRY(ThreadPoolElement) reqs;

     /* This list is only written by the thread pool's mother thread.  */
     QLIST_ENTRY(ThreadPoolElement) all;
 };

 struct ThreadPool {
     AioContext *ctx;
     QEMUBH *completion_bh;
     QemuMutex lock;
     QemuCond worker_stopped;
     QemuCond request_cond;
     QEMUBH *new_thread_bh;

     /* The following variables are only accessed from one AioContext. */
     QLIST_HEAD(, ThreadPoolElement) head;

     /* The following variables are protected by lock.  */
     QTAILQ_HEAD(, ThreadPoolElement) request_list;
     int cur_threads;
     int idle_threads;
     int new_threads;     /* backlog of threads we need to create */
     int pending_threads; /* threads created but not running yet */
     int min_threads;
     int max_threads;
 };

 static void *worker_thread(void *opaque)
 {
     ThreadPool *pool = opaque;

     qemu_mutex_lock(&pool->lock);
     pool->pending_threads--;
     do_spawn_thread(pool);

     while (pool->cur_threads <= pool->max_threads) {
         ThreadPoolElement *req;
         int ret;

         if (QTAILQ_EMPTY(&pool->request_list)) {
             pool->idle_threads++;
             ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000);
             pool->idle_threads--;
             if (ret == 0 &&
                 QTAILQ_EMPTY(&pool->request_list) &&
                 pool->cur_threads > pool->min_threads) {
                 /* Timed out + no work to do + no need for warm threads = exit.  */
                 break;
             }
             /*
              * Even if there was some work to do, check if there aren't
              * too many worker threads before picking it up.
              */
             continue;
         }

         req = QTAILQ_FIRST(&pool->request_list);
         QTAILQ_REMOVE(&pool->request_list, req, reqs);
         req->state = THREAD_ACTIVE;
         qemu_mutex_unlock(&pool->lock);

         ret = req->func(req->arg);

         req->ret = ret;
         /* Write ret before state.  */
         smp_wmb();
         req->state = THREAD_DONE;

         qemu_bh_schedule(pool->completion_bh);
         qemu_mutex_lock(&pool->lock);
     }

     pool->cur_threads--;
     qemu_cond_signal(&pool->worker_stopped);

     /*
      * Wake up another thread, in case we got a wakeup but decided
      * to exit due to pool->cur_threads > pool->max_threads.
      */
     qemu_cond_signal(&pool->request_cond);
     qemu_mutex_unlock(&pool->lock);
     return NULL;
 }

 static void do_spawn_thread(ThreadPool *pool)
 {
     QemuThread t;

     /* Runs with lock taken.  */
     if (!pool->new_threads) {
         return;
     }

     pool->new_threads--;
     pool->pending_threads++;

     qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
 }

 static void spawn_thread_bh_fn(void *opaque)
 {
     ThreadPool *pool = opaque;

     qemu_mutex_lock(&pool->lock);
     do_spawn_thread(pool);
     qemu_mutex_unlock(&pool->lock);
 }

 static void spawn_thread(ThreadPool *pool)
 {
     pool->cur_threads++;
     pool->new_threads++;
     /* If there are threads being created, they will spawn new workers, so
      * we don't spend time creating many threads in a loop holding a mutex or
      * starving the current vcpu.
      *
      * If there are no idle threads, ask the main thread to create one, so we
      * inherit the correct affinity instead of the vcpu affinity.
      */
     if (!pool->pending_threads) {
         qemu_bh_schedule(pool->new_thread_bh);
     }
 }

 static void thread_pool_completion_bh(void *opaque)
 {
     ThreadPool *pool = opaque;
     ThreadPoolElement *elem, *next;

     defer_call_begin(); /* cb() may use defer_call() to coalesce work */

 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
         if (elem->state != THREAD_DONE) {
             continue;
         }

         trace_thread_pool_complete(pool, elem, elem->common.opaque,
                                    elem->ret);
         QLIST_REMOVE(elem, all);

         if (elem->common.cb) {
             /* Read state before ret.  */
             smp_rmb();

             /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
              * wait for another request that completed at the same time.
              */
             qemu_bh_schedule(pool->completion_bh);

             elem->common.cb(elem->common.opaque, elem->ret);

             /* We can safely cancel the completion_bh here regardless of someone
              * else having scheduled it meanwhile because we reenter the
              * completion function anyway (goto restart).
              */
             qemu_bh_cancel(pool->completion_bh);

             qemu_aio_unref(elem);
             goto restart;
         } else {
             qemu_aio_unref(elem);
         }
     }

     defer_call_end();
 }

 static void thread_pool_cancel(BlockAIOCB *acb)
 {
     ThreadPoolElement *elem = (ThreadPoolElement *)acb;
     ThreadPool *pool = elem->pool;

     trace_thread_pool_cancel(elem, elem->common.opaque);

     QEMU_LOCK_GUARD(&pool->lock);
     if (elem->state == THREAD_QUEUED) {
         QTAILQ_REMOVE(&pool->request_list, elem, reqs);
         qemu_bh_schedule(pool->completion_bh);

         elem->state = THREAD_DONE;
         elem->ret = -ECANCELED;
     }

 }

 static const AIOCBInfo thread_pool_aiocb_info = {
     .aiocb_size         = sizeof(ThreadPoolElement),
     .cancel_async       = thread_pool_cancel,
 };

 BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
                                    BlockCompletionFunc *cb, void *opaque)
 {
     ThreadPoolElement *req;
     AioContext *ctx = qemu_get_current_aio_context();
     ThreadPool *pool = aio_get_thread_pool(ctx);

     /* Assert that the thread submitting work is the same running the pool */
     assert(pool->ctx == qemu_get_current_aio_context());

     req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
     req->func = func;
     req->arg = arg;
     req->state = THREAD_QUEUED;
     req->pool = pool;

     QLIST_INSERT_HEAD(&pool->head, req, all);

     trace_thread_pool_submit(pool, req, arg);

     qemu_mutex_lock(&pool->lock);
     if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
         spawn_thread(pool);
     }
     QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
     qemu_mutex_unlock(&pool->lock);
     qemu_cond_signal(&pool->request_cond);
     return &req->common;
 }

 typedef struct ThreadPoolCo {
     Coroutine *co;
     int ret;
 } ThreadPoolCo;

 static void thread_pool_co_cb(void *opaque, int ret)
 {
     ThreadPoolCo *co = opaque;

     co->ret = ret;
     aio_co_wake(co->co);
 }

 int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
 {
     ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
     assert(qemu_in_coroutine());
     thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
     qemu_coroutine_yield();
     return tpc.ret;
 }

 void thread_pool_submit(ThreadPoolFunc *func, void *arg)
 {
     thread_pool_submit_aio(func, arg, NULL, NULL);
 }

 void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
 {
     qemu_mutex_lock(&pool->lock);

     pool->min_threads = ctx->thread_pool_min;
     pool->max_threads = ctx->thread_pool_max;

     /*
      * We either have to:
      *  - Increase the number available of threads until over the min_threads
      *    threshold.
      *  - Bump the worker threads so that they exit, until under the max_threads
      *    threshold.
      *  - Do nothing. The current number of threads fall in between the min and
      *    max thresholds. We'll let the pool manage itself.
      */
     for (int i = pool->cur_threads; i < pool->min_threads; i++) {
         spawn_thread(pool);
     }

     for (int i = pool->cur_threads; i > pool->max_threads; i--) {
         qemu_cond_signal(&pool->request_cond);
     }

     qemu_mutex_unlock(&pool->lock);
 }

 static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
 {
     if (!ctx) {
         ctx = qemu_get_aio_context();
     }

     memset(pool, 0, sizeof(*pool));
     pool->ctx = ctx;
     pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
     qemu_mutex_init(&pool->lock);
     qemu_cond_init(&pool->worker_stopped);
     qemu_cond_init(&pool->request_cond);
     pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);

     QLIST_INIT(&pool->head);
     QTAILQ_INIT(&pool->request_list);

     thread_pool_update_params(pool, ctx);
 }

 ThreadPool *thread_pool_new(AioContext *ctx)
 {
     ThreadPool *pool = g_new(ThreadPool, 1);
     thread_pool_init_one(pool, ctx);
     return pool;
 }

 void thread_pool_free(ThreadPool *pool)
 {
     if (!pool) {
         return;
     }

     assert(QLIST_EMPTY(&pool->head));

     qemu_mutex_lock(&pool->lock);

     /* Stop new threads from spawning */
     qemu_bh_delete(pool->new_thread_bh);
     pool->cur_threads -= pool->new_threads;
     pool->new_threads = 0;

     /* Wait for worker threads to terminate */
     pool->max_threads = 0;
     qemu_cond_broadcast(&pool->request_cond);
     while (pool->cur_threads > 0) {
         qemu_cond_wait(&pool->worker_stopped, &pool->lock);
     }

     qemu_mutex_unlock(&pool->lock);

     qemu_bh_delete(pool->completion_bh);
     qemu_cond_destroy(&pool->request_cond);
     qemu_cond_destroy(&pool->worker_stopped);
     qemu_mutex_destroy(&pool->lock);
     g_free(pool);
 }
	/*
	* QEMU block layer thread pool
	*
	* Copyright IBM, Corp. 2008
	* Copyright Red Hat, Inc. 2012
	*
	* Authors:
	* Anthony Liguori <aliguori@us.ibm.com>
	* Paolo Bonzini <pbonzini@redhat.com>
	*
	* This work is licensed under the terms of the GNU GPL, version 2. See
	* the COPYING file in the top-level directory.
	*
	* Contributions after 2012-01-13 are licensed under the terms of the
	* GNU GPL, version 2 or (at your option) any later version.
	*/
	#include "qemu/osdep.h"
	#include "qemu/defer-call.h"
	#include "qemu/queue.h"
	#include "qemu/thread.h"
	#include "qemu/coroutine.h"
	#include "trace.h"
	#include "block/thread-pool.h"
	#include "qemu/main-loop.h"

	static void do_spawn_thread(ThreadPool *pool);

	typedef struct ThreadPoolElement ThreadPoolElement;

	enum ThreadState {
	THREAD_QUEUED,
	THREAD_ACTIVE,
	THREAD_DONE,
	};

	struct ThreadPoolElement {
	BlockAIOCB common;
	ThreadPool *pool;
	ThreadPoolFunc *func;
	void *arg;

	/* Moving state out of THREAD_QUEUED is protected by lock. After
	* that, only the worker thread can write to it. Reads and writes
	* of state and ret are ordered with memory barriers.
	*/
	enum ThreadState state;
	int ret;

	/* Access to this list is protected by lock. */
	QTAILQ_ENTRY(ThreadPoolElement) reqs;

	/* This list is only written by the thread pool's mother thread. */
	QLIST_ENTRY(ThreadPoolElement) all;
	};

	struct ThreadPool {
	AioContext *ctx;
	QEMUBH *completion_bh;
	QemuMutex lock;
	QemuCond worker_stopped;
	QemuCond request_cond;
	QEMUBH *new_thread_bh;

	/* The following variables are only accessed from one AioContext. */
	QLIST_HEAD(, ThreadPoolElement) head;

	/* The following variables are protected by lock. */
	QTAILQ_HEAD(, ThreadPoolElement) request_list;
	int cur_threads;
	int idle_threads;
	int new_threads; /* backlog of threads we need to create */
	int pending_threads; /* threads created but not running yet */
	int min_threads;
	int max_threads;
	};

	static void worker_thread(void opaque)
	{
	ThreadPool *pool = opaque;

	qemu_mutex_lock(&pool->lock);
	pool->pending_threads--;
	do_spawn_thread(pool);

	while (pool->cur_threads <= pool->max_threads) {
	ThreadPoolElement *req;
	int ret;

	if (QTAILQ_EMPTY(&pool->request_list)) {
	pool->idle_threads++;
	ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000);
	pool->idle_threads--;
	if (ret == 0 &&
	QTAILQ_EMPTY(&pool->request_list) &&
	pool->cur_threads > pool->min_threads) {
	/* Timed out + no work to do + no need for warm threads = exit. */
	break;
	}
	/*
	* Even if there was some work to do, check if there aren't
	* too many worker threads before picking it up.
	*/
	continue;
	}

	req = QTAILQ_FIRST(&pool->request_list);
	QTAILQ_REMOVE(&pool->request_list, req, reqs);
	req->state = THREAD_ACTIVE;
	qemu_mutex_unlock(&pool->lock);

	ret = req->func(req->arg);

	req->ret = ret;
	/* Write ret before state. */
	smp_wmb();
	req->state = THREAD_DONE;

	qemu_bh_schedule(pool->completion_bh);
	qemu_mutex_lock(&pool->lock);
	}

	pool->cur_threads--;
	qemu_cond_signal(&pool->worker_stopped);

	/*
	* Wake up another thread, in case we got a wakeup but decided
	* to exit due to pool->cur_threads > pool->max_threads.
	*/
	qemu_cond_signal(&pool->request_cond);
	qemu_mutex_unlock(&pool->lock);
	return NULL;
	}

	static void do_spawn_thread(ThreadPool *pool)
	{
	QemuThread t;

	/* Runs with lock taken. */
	if (!pool->new_threads) {
	return;
	}

	pool->new_threads--;
	pool->pending_threads++;

	qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
	}

	static void spawn_thread_bh_fn(void *opaque)
	{
	ThreadPool *pool = opaque;

	qemu_mutex_lock(&pool->lock);
	do_spawn_thread(pool);
	qemu_mutex_unlock(&pool->lock);
	}

	static void spawn_thread(ThreadPool *pool)
	{
	pool->cur_threads++;
	pool->new_threads++;
	/* If there are threads being created, they will spawn new workers, so
	* we don't spend time creating many threads in a loop holding a mutex or
	* starving the current vcpu.
	*
	* If there are no idle threads, ask the main thread to create one, so we
	* inherit the correct affinity instead of the vcpu affinity.
	*/
	if (!pool->pending_threads) {
	qemu_bh_schedule(pool->new_thread_bh);
	}
	}

	static void thread_pool_completion_bh(void *opaque)
	{
	ThreadPool *pool = opaque;
	ThreadPoolElement elem, next;

	defer_call_begin(); /* cb() may use defer_call() to coalesce work */

	restart:
	QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
	if (elem->state != THREAD_DONE) {
	continue;
	}

	trace_thread_pool_complete(pool, elem, elem->common.opaque,
	elem->ret);
	QLIST_REMOVE(elem, all);

	if (elem->common.cb) {
	/* Read state before ret. */
	smp_rmb();

	/* Schedule ourselves in case elem->common.cb() calls aio_poll() to
	* wait for another request that completed at the same time.
	*/
	qemu_bh_schedule(pool->completion_bh);

	elem->common.cb(elem->common.opaque, elem->ret);

	/* We can safely cancel the completion_bh here regardless of someone
	* else having scheduled it meanwhile because we reenter the
	* completion function anyway (goto restart).
	*/
	qemu_bh_cancel(pool->completion_bh);

	qemu_aio_unref(elem);
	goto restart;
	} else {
	qemu_aio_unref(elem);
	}
	}

	defer_call_end();
	}

	static void thread_pool_cancel(BlockAIOCB *acb)
	{
	ThreadPoolElement elem = (ThreadPoolElement )acb;
	ThreadPool *pool = elem->pool;

	trace_thread_pool_cancel(elem, elem->common.opaque);

	QEMU_LOCK_GUARD(&pool->lock);
	if (elem->state == THREAD_QUEUED) {
	QTAILQ_REMOVE(&pool->request_list, elem, reqs);
	qemu_bh_schedule(pool->completion_bh);

	elem->state = THREAD_DONE;
	elem->ret = -ECANCELED;
	}

	}

	static const AIOCBInfo thread_pool_aiocb_info = {
	.aiocb_size = sizeof(ThreadPoolElement),
	.cancel_async = thread_pool_cancel,
	};

	BlockAIOCB thread_pool_submit_aio(ThreadPoolFunc func, void *arg,
	BlockCompletionFunc cb, void opaque)
	{
	ThreadPoolElement *req;
	AioContext *ctx = qemu_get_current_aio_context();
	ThreadPool *pool = aio_get_thread_pool(ctx);

	/* Assert that the thread submitting work is the same running the pool */
	assert(pool->ctx == qemu_get_current_aio_context());

	req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
	req->func = func;
	req->arg = arg;
	req->state = THREAD_QUEUED;
	req->pool = pool;

	QLIST_INSERT_HEAD(&pool->head, req, all);

	trace_thread_pool_submit(pool, req, arg);

	qemu_mutex_lock(&pool->lock);
	if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
	spawn_thread(pool);
	}
	QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
	qemu_mutex_unlock(&pool->lock);
	qemu_cond_signal(&pool->request_cond);
	return &req->common;
	}

	typedef struct ThreadPoolCo {
	Coroutine *co;
	int ret;
	} ThreadPoolCo;

	static void thread_pool_co_cb(void *opaque, int ret)
	{
	ThreadPoolCo *co = opaque;

	co->ret = ret;
	aio_co_wake(co->co);
	}

	int coroutine_fn thread_pool_submit_co(ThreadPoolFunc func, void arg)
	{
	ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
	assert(qemu_in_coroutine());
	thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
	qemu_coroutine_yield();
	return tpc.ret;
	}

	void thread_pool_submit(ThreadPoolFunc func, void arg)
	{
	thread_pool_submit_aio(func, arg, NULL, NULL);
	}

	void thread_pool_update_params(ThreadPool pool, AioContext ctx)
	{
	qemu_mutex_lock(&pool->lock);

	pool->min_threads = ctx->thread_pool_min;
	pool->max_threads = ctx->thread_pool_max;

	/*
	* We either have to:
	* - Increase the number available of threads until over the min_threads
	* threshold.
	* - Bump the worker threads so that they exit, until under the max_threads
	* threshold.
	* - Do nothing. The current number of threads fall in between the min and
	* max thresholds. We'll let the pool manage itself.
	*/
	for (int i = pool->cur_threads; i < pool->min_threads; i++) {
	spawn_thread(pool);
	}

	for (int i = pool->cur_threads; i > pool->max_threads; i--) {
	qemu_cond_signal(&pool->request_cond);
	}

	qemu_mutex_unlock(&pool->lock);
	}

	static void thread_pool_init_one(ThreadPool pool, AioContext ctx)
	{
	if (!ctx) {
	ctx = qemu_get_aio_context();
	}

	memset(pool, 0, sizeof(*pool));
	pool->ctx = ctx;
	pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
	qemu_mutex_init(&pool->lock);
	qemu_cond_init(&pool->worker_stopped);
	qemu_cond_init(&pool->request_cond);
	pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);

	QLIST_INIT(&pool->head);
	QTAILQ_INIT(&pool->request_list);

	thread_pool_update_params(pool, ctx);
	}

	ThreadPool thread_pool_new(AioContext ctx)
	{
	ThreadPool *pool = g_new(ThreadPool, 1);
	thread_pool_init_one(pool, ctx);
	return pool;
	}

	void thread_pool_free(ThreadPool *pool)
	{
	if (!pool) {
	return;
	}

	assert(QLIST_EMPTY(&pool->head));

	qemu_mutex_lock(&pool->lock);

	/* Stop new threads from spawning */
	qemu_bh_delete(pool->new_thread_bh);
	pool->cur_threads -= pool->new_threads;
	pool->new_threads = 0;

	/* Wait for worker threads to terminate */
	pool->max_threads = 0;
	qemu_cond_broadcast(&pool->request_cond);
	while (pool->cur_threads > 0) {
	qemu_cond_wait(&pool->worker_stopped, &pool->lock);
	}

	qemu_mutex_unlock(&pool->lock);

	qemu_bh_delete(pool->completion_bh);
	qemu_cond_destroy(&pool->request_cond);
	qemu_cond_destroy(&pool->worker_stopped);
	qemu_mutex_destroy(&pool->lock);
	g_free(pool);
	}