coroutine: avoid co_queue_wakeup recursion

qemu_aio_coroutine_enter() is (indirectly) called recursively when
processing co_queue_wakeup.  This can lead to stack exhaustion.

This patch rewrites co_queue_wakeup in an iterative fashion (instead of
recursive) with bounded memory usage to prevent stack exhaustion.

qemu_co_queue_run_restart() is inlined into qemu_aio_coroutine_enter()
and the qemu_coroutine_enter() call is turned into a loop to avoid
recursion.

There is one change that is worth mentioning:  Previously, when
coroutine A queued coroutine B, qemu_co_queue_run_restart() entered
coroutine B from coroutine A.  If A was terminating then it would still
stay alive until B yielded.  After this patch B is entered by A's parent
so that a A can be deleted immediately if it is terminating.

It is safe to make this change since B could never interact with A if it
was terminating anyway.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20180322152834.12656-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/block/io.c b/block/io.c
index 2b09c65..bd9a19a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -249,8 +249,7 @@
     BdrvCoDrainData data;
 
     /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
-     * other coroutines run if they were queued from
-     * qemu_co_queue_run_restart(). */
+     * other coroutines run if they were queued by aio_co_enter(). */
 
     assert(qemu_in_coroutine());
     data = (BdrvCoDrainData) {
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index 59e8406..bd6b046 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -68,6 +68,5 @@
 void qemu_coroutine_delete(Coroutine *co);
 CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
                                       CoroutineAction action);
-void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
 
 #endif
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 5a80c10..27438a1 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -68,40 +68,6 @@
     }
 }
 
-/**
- * qemu_co_queue_run_restart:
- *
- * Enter each coroutine that was previously marked for restart by
- * qemu_co_queue_next() or qemu_co_queue_restart_all().  This function is
- * invoked by the core coroutine code when the current coroutine yields or
- * terminates.
- */
-void qemu_co_queue_run_restart(Coroutine *co)
-{
-    Coroutine *next;
-    QSIMPLEQ_HEAD(, Coroutine) tmp_queue_wakeup =
-        QSIMPLEQ_HEAD_INITIALIZER(tmp_queue_wakeup);
-
-    trace_qemu_co_queue_run_restart(co);
-
-    /* Because "co" has yielded, any coroutine that we wakeup can resume it.
-     * If this happens and "co" terminates, co->co_queue_wakeup becomes
-     * invalid memory.  Therefore, use a temporary queue and do not touch
-     * the "co" coroutine as soon as you enter another one.
-     *
-     * In its turn resumed "co" can populate "co_queue_wakeup" queue with
-     * new coroutines to be woken up.  The caller, who has resumed "co",
-     * will be responsible for traversing the same queue, which may cause
-     * a different wakeup order but not any missing wakeups.
-     */
-    QSIMPLEQ_CONCAT(&tmp_queue_wakeup, &co->co_queue_wakeup);
-
-    while ((next = QSIMPLEQ_FIRST(&tmp_queue_wakeup))) {
-        QSIMPLEQ_REMOVE_HEAD(&tmp_queue_wakeup, co_queue_next);
-        qemu_coroutine_enter(next);
-    }
-}
-
 static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
 {
     Coroutine *next;
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 9eff7fd..1ba4191 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -104,57 +104,65 @@
 
 void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
 {
-    Coroutine *self = qemu_coroutine_self();
-    CoroutineAction ret;
+    QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
+    Coroutine *from = qemu_coroutine_self();
 
-    /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
-     * callers outside of aio_co_wake() */
-    const char *scheduled = atomic_mb_read(&co->scheduled);
+    QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
 
-    trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
+    /* Run co and any queued coroutines */
+    while (!QSIMPLEQ_EMPTY(&pending)) {
+        Coroutine *to = QSIMPLEQ_FIRST(&pending);
+        CoroutineAction ret;
 
-    /* if the Coroutine has already been scheduled, entering it again will
-     * cause us to enter it twice, potentially even after the coroutine has
-     * been deleted */
-    if (scheduled) {
-        fprintf(stderr,
-                "%s: Co-routine was already scheduled in '%s'\n",
-                __func__, scheduled);
-        abort();
-    }
+        /* Cannot rely on the read barrier for to in aio_co_wake(), as there are
+         * callers outside of aio_co_wake() */
+        const char *scheduled = atomic_mb_read(&to->scheduled);
 
-    if (co->caller) {
-        fprintf(stderr, "Co-routine re-entered recursively\n");
-        abort();
-    }
+        QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
 
-    co->caller = self;
-    co->ctx = ctx;
+        trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
 
-    /* Store co->ctx before anything that stores co.  Matches
-     * barrier in aio_co_wake and qemu_co_mutex_wake.
-     */
-    smp_wmb();
+        /* if the Coroutine has already been scheduled, entering it again will
+         * cause us to enter it twice, potentially even after the coroutine has
+         * been deleted */
+        if (scheduled) {
+            fprintf(stderr,
+                    "%s: Co-routine was already scheduled in '%s'\n",
+                    __func__, scheduled);
+            abort();
+        }
 
-    ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+        if (to->caller) {
+            fprintf(stderr, "Co-routine re-entered recursively\n");
+            abort();
+        }
 
-    qemu_co_queue_run_restart(co);
+        to->caller = from;
+        to->ctx = ctx;
 
-    /* Beware, if ret == COROUTINE_YIELD and qemu_co_queue_run_restart()
-     * has started any other coroutine, "co" might have been reentered
-     * and even freed by now!  So be careful and do not touch it.
-     */
+        /* Store to->ctx before anything that stores to.  Matches
+         * barrier in aio_co_wake and qemu_co_mutex_wake.
+         */
+        smp_wmb();
 
-    switch (ret) {
-    case COROUTINE_YIELD:
-        return;
-    case COROUTINE_TERMINATE:
-        assert(!co->locks_held);
-        trace_qemu_coroutine_terminate(co);
-        coroutine_delete(co);
-        return;
-    default:
-        abort();
+        ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
+
+        /* Queued coroutines are run depth-first; previously pending coroutines
+         * run after those queued more recently.
+         */
+        QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
+
+        switch (ret) {
+        case COROUTINE_YIELD:
+            break;
+        case COROUTINE_TERMINATE:
+            assert(!to->locks_held);
+            trace_qemu_coroutine_terminate(to);
+            coroutine_delete(to);
+            break;
+        default:
+            abort();
+        }
     }
 }