aio-posix: move RCU_READ_LOCK() into run_poll_handlers() Now that run_poll_handlers_once() is only called by run_poll_handlers() we can improve the CPU time profile by moving the expensive RCU_READ_LOCK() out of the polling loop. This reduces the run_poll_handlers() from 40% CPU to 10% CPU in perf's sampling profiler output. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Link: https://lore.kernel.org/r/20200305170806.1313245-3-stefanha@redhat.com Message-Id: <20200305170806.1313245-3-stefanha@redhat.com>

commit: 3aa221b382c9b36db1750ef5ed340b6566aacb8c [log] [tgz]
author: Stefan Hajnoczi <stefanha@redhat.com> Thu Mar 05 17:08:01 2020 +0000
committer: Stefan Hajnoczi <stefanha@redhat.com> Mon Mar 09 16:41:31 2020 +0000
tree: bdc6ceb75661f05b66ab4b77e9a4a59c0da1e490
parent: e4346192f1c2e1683a807b46efac47ef0cf9b545 [diff]
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 65964a2..11a4971 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c

@@ -583,16 +583,6 @@
     bool progress = false;
     AioHandler *node;
 
-    /*
-     * Optimization: ->io_poll() handlers often contain RCU read critical
-     * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
-     * -> rcu_read_lock() -> ... sequences with expensive memory
-     * synchronization primitives.  Make the entire polling loop an RCU
-     * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
-     * are cheap.
-     */
-    RCU_READ_LOCK_GUARD();
-
     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
         if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll &&
             aio_node_check(ctx, node->is_external) &&
@@ -636,6 +626,16 @@
 
     trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
 
+    /*
+     * Optimization: ->io_poll() handlers often contain RCU read critical
+     * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
+     * -> rcu_read_lock() -> ... sequences with expensive memory
+     * synchronization primitives.  Make the entire polling loop an RCU
+     * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
+     * are cheap.
+     */
+    RCU_READ_LOCK_GUARD();
+
     start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     do {
         progress = run_poll_handlers_once(ctx, timeout);
commit	3aa221b382c9b36db1750ef5ed340b6566aacb8c	[log] [tgz]
author	Stefan Hajnoczi <stefanha@redhat.com>	Thu Mar 05 17:08:01 2020 +0000
committer	Stefan Hajnoczi <stefanha@redhat.com>	Mon Mar 09 16:41:31 2020 +0000
tree	bdc6ceb75661f05b66ab4b77e9a4a59c0da1e490
parent	e4346192f1c2e1683a807b46efac47ef0cf9b545 [diff]