blob: 43c4c79a753e17106ab11bb3e600521fe3c15932 [file] [log] [blame]
Paolo Bonzinif42b2202012-06-09 04:01:51 +02001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM Corp., 2008
5 * Copyright Red Hat Inc., 2012
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paolo Bonzini <pbonzini@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
18#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010019#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010020#include "qemu/queue.h"
21#include "qemu/sockets.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020022
23struct AioHandler {
24 EventNotifier *e;
Paolo Bonzinib4933172014-07-09 11:53:10 +020025 IOHandler *io_read;
26 IOHandler *io_write;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020027 EventNotifierHandler *io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020028 GPollFD pfd;
29 int deleted;
Paolo Bonzinib4933172014-07-09 11:53:10 +020030 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080031 bool is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020032 QLIST_ENTRY(AioHandler) node;
33};
34
Paolo Bonzinib4933172014-07-09 11:53:10 +020035void aio_set_fd_handler(AioContext *ctx,
36 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080037 bool is_external,
Paolo Bonzinib4933172014-07-09 11:53:10 +020038 IOHandler *io_read,
39 IOHandler *io_write,
40 void *opaque)
41{
42 /* fd is a SOCKET in our case */
43 AioHandler *node;
44
45 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
46 if (node->pfd.fd == fd && !node->deleted) {
47 break;
48 }
49 }
50
51 /* Are we deleting the fd handler? */
52 if (!io_read && !io_write) {
53 if (node) {
54 /* If the lock is held, just mark the node as deleted */
55 if (ctx->walking_handlers) {
56 node->deleted = 1;
57 node->pfd.revents = 0;
58 } else {
59 /* Otherwise, delete it for real. We can't just mark it as
60 * deleted because deleted nodes are only cleaned up after
61 * releasing the walking_handlers lock.
62 */
63 QLIST_REMOVE(node, node);
64 g_free(node);
65 }
66 }
67 } else {
68 HANDLE event;
69
70 if (node == NULL) {
71 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010072 node = g_new0(AioHandler, 1);
Paolo Bonzinib4933172014-07-09 11:53:10 +020073 node->pfd.fd = fd;
74 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
75 }
76
77 node->pfd.events = 0;
78 if (node->io_read) {
79 node->pfd.events |= G_IO_IN;
80 }
81 if (node->io_write) {
82 node->pfd.events |= G_IO_OUT;
83 }
84
85 node->e = &ctx->notifier;
86
87 /* Update handler with latest information */
88 node->opaque = opaque;
89 node->io_read = io_read;
90 node->io_write = io_write;
Fam Zhengdca21ef2015-10-23 11:08:05 +080091 node->is_external = is_external;
Paolo Bonzinib4933172014-07-09 11:53:10 +020092
93 event = event_notifier_get_handle(&ctx->notifier);
94 WSAEventSelect(node->pfd.fd, event,
95 FD_READ | FD_ACCEPT | FD_CLOSE |
96 FD_CONNECT | FD_WRITE | FD_OOB);
97 }
98
99 aio_notify(ctx);
100}
101
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200102void aio_set_event_notifier(AioContext *ctx,
103 EventNotifier *e,
Fam Zhengdca21ef2015-10-23 11:08:05 +0800104 bool is_external,
Stefan Hajnoczif2e5dca2013-04-11 17:26:25 +0200105 EventNotifierHandler *io_notify)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200106{
107 AioHandler *node;
108
109 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
110 if (node->e == e && !node->deleted) {
111 break;
112 }
113 }
114
115 /* Are we deleting the fd handler? */
116 if (!io_notify) {
117 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200118 g_source_remove_poll(&ctx->source, &node->pfd);
119
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200120 /* If the lock is held, just mark the node as deleted */
121 if (ctx->walking_handlers) {
122 node->deleted = 1;
123 node->pfd.revents = 0;
124 } else {
125 /* Otherwise, delete it for real. We can't just mark it as
126 * deleted because deleted nodes are only cleaned up after
127 * releasing the walking_handlers lock.
128 */
129 QLIST_REMOVE(node, node);
130 g_free(node);
131 }
132 }
133 } else {
134 if (node == NULL) {
135 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +0100136 node = g_new0(AioHandler, 1);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200137 node->e = e;
138 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
139 node->pfd.events = G_IO_IN;
Fam Zhengdca21ef2015-10-23 11:08:05 +0800140 node->is_external = is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200141 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200142
143 g_source_add_poll(&ctx->source, &node->pfd);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200144 }
145 /* Update handler with latest information */
146 node->io_notify = io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200147 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200148
149 aio_notify(ctx);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200150}
151
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200152bool aio_prepare(AioContext *ctx)
153{
Paolo Bonzinib4933172014-07-09 11:53:10 +0200154 static struct timeval tv0;
155 AioHandler *node;
156 bool have_select_revents = false;
157 fd_set rfds, wfds;
158
159 /* fill fd sets */
160 FD_ZERO(&rfds);
161 FD_ZERO(&wfds);
162 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
163 if (node->io_read) {
164 FD_SET ((SOCKET)node->pfd.fd, &rfds);
165 }
166 if (node->io_write) {
167 FD_SET ((SOCKET)node->pfd.fd, &wfds);
168 }
169 }
170
171 if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
172 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
173 node->pfd.revents = 0;
174 if (FD_ISSET(node->pfd.fd, &rfds)) {
175 node->pfd.revents |= G_IO_IN;
176 have_select_revents = true;
177 }
178
179 if (FD_ISSET(node->pfd.fd, &wfds)) {
180 node->pfd.revents |= G_IO_OUT;
181 have_select_revents = true;
182 }
183 }
184 }
185
186 return have_select_revents;
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200187}
188
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200189bool aio_pending(AioContext *ctx)
190{
191 AioHandler *node;
192
193 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
194 if (node->pfd.revents && node->io_notify) {
195 return true;
196 }
Paolo Bonzinib4933172014-07-09 11:53:10 +0200197
198 if ((node->pfd.revents & G_IO_IN) && node->io_read) {
199 return true;
200 }
201 if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
202 return true;
203 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200204 }
205
206 return false;
207}
208
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200209static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200210{
211 AioHandler *node;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200212 bool progress = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200213
214 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200215 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200216 * called while we're walking.
217 */
218 node = QLIST_FIRST(&ctx->aio_handlers);
219 while (node) {
220 AioHandler *tmp;
Paolo Bonzinib4933172014-07-09 11:53:10 +0200221 int revents = node->pfd.revents;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200222
223 ctx->walking_handlers++;
224
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200225 if (!node->deleted &&
Paolo Bonzinib4933172014-07-09 11:53:10 +0200226 (revents || event_notifier_get_handle(node->e) == event) &&
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200227 node->io_notify) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200228 node->pfd.revents = 0;
229 node->io_notify(node->e);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200230
231 /* aio_notify() does not count as progress */
Stefan Hajnoczi8b2d42d2013-08-22 15:28:35 +0200232 if (node->e != &ctx->notifier) {
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200233 progress = true;
234 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200235 }
236
Paolo Bonzinib4933172014-07-09 11:53:10 +0200237 if (!node->deleted &&
238 (node->io_read || node->io_write)) {
239 node->pfd.revents = 0;
240 if ((revents & G_IO_IN) && node->io_read) {
241 node->io_read(node->opaque);
242 progress = true;
243 }
244 if ((revents & G_IO_OUT) && node->io_write) {
245 node->io_write(node->opaque);
246 progress = true;
247 }
248
249 /* if the next select() will return an event, we have progressed */
250 if (event == event_notifier_get_handle(&ctx->notifier)) {
251 WSANETWORKEVENTS ev;
252 WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
253 if (ev.lNetworkEvents) {
254 progress = true;
255 }
256 }
257 }
258
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200259 tmp = node;
260 node = QLIST_NEXT(node, node);
261
262 ctx->walking_handlers--;
263
264 if (!ctx->walking_handlers && tmp->deleted) {
265 QLIST_REMOVE(tmp, node);
266 g_free(tmp);
267 }
268 }
269
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200270 return progress;
271}
272
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200273bool aio_dispatch(AioContext *ctx)
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200274{
275 bool progress;
276
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200277 progress = aio_bh_poll(ctx);
278 progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
Paolo Bonzinid397ec992014-07-09 11:53:02 +0200279 progress |= timerlistgroup_run_timers(&ctx->tlg);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200280 return progress;
281}
282
283bool aio_poll(AioContext *ctx, bool blocking)
284{
285 AioHandler *node;
286 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200287 bool progress, have_select_revents, first;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200288 int count;
289 int timeout;
290
Paolo Bonzini49110172015-02-20 17:26:51 +0100291 aio_context_acquire(ctx);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200292 progress = false;
293
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200294 /* aio_notify can avoid the expensive event_notifier_set if
295 * everything (file descriptors, bottom halves, timers) will
296 * be re-evaluated before the next blocking poll(). This is
297 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200298 * if blocking == true, it is only true after poll() returns,
299 * so disable the optimization now.
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200300 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200301 if (blocking) {
302 atomic_add(&ctx->notify_me, 2);
303 }
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200304
Paolo Bonzini6493c972015-07-21 16:07:50 +0200305 have_select_revents = aio_prepare(ctx);
306
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200307 ctx->walking_handlers++;
308
309 /* fill fd sets */
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200310 count = 0;
311 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800312 if (!node->deleted && node->io_notify
313 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200314 events[count++] = event_notifier_get_handle(node->e);
315 }
316 }
317
318 ctx->walking_handlers--;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200319 first = true;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200320
Paolo Bonzini6493c972015-07-21 16:07:50 +0200321 /* ctx->notifier is always registered. */
322 assert(count > 0);
323
324 /* Multiple iterations, all of them non-blocking except the first,
325 * may be necessary to process all pending events. After the first
326 * WaitForMultipleObjects call ctx->notify_me will be decremented.
327 */
328 do {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200329 HANDLE event;
Alex Bligh438e1f42013-08-21 16:02:53 +0100330 int ret;
331
Paolo Bonzini6493c972015-07-21 16:07:50 +0200332 timeout = blocking && !have_select_revents
Paolo Bonzini845ca102014-07-09 11:53:01 +0200333 ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
Paolo Bonzini49110172015-02-20 17:26:51 +0100334 if (timeout) {
335 aio_context_release(ctx);
336 }
Alex Bligh438e1f42013-08-21 16:02:53 +0100337 ret = WaitForMultipleObjects(count, events, FALSE, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200338 if (blocking) {
339 assert(first);
340 atomic_sub(&ctx->notify_me, 2);
341 }
Paolo Bonzini49110172015-02-20 17:26:51 +0100342 if (timeout) {
343 aio_context_acquire(ctx);
344 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200345
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200346 if (first) {
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200347 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200348 progress |= aio_bh_poll(ctx);
349 first = false;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200350 }
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200351
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200352 /* if we have any signaled events, dispatch event */
Paolo Bonzinib4933172014-07-09 11:53:10 +0200353 event = NULL;
354 if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
355 event = events[ret - WAIT_OBJECT_0];
Paolo Bonzinia90d4112014-09-15 14:52:58 +0200356 events[ret - WAIT_OBJECT_0] = events[--count];
Paolo Bonzinib4933172014-07-09 11:53:10 +0200357 } else if (!have_select_revents) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200358 break;
359 }
360
Paolo Bonzinib4933172014-07-09 11:53:10 +0200361 have_select_revents = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200362 blocking = false;
363
Paolo Bonzinib4933172014-07-09 11:53:10 +0200364 progress |= aio_dispatch_handlers(ctx, event);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200365 } while (count > 0);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200366
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200367 progress |= timerlistgroup_run_timers(&ctx->tlg);
Alex Bligh438e1f42013-08-21 16:02:53 +0100368
Paolo Bonzini49110172015-02-20 17:26:51 +0100369 aio_context_release(ctx);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200370 return progress;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200371}