blob: e676a8d9b27f137a71e827f1c5515480fa661111 [file] [log] [blame]
Paolo Bonzinif42b2202012-06-09 04:01:51 +02001/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM Corp., 2008
5 * Copyright Red Hat Inc., 2012
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paolo Bonzini <pbonzini@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
Peter Maydelld38ea872016-01-29 17:50:05 +000018#include "qemu/osdep.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020019#include "qemu-common.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010020#include "block/block.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010021#include "qemu/queue.h"
22#include "qemu/sockets.h"
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000023#include "qapi/error.h"
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010024#include "qemu/rcu_queue.h"
Paolo Bonzinif42b2202012-06-09 04:01:51 +020025
26struct AioHandler {
27 EventNotifier *e;
Paolo Bonzinib4933172014-07-09 11:53:10 +020028 IOHandler *io_read;
29 IOHandler *io_write;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020030 EventNotifierHandler *io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020031 GPollFD pfd;
32 int deleted;
Paolo Bonzinib4933172014-07-09 11:53:10 +020033 void *opaque;
Fam Zhengdca21ef2015-10-23 11:08:05 +080034 bool is_external;
Paolo Bonzinif42b2202012-06-09 04:01:51 +020035 QLIST_ENTRY(AioHandler) node;
36};
37
Paolo Bonzinib4933172014-07-09 11:53:10 +020038void aio_set_fd_handler(AioContext *ctx,
39 int fd,
Fam Zhengdca21ef2015-10-23 11:08:05 +080040 bool is_external,
Paolo Bonzinib4933172014-07-09 11:53:10 +020041 IOHandler *io_read,
42 IOHandler *io_write,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +000043 AioPollFn *io_poll,
Paolo Bonzinib4933172014-07-09 11:53:10 +020044 void *opaque)
45{
46 /* fd is a SOCKET in our case */
47 AioHandler *node;
48
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010049 qemu_lockcnt_lock(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +020050 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
51 if (node->pfd.fd == fd && !node->deleted) {
52 break;
53 }
54 }
55
56 /* Are we deleting the fd handler? */
57 if (!io_read && !io_write) {
58 if (node) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010059 /* If aio_poll is in progress, just mark the node as deleted */
60 if (qemu_lockcnt_count(&ctx->list_lock)) {
Paolo Bonzinib4933172014-07-09 11:53:10 +020061 node->deleted = 1;
62 node->pfd.revents = 0;
63 } else {
64 /* Otherwise, delete it for real. We can't just mark it as
65 * deleted because deleted nodes are only cleaned up after
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010066 * releasing the list_lock.
Paolo Bonzinib4933172014-07-09 11:53:10 +020067 */
68 QLIST_REMOVE(node, node);
69 g_free(node);
70 }
71 }
72 } else {
73 HANDLE event;
Alistair Francis55d41b12017-07-06 13:15:14 -070074 long bitmask = 0;
Paolo Bonzinib4933172014-07-09 11:53:10 +020075
76 if (node == NULL) {
77 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +010078 node = g_new0(AioHandler, 1);
Paolo Bonzinib4933172014-07-09 11:53:10 +020079 node->pfd.fd = fd;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +010080 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
Paolo Bonzinib4933172014-07-09 11:53:10 +020081 }
82
83 node->pfd.events = 0;
84 if (node->io_read) {
85 node->pfd.events |= G_IO_IN;
86 }
87 if (node->io_write) {
88 node->pfd.events |= G_IO_OUT;
89 }
90
91 node->e = &ctx->notifier;
92
93 /* Update handler with latest information */
94 node->opaque = opaque;
95 node->io_read = io_read;
96 node->io_write = io_write;
Fam Zhengdca21ef2015-10-23 11:08:05 +080097 node->is_external = is_external;
Paolo Bonzinib4933172014-07-09 11:53:10 +020098
Alistair Francis55d41b12017-07-06 13:15:14 -070099 if (io_read) {
100 bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
101 }
102
103 if (io_write) {
104 bitmask |= FD_WRITE | FD_CONNECT;
105 }
106
Paolo Bonzinib4933172014-07-09 11:53:10 +0200107 event = event_notifier_get_handle(&ctx->notifier);
Alistair Francis55d41b12017-07-06 13:15:14 -0700108 WSAEventSelect(node->pfd.fd, event, bitmask);
Paolo Bonzinib4933172014-07-09 11:53:10 +0200109 }
110
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100111 qemu_lockcnt_unlock(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +0200112 aio_notify(ctx);
113}
114
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000115void aio_set_fd_poll(AioContext *ctx, int fd,
116 IOHandler *io_poll_begin,
117 IOHandler *io_poll_end)
118{
119 /* Not implemented */
120}
121
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200122void aio_set_event_notifier(AioContext *ctx,
123 EventNotifier *e,
Fam Zhengdca21ef2015-10-23 11:08:05 +0800124 bool is_external,
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000125 EventNotifierHandler *io_notify,
126 AioPollFn *io_poll)
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200127{
128 AioHandler *node;
129
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100130 qemu_lockcnt_lock(&ctx->list_lock);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200131 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
132 if (node->e == e && !node->deleted) {
133 break;
134 }
135 }
136
137 /* Are we deleting the fd handler? */
138 if (!io_notify) {
139 if (node) {
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200140 g_source_remove_poll(&ctx->source, &node->pfd);
141
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100142 /* aio_poll is in progress, just mark the node as deleted */
143 if (qemu_lockcnt_count(&ctx->list_lock)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200144 node->deleted = 1;
145 node->pfd.revents = 0;
146 } else {
147 /* Otherwise, delete it for real. We can't just mark it as
148 * deleted because deleted nodes are only cleaned up after
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100149 * releasing the list_lock.
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200150 */
151 QLIST_REMOVE(node, node);
152 g_free(node);
153 }
154 }
155 } else {
156 if (node == NULL) {
157 /* Alloc and insert if it's not already there */
Markus Armbruster3ba235a2014-12-04 13:55:09 +0100158 node = g_new0(AioHandler, 1);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200159 node->e = e;
160 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
161 node->pfd.events = G_IO_IN;
Fam Zhengdca21ef2015-10-23 11:08:05 +0800162 node->is_external = is_external;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100163 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
Paolo Bonzinie3713e02012-09-24 14:57:41 +0200164
165 g_source_add_poll(&ctx->source, &node->pfd);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200166 }
167 /* Update handler with latest information */
168 node->io_notify = io_notify;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200169 }
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200170
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100171 qemu_lockcnt_unlock(&ctx->list_lock);
Paolo Bonzini7ed2b242012-09-25 10:22:39 +0200172 aio_notify(ctx);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200173}
174
Stefan Hajnoczi684e5082016-12-01 19:26:49 +0000175void aio_set_event_notifier_poll(AioContext *ctx,
176 EventNotifier *notifier,
177 EventNotifierHandler *io_poll_begin,
178 EventNotifierHandler *io_poll_end)
179{
180 /* Not implemented */
181}
182
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200183bool aio_prepare(AioContext *ctx)
184{
Paolo Bonzinib4933172014-07-09 11:53:10 +0200185 static struct timeval tv0;
186 AioHandler *node;
187 bool have_select_revents = false;
188 fd_set rfds, wfds;
189
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100190 /*
191 * We have to walk very carefully in case aio_set_fd_handler is
192 * called while we're walking.
193 */
194 qemu_lockcnt_inc(&ctx->list_lock);
195
Paolo Bonzinib4933172014-07-09 11:53:10 +0200196 /* fill fd sets */
197 FD_ZERO(&rfds);
198 FD_ZERO(&wfds);
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100199 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200200 if (node->io_read) {
201 FD_SET ((SOCKET)node->pfd.fd, &rfds);
202 }
203 if (node->io_write) {
204 FD_SET ((SOCKET)node->pfd.fd, &wfds);
205 }
206 }
207
208 if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100209 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200210 node->pfd.revents = 0;
211 if (FD_ISSET(node->pfd.fd, &rfds)) {
212 node->pfd.revents |= G_IO_IN;
213 have_select_revents = true;
214 }
215
216 if (FD_ISSET(node->pfd.fd, &wfds)) {
217 node->pfd.revents |= G_IO_OUT;
218 have_select_revents = true;
219 }
220 }
221 }
222
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100223 qemu_lockcnt_dec(&ctx->list_lock);
Paolo Bonzinib4933172014-07-09 11:53:10 +0200224 return have_select_revents;
Paolo Bonzinia3462c62014-07-09 11:53:08 +0200225}
226
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200227bool aio_pending(AioContext *ctx)
228{
229 AioHandler *node;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100230 bool result = false;
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100231
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200232 /*
Paolo Bonzini87f68d32014-07-07 15:18:02 +0200233 * We have to walk very carefully in case aio_set_fd_handler is
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200234 * called while we're walking.
235 */
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100236 qemu_lockcnt_inc(&ctx->list_lock);
237 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
238 if (node->pfd.revents && node->io_notify) {
239 result = true;
240 break;
241 }
242
243 if ((node->pfd.revents & G_IO_IN) && node->io_read) {
244 result = true;
245 break;
246 }
247 if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
248 result = true;
249 break;
250 }
251 }
252
253 qemu_lockcnt_dec(&ctx->list_lock);
254 return result;
255}
256
257static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
258{
259 AioHandler *node;
260 bool progress = false;
261 AioHandler *tmp;
262
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100263 /*
264 * We have to walk very carefully in case aio_set_fd_handler is
265 * called while we're walking.
266 */
267 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200268 int revents = node->pfd.revents;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200269
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200270 if (!node->deleted &&
Paolo Bonzinib4933172014-07-09 11:53:10 +0200271 (revents || event_notifier_get_handle(node->e) == event) &&
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200272 node->io_notify) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200273 node->pfd.revents = 0;
274 node->io_notify(node->e);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200275
276 /* aio_notify() does not count as progress */
Stefan Hajnoczi8b2d42d2013-08-22 15:28:35 +0200277 if (node->e != &ctx->notifier) {
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200278 progress = true;
279 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200280 }
281
Paolo Bonzinib4933172014-07-09 11:53:10 +0200282 if (!node->deleted &&
283 (node->io_read || node->io_write)) {
284 node->pfd.revents = 0;
285 if ((revents & G_IO_IN) && node->io_read) {
286 node->io_read(node->opaque);
287 progress = true;
288 }
289 if ((revents & G_IO_OUT) && node->io_write) {
290 node->io_write(node->opaque);
291 progress = true;
292 }
293
294 /* if the next select() will return an event, we have progressed */
295 if (event == event_notifier_get_handle(&ctx->notifier)) {
296 WSANETWORKEVENTS ev;
297 WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
298 if (ev.lNetworkEvents) {
299 progress = true;
300 }
301 }
302 }
303
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100304 if (node->deleted) {
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100305 if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100306 QLIST_REMOVE(node, node);
307 g_free(node);
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100308 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
Paolo Bonziniabf90d32017-01-12 19:07:56 +0100309 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200310 }
311 }
312
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200313 return progress;
314}
315
Paolo Bonzinia153bf52017-02-13 14:52:33 +0100316void aio_dispatch(AioContext *ctx)
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200317{
Paolo Bonzinibd451432017-02-13 14:52:34 +0100318 qemu_lockcnt_inc(&ctx->list_lock);
Paolo Bonzinia153bf52017-02-13 14:52:33 +0100319 aio_bh_poll(ctx);
320 aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
Paolo Bonzinibd451432017-02-13 14:52:34 +0100321 qemu_lockcnt_dec(&ctx->list_lock);
Paolo Bonzinia153bf52017-02-13 14:52:33 +0100322 timerlistgroup_run_timers(&ctx->tlg);
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200323}
324
325bool aio_poll(AioContext *ctx, bool blocking)
326{
327 AioHandler *node;
328 HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200329 bool progress, have_select_revents, first;
Paolo Bonzinia398dea2014-07-09 11:53:03 +0200330 int count;
331 int timeout;
332
333 progress = false;
334
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200335 /* aio_notify can avoid the expensive event_notifier_set if
336 * everything (file descriptors, bottom halves, timers) will
337 * be re-evaluated before the next blocking poll(). This is
338 * already true when aio_poll is called with blocking == false;
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200339 * if blocking == true, it is only true after poll() returns,
340 * so disable the optimization now.
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200341 */
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200342 if (blocking) {
343 atomic_add(&ctx->notify_me, 2);
344 }
Paolo Bonzini0a9dd162014-07-09 11:53:07 +0200345
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100346 qemu_lockcnt_inc(&ctx->list_lock);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200347 have_select_revents = aio_prepare(ctx);
348
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200349 /* fill fd sets */
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200350 count = 0;
Paolo Bonzinib92d9a92017-01-12 19:07:58 +0100351 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
Fam Zhengc1e1e5f2015-10-23 11:08:08 +0800352 if (!node->deleted && node->io_notify
353 && aio_node_check(ctx, node->is_external)) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200354 events[count++] = event_notifier_get_handle(node->e);
355 }
356 }
357
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200358 first = true;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200359
Paolo Bonzini6493c972015-07-21 16:07:50 +0200360 /* ctx->notifier is always registered. */
361 assert(count > 0);
362
363 /* Multiple iterations, all of them non-blocking except the first,
364 * may be necessary to process all pending events. After the first
365 * WaitForMultipleObjects call ctx->notify_me will be decremented.
366 */
367 do {
Paolo Bonzinib4933172014-07-09 11:53:10 +0200368 HANDLE event;
Alex Bligh438e1f42013-08-21 16:02:53 +0100369 int ret;
370
Paolo Bonzini6493c972015-07-21 16:07:50 +0200371 timeout = blocking && !have_select_revents
Paolo Bonzini845ca102014-07-09 11:53:01 +0200372 ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
Alex Bligh438e1f42013-08-21 16:02:53 +0100373 ret = WaitForMultipleObjects(count, events, FALSE, timeout);
Paolo Bonzinieabc9772015-07-21 16:07:51 +0200374 if (blocking) {
375 assert(first);
376 atomic_sub(&ctx->notify_me, 2);
377 }
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200378
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200379 if (first) {
Paolo Bonzini05e514b2015-07-21 16:07:53 +0200380 aio_notify_accept(ctx);
Paolo Bonzini21a03d12015-07-21 16:07:52 +0200381 progress |= aio_bh_poll(ctx);
382 first = false;
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200383 }
Paolo Bonzini3672fa52014-07-09 11:53:04 +0200384
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200385 /* if we have any signaled events, dispatch event */
Paolo Bonzinib4933172014-07-09 11:53:10 +0200386 event = NULL;
387 if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
388 event = events[ret - WAIT_OBJECT_0];
Paolo Bonzinia90d4112014-09-15 14:52:58 +0200389 events[ret - WAIT_OBJECT_0] = events[--count];
Paolo Bonzinib4933172014-07-09 11:53:10 +0200390 } else if (!have_select_revents) {
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200391 break;
392 }
393
Paolo Bonzinib4933172014-07-09 11:53:10 +0200394 have_select_revents = false;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200395 blocking = false;
396
Paolo Bonzinib4933172014-07-09 11:53:10 +0200397 progress |= aio_dispatch_handlers(ctx, event);
Paolo Bonzini6493c972015-07-21 16:07:50 +0200398 } while (count > 0);
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200399
Paolo Bonzinibd451432017-02-13 14:52:34 +0100400 qemu_lockcnt_dec(&ctx->list_lock);
401
Paolo Bonzinie4c7e2d2014-07-09 11:53:05 +0200402 progress |= timerlistgroup_run_timers(&ctx->tlg);
Stefan Hajnoczi164a1012013-04-11 16:56:50 +0200403 return progress;
Paolo Bonzinif42b2202012-06-09 04:01:51 +0200404}
Fam Zheng37fcee52015-10-30 12:06:28 +0800405
Cao jin7e003462016-07-15 18:28:44 +0800406void aio_context_setup(AioContext *ctx)
Fam Zheng37fcee52015-10-30 12:06:28 +0800407{
408}
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000409
Jie Wangcd0a6d22018-05-17 08:42:43 +0800410void aio_context_destroy(AioContext *ctx)
411{
412}
413
Stefan Hajnoczi82a41182016-12-01 19:26:51 +0000414void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
415 int64_t grow, int64_t shrink, Error **errp)
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000416{
Peter Xu90c558b2018-03-22 16:56:30 +0800417 if (max_ns) {
418 error_setg(errp, "AioContext polling is not implemented on Windows");
419 }
Stefan Hajnoczi4a1cba32016-12-01 19:26:42 +0000420}