Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Linux io_uring support. |
| 3 | * |
| 4 | * Copyright (C) 2009 IBM, Corp. |
| 5 | * Copyright (C) 2009 Red Hat, Inc. |
| 6 | * Copyright (C) 2019 Aarushi Mehta |
| 7 | * |
| 8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 9 | * See the COPYING file in the top-level directory. |
| 10 | */ |
| 11 | #include "qemu/osdep.h" |
| 12 | #include <liburing.h> |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 13 | #include "block/aio.h" |
| 14 | #include "qemu/queue.h" |
| 15 | #include "block/block.h" |
| 16 | #include "block/raw-aio.h" |
| 17 | #include "qemu/coroutine.h" |
Stefan Hajnoczi | 433fcea | 2023-09-13 16:00:43 -0400 | [diff] [blame] | 18 | #include "qemu/defer-call.h" |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 19 | #include "qapi/error.h" |
Stefan Hajnoczi | 6a6da23 | 2023-05-30 14:09:57 -0400 | [diff] [blame] | 20 | #include "sysemu/block-backend.h" |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 21 | #include "trace.h" |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 22 | |
Emanuele Giuseppe Esposito | a75e4e4 | 2023-02-03 08:17:29 -0500 | [diff] [blame] | 23 | /* Only used for assertions. */ |
| 24 | #include "qemu/coroutine_int.h" |
| 25 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 26 | /* io_uring ring size */ |
| 27 | #define MAX_ENTRIES 128 |
| 28 | |
| 29 | typedef struct LuringAIOCB { |
| 30 | Coroutine *co; |
| 31 | struct io_uring_sqe sqeq; |
| 32 | ssize_t ret; |
| 33 | QEMUIOVector *qiov; |
| 34 | bool is_read; |
| 35 | QSIMPLEQ_ENTRY(LuringAIOCB) next; |
| 36 | |
| 37 | /* |
| 38 | * Buffered reads may require resubmission, see |
| 39 | * luring_resubmit_short_read(). |
| 40 | */ |
| 41 | int total_read; |
| 42 | QEMUIOVector resubmit_qiov; |
| 43 | } LuringAIOCB; |
| 44 | |
| 45 | typedef struct LuringQueue { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 46 | unsigned int in_queue; |
| 47 | unsigned int in_flight; |
| 48 | bool blocked; |
| 49 | QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue; |
| 50 | } LuringQueue; |
| 51 | |
Paolo Bonzini | 3cbc17e | 2021-07-12 15:10:10 +0200 | [diff] [blame] | 52 | struct LuringState { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 53 | AioContext *aio_context; |
| 54 | |
| 55 | struct io_uring ring; |
| 56 | |
Emanuele Giuseppe Esposito | a75e4e4 | 2023-02-03 08:17:29 -0500 | [diff] [blame] | 57 | /* No locking required, only accessed from AioContext home thread */ |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 58 | LuringQueue io_q; |
| 59 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 60 | QEMUBH *completion_bh; |
Paolo Bonzini | 3cbc17e | 2021-07-12 15:10:10 +0200 | [diff] [blame] | 61 | }; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 62 | |
| 63 | /** |
| 64 | * luring_resubmit: |
| 65 | * |
| 66 | * Resubmit a request by appending it to submit_queue. The caller must ensure |
| 67 | * that ioq_submit() is called later so that submit_queue requests are started. |
| 68 | */ |
| 69 | static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb) |
| 70 | { |
| 71 | QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); |
| 72 | s->io_q.in_queue++; |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * luring_resubmit_short_read: |
| 77 | * |
Stefan Hajnoczi | be6a166 | 2022-07-06 09:03:41 +0100 | [diff] [blame] | 78 | * Short reads are rare but may occur. The remaining read request needs to be |
| 79 | * resubmitted. |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 80 | */ |
| 81 | static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, |
| 82 | int nread) |
| 83 | { |
| 84 | QEMUIOVector *resubmit_qiov; |
| 85 | size_t remaining; |
| 86 | |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 87 | trace_luring_resubmit_short_read(s, luringcb, nread); |
| 88 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 89 | /* Update read position */ |
Dominique Martinet | c06fc7c | 2022-06-30 10:01:37 +0900 | [diff] [blame] | 90 | luringcb->total_read += nread; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 91 | remaining = luringcb->qiov->size - luringcb->total_read; |
| 92 | |
| 93 | /* Shorten qiov */ |
| 94 | resubmit_qiov = &luringcb->resubmit_qiov; |
| 95 | if (resubmit_qiov->iov == NULL) { |
| 96 | qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov); |
| 97 | } else { |
| 98 | qemu_iovec_reset(resubmit_qiov); |
| 99 | } |
| 100 | qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read, |
| 101 | remaining); |
| 102 | |
| 103 | /* Update sqe */ |
Dominique Martinet | c06fc7c | 2022-06-30 10:01:37 +0900 | [diff] [blame] | 104 | luringcb->sqeq.off += nread; |
Paolo Bonzini | 592d0bc | 2023-12-13 19:32:45 +0100 | [diff] [blame] | 105 | luringcb->sqeq.addr = (uintptr_t)luringcb->resubmit_qiov.iov; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 106 | luringcb->sqeq.len = luringcb->resubmit_qiov.niov; |
| 107 | |
| 108 | luring_resubmit(s, luringcb); |
| 109 | } |
| 110 | |
| 111 | /** |
| 112 | * luring_process_completions: |
| 113 | * @s: AIO state |
| 114 | * |
| 115 | * Fetches completed I/O requests, consumes cqes and invokes their callbacks |
| 116 | * The function is somewhat tricky because it supports nested event loops, for |
| 117 | * example when a request callback invokes aio_poll(). |
| 118 | * |
| 119 | * Function schedules BH completion so it can be called again in a nested |
| 120 | * event loop. When there are no events left to complete the BH is being |
| 121 | * canceled. |
| 122 | * |
| 123 | */ |
| 124 | static void luring_process_completions(LuringState *s) |
| 125 | { |
| 126 | struct io_uring_cqe *cqes; |
| 127 | int total_bytes; |
Stefan Hajnoczi | 84d61e5 | 2023-09-13 16:00:44 -0400 | [diff] [blame] | 128 | |
| 129 | defer_call_begin(); |
| 130 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 131 | /* |
| 132 | * Request completion callbacks can run the nested event loop. |
| 133 | * Schedule ourselves so the nested event loop will "see" remaining |
| 134 | * completed requests and process them. Without this, completion |
| 135 | * callbacks that wait for other requests using a nested event loop |
| 136 | * would hang forever. |
| 137 | * |
| 138 | * This workaround is needed because io_uring uses poll_wait, which |
| 139 | * is woken up when new events are added to the uring, thus polling on |
| 140 | * the same uring fd will block unless more events are received. |
| 141 | * |
| 142 | * Other leaf block drivers (drivers that access the data themselves) |
| 143 | * are networking based, so they poll sockets for data and run the |
| 144 | * correct coroutine. |
| 145 | */ |
| 146 | qemu_bh_schedule(s->completion_bh); |
| 147 | |
| 148 | while (io_uring_peek_cqe(&s->ring, &cqes) == 0) { |
| 149 | LuringAIOCB *luringcb; |
| 150 | int ret; |
| 151 | |
| 152 | if (!cqes) { |
| 153 | break; |
| 154 | } |
| 155 | |
| 156 | luringcb = io_uring_cqe_get_data(cqes); |
| 157 | ret = cqes->res; |
| 158 | io_uring_cqe_seen(&s->ring, cqes); |
| 159 | cqes = NULL; |
| 160 | |
| 161 | /* Change counters one-by-one because we can be nested. */ |
| 162 | s->io_q.in_flight--; |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 163 | trace_luring_process_completion(s, luringcb, ret); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 164 | |
| 165 | /* total_read is non-zero only for resubmitted read requests */ |
| 166 | total_bytes = ret + luringcb->total_read; |
| 167 | |
| 168 | if (ret < 0) { |
Fabian Ebner | 54caccb | 2021-07-29 11:10:29 +0200 | [diff] [blame] | 169 | /* |
| 170 | * Only writev/readv/fsync requests on regular files or host block |
| 171 | * devices are submitted. Therefore -EAGAIN is not expected but it's |
| 172 | * known to happen sometimes with Linux SCSI. Submit again and hope |
| 173 | * the request completes successfully. |
| 174 | * |
| 175 | * For more information, see: |
| 176 | * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u |
| 177 | * |
| 178 | * If the code is changed to submit other types of requests in the |
| 179 | * future, then this workaround may need to be extended to deal with |
| 180 | * genuine -EAGAIN results that should not be resubmitted |
| 181 | * immediately. |
| 182 | */ |
| 183 | if (ret == -EINTR || ret == -EAGAIN) { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 184 | luring_resubmit(s, luringcb); |
| 185 | continue; |
| 186 | } |
| 187 | } else if (!luringcb->qiov) { |
| 188 | goto end; |
| 189 | } else if (total_bytes == luringcb->qiov->size) { |
| 190 | ret = 0; |
| 191 | /* Only read/write */ |
| 192 | } else { |
| 193 | /* Short Read/Write */ |
| 194 | if (luringcb->is_read) { |
| 195 | if (ret > 0) { |
| 196 | luring_resubmit_short_read(s, luringcb, ret); |
| 197 | continue; |
| 198 | } else { |
| 199 | /* Pad with zeroes */ |
| 200 | qemu_iovec_memset(luringcb->qiov, total_bytes, 0, |
| 201 | luringcb->qiov->size - total_bytes); |
| 202 | ret = 0; |
| 203 | } |
| 204 | } else { |
Philippe Mathieu-Daudé | 74e4a8a | 2020-02-18 10:43:53 +0100 | [diff] [blame] | 205 | ret = -ENOSPC; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 206 | } |
| 207 | } |
| 208 | end: |
| 209 | luringcb->ret = ret; |
| 210 | qemu_iovec_destroy(&luringcb->resubmit_qiov); |
| 211 | |
| 212 | /* |
| 213 | * If the coroutine is already entered it must be in ioq_submit() |
| 214 | * and will notice luringcb->ret has been filled in when it |
| 215 | * eventually runs later. Coroutines cannot be entered recursively |
| 216 | * so avoid doing that! |
| 217 | */ |
Emanuele Giuseppe Esposito | a75e4e4 | 2023-02-03 08:17:29 -0500 | [diff] [blame] | 218 | assert(luringcb->co->ctx == s->aio_context); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 219 | if (!qemu_coroutine_entered(luringcb->co)) { |
| 220 | aio_co_wake(luringcb->co); |
| 221 | } |
| 222 | } |
Stefan Hajnoczi | 84d61e5 | 2023-09-13 16:00:44 -0400 | [diff] [blame] | 223 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 224 | qemu_bh_cancel(s->completion_bh); |
Stefan Hajnoczi | 84d61e5 | 2023-09-13 16:00:44 -0400 | [diff] [blame] | 225 | |
| 226 | defer_call_end(); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 227 | } |
| 228 | |
| 229 | static int ioq_submit(LuringState *s) |
| 230 | { |
| 231 | int ret = 0; |
| 232 | LuringAIOCB *luringcb, *luringcb_next; |
| 233 | |
| 234 | while (s->io_q.in_queue > 0) { |
| 235 | /* |
| 236 | * Try to fetch sqes from the ring for requests waiting in |
| 237 | * the overflow queue |
| 238 | */ |
| 239 | QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next, |
| 240 | luringcb_next) { |
| 241 | struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring); |
| 242 | if (!sqes) { |
| 243 | break; |
| 244 | } |
| 245 | /* Prep sqe for submission */ |
| 246 | *sqes = luringcb->sqeq; |
| 247 | QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next); |
| 248 | } |
| 249 | ret = io_uring_submit(&s->ring); |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 250 | trace_luring_io_uring_submit(s, ret); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 251 | /* Prevent infinite loop if submission is refused */ |
| 252 | if (ret <= 0) { |
Stefano Garzarella | b4e44c9 | 2020-05-19 15:30:41 +0200 | [diff] [blame] | 253 | if (ret == -EAGAIN || ret == -EINTR) { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 254 | continue; |
| 255 | } |
| 256 | break; |
| 257 | } |
| 258 | s->io_q.in_flight += ret; |
| 259 | s->io_q.in_queue -= ret; |
| 260 | } |
| 261 | s->io_q.blocked = (s->io_q.in_queue > 0); |
| 262 | |
| 263 | if (s->io_q.in_flight) { |
| 264 | /* |
| 265 | * We can try to complete something just right away if there are |
| 266 | * still requests in-flight. |
| 267 | */ |
| 268 | luring_process_completions(s); |
| 269 | } |
| 270 | return ret; |
| 271 | } |
| 272 | |
| 273 | static void luring_process_completions_and_submit(LuringState *s) |
| 274 | { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 275 | luring_process_completions(s); |
| 276 | |
Stefan Hajnoczi | 6a6da23 | 2023-05-30 14:09:57 -0400 | [diff] [blame] | 277 | if (s->io_q.in_queue > 0) { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 278 | ioq_submit(s); |
| 279 | } |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 280 | } |
| 281 | |
| 282 | static void qemu_luring_completion_bh(void *opaque) |
| 283 | { |
| 284 | LuringState *s = opaque; |
| 285 | luring_process_completions_and_submit(s); |
| 286 | } |
| 287 | |
| 288 | static void qemu_luring_completion_cb(void *opaque) |
| 289 | { |
| 290 | LuringState *s = opaque; |
| 291 | luring_process_completions_and_submit(s); |
| 292 | } |
| 293 | |
Aarushi Mehta | daffeb0 | 2020-01-20 14:18:53 +0000 | [diff] [blame] | 294 | static bool qemu_luring_poll_cb(void *opaque) |
| 295 | { |
| 296 | LuringState *s = opaque; |
Aarushi Mehta | daffeb0 | 2020-01-20 14:18:53 +0000 | [diff] [blame] | 297 | |
Stefan Hajnoczi | 826cc32 | 2021-12-07 13:23:31 +0000 | [diff] [blame] | 298 | return io_uring_cq_ready(&s->ring); |
| 299 | } |
Aarushi Mehta | daffeb0 | 2020-01-20 14:18:53 +0000 | [diff] [blame] | 300 | |
Stefan Hajnoczi | 826cc32 | 2021-12-07 13:23:31 +0000 | [diff] [blame] | 301 | static void qemu_luring_poll_ready(void *opaque) |
| 302 | { |
| 303 | LuringState *s = opaque; |
| 304 | |
| 305 | luring_process_completions_and_submit(s); |
Aarushi Mehta | daffeb0 | 2020-01-20 14:18:53 +0000 | [diff] [blame] | 306 | } |
| 307 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 308 | static void ioq_init(LuringQueue *io_q) |
| 309 | { |
| 310 | QSIMPLEQ_INIT(&io_q->submit_queue); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 311 | io_q->in_queue = 0; |
| 312 | io_q->in_flight = 0; |
| 313 | io_q->blocked = false; |
| 314 | } |
| 315 | |
Stefan Hajnoczi | ccee48a | 2023-09-13 16:00:42 -0400 | [diff] [blame] | 316 | static void luring_deferred_fn(void *opaque) |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 317 | { |
Stefan Hajnoczi | 6a6da23 | 2023-05-30 14:09:57 -0400 | [diff] [blame] | 318 | LuringState *s = opaque; |
| 319 | trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue, |
| 320 | s->io_q.in_flight); |
| 321 | if (!s->io_q.blocked && s->io_q.in_queue > 0) { |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 322 | ioq_submit(s); |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | /** |
| 327 | * luring_do_submit: |
| 328 | * @fd: file descriptor for I/O |
| 329 | * @luringcb: AIO control block |
| 330 | * @s: AIO state |
| 331 | * @offset: offset for request |
| 332 | * @type: type of request |
| 333 | * |
| 334 | * Fetches sqes from ring, adds to pending queue and preps them |
| 335 | * |
| 336 | */ |
| 337 | static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, |
| 338 | uint64_t offset, int type) |
| 339 | { |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 340 | int ret; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 341 | struct io_uring_sqe *sqes = &luringcb->sqeq; |
| 342 | |
| 343 | switch (type) { |
| 344 | case QEMU_AIO_WRITE: |
| 345 | io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, |
| 346 | luringcb->qiov->niov, offset); |
| 347 | break; |
Sam Li | 4751d09 | 2023-05-08 13:15:08 +0800 | [diff] [blame] | 348 | case QEMU_AIO_ZONE_APPEND: |
| 349 | io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, |
| 350 | luringcb->qiov->niov, offset); |
| 351 | break; |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 352 | case QEMU_AIO_READ: |
| 353 | io_uring_prep_readv(sqes, fd, luringcb->qiov->iov, |
| 354 | luringcb->qiov->niov, offset); |
| 355 | break; |
| 356 | case QEMU_AIO_FLUSH: |
| 357 | io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC); |
| 358 | break; |
| 359 | default: |
| 360 | fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n", |
| 361 | __func__, type); |
| 362 | abort(); |
| 363 | } |
| 364 | io_uring_sqe_set_data(sqes, luringcb); |
| 365 | |
| 366 | QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); |
| 367 | s->io_q.in_queue++; |
Stefan Hajnoczi | 6a6da23 | 2023-05-30 14:09:57 -0400 | [diff] [blame] | 368 | trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue, |
| 369 | s->io_q.in_flight); |
| 370 | if (!s->io_q.blocked) { |
| 371 | if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) { |
| 372 | ret = ioq_submit(s); |
| 373 | trace_luring_do_submit_done(s, ret); |
| 374 | return ret; |
| 375 | } |
| 376 | |
Stefan Hajnoczi | ccee48a | 2023-09-13 16:00:42 -0400 | [diff] [blame] | 377 | defer_call(luring_deferred_fn, s); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 378 | } |
| 379 | return 0; |
| 380 | } |
| 381 | |
Emanuele Giuseppe Esposito | a75e4e4 | 2023-02-03 08:17:29 -0500 | [diff] [blame] | 382 | int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, |
| 383 | QEMUIOVector *qiov, int type) |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 384 | { |
| 385 | int ret; |
Emanuele Giuseppe Esposito | a75e4e4 | 2023-02-03 08:17:29 -0500 | [diff] [blame] | 386 | AioContext *ctx = qemu_get_current_aio_context(); |
| 387 | LuringState *s = aio_get_linux_io_uring(ctx); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 388 | LuringAIOCB luringcb = { |
| 389 | .co = qemu_coroutine_self(), |
| 390 | .ret = -EINPROGRESS, |
| 391 | .qiov = qiov, |
| 392 | .is_read = (type == QEMU_AIO_READ), |
| 393 | }; |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 394 | trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0, |
| 395 | type); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 396 | ret = luring_do_submit(fd, &luringcb, s, offset, type); |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 397 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 398 | if (ret < 0) { |
| 399 | return ret; |
| 400 | } |
| 401 | |
| 402 | if (luringcb.ret == -EINPROGRESS) { |
| 403 | qemu_coroutine_yield(); |
| 404 | } |
| 405 | return luringcb.ret; |
| 406 | } |
| 407 | |
| 408 | void luring_detach_aio_context(LuringState *s, AioContext *old_context) |
| 409 | { |
Stefan Hajnoczi | 60f782b | 2023-05-16 15:02:38 -0400 | [diff] [blame] | 410 | aio_set_fd_handler(old_context, s->ring.ring_fd, |
Stefan Hajnoczi | 826cc32 | 2021-12-07 13:23:31 +0000 | [diff] [blame] | 411 | NULL, NULL, NULL, NULL, s); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 412 | qemu_bh_delete(s->completion_bh); |
| 413 | s->aio_context = NULL; |
| 414 | } |
| 415 | |
| 416 | void luring_attach_aio_context(LuringState *s, AioContext *new_context) |
| 417 | { |
| 418 | s->aio_context = new_context; |
| 419 | s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); |
Stefan Hajnoczi | 60f782b | 2023-05-16 15:02:38 -0400 | [diff] [blame] | 420 | aio_set_fd_handler(s->aio_context, s->ring.ring_fd, |
Stefan Hajnoczi | 826cc32 | 2021-12-07 13:23:31 +0000 | [diff] [blame] | 421 | qemu_luring_completion_cb, NULL, |
| 422 | qemu_luring_poll_cb, qemu_luring_poll_ready, s); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 423 | } |
| 424 | |
| 425 | LuringState *luring_init(Error **errp) |
| 426 | { |
| 427 | int rc; |
| 428 | LuringState *s = g_new0(LuringState, 1); |
| 429 | struct io_uring *ring = &s->ring; |
| 430 | |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 431 | trace_luring_init_state(s, sizeof(*s)); |
| 432 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 433 | rc = io_uring_queue_init(MAX_ENTRIES, ring, 0); |
| 434 | if (rc < 0) { |
Fiona Ebner | 75e79f5 | 2024-01-23 14:50:44 +0100 | [diff] [blame] | 435 | error_setg_errno(errp, -rc, "failed to init linux io_uring ring"); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 436 | g_free(s); |
| 437 | return NULL; |
| 438 | } |
| 439 | |
| 440 | ioq_init(&s->io_q); |
Sam Li | e2848bc | 2022-05-31 18:50:11 +0800 | [diff] [blame] | 441 | return s; |
Sam Li | 7845e73 | 2022-09-24 22:48:15 +0800 | [diff] [blame] | 442 | |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 443 | } |
| 444 | |
| 445 | void luring_cleanup(LuringState *s) |
| 446 | { |
| 447 | io_uring_queue_exit(&s->ring); |
Aarushi Mehta | d803f59 | 2020-01-20 14:18:52 +0000 | [diff] [blame] | 448 | trace_luring_cleanup_state(s); |
Paolo Bonzini | bd89f93 | 2020-11-13 10:41:02 -0500 | [diff] [blame] | 449 | g_free(s); |
Aarushi Mehta | 6663a0a | 2020-01-20 14:18:47 +0000 | [diff] [blame] | 450 | } |