blob: fab75087b0bad2b457dec334a98658d5305ad070 [file] [log] [blame]
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001/*
2 * Image mirroring
3 *
4 * Copyright Red Hat, Inc. 2012
5 *
6 * Authors:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
Peter Maydell80c71a22016-01-18 18:01:42 +000014#include "qemu/osdep.h"
Kevin Wolffd4a6492017-03-09 11:49:16 +010015#include "qemu/cutils.h"
Max Reitz12aa4082018-06-13 20:18:12 +020016#include "qemu/coroutine.h"
Max Reitz1181e192018-06-13 20:18:13 +020017#include "qemu/range.h"
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020018#include "trace.h"
John Snowc87621e2016-10-27 12:07:00 -040019#include "block/blockjob_int.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010020#include "block/block_int.h"
Max Reitz373340b2015-10-19 17:53:22 +020021#include "sysemu/block-backend.h"
Markus Armbrusterda34e652016-03-14 09:01:28 +010022#include "qapi/error.h"
Markus Armbrustercc7a8ea2015-03-17 17:22:46 +010023#include "qapi/qmp/qerror.h"
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020024#include "qemu/ratelimit.h"
Paolo Bonzinib812f672013-01-21 17:09:43 +010025#include "qemu/bitmap.h"
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020026
Paolo Bonzini402a4742013-01-22 09:03:14 +010027#define MAX_IN_FLIGHT 16
Eric Blakeb4369822017-07-07 07:44:46 -050028#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
29#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)
Paolo Bonzini402a4742013-01-22 09:03:14 +010030
31/* The mirroring buffer is a list of granularity-sized chunks.
32 * Free chunks are organized in a list.
33 */
34typedef struct MirrorBuffer {
35 QSIMPLEQ_ENTRY(MirrorBuffer) next;
36} MirrorBuffer;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020037
Max Reitz12aa4082018-06-13 20:18:12 +020038typedef struct MirrorOp MirrorOp;
39
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020040typedef struct MirrorBlockJob {
41 BlockJob common;
Kevin Wolfe253f4b2016-04-12 16:17:41 +020042 BlockBackend *target;
Kevin Wolf4ef85a92017-01-25 19:16:34 +010043 BlockDriverState *mirror_top_bs;
Fam Zheng5bc361b2013-12-16 14:45:29 +080044 BlockDriverState *base;
Max Reitz3f072a72019-06-12 16:27:32 +020045 BlockDriverState *base_overlay;
Kevin Wolf4ef85a92017-01-25 19:16:34 +010046
BenoƮt Canet09158f02014-06-27 18:25:25 +020047 /* The name of the graph node to replace */
48 char *replaces;
49 /* The BDS to replace */
50 BlockDriverState *to_replace;
51 /* Used to block operations on the drive-mirror-replace target */
52 Error *replace_blocker;
Fam Zheng03544a62013-12-16 14:45:30 +080053 bool is_none_mode;
Max Reitz274fcce2016-06-10 20:57:47 +020054 BlockMirrorBackingMode backing_mode;
Max Reitzcdf3bc92019-07-24 19:12:30 +020055 /* Whether the target image requires explicit zero-initialization */
56 bool zero_target;
Max Reitzd06107a2018-06-13 20:18:21 +020057 MirrorCopyMode copy_mode;
Paolo Bonzinib952b552012-10-18 16:49:28 +020058 BlockdevOnError on_source_error, on_target_error;
Paolo Bonzinid63ffd82012-10-18 16:49:25 +020059 bool synced;
Max Reitzd06107a2018-06-13 20:18:21 +020060 /* Set when the target is synced (dirty bitmap is clean, nothing
61 * in flight) and the job is running in active mode */
62 bool actively_synced;
Paolo Bonzinid63ffd82012-10-18 16:49:25 +020063 bool should_complete;
Paolo Bonzinieee13df2013-01-21 17:09:46 +010064 int64_t granularity;
Paolo Bonzinib812f672013-01-21 17:09:43 +010065 size_t buf_size;
Max Reitzb21c7652014-10-24 15:57:36 +020066 int64_t bdev_length;
Paolo Bonzinib812f672013-01-21 17:09:43 +010067 unsigned long *cow_bitmap;
Fam Zhenge4654d22013-11-13 18:29:43 +080068 BdrvDirtyBitmap *dirty_bitmap;
Fam Zhengdc162c82016-10-13 17:58:21 -040069 BdrvDirtyBitmapIter *dbi;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020070 uint8_t *buf;
Paolo Bonzini402a4742013-01-22 09:03:14 +010071 QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
72 int buf_free_count;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +010073
Denis V. Lunev49efb1f2016-07-14 16:33:24 +030074 uint64_t last_pause_ns;
Paolo Bonzini402a4742013-01-22 09:03:14 +010075 unsigned long *in_flight_bitmap;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +010076 int in_flight;
Eric Blakeb4369822017-07-07 07:44:46 -050077 int64_t bytes_in_flight;
Paolo Bonzinib58deb32018-12-06 11:58:10 +010078 QTAILQ_HEAD(, MirrorOp) ops_in_flight;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +010079 int ret;
Fam Zheng0fc9f8e2015-06-08 13:56:08 +080080 bool unmap;
Eric Blakeb4369822017-07-07 07:44:46 -050081 int target_cluster_size;
Fam Zhenge5b43572016-02-05 10:00:29 +080082 int max_iov;
Anton Nefedov90ab48e2017-02-02 17:25:15 +030083 bool initial_zeroing_ongoing;
Max Reitzd06107a2018-06-13 20:18:21 +020084 int in_active_write_counter;
John Snow737efc12018-09-06 09:02:15 -040085 bool prepared;
Sergio Lopez5e771752019-03-08 16:48:53 +010086 bool in_drain;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +020087} MirrorBlockJob;
88
Max Reitz429076e2018-06-13 20:18:19 +020089typedef struct MirrorBDSOpaque {
90 MirrorBlockJob *job;
Max Reitzf94dc3b2019-05-22 19:03:47 +020091 bool stop;
Max Reitz53431b92021-02-11 18:22:41 +010092 bool is_commit;
Max Reitz429076e2018-06-13 20:18:19 +020093} MirrorBDSOpaque;
94
Max Reitz12aa4082018-06-13 20:18:12 +020095struct MirrorOp {
Paolo Bonzinibd48bde2013-01-22 09:03:12 +010096 MirrorBlockJob *s;
97 QEMUIOVector qiov;
Eric Blakeb4369822017-07-07 07:44:46 -050098 int64_t offset;
99 uint64_t bytes;
Max Reitz2e1990b2018-06-13 20:18:11 +0200100
101 /* The pointee is set by mirror_co_read(), mirror_co_zero(), and
102 * mirror_co_discard() before yielding for the first time */
103 int64_t *bytes_handled;
Max Reitz12aa4082018-06-13 20:18:12 +0200104
Max Reitz1181e192018-06-13 20:18:13 +0200105 bool is_pseudo_op;
Max Reitzd06107a2018-06-13 20:18:21 +0200106 bool is_active_write;
Kevin Wolfce8cabb2020-03-26 16:36:28 +0100107 bool is_in_flight;
Max Reitz12aa4082018-06-13 20:18:12 +0200108 CoQueue waiting_requests;
Kevin Wolfeed325b2020-01-28 16:06:41 +0100109 Coroutine *co;
Vladimir Sementsov-Ogievskiyd44dae12021-07-03 00:16:36 +0300110 MirrorOp *waiting_for_op;
Max Reitz12aa4082018-06-13 20:18:12 +0200111
112 QTAILQ_ENTRY(MirrorOp) next;
113};
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100114
Max Reitz4295c5f2018-06-13 20:18:10 +0200115typedef enum MirrorMethod {
116 MIRROR_METHOD_COPY,
117 MIRROR_METHOD_ZERO,
118 MIRROR_METHOD_DISCARD,
119} MirrorMethod;
120
Paolo Bonzinib952b552012-10-18 16:49:28 +0200121static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
122 int error)
123{
124 s->synced = false;
Max Reitzd06107a2018-06-13 20:18:21 +0200125 s->actively_synced = false;
Paolo Bonzinib952b552012-10-18 16:49:28 +0200126 if (read) {
Kevin Wolf81e254d2016-04-18 11:36:38 +0200127 return block_job_error_action(&s->common, s->on_source_error,
128 true, error);
Paolo Bonzinib952b552012-10-18 16:49:28 +0200129 } else {
Kevin Wolf81e254d2016-04-18 11:36:38 +0200130 return block_job_error_action(&s->common, s->on_target_error,
131 false, error);
Paolo Bonzinib952b552012-10-18 16:49:28 +0200132 }
133}
134
Max Reitz1181e192018-06-13 20:18:13 +0200135static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
136 MirrorBlockJob *s,
137 uint64_t offset,
138 uint64_t bytes)
139{
140 uint64_t self_start_chunk = offset / s->granularity;
141 uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
142 uint64_t self_nb_chunks = self_end_chunk - self_start_chunk;
143
144 while (find_next_bit(s->in_flight_bitmap, self_end_chunk,
145 self_start_chunk) < self_end_chunk &&
146 s->ret >= 0)
147 {
148 MirrorOp *op;
149
150 QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
151 uint64_t op_start_chunk = op->offset / s->granularity;
152 uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes,
153 s->granularity) -
154 op_start_chunk;
155
156 if (op == self) {
157 continue;
158 }
159
160 if (ranges_overlap(self_start_chunk, self_nb_chunks,
161 op_start_chunk, op_nb_chunks))
162 {
Stefano Garzarella66fed302021-09-10 14:45:33 +0200163 if (self) {
164 /*
165 * If the operation is already (indirectly) waiting for us,
166 * or will wait for us as soon as it wakes up, then just go
167 * on (instead of producing a deadlock in the former case).
168 */
169 if (op->waiting_for_op) {
170 continue;
171 }
172
173 self->waiting_for_op = op;
Vladimir Sementsov-Ogievskiyd44dae12021-07-03 00:16:36 +0300174 }
175
Max Reitz1181e192018-06-13 20:18:13 +0200176 qemu_co_queue_wait(&op->waiting_requests, NULL);
Stefano Garzarella66fed302021-09-10 14:45:33 +0200177
178 if (self) {
179 self->waiting_for_op = NULL;
180 }
181
Max Reitz1181e192018-06-13 20:18:13 +0200182 break;
183 }
184 }
185 }
186}
187
Max Reitz2e1990b2018-06-13 20:18:11 +0200188static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret)
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100189{
190 MirrorBlockJob *s = op->s;
Paolo Bonzini402a4742013-01-22 09:03:14 +0100191 struct iovec *iov;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100192 int64_t chunk_num;
Eric Blakeb4369822017-07-07 07:44:46 -0500193 int i, nb_chunks;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100194
Eric Blakeb4369822017-07-07 07:44:46 -0500195 trace_mirror_iteration_done(s, op->offset, op->bytes, ret);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100196
197 s->in_flight--;
Eric Blakeb4369822017-07-07 07:44:46 -0500198 s->bytes_in_flight -= op->bytes;
Paolo Bonzini402a4742013-01-22 09:03:14 +0100199 iov = op->qiov.iov;
200 for (i = 0; i < op->qiov.niov; i++) {
201 MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
202 QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
203 s->buf_free_count++;
204 }
205
Eric Blakeb4369822017-07-07 07:44:46 -0500206 chunk_num = op->offset / s->granularity;
207 nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
Max Reitz12aa4082018-06-13 20:18:12 +0200208
Paolo Bonzini402a4742013-01-22 09:03:14 +0100209 bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
Max Reitz12aa4082018-06-13 20:18:12 +0200210 QTAILQ_REMOVE(&s->ops_in_flight, op, next);
Max Reitzb21c7652014-10-24 15:57:36 +0200211 if (ret >= 0) {
212 if (s->cow_bitmap) {
213 bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
214 }
Anton Nefedov90ab48e2017-02-02 17:25:15 +0300215 if (!s->initial_zeroing_ongoing) {
Kevin Wolf30a5c882018-05-04 12:17:20 +0200216 job_progress_update(&s->common.job, op->bytes);
Anton Nefedov90ab48e2017-02-02 17:25:15 +0300217 }
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100218 }
Zhang Min6df3bf82014-01-23 15:59:16 +0800219 qemu_iovec_destroy(&op->qiov);
Stefan Hajnoczi7b770c72014-03-21 13:55:19 +0100220
Max Reitz12aa4082018-06-13 20:18:12 +0200221 qemu_co_queue_restart_all(&op->waiting_requests);
222 g_free(op);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100223}
224
Max Reitz2e1990b2018-06-13 20:18:11 +0200225static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret)
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100226{
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100227 MirrorBlockJob *s = op->s;
Paolo Bonzinib9e413d2017-02-13 14:52:32 +0100228
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100229 if (ret < 0) {
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100230 BlockErrorAction action;
231
Eric Blakee0d7f732017-09-25 09:55:20 -0500232 bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100233 action = mirror_error_action(s, false, -ret);
Wenchao Xiaa5895692014-06-18 08:43:30 +0200234 if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100235 s->ret = ret;
236 }
237 }
Vladimir Sementsov-Ogievskiyd12ade52018-11-29 13:18:00 +0300238
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100239 mirror_iteration_done(op, ret);
240}
241
Max Reitz2e1990b2018-06-13 20:18:11 +0200242static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret)
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100243{
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100244 MirrorBlockJob *s = op->s;
Paolo Bonzinib9e413d2017-02-13 14:52:32 +0100245
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100246 if (ret < 0) {
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100247 BlockErrorAction action;
248
Eric Blakee0d7f732017-09-25 09:55:20 -0500249 bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100250 action = mirror_error_action(s, true, -ret);
Wenchao Xiaa5895692014-06-18 08:43:30 +0200251 if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100252 s->ret = ret;
253 }
254
255 mirror_iteration_done(op, ret);
Vladimir Sementsov-Ogievskiyd12ade52018-11-29 13:18:00 +0300256 return;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100257 }
Vladimir Sementsov-Ogievskiyd12ade52018-11-29 13:18:00 +0300258
259 ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0);
260 mirror_write_complete(op, ret);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100261}
262
Eric Blake782d97e2017-07-07 07:44:49 -0500263/* Clip bytes relative to offset to not exceed end-of-file */
264static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
265 int64_t offset,
266 int64_t bytes)
267{
268 return MIN(bytes, s->bdev_length - offset);
269}
270
Eric Blake782d97e2017-07-07 07:44:49 -0500271/* Round offset and/or bytes to target cluster if COW is needed, and
272 * return the offset of the adjusted tail against original. */
273static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
Eric Blakeae4cc872017-07-07 07:44:50 -0500274 uint64_t *bytes)
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200275{
Fam Zhenge5b43572016-02-05 10:00:29 +0800276 bool need_cow;
277 int ret = 0;
Eric Blake782d97e2017-07-07 07:44:49 -0500278 int64_t align_offset = *offset;
Eric Blake7cfd5272017-10-11 22:46:59 -0500279 int64_t align_bytes = *bytes;
Eric Blake782d97e2017-07-07 07:44:49 -0500280 int max_bytes = s->granularity * s->max_iov;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200281
Eric Blake782d97e2017-07-07 07:44:49 -0500282 need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
283 need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
Fam Zhenge5b43572016-02-05 10:00:29 +0800284 s->cow_bitmap);
285 if (need_cow) {
Eric Blake782d97e2017-07-07 07:44:49 -0500286 bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
287 &align_offset, &align_bytes);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +0100288 }
289
Eric Blake782d97e2017-07-07 07:44:49 -0500290 if (align_bytes > max_bytes) {
291 align_bytes = max_bytes;
Fam Zhenge5b43572016-02-05 10:00:29 +0800292 if (need_cow) {
Eric Blake782d97e2017-07-07 07:44:49 -0500293 align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size);
Fam Zhenge5b43572016-02-05 10:00:29 +0800294 }
295 }
Eric Blake782d97e2017-07-07 07:44:49 -0500296 /* Clipping may result in align_bytes unaligned to chunk boundary, but
Fam Zheng4150ae62016-04-20 10:48:34 +0800297 * that doesn't matter because it's already the end of source image. */
Eric Blake782d97e2017-07-07 07:44:49 -0500298 align_bytes = mirror_clip_bytes(s, align_offset, align_bytes);
Fam Zhenge5b43572016-02-05 10:00:29 +0800299
Eric Blake782d97e2017-07-07 07:44:49 -0500300 ret = align_offset + align_bytes - (*offset + *bytes);
301 *offset = align_offset;
302 *bytes = align_bytes;
Fam Zhenge5b43572016-02-05 10:00:29 +0800303 assert(ret >= 0);
304 return ret;
305}
306
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +0000307static inline void coroutine_fn
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100308mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
Fam Zheng21cd9172016-02-05 10:00:30 +0800309{
Max Reitz12aa4082018-06-13 20:18:12 +0200310 MirrorOp *op;
311
Max Reitz1181e192018-06-13 20:18:13 +0200312 QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
313 /* Do not wait on pseudo ops, because it may in turn wait on
314 * some other operation to start, which may in fact be the
315 * caller of this function. Since there is only one pseudo op
316 * at any given time, we will always find some real operation
317 * to wait on. */
Kevin Wolfce8cabb2020-03-26 16:36:28 +0100318 if (!op->is_pseudo_op && op->is_in_flight &&
319 op->is_active_write == active)
320 {
Max Reitz1181e192018-06-13 20:18:13 +0200321 qemu_co_queue_wait(&op->waiting_requests, NULL);
322 return;
323 }
324 }
325 abort();
Fam Zheng21cd9172016-02-05 10:00:30 +0800326}
327
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +0000328static inline void coroutine_fn
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100329mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
Max Reitzd06107a2018-06-13 20:18:21 +0200330{
331 /* Only non-active operations use up in-flight slots */
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100332 mirror_wait_for_any_operation(s, false);
Max Reitzd06107a2018-06-13 20:18:21 +0200333}
334
Max Reitz2e1990b2018-06-13 20:18:11 +0200335/* Perform a mirror copy operation.
336 *
337 * *op->bytes_handled is set to the number of bytes copied after and
338 * including offset, excluding any bytes copied prior to offset due
339 * to alignment. This will be op->bytes if no alignment is necessary,
340 * or (new_end - op->offset) if the tail is rounded up or down due to
341 * alignment or buffer limit.
Fam Zhenge5b43572016-02-05 10:00:29 +0800342 */
Max Reitz2e1990b2018-06-13 20:18:11 +0200343static void coroutine_fn mirror_co_read(void *opaque)
Fam Zhenge5b43572016-02-05 10:00:29 +0800344{
Max Reitz2e1990b2018-06-13 20:18:11 +0200345 MirrorOp *op = opaque;
346 MirrorBlockJob *s = op->s;
Eric Blakeae4cc872017-07-07 07:44:50 -0500347 int nb_chunks;
348 uint64_t ret;
Eric Blakeae4cc872017-07-07 07:44:50 -0500349 uint64_t max_bytes;
Fam Zhenge5b43572016-02-05 10:00:29 +0800350
Eric Blakeae4cc872017-07-07 07:44:50 -0500351 max_bytes = s->granularity * s->max_iov;
Paolo Bonzini402a4742013-01-22 09:03:14 +0100352
Fam Zhenge5b43572016-02-05 10:00:29 +0800353 /* We can only handle as much as buf_size at a time. */
Max Reitz2e1990b2018-06-13 20:18:11 +0200354 op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes));
355 assert(op->bytes);
356 assert(op->bytes < BDRV_REQUEST_MAX_BYTES);
357 *op->bytes_handled = op->bytes;
Paolo Bonzini402a4742013-01-22 09:03:14 +0100358
Fam Zhenge5b43572016-02-05 10:00:29 +0800359 if (s->cow_bitmap) {
Max Reitz2e1990b2018-06-13 20:18:11 +0200360 *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes);
Fam Zhenge5b43572016-02-05 10:00:29 +0800361 }
Max Reitz2e1990b2018-06-13 20:18:11 +0200362 /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */
363 assert(*op->bytes_handled <= UINT_MAX);
364 assert(op->bytes <= s->buf_size);
Eric Blakeae4cc872017-07-07 07:44:50 -0500365 /* The offset is granularity-aligned because:
Fam Zhenge5b43572016-02-05 10:00:29 +0800366 * 1) Caller passes in aligned values;
367 * 2) mirror_cow_align is used only when target cluster is larger. */
Max Reitz2e1990b2018-06-13 20:18:11 +0200368 assert(QEMU_IS_ALIGNED(op->offset, s->granularity));
Eric Blakeae4cc872017-07-07 07:44:50 -0500369 /* The range is sector-aligned, since bdrv_getlength() rounds up. */
Max Reitz2e1990b2018-06-13 20:18:11 +0200370 assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE));
371 nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
Fam Zhenge5b43572016-02-05 10:00:29 +0800372
373 while (s->buf_free_count < nb_chunks) {
Max Reitz2e1990b2018-06-13 20:18:11 +0200374 trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100375 mirror_wait_for_free_in_flight_slot(s);
Paolo Bonzinib812f672013-01-21 17:09:43 +0100376 }
377
Paolo Bonzini402a4742013-01-22 09:03:14 +0100378 /* Now make a QEMUIOVector taking enough granularity-sized chunks
379 * from s->buf_free.
380 */
381 qemu_iovec_init(&op->qiov, nb_chunks);
Paolo Bonzini402a4742013-01-22 09:03:14 +0100382 while (nb_chunks-- > 0) {
383 MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
Max Reitz2e1990b2018-06-13 20:18:11 +0200384 size_t remaining = op->bytes - op->qiov.size;
Kevin Wolf5a0f6fd2014-07-01 16:52:21 +0200385
Paolo Bonzini402a4742013-01-22 09:03:14 +0100386 QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
387 s->buf_free_count--;
Kevin Wolf5a0f6fd2014-07-01 16:52:21 +0200388 qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
Paolo Bonzini402a4742013-01-22 09:03:14 +0100389 }
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100390
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200391 /* Copy the dirty cluster. */
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100392 s->in_flight++;
Max Reitz2e1990b2018-06-13 20:18:11 +0200393 s->bytes_in_flight += op->bytes;
Kevin Wolfce8cabb2020-03-26 16:36:28 +0100394 op->is_in_flight = true;
Max Reitz2e1990b2018-06-13 20:18:11 +0200395 trace_mirror_one_iteration(s, op->offset, op->bytes);
Fam Zhengdcfb3be2015-06-08 13:56:09 +0800396
Max Reitz138f9ff2018-06-13 20:18:14 +0200397 ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
398 &op->qiov, 0);
Max Reitz2e1990b2018-06-13 20:18:11 +0200399 mirror_read_complete(op, ret);
Fam Zhenge5b43572016-02-05 10:00:29 +0800400}
401
Max Reitz2e1990b2018-06-13 20:18:11 +0200402static void coroutine_fn mirror_co_zero(void *opaque)
Fam Zhenge5b43572016-02-05 10:00:29 +0800403{
Max Reitz2e1990b2018-06-13 20:18:11 +0200404 MirrorOp *op = opaque;
405 int ret;
Fam Zhenge5b43572016-02-05 10:00:29 +0800406
Max Reitz2e1990b2018-06-13 20:18:11 +0200407 op->s->in_flight++;
408 op->s->bytes_in_flight += op->bytes;
409 *op->bytes_handled = op->bytes;
Kevin Wolfce8cabb2020-03-26 16:36:28 +0100410 op->is_in_flight = true;
Fam Zhenge5b43572016-02-05 10:00:29 +0800411
Max Reitz2e1990b2018-06-13 20:18:11 +0200412 ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
413 op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
414 mirror_write_complete(op, ret);
415}
416
417static void coroutine_fn mirror_co_discard(void *opaque)
418{
419 MirrorOp *op = opaque;
420 int ret;
421
422 op->s->in_flight++;
423 op->s->bytes_in_flight += op->bytes;
424 *op->bytes_handled = op->bytes;
Kevin Wolfce8cabb2020-03-26 16:36:28 +0100425 op->is_in_flight = true;
Max Reitz2e1990b2018-06-13 20:18:11 +0200426
427 ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes);
428 mirror_write_complete(op, ret);
Fam Zhenge5b43572016-02-05 10:00:29 +0800429}
430
Max Reitz4295c5f2018-06-13 20:18:10 +0200431static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
432 unsigned bytes, MirrorMethod mirror_method)
433{
Max Reitz2e1990b2018-06-13 20:18:11 +0200434 MirrorOp *op;
435 Coroutine *co;
436 int64_t bytes_handled = -1;
437
438 op = g_new(MirrorOp, 1);
439 *op = (MirrorOp){
440 .s = s,
441 .offset = offset,
442 .bytes = bytes,
443 .bytes_handled = &bytes_handled,
444 };
Max Reitz12aa4082018-06-13 20:18:12 +0200445 qemu_co_queue_init(&op->waiting_requests);
Max Reitz2e1990b2018-06-13 20:18:11 +0200446
Max Reitz4295c5f2018-06-13 20:18:10 +0200447 switch (mirror_method) {
448 case MIRROR_METHOD_COPY:
Max Reitz2e1990b2018-06-13 20:18:11 +0200449 co = qemu_coroutine_create(mirror_co_read, op);
450 break;
Max Reitz4295c5f2018-06-13 20:18:10 +0200451 case MIRROR_METHOD_ZERO:
Max Reitz2e1990b2018-06-13 20:18:11 +0200452 co = qemu_coroutine_create(mirror_co_zero, op);
453 break;
Max Reitz4295c5f2018-06-13 20:18:10 +0200454 case MIRROR_METHOD_DISCARD:
Max Reitz2e1990b2018-06-13 20:18:11 +0200455 co = qemu_coroutine_create(mirror_co_discard, op);
456 break;
Max Reitz4295c5f2018-06-13 20:18:10 +0200457 default:
458 abort();
459 }
Kevin Wolfeed325b2020-01-28 16:06:41 +0100460 op->co = co;
Max Reitz2e1990b2018-06-13 20:18:11 +0200461
Max Reitz12aa4082018-06-13 20:18:12 +0200462 QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
Max Reitz2e1990b2018-06-13 20:18:11 +0200463 qemu_coroutine_enter(co);
464 /* At this point, ownership of op has been moved to the coroutine
465 * and the object may already be freed */
466
467 /* Assert that this value has been set */
468 assert(bytes_handled >= 0);
469
470 /* Same assertion as in mirror_co_read() (and for mirror_co_read()
471 * and mirror_co_discard(), bytes_handled == op->bytes, which
472 * is the @bytes parameter given to this function) */
473 assert(bytes_handled <= UINT_MAX);
474 return bytes_handled;
Max Reitz4295c5f2018-06-13 20:18:10 +0200475}
476
Fam Zhenge5b43572016-02-05 10:00:29 +0800477static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
478{
Max Reitz138f9ff2018-06-13 20:18:14 +0200479 BlockDriverState *source = s->mirror_top_bs->backing->bs;
Max Reitz1181e192018-06-13 20:18:13 +0200480 MirrorOp *pseudo_op;
481 int64_t offset;
482 uint64_t delay_ns = 0, ret = 0;
Fam Zhenge5b43572016-02-05 10:00:29 +0800483 /* At least the first dirty chunk is mirrored in one iteration. */
484 int nb_chunks = 1;
Denis V. Lunev4b5004d2016-07-14 16:33:29 +0300485 bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
Eric Blakeb4369822017-07-07 07:44:46 -0500486 int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
Fam Zhenge5b43572016-02-05 10:00:29 +0800487
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200488 bdrv_dirty_bitmap_lock(s->dirty_bitmap);
Eric Blakef7981842017-09-25 09:55:17 -0500489 offset = bdrv_dirty_iter_next(s->dbi);
Eric Blakefb2ef792017-07-07 07:44:51 -0500490 if (offset < 0) {
Fam Zhengdc162c82016-10-13 17:58:21 -0400491 bdrv_set_dirty_iter(s->dbi, 0);
Eric Blakef7981842017-09-25 09:55:17 -0500492 offset = bdrv_dirty_iter_next(s->dbi);
Eric Blake9a46dba2017-09-25 09:55:18 -0500493 trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
Eric Blakefb2ef792017-07-07 07:44:51 -0500494 assert(offset >= 0);
Fam Zhenge5b43572016-02-05 10:00:29 +0800495 }
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200496 bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
Fam Zhenge5b43572016-02-05 10:00:29 +0800497
Max Reitz1181e192018-06-13 20:18:13 +0200498 mirror_wait_on_conflicts(NULL, s, offset, 1);
Max Reitz9c836252016-04-20 00:59:47 +0200499
Kevin Wolfda01ff72018-04-13 17:31:02 +0200500 job_pause_point(&s->common.job);
Stefan Hajnoczi565ac012016-06-16 17:56:28 +0100501
Fam Zhenge5b43572016-02-05 10:00:29 +0800502 /* Find the number of consective dirty chunks following the first dirty
503 * one, and wait for in flight requests in them. */
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200504 bdrv_dirty_bitmap_lock(s->dirty_bitmap);
Eric Blakefb2ef792017-07-07 07:44:51 -0500505 while (nb_chunks * s->granularity < s->buf_size) {
Fam Zhengdc162c82016-10-13 17:58:21 -0400506 int64_t next_dirty;
Eric Blakefb2ef792017-07-07 07:44:51 -0500507 int64_t next_offset = offset + nb_chunks * s->granularity;
508 int64_t next_chunk = next_offset / s->granularity;
509 if (next_offset >= s->bdev_length ||
John Snow28636b82019-07-29 16:35:53 -0400510 !bdrv_dirty_bitmap_get_locked(s->dirty_bitmap, next_offset)) {
Fam Zhenge5b43572016-02-05 10:00:29 +0800511 break;
512 }
513 if (test_bit(next_chunk, s->in_flight_bitmap)) {
Max Reitz9c836252016-04-20 00:59:47 +0200514 break;
Fam Zhenge5b43572016-02-05 10:00:29 +0800515 }
Max Reitz9c836252016-04-20 00:59:47 +0200516
Eric Blakef7981842017-09-25 09:55:17 -0500517 next_dirty = bdrv_dirty_iter_next(s->dbi);
Eric Blakefb2ef792017-07-07 07:44:51 -0500518 if (next_dirty > next_offset || next_dirty < 0) {
Max Reitzf27a2742016-04-20 00:59:48 +0200519 /* The bitmap iterator's cache is stale, refresh it */
Eric Blake715a74d2017-09-25 09:55:16 -0500520 bdrv_set_dirty_iter(s->dbi, next_offset);
Eric Blakef7981842017-09-25 09:55:17 -0500521 next_dirty = bdrv_dirty_iter_next(s->dbi);
Max Reitzf27a2742016-04-20 00:59:48 +0200522 }
Eric Blakefb2ef792017-07-07 07:44:51 -0500523 assert(next_dirty == next_offset);
Max Reitz9c836252016-04-20 00:59:47 +0200524 nb_chunks++;
Fam Zhenge5b43572016-02-05 10:00:29 +0800525 }
526
527 /* Clear dirty bits before querying the block status, because
Eric Blake31826642017-10-11 22:47:08 -0500528 * calling bdrv_block_status_above could yield - if some blocks are
Fam Zhenge5b43572016-02-05 10:00:29 +0800529 * marked dirty in this window, we need to know.
530 */
Eric Blakee0d7f732017-09-25 09:55:20 -0500531 bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset,
532 nb_chunks * s->granularity);
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200533 bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
534
Max Reitz1181e192018-06-13 20:18:13 +0200535 /* Before claiming an area in the in-flight bitmap, we have to
536 * create a MirrorOp for it so that conflicting requests can wait
537 * for it. mirror_perform() will create the real MirrorOps later,
538 * for now we just create a pseudo operation that will wake up all
539 * conflicting requests once all real operations have been
540 * launched. */
541 pseudo_op = g_new(MirrorOp, 1);
542 *pseudo_op = (MirrorOp){
543 .offset = offset,
544 .bytes = nb_chunks * s->granularity,
545 .is_pseudo_op = true,
546 };
547 qemu_co_queue_init(&pseudo_op->waiting_requests);
548 QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next);
549
Eric Blakefb2ef792017-07-07 07:44:51 -0500550 bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
551 while (nb_chunks > 0 && offset < s->bdev_length) {
Eric Blake31826642017-10-11 22:47:08 -0500552 int ret;
Eric Blake7cfd5272017-10-11 22:46:59 -0500553 int64_t io_bytes;
Eric Blakef3e4ce42017-07-07 07:44:39 -0500554 int64_t io_bytes_acct;
Max Reitz4295c5f2018-06-13 20:18:10 +0200555 MirrorMethod mirror_method = MIRROR_METHOD_COPY;
Fam Zhenge5b43572016-02-05 10:00:29 +0800556
Eric Blakefb2ef792017-07-07 07:44:51 -0500557 assert(!(offset % s->granularity));
Eric Blake31826642017-10-11 22:47:08 -0500558 ret = bdrv_block_status_above(source, NULL, offset,
559 nb_chunks * s->granularity,
560 &io_bytes, NULL, NULL);
Fam Zhenge5b43572016-02-05 10:00:29 +0800561 if (ret < 0) {
Eric Blakefb2ef792017-07-07 07:44:51 -0500562 io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
Vladimir Sementsov-Ogievskiy0965a412016-07-14 20:19:01 +0300563 } else if (ret & BDRV_BLOCK_DATA) {
Eric Blakefb2ef792017-07-07 07:44:51 -0500564 io_bytes = MIN(io_bytes, max_io_bytes);
Fam Zhenge5b43572016-02-05 10:00:29 +0800565 }
566
Eric Blakefb2ef792017-07-07 07:44:51 -0500567 io_bytes -= io_bytes % s->granularity;
568 if (io_bytes < s->granularity) {
569 io_bytes = s->granularity;
Fam Zhenge5b43572016-02-05 10:00:29 +0800570 } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
Eric Blakefb2ef792017-07-07 07:44:51 -0500571 int64_t target_offset;
Eric Blake7cfd5272017-10-11 22:46:59 -0500572 int64_t target_bytes;
Eric Blakefb2ef792017-07-07 07:44:51 -0500573 bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
574 &target_offset, &target_bytes);
575 if (target_offset == offset &&
576 target_bytes == io_bytes) {
Fam Zhenge5b43572016-02-05 10:00:29 +0800577 mirror_method = ret & BDRV_BLOCK_ZERO ?
578 MIRROR_METHOD_ZERO :
579 MIRROR_METHOD_DISCARD;
580 }
581 }
582
Denis V. Lunevcf56a3c2016-06-22 15:35:27 +0300583 while (s->in_flight >= MAX_IN_FLIGHT) {
Eric Blakefb2ef792017-07-07 07:44:51 -0500584 trace_mirror_yield_in_flight(s, offset, s->in_flight);
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100585 mirror_wait_for_free_in_flight_slot(s);
Denis V. Lunevcf56a3c2016-06-22 15:35:27 +0300586 }
587
Vladimir Sementsov-Ogievskiydbaa7b52016-08-03 15:56:44 +0300588 if (s->ret < 0) {
Max Reitz1181e192018-06-13 20:18:13 +0200589 ret = 0;
590 goto fail;
Vladimir Sementsov-Ogievskiydbaa7b52016-08-03 15:56:44 +0300591 }
592
Eric Blakefb2ef792017-07-07 07:44:51 -0500593 io_bytes = mirror_clip_bytes(s, offset, io_bytes);
Max Reitz4295c5f2018-06-13 20:18:10 +0200594 io_bytes = mirror_perform(s, offset, io_bytes, mirror_method);
595 if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) {
596 io_bytes_acct = 0;
597 } else {
598 io_bytes_acct = io_bytes;
Fam Zhenge5b43572016-02-05 10:00:29 +0800599 }
Eric Blakefb2ef792017-07-07 07:44:51 -0500600 assert(io_bytes);
601 offset += io_bytes;
602 nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
Kevin Wolfdee81d52018-01-18 21:19:38 +0100603 delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
Fam Zhengdcfb3be2015-06-08 13:56:09 +0800604 }
Max Reitz1181e192018-06-13 20:18:13 +0200605
606 ret = delay_ns;
607fail:
608 QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
609 qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
610 g_free(pseudo_op);
611
612 return ret;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100613}
Paolo Bonzinib952b552012-10-18 16:49:28 +0200614
Paolo Bonzini402a4742013-01-22 09:03:14 +0100615static void mirror_free_init(MirrorBlockJob *s)
616{
617 int granularity = s->granularity;
618 size_t buf_size = s->buf_size;
619 uint8_t *buf = s->buf;
620
621 assert(s->buf_free_count == 0);
622 QSIMPLEQ_INIT(&s->buf_free);
623 while (buf_size != 0) {
624 MirrorBuffer *cur = (MirrorBuffer *)buf;
625 QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
626 s->buf_free_count++;
627 buf_size -= granularity;
628 buf += granularity;
629 }
630}
631
Paolo Bonzinibae81962016-10-27 12:48:50 +0200632/* This is also used for the .pause callback. There is no matching
633 * mirror_resume() because mirror_run() will begin iterating again
634 * when the job is resumed.
635 */
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +0000636static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100637{
638 while (s->in_flight > 0) {
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100639 mirror_wait_for_free_in_flight_slot(s);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +0100640 }
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200641}
642
John Snow737efc12018-09-06 09:02:15 -0400643/**
644 * mirror_exit_common: handle both abort() and prepare() cases.
645 * for .prepare, returns 0 on success and -errno on failure.
646 * for .abort cases, denoted by abort = true, MUST return 0.
647 */
648static int mirror_exit_common(Job *job)
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100649{
Kevin Wolf1908a552018-04-17 16:41:17 +0200650 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
651 BlockJob *bjob = &s->common;
Max Reitzf93c3ad2019-10-14 17:39:28 +0200652 MirrorBDSOpaque *bs_opaque;
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100653 AioContext *replace_aio_context = NULL;
Max Reitzf93c3ad2019-10-14 17:39:28 +0200654 BlockDriverState *src;
655 BlockDriverState *target_bs;
656 BlockDriverState *mirror_top_bs;
Kevin Wolf12fa4af2017-02-17 20:42:32 +0100657 Error *local_err = NULL;
John Snow737efc12018-09-06 09:02:15 -0400658 bool abort = job->ret < 0;
659 int ret = 0;
660
661 if (s->prepared) {
662 return 0;
663 }
664 s->prepared = true;
Kevin Wolf3f09bfb2015-09-15 11:58:23 +0200665
Max Reitzf93c3ad2019-10-14 17:39:28 +0200666 mirror_top_bs = s->mirror_top_bs;
667 bs_opaque = mirror_top_bs->opaque;
668 src = mirror_top_bs->backing->bs;
669 target_bs = blk_bs(s->target);
670
Alberto Garciaef53dc02019-03-12 18:48:42 +0200671 if (bdrv_chain_contains(src, target_bs)) {
672 bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
673 }
674
Vladimir Sementsov-Ogievskiy5deb6cb2019-09-16 17:19:09 +0300675 bdrv_release_dirty_bitmap(s->dirty_bitmap);
Paolo Bonzini21198822017-06-05 14:39:03 +0200676
John Snow7b508f62018-08-29 21:57:30 -0400677 /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
678 * before we can call bdrv_drained_end */
Kevin Wolf3f09bfb2015-09-15 11:58:23 +0200679 bdrv_ref(src);
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100680 bdrv_ref(mirror_top_bs);
Kevin Wolf7d9fcb32017-03-02 17:48:14 +0100681 bdrv_ref(target_bs);
682
Vladimir Sementsov-Ogievskiybb0c9402019-08-29 12:09:53 +0300683 /*
684 * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
Kevin Wolf7d9fcb32017-03-02 17:48:14 +0100685 * inserting target_bs at s->to_replace, where we might not be able to get
Kevin Wolf63c8ef22017-05-29 14:08:32 +0200686 * these permissions.
Vladimir Sementsov-Ogievskiybb0c9402019-08-29 12:09:53 +0300687 */
Kevin Wolf7d9fcb32017-03-02 17:48:14 +0100688 blk_unref(s->target);
689 s->target = NULL;
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100690
691 /* We don't access the source any more. Dropping any WRITE/RESIZE is
Kevin Wolfd2da5e22019-07-22 17:44:27 +0200692 * required before it could become a backing file of target_bs. Not having
693 * these permissions any more means that we can't allow any new requests on
694 * mirror_top_bs from now on, so keep it drained. */
695 bdrv_drained_begin(mirror_top_bs);
Max Reitzf94dc3b2019-05-22 19:03:47 +0200696 bs_opaque->stop = true;
697 bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
698 &error_abort);
John Snow737efc12018-09-06 09:02:15 -0400699 if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100700 BlockDriverState *backing = s->is_none_mode ? src : s->base;
Max Reitz3f072a72019-06-12 16:27:32 +0200701 BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
702
703 if (bdrv_cow_bs(unfiltered_target) != backing) {
704 bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
Kevin Wolf12fa4af2017-02-17 20:42:32 +0100705 if (local_err) {
706 error_report_err(local_err);
Vladimir Sementsov-Ogievskiy66c86722020-03-24 18:36:26 +0300707 local_err = NULL;
John Snow7b508f62018-08-29 21:57:30 -0400708 ret = -EPERM;
Kevin Wolf12fa4af2017-02-17 20:42:32 +0100709 }
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100710 }
Max Reitzc41f5b92021-04-09 14:04:18 +0200711 } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
712 assert(!bdrv_backing_chain_next(target_bs));
713 ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL,
714 "backing", &local_err);
715 if (ret < 0) {
716 error_report_err(local_err);
717 local_err = NULL;
718 }
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100719 }
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100720
721 if (s->to_replace) {
722 replace_aio_context = bdrv_get_aio_context(s->to_replace);
723 aio_context_acquire(replace_aio_context);
724 }
725
John Snow737efc12018-09-06 09:02:15 -0400726 if (s->should_complete && !abort) {
727 BlockDriverState *to_replace = s->to_replace ?: src;
Alberto Garcia1ba79382018-11-12 16:00:40 +0200728 bool ro = bdrv_is_read_only(to_replace);
Kevin Wolf40365552015-10-28 13:24:26 +0100729
Alberto Garcia1ba79382018-11-12 16:00:40 +0200730 if (ro != bdrv_is_read_only(target_bs)) {
731 bdrv_reopen_set_read_only(target_bs, ro, NULL);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100732 }
Kevin Wolfb8804812016-04-12 16:20:59 +0200733
734 /* The mirror job has no requests in flight any more, but we need to
735 * drain potential other users of the BDS before changing the graph. */
Sergio Lopez5e771752019-03-08 16:48:53 +0100736 assert(s->in_drain);
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200737 bdrv_drained_begin(target_bs);
Max Reitz6e9cc052020-02-18 11:34:46 +0100738 /*
739 * Cannot use check_to_replace_node() here, because that would
740 * check for an op blocker on @to_replace, and we have our own
741 * there.
742 */
743 if (bdrv_recurse_can_replace(src, to_replace)) {
744 bdrv_replace_node(to_replace, target_bs, &local_err);
745 } else {
746 error_setg(&local_err, "Can no longer replace '%s' by '%s', "
747 "because it can no longer be guaranteed that doing so "
748 "would not lead to an abrupt change of visible data",
749 to_replace->node_name, target_bs->node_name);
750 }
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200751 bdrv_drained_end(target_bs);
Kevin Wolf5fe31c22017-03-06 16:20:51 +0100752 if (local_err) {
753 error_report_err(local_err);
John Snow7b508f62018-08-29 21:57:30 -0400754 ret = -EPERM;
Kevin Wolf5fe31c22017-03-06 16:20:51 +0100755 }
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100756 }
757 if (s->to_replace) {
758 bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
759 error_free(s->replace_blocker);
760 bdrv_unref(s->to_replace);
761 }
762 if (replace_aio_context) {
763 aio_context_release(replace_aio_context);
764 }
765 g_free(s->replaces);
Kevin Wolf7d9fcb32017-03-02 17:48:14 +0100766 bdrv_unref(target_bs);
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100767
Max Reitzf94dc3b2019-05-22 19:03:47 +0200768 /*
769 * Remove the mirror filter driver from the graph. Before this, get rid of
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100770 * the blockers on the intermediate nodes so that the resulting state is
Max Reitzf94dc3b2019-05-22 19:03:47 +0200771 * valid.
772 */
Kevin Wolf1908a552018-04-17 16:41:17 +0200773 block_job_remove_all_bdrv(bjob);
Max Reitz3f072a72019-06-12 16:27:32 +0200774 bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100775
776 /* We just changed the BDS the job BB refers to (with either or both of the
Kevin Wolf5fe31c22017-03-06 16:20:51 +0100777 * bdrv_replace_node() calls), so switch the BB back so the cleanup does
778 * the right thing. We don't need any permissions any more now. */
Kevin Wolf1908a552018-04-17 16:41:17 +0200779 blk_remove_bs(bjob->blk);
780 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
781 blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100782
Max Reitz429076e2018-06-13 20:18:19 +0200783 bs_opaque->job = NULL;
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100784
Fam Zheng176c3692015-11-23 10:28:04 +0800785 bdrv_drained_end(src);
Kevin Wolfd2da5e22019-07-22 17:44:27 +0200786 bdrv_drained_end(mirror_top_bs);
Sergio Lopez5e771752019-03-08 16:48:53 +0100787 s->in_drain = false;
Kevin Wolf4ef85a92017-01-25 19:16:34 +0100788 bdrv_unref(mirror_top_bs);
Kevin Wolf3f09bfb2015-09-15 11:58:23 +0200789 bdrv_unref(src);
John Snow7b508f62018-08-29 21:57:30 -0400790
John Snow737efc12018-09-06 09:02:15 -0400791 return ret;
792}
793
794static int mirror_prepare(Job *job)
795{
796 return mirror_exit_common(job);
797}
798
799static void mirror_abort(Job *job)
800{
801 int ret = mirror_exit_common(job);
802 assert(ret == 0);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +0100803}
804
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +0000805static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300806{
807 int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
808
Kevin Wolf18bb6922018-01-18 20:25:40 +0100809 if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) {
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300810 s->last_pause_ns = now;
Kevin Wolf5d43e862018-04-18 16:32:20 +0200811 job_sleep_ns(&s->common.job, 0);
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300812 } else {
Kevin Wolfda01ff72018-04-13 17:31:02 +0200813 job_pause_point(&s->common.job);
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300814 }
815}
816
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300817static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
818{
Eric Blake23ca4592017-09-25 09:55:21 -0500819 int64_t offset;
Max Reitz138f9ff2018-06-13 20:18:14 +0200820 BlockDriverState *bs = s->mirror_top_bs->backing->bs;
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300821 BlockDriverState *target_bs = blk_bs(s->target);
Eric Blake23ca4592017-09-25 09:55:21 -0500822 int ret;
Eric Blake51b0a482017-07-07 07:44:59 -0500823 int64_t count;
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300824
Max Reitzcdf3bc92019-07-24 19:12:30 +0200825 if (s->zero_target) {
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300826 if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
Eric Blakee0d7f732017-09-25 09:55:20 -0500827 bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300828 return 0;
829 }
830
Anton Nefedov90ab48e2017-02-02 17:25:15 +0300831 s->initial_zeroing_ongoing = true;
Eric Blake23ca4592017-09-25 09:55:21 -0500832 for (offset = 0; offset < s->bdev_length; ) {
833 int bytes = MIN(s->bdev_length - offset,
834 QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300835
836 mirror_throttle(s);
837
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200838 if (job_is_cancelled(&s->common.job)) {
Anton Nefedov90ab48e2017-02-02 17:25:15 +0300839 s->initial_zeroing_ongoing = false;
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300840 return 0;
841 }
842
843 if (s->in_flight >= MAX_IN_FLIGHT) {
Eric Blake67adf4b2017-03-13 14:55:18 -0500844 trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
845 s->in_flight);
Kevin Wolf9178f4f2020-03-26 16:36:27 +0100846 mirror_wait_for_free_in_flight_slot(s);
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300847 continue;
848 }
849
Max Reitz4295c5f2018-06-13 20:18:10 +0200850 mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO);
Eric Blake23ca4592017-09-25 09:55:21 -0500851 offset += bytes;
Denis V. Lunevc7c27692016-07-14 16:33:28 +0300852 }
853
Paolo Bonzinibae81962016-10-27 12:48:50 +0200854 mirror_wait_for_all_io(s);
Anton Nefedov90ab48e2017-02-02 17:25:15 +0300855 s->initial_zeroing_ongoing = false;
Denis V. Lunevb7d50622016-07-14 16:33:27 +0300856 }
857
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300858 /* First part, loop on the sectors and initialize the dirty bitmap. */
Eric Blake23ca4592017-09-25 09:55:21 -0500859 for (offset = 0; offset < s->bdev_length; ) {
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300860 /* Just to make sure we are not exceeding int limit. */
Eric Blake23ca4592017-09-25 09:55:21 -0500861 int bytes = MIN(s->bdev_length - offset,
862 QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300863
864 mirror_throttle(s);
865
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200866 if (job_is_cancelled(&s->common.job)) {
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300867 return 0;
868 }
869
Max Reitz3f072a72019-06-12 16:27:32 +0200870 ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
871 &count);
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300872 if (ret < 0) {
873 return ret;
874 }
875
Eric Blake23ca4592017-09-25 09:55:21 -0500876 assert(count);
Eric Blakea92b1b02020-10-27 00:05:53 -0500877 if (ret > 0) {
Eric Blake23ca4592017-09-25 09:55:21 -0500878 bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300879 }
Eric Blake23ca4592017-09-25 09:55:21 -0500880 offset += count;
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300881 }
882 return 0;
883}
884
Paolo Bonzinibdffb312016-11-09 17:20:08 +0100885/* Called when going out of the streaming phase to flush the bulk of the
886 * data to the medium, or just before completing.
887 */
888static int mirror_flush(MirrorBlockJob *s)
889{
890 int ret = blk_flush(s->target);
891 if (ret < 0) {
892 if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
893 s->ret = ret;
894 }
895 }
896 return ret;
897}
898
John Snowf67432a2018-08-29 21:57:26 -0400899static int coroutine_fn mirror_run(Job *job, Error **errp)
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200900{
John Snowf67432a2018-08-29 21:57:26 -0400901 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
Max Reitz138f9ff2018-06-13 20:18:14 +0200902 BlockDriverState *bs = s->mirror_top_bs->backing->bs;
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200903 BlockDriverState *target_bs = blk_bs(s->target);
Paolo Bonzini9a0cec62016-10-27 12:48:51 +0200904 bool need_drain = true;
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300905 int64_t length;
Kevin Wolfe83dd682020-05-11 15:58:24 +0200906 int64_t target_length;
Paolo Bonzinib812f672013-01-21 17:09:43 +0100907 BlockDriverInfo bdi;
Jeff Cody1d339362015-01-22 08:03:29 -0500908 char backing_filename[2]; /* we only need 2 characters because we are only
909 checking for a NULL string */
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200910 int ret = 0;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200911
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200912 if (job_is_cancelled(&s->common.job)) {
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200913 goto immediate_exit;
914 }
915
Max Reitzb21c7652014-10-24 15:57:36 +0200916 s->bdev_length = bdrv_getlength(bs);
917 if (s->bdev_length < 0) {
918 ret = s->bdev_length;
Fam Zheng373df5b2014-04-29 18:09:09 +0800919 goto immediate_exit;
Kevin Wolfbecc3472017-02-17 11:11:28 +0100920 }
921
Kevin Wolfe83dd682020-05-11 15:58:24 +0200922 target_length = blk_getlength(s->target);
923 if (target_length < 0) {
924 ret = target_length;
925 goto immediate_exit;
926 }
927
Kevin Wolfbecc3472017-02-17 11:11:28 +0100928 /* Active commit must resize the base image if its size differs from the
929 * active layer. */
930 if (s->base == blk_bs(s->target)) {
Kevin Wolfe83dd682020-05-11 15:58:24 +0200931 if (s->bdev_length > target_length) {
Max Reitzc80d8b02019-09-18 11:51:40 +0200932 ret = blk_truncate(s->target, s->bdev_length, false,
Kevin Wolf8c6242b2020-04-24 14:54:41 +0200933 PREALLOC_MODE_OFF, 0, NULL);
Kevin Wolfbecc3472017-02-17 11:11:28 +0100934 if (ret < 0) {
935 goto immediate_exit;
936 }
937 }
Kevin Wolfe83dd682020-05-11 15:58:24 +0200938 } else if (s->bdev_length != target_length) {
939 error_setg(errp, "Source and target image have different sizes");
940 ret = -EINVAL;
941 goto immediate_exit;
Kevin Wolfbecc3472017-02-17 11:11:28 +0100942 }
943
944 if (s->bdev_length == 0) {
Kevin Wolf2e1795b2018-04-25 14:56:09 +0200945 /* Transition to the READY state and wait for complete. */
946 job_transition_to_ready(&s->common.job);
Fam Zheng9e48b022014-06-24 20:26:36 +0800947 s->synced = true;
Max Reitzd06107a2018-06-13 20:18:21 +0200948 s->actively_synced = true;
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200949 while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
Kevin Wolf198c49c2018-04-24 16:55:04 +0200950 job_yield(&s->common.job);
Fam Zheng9e48b022014-06-24 20:26:36 +0800951 }
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200952 s->common.job.cancelled = false;
Fam Zheng9e48b022014-06-24 20:26:36 +0800953 goto immediate_exit;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200954 }
955
Max Reitzb21c7652014-10-24 15:57:36 +0200956 length = DIV_ROUND_UP(s->bdev_length, s->granularity);
Paolo Bonzini402a4742013-01-22 09:03:14 +0100957 s->in_flight_bitmap = bitmap_new(length);
958
Paolo Bonzinib812f672013-01-21 17:09:43 +0100959 /* If we have no backing file yet in the destination, we cannot let
960 * the destination do COW. Instead, we copy sectors around the
961 * dirty data if needed. We need a bitmap to do that.
962 */
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200963 bdrv_get_backing_filename(target_bs, backing_filename,
Paolo Bonzinib812f672013-01-21 17:09:43 +0100964 sizeof(backing_filename));
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200965 if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
Eric Blakeb4369822017-07-07 07:44:46 -0500966 s->target_cluster_size = bdi.cluster_size;
967 } else {
968 s->target_cluster_size = BDRV_SECTOR_SIZE;
Paolo Bonzinib812f672013-01-21 17:09:43 +0100969 }
Max Reitz3f072a72019-06-12 16:27:32 +0200970 if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
Eric Blakeb4369822017-07-07 07:44:46 -0500971 s->granularity < s->target_cluster_size) {
972 s->buf_size = MAX(s->buf_size, s->target_cluster_size);
Fam Zhenge5b43572016-02-05 10:00:29 +0800973 s->cow_bitmap = bitmap_new(length);
974 }
Kevin Wolfe253f4b2016-04-12 16:17:41 +0200975 s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
Paolo Bonzinib812f672013-01-21 17:09:43 +0100976
Kevin Wolf7504edf2014-05-21 18:16:21 +0200977 s->buf = qemu_try_blockalign(bs, s->buf_size);
978 if (s->buf == NULL) {
979 ret = -ENOMEM;
980 goto immediate_exit;
981 }
982
Paolo Bonzini402a4742013-01-22 09:03:14 +0100983 mirror_free_init(s);
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200984
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300985 s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
Fam Zheng03544a62013-12-16 14:45:30 +0800986 if (!s->is_none_mode) {
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300987 ret = mirror_dirty_init(s);
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200988 if (ret < 0 || job_is_cancelled(&s->common.job)) {
Denis V. Lunevc0b363a2016-07-14 16:33:25 +0300989 goto immediate_exit;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200990 }
991 }
992
Fam Zhengdc162c82016-10-13 17:58:21 -0400993 assert(!s->dbi);
Eric Blake715a74d2017-09-25 09:55:16 -0500994 s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200995 for (;;) {
Paolo Bonzinicc8c9d62014-03-21 13:55:18 +0100996 uint64_t delay_ns = 0;
Denis V. Lunev49efb1f2016-07-14 16:33:24 +0300997 int64_t cnt, delta;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +0200998 bool should_complete;
999
Max Reitzd06107a2018-06-13 20:18:21 +02001000 /* Do not start passive operations while there are active
1001 * writes in progress */
1002 while (s->in_active_write_counter) {
Kevin Wolf9178f4f2020-03-26 16:36:27 +01001003 mirror_wait_for_any_operation(s, true);
Max Reitzd06107a2018-06-13 20:18:21 +02001004 }
1005
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001006 if (s->ret < 0) {
1007 ret = s->ret;
1008 goto immediate_exit;
1009 }
1010
Kevin Wolfda01ff72018-04-13 17:31:02 +02001011 job_pause_point(&s->common.job);
Stefan Hajnoczi565ac012016-06-16 17:56:28 +01001012
John Snow20dca812015-04-17 19:50:02 -04001013 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
Kevin Wolf05df8a62018-01-18 18:08:22 +01001014 /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
1015 * the number of bytes currently being processed; together those are
1016 * the current remaining operation length */
Kevin Wolf30a5c882018-05-04 12:17:20 +02001017 job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001018
1019 /* Note that even when no rate limit is applied we need to yield
Fam Zhenga7282332015-04-03 22:05:21 +08001020 * periodically with no pending I/O so that bdrv_drain_all() returns.
Kevin Wolf18bb6922018-01-18 20:25:40 +01001021 * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
1022 * an error, or when the source is clean, whichever comes first. */
Denis V. Lunev49efb1f2016-07-14 16:33:24 +03001023 delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
Kevin Wolf18bb6922018-01-18 20:25:40 +01001024 if (delta < BLOCK_JOB_SLICE_TIME &&
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001025 s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Denis V. Lunevcf56a3c2016-06-22 15:35:27 +03001026 if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
Paolo Bonzini402a4742013-01-22 09:03:14 +01001027 (cnt == 0 && s->in_flight > 0)) {
Eric Blake9a46dba2017-09-25 09:55:18 -05001028 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
Kevin Wolf9178f4f2020-03-26 16:36:27 +01001029 mirror_wait_for_free_in_flight_slot(s);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001030 continue;
1031 } else if (cnt != 0) {
Paolo Bonzinicc8c9d62014-03-21 13:55:18 +01001032 delay_ns = mirror_iteration(s);
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001033 }
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001034 }
1035
1036 should_complete = false;
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001037 if (s->in_flight == 0 && cnt == 0) {
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001038 trace_mirror_before_flush(s);
Paolo Bonzinibdffb312016-11-09 17:20:08 +01001039 if (!s->synced) {
1040 if (mirror_flush(s) < 0) {
1041 /* Go check s->ret. */
1042 continue;
Paolo Bonzinib952b552012-10-18 16:49:28 +02001043 }
Paolo Bonzinib952b552012-10-18 16:49:28 +02001044 /* We're out of the streaming phase. From now on, if the job
1045 * is cancelled we will actually complete all pending I/O and
1046 * report completion. This way, block-job-cancel will leave
1047 * the target in a consistent state.
1048 */
Kevin Wolf2e1795b2018-04-25 14:56:09 +02001049 job_transition_to_ready(&s->common.job);
Paolo Bonzinibdffb312016-11-09 17:20:08 +01001050 s->synced = true;
Max Reitzd06107a2018-06-13 20:18:21 +02001051 if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
1052 s->actively_synced = true;
1053 }
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001054 }
Paolo Bonzinibdffb312016-11-09 17:20:08 +01001055
1056 should_complete = s->should_complete ||
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +02001057 job_is_cancelled(&s->common.job);
Paolo Bonzinibdffb312016-11-09 17:20:08 +01001058 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001059 }
1060
1061 if (cnt == 0 && should_complete) {
1062 /* The dirty bitmap is not updated while operations are pending.
1063 * If we're about to exit, wait for pending operations before
1064 * calling bdrv_get_dirty_count(bs), or we may exit while the
1065 * source has dirty data to copy!
1066 *
1067 * Note that I/O can be submitted by the guest while
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001068 * mirror_populate runs, so pause it now. Before deciding
1069 * whether to switch to target check one last time if I/O has
1070 * come in the meanwhile, and if not flush the data to disk.
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001071 */
Eric Blake9a46dba2017-09-25 09:55:18 -05001072 trace_mirror_before_drain(s, cnt);
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001073
Sergio Lopez5e771752019-03-08 16:48:53 +01001074 s->in_drain = true;
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001075 bdrv_drained_begin(bs);
John Snow20dca812015-04-17 19:50:02 -04001076 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
Paolo Bonzinibdffb312016-11-09 17:20:08 +01001077 if (cnt > 0 || mirror_flush(s) < 0) {
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001078 bdrv_drained_end(bs);
Sergio Lopez5e771752019-03-08 16:48:53 +01001079 s->in_drain = false;
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001080 continue;
1081 }
1082
1083 /* The two disks are in sync. Exit and report successful
1084 * completion.
1085 */
1086 assert(QLIST_EMPTY(&bs->tracked_requests));
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +02001087 s->common.job.cancelled = false;
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001088 need_drain = false;
1089 break;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001090 }
1091
1092 ret = 0;
Stefan Hajnocziddc41152018-04-24 13:35:27 +01001093
1094 if (s->synced && !should_complete) {
Kevin Wolf18bb6922018-01-18 20:25:40 +01001095 delay_ns = (s->in_flight == 0 &&
1096 cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
Stefan Hajnocziddc41152018-04-24 13:35:27 +01001097 }
Eric Blake9a46dba2017-09-25 09:55:18 -05001098 trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
Kevin Wolf5d43e862018-04-18 16:32:20 +02001099 job_sleep_ns(&s->common.job, delay_ns);
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +02001100 if (job_is_cancelled(&s->common.job) &&
Kevin Wolf004e95d2018-04-20 14:56:08 +02001101 (!s->synced || s->common.job.force_cancel))
Max Reitzeb366392018-05-02 00:05:08 +02001102 {
Liang Lib76e4452018-03-13 08:12:16 -04001103 break;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001104 }
Denis V. Lunev49efb1f2016-07-14 16:33:24 +03001105 s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001106 }
1107
1108immediate_exit:
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001109 if (s->in_flight > 0) {
1110 /* We get here only if something went wrong. Either the job failed,
1111 * or it was cancelled prematurely so that we do not guarantee that
1112 * the target is a copy of the source.
1113 */
Kevin Wolf004e95d2018-04-20 14:56:08 +02001114 assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) &&
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +02001115 job_is_cancelled(&s->common.job)));
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001116 assert(need_drain);
Paolo Bonzinibae81962016-10-27 12:48:50 +02001117 mirror_wait_for_all_io(s);
Paolo Bonzinibd48bde2013-01-22 09:03:12 +01001118 }
1119
1120 assert(s->in_flight == 0);
Markus Armbruster7191bf32013-01-15 15:29:10 +01001121 qemu_vfree(s->buf);
Paolo Bonzinib812f672013-01-21 17:09:43 +01001122 g_free(s->cow_bitmap);
Paolo Bonzini402a4742013-01-22 09:03:14 +01001123 g_free(s->in_flight_bitmap);
Fam Zhengdc162c82016-10-13 17:58:21 -04001124 bdrv_dirty_iter_free(s->dbi);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01001125
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001126 if (need_drain) {
Sergio Lopez5e771752019-03-08 16:48:53 +01001127 s->in_drain = true;
Paolo Bonzini9a0cec62016-10-27 12:48:51 +02001128 bdrv_drained_begin(bs);
1129 }
John Snowf67432a2018-08-29 21:57:26 -04001130
John Snowf67432a2018-08-29 21:57:26 -04001131 return ret;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001132}
1133
Kevin Wolf3453d972018-04-23 12:24:16 +02001134static void mirror_complete(Job *job, Error **errp)
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001135{
Kevin Wolf3453d972018-04-23 12:24:16 +02001136 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
Max Reitz274fcce2016-06-10 20:57:47 +02001137
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001138 if (!s->synced) {
Alberto Garcia9df229c2016-07-05 17:28:53 +03001139 error_setg(errp, "The active block job '%s' cannot be completed",
Kevin Wolf3453d972018-04-23 12:24:16 +02001140 job->id);
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001141 return;
1142 }
1143
Changlong Xie15d67292016-06-23 16:57:21 +08001144 /* block all operations on to_replace bs */
BenoƮt Canet09158f02014-06-27 18:25:25 +02001145 if (s->replaces) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01001146 AioContext *replace_aio_context;
1147
Wen Congyange12f3782015-07-17 10:12:22 +08001148 s->to_replace = bdrv_find_node(s->replaces);
BenoƮt Canet09158f02014-06-27 18:25:25 +02001149 if (!s->to_replace) {
Wen Congyange12f3782015-07-17 10:12:22 +08001150 error_setg(errp, "Node name '%s' not found", s->replaces);
BenoƮt Canet09158f02014-06-27 18:25:25 +02001151 return;
1152 }
1153
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01001154 replace_aio_context = bdrv_get_aio_context(s->to_replace);
1155 aio_context_acquire(replace_aio_context);
1156
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001157 /* TODO Translate this into permission system. Current definition of
1158 * GRAPH_MOD would require to request it for the parents; they might
1159 * not even be BlockDriverStates, however, so a BdrvChild can't address
1160 * them. May need redefinition of GRAPH_MOD. */
BenoƮt Canet09158f02014-06-27 18:25:25 +02001161 error_setg(&s->replace_blocker,
1162 "block device is in use by block-job-complete");
1163 bdrv_op_block_all(s->to_replace, s->replace_blocker);
1164 bdrv_ref(s->to_replace);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01001165
1166 aio_context_release(replace_aio_context);
BenoƮt Canet09158f02014-06-27 18:25:25 +02001167 }
1168
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001169 s->should_complete = true;
Max Reitz00769412021-04-09 14:04:19 +02001170
1171 /* If the job is paused, it will be re-entered when it is resumed */
1172 if (!job->paused) {
1173 job_enter(job);
1174 }
Paolo Bonzinid63ffd82012-10-18 16:49:25 +02001175}
1176
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +00001177static void coroutine_fn mirror_pause(Job *job)
Stefan Hajnoczi565ac012016-06-16 17:56:28 +01001178{
Kevin Wolfda01ff72018-04-13 17:31:02 +02001179 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
Stefan Hajnoczi565ac012016-06-16 17:56:28 +01001180
Paolo Bonzinibae81962016-10-27 12:48:50 +02001181 mirror_wait_for_all_io(s);
Stefan Hajnoczi565ac012016-06-16 17:56:28 +01001182}
1183
Kevin Wolf89bd0302018-03-22 14:11:20 +01001184static bool mirror_drained_poll(BlockJob *job)
1185{
1186 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
Sergio Lopez5e771752019-03-08 16:48:53 +01001187
1188 /* If the job isn't paused nor cancelled, we can't be sure that it won't
1189 * issue more requests. We make an exception if we've reached this point
1190 * from one of our own drain sections, to avoid a deadlock waiting for
1191 * ourselves.
1192 */
1193 if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
1194 return true;
1195 }
1196
Kevin Wolf89bd0302018-03-22 14:11:20 +01001197 return !!s->in_flight;
1198}
1199
Vladimir Sementsov-Ogievskiy9c785cd2021-04-21 10:58:58 +03001200static void mirror_cancel(Job *job, bool force)
Vladimir Sementsov-Ogievskiy521ff8b2021-02-05 19:37:15 +03001201{
1202 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
1203 BlockDriverState *target = blk_bs(s->target);
1204
Vladimir Sementsov-Ogievskiy9c785cd2021-04-21 10:58:58 +03001205 if (force || !job_is_ready(job)) {
1206 bdrv_cancel_in_flight(target);
1207 }
Vladimir Sementsov-Ogievskiy521ff8b2021-02-05 19:37:15 +03001208}
1209
Fam Zheng3fc4b102013-10-08 17:29:38 +08001210static const BlockJobDriver mirror_job_driver = {
Kevin Wolf33e9e9b2018-04-12 17:29:59 +02001211 .job_driver = {
1212 .instance_size = sizeof(MirrorBlockJob),
Kevin Wolf252291e2018-04-12 17:57:08 +02001213 .job_type = JOB_TYPE_MIRROR,
Kevin Wolf80fa2c72018-04-13 18:50:05 +02001214 .free = block_job_free,
Kevin Wolfb15de822018-04-18 17:10:26 +02001215 .user_resume = block_job_user_resume,
John Snowf67432a2018-08-29 21:57:26 -04001216 .run = mirror_run,
John Snow737efc12018-09-06 09:02:15 -04001217 .prepare = mirror_prepare,
1218 .abort = mirror_abort,
Kevin Wolfda01ff72018-04-13 17:31:02 +02001219 .pause = mirror_pause,
Kevin Wolf3453d972018-04-23 12:24:16 +02001220 .complete = mirror_complete,
Vladimir Sementsov-Ogievskiy521ff8b2021-02-05 19:37:15 +03001221 .cancel = mirror_cancel,
Kevin Wolf33e9e9b2018-04-12 17:29:59 +02001222 },
Kevin Wolf89bd0302018-03-22 14:11:20 +01001223 .drained_poll = mirror_drained_poll,
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001224};
1225
Fam Zheng03544a62013-12-16 14:45:30 +08001226static const BlockJobDriver commit_active_job_driver = {
Kevin Wolf33e9e9b2018-04-12 17:29:59 +02001227 .job_driver = {
1228 .instance_size = sizeof(MirrorBlockJob),
Kevin Wolf252291e2018-04-12 17:57:08 +02001229 .job_type = JOB_TYPE_COMMIT,
Kevin Wolf80fa2c72018-04-13 18:50:05 +02001230 .free = block_job_free,
Kevin Wolfb15de822018-04-18 17:10:26 +02001231 .user_resume = block_job_user_resume,
John Snowf67432a2018-08-29 21:57:26 -04001232 .run = mirror_run,
John Snow737efc12018-09-06 09:02:15 -04001233 .prepare = mirror_prepare,
1234 .abort = mirror_abort,
Kevin Wolfda01ff72018-04-13 17:31:02 +02001235 .pause = mirror_pause,
Kevin Wolf3453d972018-04-23 12:24:16 +02001236 .complete = mirror_complete,
Kevin Wolf33e9e9b2018-04-12 17:29:59 +02001237 },
Kevin Wolf89bd0302018-03-22 14:11:20 +01001238 .drained_poll = mirror_drained_poll,
Fam Zheng03544a62013-12-16 14:45:30 +08001239};
1240
Stefan Hajnoczi537c3d42018-12-13 11:24:34 +00001241static void coroutine_fn
1242do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
1243 uint64_t offset, uint64_t bytes,
1244 QEMUIOVector *qiov, int flags)
Max Reitzd06107a2018-06-13 20:18:21 +02001245{
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001246 int ret;
Vladimir Sementsov-Ogievskiydbdf6992019-10-11 12:07:10 +03001247 size_t qiov_offset = 0;
1248 int64_t bitmap_offset, bitmap_end;
Max Reitzd06107a2018-06-13 20:18:21 +02001249
Vladimir Sementsov-Ogievskiydbdf6992019-10-11 12:07:10 +03001250 if (!QEMU_IS_ALIGNED(offset, job->granularity) &&
1251 bdrv_dirty_bitmap_get(job->dirty_bitmap, offset))
1252 {
1253 /*
1254 * Dirty unaligned padding: ignore it.
1255 *
1256 * Reasoning:
1257 * 1. If we copy it, we can't reset corresponding bit in
1258 * dirty_bitmap as there may be some "dirty" bytes still not
1259 * copied.
1260 * 2. It's already dirty, so skipping it we don't diverge mirror
1261 * progress.
1262 *
1263 * Note, that because of this, guest write may have no contribution
1264 * into mirror converge, but that's not bad, as we have background
1265 * process of mirroring. If under some bad circumstances (high guest
1266 * IO load) background process starve, we will not converge anyway,
1267 * even if each write will contribute, as guest is not guaranteed to
1268 * rewrite the whole disk.
1269 */
1270 qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset;
1271 if (bytes <= qiov_offset) {
1272 /* nothing to do after shrink */
1273 return;
1274 }
1275 offset += qiov_offset;
1276 bytes -= qiov_offset;
1277 }
1278
1279 if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) &&
1280 bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1))
1281 {
1282 uint64_t tail = (offset + bytes) % job->granularity;
1283
1284 if (bytes <= tail) {
1285 /* nothing to do after shrink */
1286 return;
1287 }
1288 bytes -= tail;
1289 }
1290
1291 /*
1292 * Tails are either clean or shrunk, so for bitmap resetting
1293 * we safely align the range down.
1294 */
1295 bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity);
1296 bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity);
1297 if (bitmap_offset < bitmap_end) {
1298 bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
1299 bitmap_end - bitmap_offset);
1300 }
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001301
1302 job_progress_increase_remaining(&job->common.job, bytes);
1303
1304 switch (method) {
1305 case MIRROR_METHOD_COPY:
Vladimir Sementsov-Ogievskiydbdf6992019-10-11 12:07:10 +03001306 ret = blk_co_pwritev_part(job->target, offset, bytes,
1307 qiov, qiov_offset, flags);
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001308 break;
1309
1310 case MIRROR_METHOD_ZERO:
1311 assert(!qiov);
1312 ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags);
1313 break;
1314
1315 case MIRROR_METHOD_DISCARD:
1316 assert(!qiov);
1317 ret = blk_co_pdiscard(job->target, offset, bytes);
1318 break;
1319
1320 default:
1321 abort();
Max Reitzd06107a2018-06-13 20:18:21 +02001322 }
1323
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001324 if (ret >= 0) {
1325 job_progress_update(&job->common.job, bytes);
1326 } else {
1327 BlockErrorAction action;
Max Reitzd06107a2018-06-13 20:18:21 +02001328
Vladimir Sementsov-Ogievskiydbdf6992019-10-11 12:07:10 +03001329 /*
1330 * We failed, so we should mark dirty the whole area, aligned up.
1331 * Note that we don't care about shrunk tails if any: they were dirty
1332 * at function start, and they must be still dirty, as we've locked
1333 * the region for in-flight op.
1334 */
1335 bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity);
1336 bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
1337 bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
1338 bitmap_end - bitmap_offset);
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001339 job->actively_synced = false;
Max Reitzd06107a2018-06-13 20:18:21 +02001340
Vladimir Sementsov-Ogievskiy5c511ac2019-10-11 12:07:08 +03001341 action = mirror_error_action(job, false, -ret);
1342 if (action == BLOCK_ERROR_ACTION_REPORT) {
1343 if (!job->ret) {
1344 job->ret = ret;
Max Reitzd06107a2018-06-13 20:18:21 +02001345 }
1346 }
Max Reitzd06107a2018-06-13 20:18:21 +02001347 }
1348}
1349
1350static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
1351 uint64_t offset,
1352 uint64_t bytes)
1353{
1354 MirrorOp *op;
1355 uint64_t start_chunk = offset / s->granularity;
1356 uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity);
1357
1358 op = g_new(MirrorOp, 1);
1359 *op = (MirrorOp){
1360 .s = s,
1361 .offset = offset,
1362 .bytes = bytes,
1363 .is_active_write = true,
Kevin Wolfce8cabb2020-03-26 16:36:28 +01001364 .is_in_flight = true,
Vladimir Sementsov-Ogievskiyead3f1b2021-07-03 00:16:34 +03001365 .co = qemu_coroutine_self(),
Max Reitzd06107a2018-06-13 20:18:21 +02001366 };
1367 qemu_co_queue_init(&op->waiting_requests);
1368 QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
1369
1370 s->in_active_write_counter++;
1371
1372 mirror_wait_on_conflicts(op, s, offset, bytes);
1373
1374 bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
1375
1376 return op;
1377}
1378
1379static void coroutine_fn active_write_settle(MirrorOp *op)
1380{
1381 uint64_t start_chunk = op->offset / op->s->granularity;
1382 uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
1383 op->s->granularity);
1384
1385 if (!--op->s->in_active_write_counter && op->s->actively_synced) {
1386 BdrvChild *source = op->s->mirror_top_bs->backing;
1387
1388 if (QLIST_FIRST(&source->bs->parents) == source &&
1389 QLIST_NEXT(source, next_parent) == NULL)
1390 {
1391 /* Assert that we are back in sync once all active write
1392 * operations are settled.
1393 * Note that we can only assert this if the mirror node
1394 * is the source node's only parent. */
1395 assert(!bdrv_get_dirty_count(op->s->dirty_bitmap));
1396 }
1397 }
1398 bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
1399 QTAILQ_REMOVE(&op->s->ops_in_flight, op, next);
1400 qemu_co_queue_restart_all(&op->waiting_requests);
1401 g_free(op);
1402}
1403
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001404static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
Vladimir Sementsov-Ogievskiyf7ef38d2021-09-03 13:27:59 +03001405 int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001406{
1407 return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
1408}
1409
Max Reitzd06107a2018-06-13 20:18:21 +02001410static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
1411 MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
1412 int flags)
1413{
1414 MirrorOp *op = NULL;
1415 MirrorBDSOpaque *s = bs->opaque;
1416 int ret = 0;
1417 bool copy_to_target;
1418
1419 copy_to_target = s->job->ret >= 0 &&
1420 s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
1421
1422 if (copy_to_target) {
1423 op = active_write_prepare(s->job, offset, bytes);
1424 }
1425
1426 switch (method) {
1427 case MIRROR_METHOD_COPY:
1428 ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
1429 break;
1430
1431 case MIRROR_METHOD_ZERO:
1432 ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
1433 break;
1434
1435 case MIRROR_METHOD_DISCARD:
Fam Zheng0b9fd3f2018-07-10 14:31:17 +08001436 ret = bdrv_co_pdiscard(bs->backing, offset, bytes);
Max Reitzd06107a2018-06-13 20:18:21 +02001437 break;
1438
1439 default:
1440 abort();
1441 }
1442
1443 if (ret < 0) {
1444 goto out;
1445 }
1446
1447 if (copy_to_target) {
1448 do_sync_target_write(s->job, method, offset, bytes, qiov, flags);
1449 }
1450
1451out:
1452 if (copy_to_target) {
1453 active_write_settle(op);
1454 }
1455 return ret;
1456}
1457
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001458static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
Vladimir Sementsov-Ogievskiye75abed2021-09-03 13:28:00 +03001459 int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001460{
Max Reitzd06107a2018-06-13 20:18:21 +02001461 MirrorBDSOpaque *s = bs->opaque;
1462 QEMUIOVector bounce_qiov;
1463 void *bounce_buf;
1464 int ret = 0;
1465 bool copy_to_target;
1466
1467 copy_to_target = s->job->ret >= 0 &&
1468 s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
1469
1470 if (copy_to_target) {
1471 /* The guest might concurrently modify the data to write; but
1472 * the data on source and destination must match, so we have
1473 * to use a bounce buffer if we are going to write to the
1474 * target now. */
1475 bounce_buf = qemu_blockalign(bs, bytes);
1476 iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes);
1477
1478 qemu_iovec_init(&bounce_qiov, 1);
1479 qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
1480 qiov = &bounce_qiov;
1481 }
1482
1483 ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
1484 flags);
1485
1486 if (copy_to_target) {
1487 qemu_iovec_destroy(&bounce_qiov);
1488 qemu_vfree(bounce_buf);
1489 }
1490
1491 return ret;
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001492}
1493
1494static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
1495{
Vladimir Sementsov-Ogievskiyce960aa2017-09-29 18:22:55 +03001496 if (bs->backing == NULL) {
1497 /* we can be here after failed bdrv_append in mirror_start_job */
1498 return 0;
1499 }
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001500 return bdrv_co_flush(bs->backing->bs);
1501}
1502
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001503static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
Vladimir Sementsov-Ogievskiyf34b2bc2021-09-03 13:28:03 +03001504 int64_t offset, int64_t bytes, BdrvRequestFlags flags)
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001505{
Max Reitzd06107a2018-06-13 20:18:21 +02001506 return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
1507 flags);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001508}
1509
1510static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
Manos Pitsidianakisf5a5ca72017-06-09 13:18:08 +03001511 int64_t offset, int bytes)
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001512{
Max Reitzd06107a2018-06-13 20:18:21 +02001513 return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
1514 NULL, 0);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001515}
1516
Max Reitz998b3a12019-02-01 20:29:28 +01001517static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs)
Kevin Wolffd4a6492017-03-09 11:49:16 +01001518{
Vladimir Sementsov-Ogievskiy18775ff2017-09-28 15:03:00 +03001519 if (bs->backing == NULL) {
1520 /* we can be here after failed bdrv_attach_child in
1521 * bdrv_set_backing_hd */
1522 return;
1523 }
Kevin Wolffd4a6492017-03-09 11:49:16 +01001524 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
1525 bs->backing->bs->filename);
1526}
1527
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001528static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
Max Reitzbf8e9252020-05-13 13:05:16 +02001529 BdrvChildRole role,
Kevin Wolfe0995dc2017-09-14 12:47:11 +02001530 BlockReopenQueue *reopen_queue,
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001531 uint64_t perm, uint64_t shared,
1532 uint64_t *nperm, uint64_t *nshared)
1533{
Max Reitzf94dc3b2019-05-22 19:03:47 +02001534 MirrorBDSOpaque *s = bs->opaque;
1535
1536 if (s->stop) {
1537 /*
1538 * If the job is to be stopped, we do not need to forward
1539 * anything to the real image.
1540 */
1541 *nperm = 0;
1542 *nshared = BLK_PERM_ALL;
1543 return;
1544 }
1545
Max Reitz53431b92021-02-11 18:22:41 +01001546 bdrv_default_perms(bs, c, role, reopen_queue,
1547 perm, shared, nperm, nshared);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001548
Max Reitz53431b92021-02-11 18:22:41 +01001549 if (s->is_commit) {
1550 /*
1551 * For commit jobs, we cannot take CONSISTENT_READ, because
1552 * that permission is unshared for everything above the base
1553 * node (except for filters on the base node).
1554 * We also have to force-share the WRITE permission, or
1555 * otherwise we would block ourselves at the base node (if
1556 * writes are blocked for a node, they are also blocked for
1557 * its backing file).
1558 * (We could also share RESIZE, because it may be needed for
1559 * the target if its size is less than the top node's; but
1560 * bdrv_default_perms_for_cow() automatically shares RESIZE
1561 * for backing nodes if WRITE is shared, so there is no need
1562 * to do it here.)
1563 */
1564 *nperm &= ~BLK_PERM_CONSISTENT_READ;
1565 *nshared |= BLK_PERM_WRITE;
1566 }
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001567}
1568
1569/* Dummy node that provides consistent read to its users without requiring it
1570 * from its backing file and that allows writes on the backing file chain. */
1571static BlockDriver bdrv_mirror_top = {
1572 .format_name = "mirror_top",
1573 .bdrv_co_preadv = bdrv_mirror_top_preadv,
1574 .bdrv_co_pwritev = bdrv_mirror_top_pwritev,
1575 .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
1576 .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
1577 .bdrv_co_flush = bdrv_mirror_top_flush,
Kevin Wolffd4a6492017-03-09 11:49:16 +01001578 .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001579 .bdrv_child_perm = bdrv_mirror_top_child_perm,
Max Reitz6540fd12020-05-13 13:05:11 +02001580
1581 .is_filter = true,
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001582};
1583
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001584static BlockJob *mirror_start_job(
1585 const char *job_id, BlockDriverState *bs,
John Snow47970df2016-10-27 12:06:57 -04001586 int creation_flags, BlockDriverState *target,
1587 const char *replaces, int64_t speed,
1588 uint32_t granularity, int64_t buf_size,
Max Reitz274fcce2016-06-10 20:57:47 +02001589 BlockMirrorBackingMode backing_mode,
Max Reitzcdf3bc92019-07-24 19:12:30 +02001590 bool zero_target,
BenoƮt Canet09158f02014-06-27 18:25:25 +02001591 BlockdevOnError on_source_error,
1592 BlockdevOnError on_target_error,
Fam Zheng0fc9f8e2015-06-08 13:56:08 +08001593 bool unmap,
Markus Armbruster097310b2014-10-07 13:59:15 +02001594 BlockCompletionFunc *cb,
Fam Zheng51ccfa22017-04-21 20:27:03 +08001595 void *opaque,
BenoƮt Canet09158f02014-06-27 18:25:25 +02001596 const BlockJobDriver *driver,
Wen Congyangb49f7ea2016-07-27 15:01:47 +08001597 bool is_none_mode, BlockDriverState *base,
Fam Zheng51ccfa22017-04-21 20:27:03 +08001598 bool auto_complete, const char *filter_node_name,
Max Reitz481deba2018-06-13 20:18:22 +02001599 bool is_mirror, MirrorCopyMode copy_mode,
Fam Zheng51ccfa22017-04-21 20:27:03 +08001600 Error **errp)
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001601{
1602 MirrorBlockJob *s;
Max Reitz429076e2018-06-13 20:18:19 +02001603 MirrorBDSOpaque *bs_opaque;
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001604 BlockDriverState *mirror_top_bs;
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001605 bool target_is_backing;
Max Reitz3f072a72019-06-12 16:27:32 +02001606 uint64_t target_perms, target_shared_perms;
Kevin Wolfd7086422017-01-13 19:02:32 +01001607 int ret;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001608
Paolo Bonzinieee13df2013-01-21 17:09:46 +01001609 if (granularity == 0) {
John Snow341ebc22015-04-17 19:49:52 -04001610 granularity = bdrv_get_default_bitmap_granularity(target);
Paolo Bonzinieee13df2013-01-21 17:09:46 +01001611 }
1612
Eric Blake31826642017-10-11 22:47:08 -05001613 assert(is_power_of_2(granularity));
Paolo Bonzinieee13df2013-01-21 17:09:46 +01001614
Wen Congyang48ac0a42015-05-15 15:51:36 +08001615 if (buf_size < 0) {
1616 error_setg(errp, "Invalid parameter 'buf-size'");
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001617 return NULL;
Wen Congyang48ac0a42015-05-15 15:51:36 +08001618 }
1619
1620 if (buf_size == 0) {
1621 buf_size = DEFAULT_MIRROR_BUF_SIZE;
1622 }
Fam Zheng5bc361b2013-12-16 14:45:29 +08001623
Max Reitz3f072a72019-06-12 16:27:32 +02001624 if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) {
Kevin Wolf86fae102018-08-14 11:52:25 +02001625 error_setg(errp, "Can't mirror node into itself");
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001626 return NULL;
Kevin Wolf86fae102018-08-14 11:52:25 +02001627 }
1628
Max Reitz53431b92021-02-11 18:22:41 +01001629 target_is_backing = bdrv_chain_contains(bs, target);
1630
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001631 /* In the case of active commit, add dummy driver to provide consistent
1632 * reads on the top, while disabling it in the intermediate nodes, and make
1633 * the backing chain writable. */
Kevin Wolf6cdbceb2017-02-20 18:10:05 +01001634 mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
1635 BDRV_O_RDWR, errp);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001636 if (mirror_top_bs == NULL) {
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001637 return NULL;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001638 }
Kevin Wolfd3c8c672017-07-18 17:24:05 +02001639 if (!filter_node_name) {
1640 mirror_top_bs->implicit = true;
1641 }
Max Reitze5182c12019-07-03 19:28:02 +02001642
1643 /* So that we can always drop this node */
1644 mirror_top_bs->never_freeze = true;
1645
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001646 mirror_top_bs->total_sectors = bs->total_sectors;
Max Reitz228345b2018-04-21 15:29:26 +02001647 mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
Kevin Wolf80f5c332019-03-22 13:42:39 +01001648 mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
1649 BDRV_REQ_NO_FALLBACK;
Max Reitz429076e2018-06-13 20:18:19 +02001650 bs_opaque = g_new0(MirrorBDSOpaque, 1);
1651 mirror_top_bs->opaque = bs_opaque;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001652
Max Reitz53431b92021-02-11 18:22:41 +01001653 bs_opaque->is_commit = target_is_backing;
1654
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001655 bdrv_drained_begin(bs);
Vladimir Sementsov-Ogievskiy934aee12021-02-02 15:49:44 +03001656 ret = bdrv_append(mirror_top_bs, bs, errp);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001657 bdrv_drained_end(bs);
1658
Vladimir Sementsov-Ogievskiy934aee12021-02-02 15:49:44 +03001659 if (ret < 0) {
Kevin Wolfb2c28322017-02-20 12:46:42 +01001660 bdrv_unref(mirror_top_bs);
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001661 return NULL;
Kevin Wolfb2c28322017-02-20 12:46:42 +01001662 }
1663
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001664 /* Make sure that the source is not resized while the job is running */
John Snow75859b92018-03-10 03:27:27 -05001665 s = block_job_create(job_id, driver, NULL, mirror_top_bs,
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001666 BLK_PERM_CONSISTENT_READ,
1667 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
1668 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
1669 creation_flags, cb, opaque, errp);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001670 if (!s) {
1671 goto fail;
1672 }
Max Reitz429076e2018-06-13 20:18:19 +02001673 bs_opaque->job = s;
1674
Max Reitz7a25fcd2017-04-03 19:51:49 +02001675 /* The block job now has a reference to this node */
1676 bdrv_unref(mirror_top_bs);
1677
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001678 s->mirror_top_bs = mirror_top_bs;
1679
1680 /* No resize for the target either; while the mirror is still running, a
1681 * consistent read isn't necessarily possible. We could possibly allow
1682 * writes and graph modifications, though it would likely defeat the
1683 * purpose of a mirror, so leave them blocked for now.
1684 *
1685 * In the case of active commit, things look a bit different, though,
1686 * because the target is an already populated backing file in active use.
1687 * We can allow anything except resize there.*/
Max Reitz3f072a72019-06-12 16:27:32 +02001688
1689 target_perms = BLK_PERM_WRITE;
1690 target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
1691
Max Reitz3f072a72019-06-12 16:27:32 +02001692 if (target_is_backing) {
1693 int64_t bs_size, target_size;
1694 bs_size = bdrv_getlength(bs);
1695 if (bs_size < 0) {
1696 error_setg_errno(errp, -bs_size,
1697 "Could not inquire top image size");
1698 goto fail;
1699 }
1700
1701 target_size = bdrv_getlength(target);
1702 if (target_size < 0) {
1703 error_setg_errno(errp, -target_size,
1704 "Could not inquire base image size");
1705 goto fail;
1706 }
1707
1708 if (target_size < bs_size) {
1709 target_perms |= BLK_PERM_RESIZE;
1710 }
1711
1712 target_shared_perms |= BLK_PERM_CONSISTENT_READ
1713 | BLK_PERM_WRITE
1714 | BLK_PERM_GRAPH_MOD;
1715 } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
1716 /*
1717 * We may want to allow this in the future, but it would
1718 * require taking some extra care.
1719 */
1720 error_setg(errp, "Cannot mirror to a filter on top of a node in the "
1721 "source's backing chain");
1722 goto fail;
1723 }
1724
1725 if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
1726 target_perms |= BLK_PERM_GRAPH_MOD;
1727 }
1728
Kevin Wolfd861ab32019-04-25 14:25:10 +02001729 s->target = blk_new(s->common.job.aio_context,
Max Reitz3f072a72019-06-12 16:27:32 +02001730 target_perms, target_shared_perms);
Kevin Wolfd7086422017-01-13 19:02:32 +01001731 ret = blk_insert_bs(s->target, target, errp);
1732 if (ret < 0) {
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001733 goto fail;
Kevin Wolfd7086422017-01-13 19:02:32 +01001734 }
Fam Zheng045a2f82017-08-23 21:42:41 +08001735 if (is_mirror) {
1736 /* XXX: Mirror target could be a NBD server of target QEMU in the case
1737 * of non-shared block migration. To allow migration completion, we
1738 * have to allow "inactivate" of the target BB. When that happens, we
1739 * know the job is drained, and the vcpus are stopped, so no write
1740 * operation will be performed. Block layer already has assertions to
1741 * ensure that. */
1742 blk_set_force_allow_inactivate(s->target);
1743 }
Kevin Wolf9ff7f0d2019-05-06 19:18:03 +02001744 blk_set_allow_aio_context_change(s->target, true);
Kevin Wolfcf312932019-07-22 17:46:23 +02001745 blk_set_disable_request_queuing(s->target, true);
Kevin Wolfe253f4b2016-04-12 16:17:41 +02001746
BenoƮt Canet09158f02014-06-27 18:25:25 +02001747 s->replaces = g_strdup(replaces);
Paolo Bonzinib952b552012-10-18 16:49:28 +02001748 s->on_source_error = on_source_error;
1749 s->on_target_error = on_target_error;
Fam Zheng03544a62013-12-16 14:45:30 +08001750 s->is_none_mode = is_none_mode;
Max Reitz274fcce2016-06-10 20:57:47 +02001751 s->backing_mode = backing_mode;
Max Reitzcdf3bc92019-07-24 19:12:30 +02001752 s->zero_target = zero_target;
Max Reitz481deba2018-06-13 20:18:22 +02001753 s->copy_mode = copy_mode;
Fam Zheng5bc361b2013-12-16 14:45:29 +08001754 s->base = base;
Max Reitz3f072a72019-06-12 16:27:32 +02001755 s->base_overlay = bdrv_find_overlay(bs, base);
Paolo Bonzinieee13df2013-01-21 17:09:46 +01001756 s->granularity = granularity;
Wen Congyang48ac0a42015-05-15 15:51:36 +08001757 s->buf_size = ROUND_UP(buf_size, granularity);
Fam Zheng0fc9f8e2015-06-08 13:56:08 +08001758 s->unmap = unmap;
Wen Congyangb49f7ea2016-07-27 15:01:47 +08001759 if (auto_complete) {
1760 s->should_complete = true;
1761 }
Paolo Bonzinib812f672013-01-21 17:09:43 +01001762
Fam Zheng0db6e542015-04-17 19:49:50 -04001763 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
Fam Zhengb8afb522014-04-16 09:34:30 +08001764 if (!s->dirty_bitmap) {
Kevin Wolf88f9d1b2017-03-06 16:12:44 +01001765 goto fail;
Fam Zhengb8afb522014-04-16 09:34:30 +08001766 }
Vladimir Sementsov-Ogievskiydbdf6992019-10-11 12:07:10 +03001767 if (s->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) {
1768 bdrv_disable_dirty_bitmap(s->dirty_bitmap);
1769 }
Alberto Garcia10f3cd12015-11-02 16:51:53 +02001770
Alberto Garcia67b24422018-11-22 17:00:27 +02001771 ret = block_job_add_bdrv(&s->common, "source", bs, 0,
1772 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
1773 BLK_PERM_CONSISTENT_READ,
1774 errp);
1775 if (ret < 0) {
1776 goto fail;
1777 }
1778
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001779 /* Required permissions are already taken with blk_new() */
Kevin Wolf76d554e2017-01-17 11:56:42 +01001780 block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
1781 &error_abort);
1782
Alberto Garciaf3ede4b2016-10-28 10:08:09 +03001783 /* In commit_active_start() all intermediate nodes disappear, so
1784 * any jobs in them must be blocked */
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001785 if (target_is_backing) {
Max Reitz3f072a72019-06-12 16:27:32 +02001786 BlockDriverState *iter, *filtered_target;
1787 uint64_t iter_shared_perms;
1788
1789 /*
1790 * The topmost node with
1791 * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target)
1792 */
1793 filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target));
1794
1795 assert(bdrv_skip_filters(filtered_target) ==
1796 bdrv_skip_filters(target));
1797
1798 /*
1799 * XXX BLK_PERM_WRITE needs to be allowed so we don't block
1800 * ourselves at s->base (if writes are blocked for a node, they are
1801 * also blocked for its backing file). The other options would be a
1802 * second filter driver above s->base (== target).
1803 */
1804 iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
1805
1806 for (iter = bdrv_filter_or_cow_bs(bs); iter != target;
1807 iter = bdrv_filter_or_cow_bs(iter))
1808 {
1809 if (iter == filtered_target) {
1810 /*
1811 * From here on, all nodes are filters on the base.
1812 * This allows us to share BLK_PERM_CONSISTENT_READ.
1813 */
1814 iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
1815 }
1816
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001817 ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
Max Reitz3f072a72019-06-12 16:27:32 +02001818 iter_shared_perms, errp);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001819 if (ret < 0) {
1820 goto fail;
1821 }
Alberto Garciaf3ede4b2016-10-28 10:08:09 +03001822 }
Alberto Garciaef53dc02019-03-12 18:48:42 +02001823
1824 if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
1825 goto fail;
1826 }
Alberto Garciaf3ede4b2016-10-28 10:08:09 +03001827 }
Alberto Garcia10f3cd12015-11-02 16:51:53 +02001828
Max Reitz12aa4082018-06-13 20:18:12 +02001829 QTAILQ_INIT(&s->ops_in_flight);
1830
John Snow5ccac6f2016-11-08 01:50:37 -05001831 trace_mirror_start(bs, s, opaque);
Kevin Wolfda01ff72018-04-13 17:31:02 +02001832 job_start(&s->common.job);
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001833
1834 return &s->common;
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001835
1836fail:
1837 if (s) {
Max Reitz7a25fcd2017-04-03 19:51:49 +02001838 /* Make sure this BDS does not go away until we have completed the graph
1839 * changes below */
1840 bdrv_ref(mirror_top_bs);
1841
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001842 g_free(s->replaces);
1843 blk_unref(s->target);
Max Reitz429076e2018-06-13 20:18:19 +02001844 bs_opaque->job = NULL;
Alberto Garciae917e2c2018-11-22 17:00:26 +02001845 if (s->dirty_bitmap) {
Vladimir Sementsov-Ogievskiy5deb6cb2019-09-16 17:19:09 +03001846 bdrv_release_dirty_bitmap(s->dirty_bitmap);
Alberto Garciae917e2c2018-11-22 17:00:26 +02001847 }
Kevin Wolf4ad35182018-04-19 17:30:16 +02001848 job_early_fail(&s->common.job);
Kevin Wolf4ef85a92017-01-25 19:16:34 +01001849 }
1850
Max Reitzf94dc3b2019-05-22 19:03:47 +02001851 bs_opaque->stop = true;
1852 bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
1853 &error_abort);
Max Reitz3f072a72019-06-12 16:27:32 +02001854 bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
Max Reitz7a25fcd2017-04-03 19:51:49 +02001855
1856 bdrv_unref(mirror_top_bs);
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001857
1858 return NULL;
Paolo Bonzini893f7eb2012-10-18 16:49:23 +02001859}
Fam Zheng03544a62013-12-16 14:45:30 +08001860
Alberto Garcia71aa9862016-07-05 17:28:57 +03001861void mirror_start(const char *job_id, BlockDriverState *bs,
1862 BlockDriverState *target, const char *replaces,
John Snowa1999b32018-09-06 09:02:11 -04001863 int creation_flags, int64_t speed,
1864 uint32_t granularity, int64_t buf_size,
Max Reitz274fcce2016-06-10 20:57:47 +02001865 MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
Max Reitzcdf3bc92019-07-24 19:12:30 +02001866 bool zero_target,
Max Reitz274fcce2016-06-10 20:57:47 +02001867 BlockdevOnError on_source_error,
Fam Zheng03544a62013-12-16 14:45:30 +08001868 BlockdevOnError on_target_error,
Max Reitz481deba2018-06-13 20:18:22 +02001869 bool unmap, const char *filter_node_name,
1870 MirrorCopyMode copy_mode, Error **errp)
Fam Zheng03544a62013-12-16 14:45:30 +08001871{
1872 bool is_none_mode;
1873 BlockDriverState *base;
1874
John Snowc8b56502019-07-29 16:35:52 -04001875 if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
1876 (mode == MIRROR_SYNC_MODE_BITMAP)) {
1877 error_setg(errp, "Sync mode '%s' not supported",
1878 MirrorSyncMode_str(mode));
John Snowd58d8452015-04-17 19:49:58 -04001879 return;
1880 }
Fam Zheng03544a62013-12-16 14:45:30 +08001881 is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
Max Reitz3f072a72019-06-12 16:27:32 +02001882 base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
John Snowa1999b32018-09-06 09:02:11 -04001883 mirror_start_job(job_id, bs, creation_flags, target, replaces,
Max Reitzcdf3bc92019-07-24 19:12:30 +02001884 speed, granularity, buf_size, backing_mode, zero_target,
Fam Zheng51ccfa22017-04-21 20:27:03 +08001885 on_source_error, on_target_error, unmap, NULL, NULL,
Kevin Wolf6cdbceb2017-02-20 18:10:05 +01001886 &mirror_job_driver, is_none_mode, base, false,
Max Reitz481deba2018-06-13 20:18:22 +02001887 filter_node_name, true, copy_mode, errp);
Fam Zheng03544a62013-12-16 14:45:30 +08001888}
1889
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001890BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
1891 BlockDriverState *base, int creation_flags,
1892 int64_t speed, BlockdevOnError on_error,
1893 const char *filter_node_name,
1894 BlockCompletionFunc *cb, void *opaque,
1895 bool auto_complete, Error **errp)
Fam Zheng03544a62013-12-16 14:45:30 +08001896{
Alberto Garcia1ba79382018-11-12 16:00:40 +02001897 bool base_read_only;
Vladimir Sementsov-Ogievskiyeb5becc2021-02-02 15:49:48 +03001898 BlockJob *job;
Jeff Cody4da83582014-01-24 09:02:36 -05001899
Alberto Garcia1ba79382018-11-12 16:00:40 +02001900 base_read_only = bdrv_is_read_only(base);
Jeff Cody4da83582014-01-24 09:02:36 -05001901
Alberto Garcia1ba79382018-11-12 16:00:40 +02001902 if (base_read_only) {
1903 if (bdrv_reopen_set_read_only(base, false, errp) < 0) {
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001904 return NULL;
Alberto Garcia1ba79382018-11-12 16:00:40 +02001905 }
Fam Zheng20a63d22013-12-16 14:45:31 +08001906 }
Jeff Cody4da83582014-01-24 09:02:36 -05001907
Vladimir Sementsov-Ogievskiyeb5becc2021-02-02 15:49:48 +03001908 job = mirror_start_job(
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001909 job_id, bs, creation_flags, base, NULL, speed, 0, 0,
Max Reitzcdf3bc92019-07-24 19:12:30 +02001910 MIRROR_LEAVE_BACKING_CHAIN, false,
Fam Zheng51ccfa22017-04-21 20:27:03 +08001911 on_error, on_error, true, cb, opaque,
Kevin Wolf6cdbceb2017-02-20 18:10:05 +01001912 &commit_active_job_driver, false, base, auto_complete,
Max Reitz481deba2018-06-13 20:18:22 +02001913 filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
Vladimir Sementsov-Ogievskiyeb5becc2021-02-02 15:49:48 +03001914 errp);
1915 if (!job) {
Jeff Cody4da83582014-01-24 09:02:36 -05001916 goto error_restore_flags;
1917 }
1918
Vladimir Sementsov-Ogievskiyeb5becc2021-02-02 15:49:48 +03001919 return job;
Jeff Cody4da83582014-01-24 09:02:36 -05001920
1921error_restore_flags:
1922 /* ignore error and errp for bdrv_reopen, because we want to propagate
1923 * the original error */
Alberto Garcia1ba79382018-11-12 16:00:40 +02001924 if (base_read_only) {
1925 bdrv_reopen_set_read_only(base, true, NULL);
1926 }
Vladimir Sementsov-Ogievskiycc19f172019-06-06 18:41:29 +03001927 return NULL;
Fam Zheng03544a62013-12-16 14:45:30 +08001928}