Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Image mirroring |
| 3 | * |
| 4 | * Copyright Red Hat, Inc. 2012 |
| 5 | * |
| 6 | * Authors: |
| 7 | * Paolo Bonzini <pbonzini@redhat.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. |
| 10 | * See the COPYING.LIB file in the top-level directory. |
| 11 | * |
| 12 | */ |
| 13 | |
| 14 | #include "trace.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 15 | #include "block/blockjob.h" |
| 16 | #include "block/block_int.h" |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 17 | #include "qemu/ratelimit.h" |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 18 | #include "qemu/bitmap.h" |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 19 | |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 20 | #define SLICE_TIME 100000000ULL /* ns */ |
| 21 | #define MAX_IN_FLIGHT 16 |
| 22 | |
| 23 | /* The mirroring buffer is a list of granularity-sized chunks. |
| 24 | * Free chunks are organized in a list. |
| 25 | */ |
| 26 | typedef struct MirrorBuffer { |
| 27 | QSIMPLEQ_ENTRY(MirrorBuffer) next; |
| 28 | } MirrorBuffer; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 29 | |
| 30 | typedef struct MirrorBlockJob { |
| 31 | BlockJob common; |
| 32 | RateLimit limit; |
| 33 | BlockDriverState *target; |
| 34 | MirrorSyncMode mode; |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 35 | BlockdevOnError on_source_error, on_target_error; |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 36 | bool synced; |
| 37 | bool should_complete; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 38 | int64_t sector_num; |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 39 | int64_t granularity; |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 40 | size_t buf_size; |
| 41 | unsigned long *cow_bitmap; |
Paolo Bonzini | 8f0720e | 2013-01-21 17:09:41 +0100 | [diff] [blame] | 42 | HBitmapIter hbi; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 43 | uint8_t *buf; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 44 | QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; |
| 45 | int buf_free_count; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 46 | |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 47 | unsigned long *in_flight_bitmap; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 48 | int in_flight; |
| 49 | int ret; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 50 | } MirrorBlockJob; |
| 51 | |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 52 | typedef struct MirrorOp { |
| 53 | MirrorBlockJob *s; |
| 54 | QEMUIOVector qiov; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 55 | int64_t sector_num; |
| 56 | int nb_sectors; |
| 57 | } MirrorOp; |
| 58 | |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 59 | static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, |
| 60 | int error) |
| 61 | { |
| 62 | s->synced = false; |
| 63 | if (read) { |
| 64 | return block_job_error_action(&s->common, s->common.bs, |
| 65 | s->on_source_error, true, error); |
| 66 | } else { |
| 67 | return block_job_error_action(&s->common, s->target, |
| 68 | s->on_target_error, false, error); |
| 69 | } |
| 70 | } |
| 71 | |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 72 | static void mirror_iteration_done(MirrorOp *op, int ret) |
| 73 | { |
| 74 | MirrorBlockJob *s = op->s; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 75 | struct iovec *iov; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 76 | int64_t chunk_num; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 77 | int i, nb_chunks, sectors_per_chunk; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 78 | |
| 79 | trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret); |
| 80 | |
| 81 | s->in_flight--; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 82 | iov = op->qiov.iov; |
| 83 | for (i = 0; i < op->qiov.niov; i++) { |
| 84 | MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; |
| 85 | QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next); |
| 86 | s->buf_free_count++; |
| 87 | } |
| 88 | |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 89 | sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; |
| 90 | chunk_num = op->sector_num / sectors_per_chunk; |
| 91 | nb_chunks = op->nb_sectors / sectors_per_chunk; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 92 | bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 93 | if (s->cow_bitmap && ret >= 0) { |
| 94 | bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); |
| 95 | } |
| 96 | |
| 97 | g_slice_free(MirrorOp, op); |
| 98 | qemu_coroutine_enter(s->common.co, NULL); |
| 99 | } |
| 100 | |
| 101 | static void mirror_write_complete(void *opaque, int ret) |
| 102 | { |
| 103 | MirrorOp *op = opaque; |
| 104 | MirrorBlockJob *s = op->s; |
| 105 | if (ret < 0) { |
| 106 | BlockDriverState *source = s->common.bs; |
| 107 | BlockErrorAction action; |
| 108 | |
| 109 | bdrv_set_dirty(source, op->sector_num, op->nb_sectors); |
| 110 | action = mirror_error_action(s, false, -ret); |
| 111 | if (action == BDRV_ACTION_REPORT && s->ret >= 0) { |
| 112 | s->ret = ret; |
| 113 | } |
| 114 | } |
| 115 | mirror_iteration_done(op, ret); |
| 116 | } |
| 117 | |
| 118 | static void mirror_read_complete(void *opaque, int ret) |
| 119 | { |
| 120 | MirrorOp *op = opaque; |
| 121 | MirrorBlockJob *s = op->s; |
| 122 | if (ret < 0) { |
| 123 | BlockDriverState *source = s->common.bs; |
| 124 | BlockErrorAction action; |
| 125 | |
| 126 | bdrv_set_dirty(source, op->sector_num, op->nb_sectors); |
| 127 | action = mirror_error_action(s, true, -ret); |
| 128 | if (action == BDRV_ACTION_REPORT && s->ret >= 0) { |
| 129 | s->ret = ret; |
| 130 | } |
| 131 | |
| 132 | mirror_iteration_done(op, ret); |
| 133 | return; |
| 134 | } |
| 135 | bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors, |
| 136 | mirror_write_complete, op); |
| 137 | } |
| 138 | |
| 139 | static void coroutine_fn mirror_iteration(MirrorBlockJob *s) |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 140 | { |
| 141 | BlockDriverState *source = s->common.bs; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 142 | int nb_sectors, sectors_per_chunk, nb_chunks; |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 143 | int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 144 | MirrorOp *op; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 145 | |
Paolo Bonzini | 8f0720e | 2013-01-21 17:09:41 +0100 | [diff] [blame] | 146 | s->sector_num = hbitmap_iter_next(&s->hbi); |
| 147 | if (s->sector_num < 0) { |
| 148 | bdrv_dirty_iter_init(source, &s->hbi); |
| 149 | s->sector_num = hbitmap_iter_next(&s->hbi); |
| 150 | trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); |
| 151 | assert(s->sector_num >= 0); |
| 152 | } |
| 153 | |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 154 | hbitmap_next_sector = s->sector_num; |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 155 | sector_num = s->sector_num; |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 156 | sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; |
| 157 | end = s->common.len >> BDRV_SECTOR_BITS; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 158 | |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 159 | /* Extend the QEMUIOVector to include all adjacent blocks that will |
| 160 | * be copied in this operation. |
| 161 | * |
| 162 | * We have to do this if we have no backing file yet in the destination, |
| 163 | * and the cluster size is very large. Then we need to do COW ourselves. |
| 164 | * The first time a cluster is copied, copy it entirely. Note that, |
| 165 | * because both the granularity and the cluster size are powers of two, |
| 166 | * the number of sectors to copy cannot exceed one cluster. |
| 167 | * |
| 168 | * We also want to extend the QEMUIOVector to include more adjacent |
| 169 | * dirty blocks if possible, to limit the number of I/O operations and |
| 170 | * run efficiently even with a small granularity. |
| 171 | */ |
| 172 | nb_chunks = 0; |
| 173 | nb_sectors = 0; |
| 174 | next_sector = sector_num; |
| 175 | next_chunk = sector_num / sectors_per_chunk; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 176 | |
| 177 | /* Wait for I/O to this cluster (from a previous iteration) to be done. */ |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 178 | while (test_bit(next_chunk, s->in_flight_bitmap)) { |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 179 | trace_mirror_yield_in_flight(s, sector_num, s->in_flight); |
| 180 | qemu_coroutine_yield(); |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 181 | } |
| 182 | |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 183 | do { |
| 184 | int added_sectors, added_chunks; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 185 | |
Paolo Bonzini | 884fea4 | 2013-01-22 09:03:15 +0100 | [diff] [blame] | 186 | if (!bdrv_get_dirty(source, next_sector) || |
| 187 | test_bit(next_chunk, s->in_flight_bitmap)) { |
| 188 | assert(nb_sectors > 0); |
| 189 | break; |
| 190 | } |
| 191 | |
| 192 | added_sectors = sectors_per_chunk; |
| 193 | if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { |
| 194 | bdrv_round_to_clusters(s->target, |
| 195 | next_sector, added_sectors, |
| 196 | &next_sector, &added_sectors); |
| 197 | |
| 198 | /* On the first iteration, the rounding may make us copy |
| 199 | * sectors before the first dirty one. |
| 200 | */ |
| 201 | if (next_sector < sector_num) { |
| 202 | assert(nb_sectors == 0); |
| 203 | sector_num = next_sector; |
| 204 | next_chunk = next_sector / sectors_per_chunk; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); |
| 209 | added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; |
| 210 | |
| 211 | /* When doing COW, it may happen that there is not enough space for |
| 212 | * a full cluster. Wait if that is the case. |
| 213 | */ |
| 214 | while (nb_chunks == 0 && s->buf_free_count < added_chunks) { |
| 215 | trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); |
| 216 | qemu_coroutine_yield(); |
| 217 | } |
| 218 | if (s->buf_free_count < nb_chunks + added_chunks) { |
| 219 | trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); |
| 220 | break; |
| 221 | } |
| 222 | |
| 223 | /* We have enough free space to copy these sectors. */ |
| 224 | bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); |
| 225 | |
| 226 | nb_sectors += added_sectors; |
| 227 | nb_chunks += added_chunks; |
| 228 | next_sector += added_sectors; |
| 229 | next_chunk += added_chunks; |
| 230 | } while (next_sector < end); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 231 | |
| 232 | /* Allocate a MirrorOp that is used as an AIO callback. */ |
| 233 | op = g_slice_new(MirrorOp); |
| 234 | op->s = s; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 235 | op->sector_num = sector_num; |
| 236 | op->nb_sectors = nb_sectors; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 237 | |
| 238 | /* Now make a QEMUIOVector taking enough granularity-sized chunks |
| 239 | * from s->buf_free. |
| 240 | */ |
| 241 | qemu_iovec_init(&op->qiov, nb_chunks); |
| 242 | next_sector = sector_num; |
| 243 | while (nb_chunks-- > 0) { |
| 244 | MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); |
| 245 | QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); |
| 246 | s->buf_free_count--; |
| 247 | qemu_iovec_add(&op->qiov, buf, s->granularity); |
| 248 | |
| 249 | /* Advance the HBitmapIter in parallel, so that we do not examine |
| 250 | * the same sector twice. |
| 251 | */ |
| 252 | if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { |
| 253 | hbitmap_next_sector = hbitmap_iter_next(&s->hbi); |
| 254 | } |
| 255 | |
| 256 | next_sector += sectors_per_chunk; |
| 257 | } |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 258 | |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 259 | bdrv_reset_dirty(source, sector_num, nb_sectors); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 260 | |
| 261 | /* Copy the dirty cluster. */ |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 262 | s->in_flight++; |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 263 | trace_mirror_one_iteration(s, sector_num, nb_sectors); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 264 | bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, |
| 265 | mirror_read_complete, op); |
| 266 | } |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 267 | |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 268 | static void mirror_free_init(MirrorBlockJob *s) |
| 269 | { |
| 270 | int granularity = s->granularity; |
| 271 | size_t buf_size = s->buf_size; |
| 272 | uint8_t *buf = s->buf; |
| 273 | |
| 274 | assert(s->buf_free_count == 0); |
| 275 | QSIMPLEQ_INIT(&s->buf_free); |
| 276 | while (buf_size != 0) { |
| 277 | MirrorBuffer *cur = (MirrorBuffer *)buf; |
| 278 | QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next); |
| 279 | s->buf_free_count++; |
| 280 | buf_size -= granularity; |
| 281 | buf += granularity; |
| 282 | } |
| 283 | } |
| 284 | |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 285 | static void mirror_drain(MirrorBlockJob *s) |
| 286 | { |
| 287 | while (s->in_flight > 0) { |
| 288 | qemu_coroutine_yield(); |
| 289 | } |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 290 | } |
| 291 | |
| 292 | static void coroutine_fn mirror_run(void *opaque) |
| 293 | { |
| 294 | MirrorBlockJob *s = opaque; |
| 295 | BlockDriverState *bs = s->common.bs; |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 296 | int64_t sector_num, end, sectors_per_chunk, length; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 297 | uint64_t last_pause_ns; |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 298 | BlockDriverInfo bdi; |
| 299 | char backing_filename[1024]; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 300 | int ret = 0; |
| 301 | int n; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 302 | |
| 303 | if (block_job_is_cancelled(&s->common)) { |
| 304 | goto immediate_exit; |
| 305 | } |
| 306 | |
| 307 | s->common.len = bdrv_getlength(bs); |
Paolo Bonzini | 88ff0e4 | 2013-01-22 15:01:11 +0100 | [diff] [blame] | 308 | if (s->common.len <= 0) { |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 309 | block_job_completed(&s->common, s->common.len); |
| 310 | return; |
| 311 | } |
| 312 | |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 313 | length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; |
| 314 | s->in_flight_bitmap = bitmap_new(length); |
| 315 | |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 316 | /* If we have no backing file yet in the destination, we cannot let |
| 317 | * the destination do COW. Instead, we copy sectors around the |
| 318 | * dirty data if needed. We need a bitmap to do that. |
| 319 | */ |
| 320 | bdrv_get_backing_filename(s->target, backing_filename, |
| 321 | sizeof(backing_filename)); |
| 322 | if (backing_filename[0] && !s->target->backing_hd) { |
| 323 | bdrv_get_info(s->target, &bdi); |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 324 | if (s->granularity < bdi.cluster_size) { |
Paolo Bonzini | 08e4ed6 | 2013-01-22 09:03:13 +0100 | [diff] [blame] | 325 | s->buf_size = MAX(s->buf_size, bdi.cluster_size); |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 326 | s->cow_bitmap = bitmap_new(length); |
| 327 | } |
| 328 | } |
| 329 | |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 330 | end = s->common.len >> BDRV_SECTOR_BITS; |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 331 | s->buf = qemu_blockalign(bs, s->buf_size); |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 332 | sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 333 | mirror_free_init(s); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 334 | |
| 335 | if (s->mode != MIRROR_SYNC_MODE_NONE) { |
| 336 | /* First part, loop on the sectors and initialize the dirty bitmap. */ |
| 337 | BlockDriverState *base; |
| 338 | base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; |
| 339 | for (sector_num = 0; sector_num < end; ) { |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 340 | int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 341 | ret = bdrv_co_is_allocated_above(bs, base, |
| 342 | sector_num, next - sector_num, &n); |
| 343 | |
| 344 | if (ret < 0) { |
| 345 | goto immediate_exit; |
| 346 | } |
| 347 | |
| 348 | assert(n > 0); |
| 349 | if (ret == 1) { |
| 350 | bdrv_set_dirty(bs, sector_num, n); |
| 351 | sector_num = next; |
| 352 | } else { |
| 353 | sector_num += n; |
| 354 | } |
| 355 | } |
| 356 | } |
| 357 | |
Paolo Bonzini | 8f0720e | 2013-01-21 17:09:41 +0100 | [diff] [blame] | 358 | bdrv_dirty_iter_init(bs, &s->hbi); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 359 | last_pause_ns = qemu_get_clock_ns(rt_clock); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 360 | for (;;) { |
| 361 | uint64_t delay_ns; |
| 362 | int64_t cnt; |
| 363 | bool should_complete; |
| 364 | |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 365 | if (s->ret < 0) { |
| 366 | ret = s->ret; |
| 367 | goto immediate_exit; |
| 368 | } |
| 369 | |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 370 | cnt = bdrv_get_dirty_count(bs); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 371 | |
| 372 | /* Note that even when no rate limit is applied we need to yield |
| 373 | * periodically with no pending I/O so that qemu_aio_flush() returns. |
| 374 | * We do so every SLICE_TIME nanoseconds, or when there is an error, |
| 375 | * or when the source is clean, whichever comes first. |
| 376 | */ |
| 377 | if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME && |
| 378 | s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 379 | if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || |
| 380 | (cnt == 0 && s->in_flight > 0)) { |
| 381 | trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 382 | qemu_coroutine_yield(); |
| 383 | continue; |
| 384 | } else if (cnt != 0) { |
| 385 | mirror_iteration(s); |
| 386 | continue; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 387 | } |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 388 | } |
| 389 | |
| 390 | should_complete = false; |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 391 | if (s->in_flight == 0 && cnt == 0) { |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 392 | trace_mirror_before_flush(s); |
| 393 | ret = bdrv_flush(s->target); |
| 394 | if (ret < 0) { |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 395 | if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { |
| 396 | goto immediate_exit; |
| 397 | } |
| 398 | } else { |
| 399 | /* We're out of the streaming phase. From now on, if the job |
| 400 | * is cancelled we will actually complete all pending I/O and |
| 401 | * report completion. This way, block-job-cancel will leave |
| 402 | * the target in a consistent state. |
| 403 | */ |
| 404 | s->common.offset = end * BDRV_SECTOR_SIZE; |
| 405 | if (!s->synced) { |
| 406 | block_job_ready(&s->common); |
| 407 | s->synced = true; |
| 408 | } |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 409 | |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 410 | should_complete = s->should_complete || |
| 411 | block_job_is_cancelled(&s->common); |
| 412 | cnt = bdrv_get_dirty_count(bs); |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 413 | } |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 414 | } |
| 415 | |
| 416 | if (cnt == 0 && should_complete) { |
| 417 | /* The dirty bitmap is not updated while operations are pending. |
| 418 | * If we're about to exit, wait for pending operations before |
| 419 | * calling bdrv_get_dirty_count(bs), or we may exit while the |
| 420 | * source has dirty data to copy! |
| 421 | * |
| 422 | * Note that I/O can be submitted by the guest while |
| 423 | * mirror_populate runs. |
| 424 | */ |
| 425 | trace_mirror_before_drain(s, cnt); |
| 426 | bdrv_drain_all(); |
| 427 | cnt = bdrv_get_dirty_count(bs); |
| 428 | } |
| 429 | |
| 430 | ret = 0; |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 431 | trace_mirror_before_sleep(s, cnt, s->synced); |
| 432 | if (!s->synced) { |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 433 | /* Publish progress */ |
Paolo Bonzini | acc906c | 2013-01-21 17:09:44 +0100 | [diff] [blame] | 434 | s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 435 | |
| 436 | if (s->common.speed) { |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 437 | delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 438 | } else { |
| 439 | delay_ns = 0; |
| 440 | } |
| 441 | |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 442 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); |
| 443 | if (block_job_is_cancelled(&s->common)) { |
| 444 | break; |
| 445 | } |
| 446 | } else if (!should_complete) { |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 447 | delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 448 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); |
| 449 | } else if (cnt == 0) { |
| 450 | /* The two disks are in sync. Exit and report successful |
| 451 | * completion. |
| 452 | */ |
| 453 | assert(QLIST_EMPTY(&bs->tracked_requests)); |
| 454 | s->common.cancelled = false; |
| 455 | break; |
| 456 | } |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 457 | last_pause_ns = qemu_get_clock_ns(rt_clock); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 458 | } |
| 459 | |
| 460 | immediate_exit: |
Paolo Bonzini | bd48bde | 2013-01-22 09:03:12 +0100 | [diff] [blame] | 461 | if (s->in_flight > 0) { |
| 462 | /* We get here only if something went wrong. Either the job failed, |
| 463 | * or it was cancelled prematurely so that we do not guarantee that |
| 464 | * the target is a copy of the source. |
| 465 | */ |
| 466 | assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common))); |
| 467 | mirror_drain(s); |
| 468 | } |
| 469 | |
| 470 | assert(s->in_flight == 0); |
Markus Armbruster | 7191bf3 | 2013-01-15 15:29:10 +0100 | [diff] [blame] | 471 | qemu_vfree(s->buf); |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 472 | g_free(s->cow_bitmap); |
Paolo Bonzini | 402a474 | 2013-01-22 09:03:14 +0100 | [diff] [blame] | 473 | g_free(s->in_flight_bitmap); |
Paolo Bonzini | 50717e9 | 2013-01-21 17:09:45 +0100 | [diff] [blame] | 474 | bdrv_set_dirty_tracking(bs, 0); |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 475 | bdrv_iostatus_disable(s->target); |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 476 | if (s->should_complete && ret == 0) { |
| 477 | if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { |
| 478 | bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); |
| 479 | } |
| 480 | bdrv_swap(s->target, s->common.bs); |
| 481 | } |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 482 | bdrv_close(s->target); |
| 483 | bdrv_delete(s->target); |
| 484 | block_job_completed(&s->common, ret); |
| 485 | } |
| 486 | |
| 487 | static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) |
| 488 | { |
| 489 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
| 490 | |
| 491 | if (speed < 0) { |
| 492 | error_set(errp, QERR_INVALID_PARAMETER, "speed"); |
| 493 | return; |
| 494 | } |
| 495 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); |
| 496 | } |
| 497 | |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 498 | static void mirror_iostatus_reset(BlockJob *job) |
| 499 | { |
| 500 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
| 501 | |
| 502 | bdrv_iostatus_reset(s->target); |
| 503 | } |
| 504 | |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 505 | static void mirror_complete(BlockJob *job, Error **errp) |
| 506 | { |
| 507 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
| 508 | int ret; |
| 509 | |
| 510 | ret = bdrv_open_backing_file(s->target); |
| 511 | if (ret < 0) { |
| 512 | char backing_filename[PATH_MAX]; |
| 513 | bdrv_get_full_backing_filename(s->target, backing_filename, |
| 514 | sizeof(backing_filename)); |
| 515 | error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); |
| 516 | return; |
| 517 | } |
| 518 | if (!s->synced) { |
| 519 | error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); |
| 520 | return; |
| 521 | } |
| 522 | |
| 523 | s->should_complete = true; |
| 524 | block_job_resume(job); |
| 525 | } |
| 526 | |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 527 | static BlockJobType mirror_job_type = { |
| 528 | .instance_size = sizeof(MirrorBlockJob), |
| 529 | .job_type = "mirror", |
| 530 | .set_speed = mirror_set_speed, |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 531 | .iostatus_reset= mirror_iostatus_reset, |
Paolo Bonzini | d63ffd8 | 2012-10-18 16:49:25 +0200 | [diff] [blame] | 532 | .complete = mirror_complete, |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 533 | }; |
| 534 | |
| 535 | void mirror_start(BlockDriverState *bs, BlockDriverState *target, |
Paolo Bonzini | 08e4ed6 | 2013-01-22 09:03:13 +0100 | [diff] [blame] | 536 | int64_t speed, int64_t granularity, int64_t buf_size, |
| 537 | MirrorSyncMode mode, BlockdevOnError on_source_error, |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 538 | BlockdevOnError on_target_error, |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 539 | BlockDriverCompletionFunc *cb, |
| 540 | void *opaque, Error **errp) |
| 541 | { |
| 542 | MirrorBlockJob *s; |
| 543 | |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 544 | if (granularity == 0) { |
| 545 | /* Choose the default granularity based on the target file's cluster |
| 546 | * size, clamped between 4k and 64k. */ |
| 547 | BlockDriverInfo bdi; |
| 548 | if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) { |
| 549 | granularity = MAX(4096, bdi.cluster_size); |
| 550 | granularity = MIN(65536, granularity); |
| 551 | } else { |
| 552 | granularity = 65536; |
| 553 | } |
| 554 | } |
| 555 | |
| 556 | assert ((granularity & (granularity - 1)) == 0); |
| 557 | |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 558 | if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || |
| 559 | on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && |
| 560 | !bdrv_iostatus_is_enabled(bs)) { |
| 561 | error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); |
| 562 | return; |
| 563 | } |
| 564 | |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 565 | s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); |
| 566 | if (!s) { |
| 567 | return; |
| 568 | } |
| 569 | |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 570 | s->on_source_error = on_source_error; |
| 571 | s->on_target_error = on_target_error; |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 572 | s->target = target; |
| 573 | s->mode = mode; |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 574 | s->granularity = granularity; |
Paolo Bonzini | 08e4ed6 | 2013-01-22 09:03:13 +0100 | [diff] [blame] | 575 | s->buf_size = MAX(buf_size, granularity); |
Paolo Bonzini | b812f67 | 2013-01-21 17:09:43 +0100 | [diff] [blame] | 576 | |
Paolo Bonzini | eee13df | 2013-01-21 17:09:46 +0100 | [diff] [blame] | 577 | bdrv_set_dirty_tracking(bs, granularity); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 578 | bdrv_set_enable_write_cache(s->target, true); |
Paolo Bonzini | b952b55 | 2012-10-18 16:49:28 +0200 | [diff] [blame] | 579 | bdrv_set_on_error(s->target, on_target_error, on_target_error); |
| 580 | bdrv_iostatus_enable(s->target); |
Paolo Bonzini | 893f7eb | 2012-10-18 16:49:23 +0200 | [diff] [blame] | 581 | s->common.co = qemu_coroutine_create(mirror_run); |
| 582 | trace_mirror_start(bs, s, s->common.co, opaque); |
| 583 | qemu_coroutine_enter(s->common.co, s); |
| 584 | } |