blob: 9171f600287508b5f8d0e3ffc8049a5cc3c51db7 [file] [log] [blame]
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +02001/*
2 * QEMU live block migration
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Liran Schour <lirans@il.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
Paolo Bonzini6b620ca2012-01-13 17:44:23 +010012 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020014 */
15
Peter Maydell1393a482016-01-26 18:16:54 +000016#include "qemu/osdep.h"
Markus Armbrusterda34e652016-03-14 09:01:28 +010017#include "qapi/error.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020018#include "qemu/error-report.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020019#include "qemu/cutils.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010020#include "qemu/queue.h"
Juan Quintela2c9e6fe2017-04-21 14:31:22 +020021#include "block.h"
22#include "migration/misc.h"
Juan Quintela6666c962017-04-24 20:07:27 +020023#include "migration.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020024#include "migration/register.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020025#include "qemu-file.h"
Juan Quintela987772d2017-04-17 19:02:59 +020026#include "migration/vmstate.h"
Fam Zhengc9ebaf72015-03-02 19:36:47 +080027#include "sysemu/block-backend.h"
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020028
Paolo Bonzini50717e92013-01-21 17:09:45 +010029#define BLOCK_SIZE (1 << 20)
30#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020031
32#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
33#define BLK_MIG_FLAG_EOS 0x02
Jan Kiszka01e61e22009-12-01 15:20:17 +010034#define BLK_MIG_FLAG_PROGRESS 0x04
Peter Lieven323004a2013-07-18 09:48:50 +020035#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020036
Eric Blaked6a644b2017-07-07 07:44:57 -050037#define MAX_IS_ALLOCATED_SEARCH (65536 * BDRV_SECTOR_SIZE)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020038
Wen Congyangf77dcdb2015-11-20 17:37:13 +080039#define MAX_INFLIGHT_IO 512
40
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020041//#define DEBUG_BLK_MIGRATION
42
43#ifdef DEBUG_BLK_MIGRATION
malcd0f2c4c2010-02-07 02:03:50 +030044#define DPRINTF(fmt, ...) \
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020045 do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
46#else
malcd0f2c4c2010-02-07 02:03:50 +030047#define DPRINTF(fmt, ...) \
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020048 do { } while (0)
49#endif
50
Jan Kiszkaa55eb922009-11-30 18:21:19 +010051typedef struct BlkMigDevState {
Paolo Bonzini323920c2013-02-22 17:36:24 +010052 /* Written during setup phase. Can be read without a lock. */
Kevin Wolfebd2f9e2016-05-27 19:50:37 +020053 BlockBackend *blk;
54 char *blk_name;
Jan Kiszkaa55eb922009-11-30 18:21:19 +010055 int shared_base;
Jan Kiszkaa55eb922009-11-30 18:21:19 +010056 int64_t total_sectors;
Jan Kiszka5e5328b2009-11-30 18:21:20 +010057 QSIMPLEQ_ENTRY(BlkMigDevState) entry;
Paolo Bonzinief0716d2016-02-14 18:17:04 +010058 Error *blocker;
Paolo Bonzini323920c2013-02-22 17:36:24 +010059
60 /* Only used by migration thread. Does not need a lock. */
61 int bulk_completed;
62 int64_t cur_sector;
63 int64_t cur_dirty;
64
Paolo Bonzinief0716d2016-02-14 18:17:04 +010065 /* Data in the aio_bitmap is protected by block migration lock.
66 * Allocation and free happen during setup and cleanup respectively.
67 */
Marcelo Tosatti33656af2010-11-08 17:02:56 -020068 unsigned long *aio_bitmap;
Paolo Bonzinief0716d2016-02-14 18:17:04 +010069
70 /* Protected by block migration lock. */
Paolo Bonzini323920c2013-02-22 17:36:24 +010071 int64_t completed_sectors;
Paolo Bonzinief0716d2016-02-14 18:17:04 +010072
73 /* During migration this is protected by iothread lock / AioContext.
74 * Allocation and free happen during setup and cleanup respectively.
75 */
Fam Zhenge4654d22013-11-13 18:29:43 +080076 BdrvDirtyBitmap *dirty_bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +010077} BlkMigDevState;
78
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020079typedef struct BlkMigBlock {
Paolo Bonzini323920c2013-02-22 17:36:24 +010080 /* Only used by migration thread. */
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020081 uint8_t *buf;
82 BlkMigDevState *bmds;
83 int64_t sector;
Marcelo Tosatti33656af2010-11-08 17:02:56 -020084 int nr_sectors;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020085 struct iovec iov;
86 QEMUIOVector qiov;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020087 BlockAIOCB *aiocb;
Paolo Bonzini323920c2013-02-22 17:36:24 +010088
Paolo Bonzini52e850d2013-02-22 17:36:25 +010089 /* Protected by block migration lock. */
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020090 int ret;
Jan Kiszka5e5328b2009-11-30 18:21:20 +010091 QSIMPLEQ_ENTRY(BlkMigBlock) entry;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +020092} BlkMigBlock;
93
94typedef struct BlkMigState {
Jan Kiszka5e5328b2009-11-30 18:21:20 +010095 QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
Paolo Bonzini323920c2013-02-22 17:36:24 +010096 int64_t total_sector_sum;
Peter Lieven323004a2013-07-18 09:48:50 +020097 bool zero_blocks;
Paolo Bonzini323920c2013-02-22 17:36:24 +010098
Paolo Bonzini52e850d2013-02-22 17:36:25 +010099 /* Protected by lock. */
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100100 QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200101 int submitted;
102 int read_done;
Paolo Bonzini323920c2013-02-22 17:36:24 +0100103
104 /* Only used by migration thread. Does not need a lock. */
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200105 int transferred;
Jan Kiszka01e61e22009-12-01 15:20:17 +0100106 int prev_progress;
Liran Schoure970ec02010-01-26 10:31:45 +0200107 int bulk_completed;
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100108
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100109 /* Lock must be taken _inside_ the iothread lock and any AioContexts. */
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100110 QemuMutex lock;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200111} BlkMigState;
112
Jan Kiszkad11ecd32009-11-30 18:21:20 +0100113static BlkMigState block_mig_state;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200114
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100115static void blk_mig_lock(void)
116{
117 qemu_mutex_lock(&block_mig_state.lock);
118}
119
120static void blk_mig_unlock(void)
121{
122 qemu_mutex_unlock(&block_mig_state.lock);
123}
124
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100125/* Must run outside of the iothread lock during the bulk phase,
126 * or the VM will stall.
127 */
128
Jan Kiszka13f0b672009-11-30 18:21:21 +0100129static void blk_send(QEMUFile *f, BlkMigBlock * blk)
130{
131 int len;
Peter Lieven323004a2013-07-18 09:48:50 +0200132 uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
133
134 if (block_mig_state.zero_blocks &&
135 buffer_is_zero(blk->buf, BLOCK_SIZE)) {
136 flags |= BLK_MIG_FLAG_ZERO_BLOCK;
137 }
Jan Kiszka13f0b672009-11-30 18:21:21 +0100138
139 /* sector number and flags */
140 qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
Peter Lieven323004a2013-07-18 09:48:50 +0200141 | flags);
Jan Kiszka13f0b672009-11-30 18:21:21 +0100142
143 /* device name */
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200144 len = strlen(blk->bmds->blk_name);
Jan Kiszka13f0b672009-11-30 18:21:21 +0100145 qemu_put_byte(f, len);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200146 qemu_put_buffer(f, (uint8_t *) blk->bmds->blk_name, len);
Jan Kiszka13f0b672009-11-30 18:21:21 +0100147
Peter Lieven323004a2013-07-18 09:48:50 +0200148 /* if a block is zero we need to flush here since the network
149 * bandwidth is now a lot higher than the storage device bandwidth.
150 * thus if we queue zero blocks we slow down the migration */
151 if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
152 qemu_fflush(f);
153 return;
154 }
155
Jan Kiszka13f0b672009-11-30 18:21:21 +0100156 qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
157}
158
Jan Kiszka25f23642009-11-30 18:21:21 +0100159int blk_mig_active(void)
160{
161 return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
162}
163
164uint64_t blk_mig_bytes_transferred(void)
165{
166 BlkMigDevState *bmds;
167 uint64_t sum = 0;
168
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100169 blk_mig_lock();
Jan Kiszka25f23642009-11-30 18:21:21 +0100170 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
171 sum += bmds->completed_sectors;
172 }
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100173 blk_mig_unlock();
Jan Kiszka25f23642009-11-30 18:21:21 +0100174 return sum << BDRV_SECTOR_BITS;
175}
176
177uint64_t blk_mig_bytes_remaining(void)
178{
179 return blk_mig_bytes_total() - blk_mig_bytes_transferred();
180}
181
182uint64_t blk_mig_bytes_total(void)
183{
184 BlkMigDevState *bmds;
185 uint64_t sum = 0;
186
187 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
188 sum += bmds->total_sectors;
189 }
190 return sum << BDRV_SECTOR_BITS;
191}
192
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100193
194/* Called with migration lock held. */
195
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200196static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
197{
198 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
199
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200200 if (sector < blk_nb_sectors(bmds->blk)) {
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200201 return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
202 (1UL << (chunk % (sizeof(unsigned long) * 8))));
203 } else {
204 return 0;
205 }
206}
207
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100208/* Called with migration lock held. */
209
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200210static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
211 int nb_sectors, int set)
212{
213 int64_t start, end;
214 unsigned long val, idx, bit;
215
216 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
217 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
218
219 for (; start <= end; start++) {
220 idx = start / (sizeof(unsigned long) * 8);
221 bit = start % (sizeof(unsigned long) * 8);
222 val = bmds->aio_bitmap[idx];
223 if (set) {
Marcelo Tosatti62155e22010-11-12 16:07:50 -0200224 val |= 1UL << bit;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200225 } else {
Marcelo Tosatti62155e22010-11-12 16:07:50 -0200226 val &= ~(1UL << bit);
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200227 }
228 bmds->aio_bitmap[idx] = val;
229 }
230}
231
232static void alloc_aio_bitmap(BlkMigDevState *bmds)
233{
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200234 BlockBackend *bb = bmds->blk;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200235 int64_t bitmap_size;
236
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200237 bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200238 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
239
Anthony Liguori7267c092011-08-20 22:09:37 -0500240 bmds->aio_bitmap = g_malloc0(bitmap_size);
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200241}
242
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100243/* Never hold migration lock when yielding to the main loop! */
244
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200245static void blk_mig_read_cb(void *opaque, int ret)
246{
247 BlkMigBlock *blk = opaque;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100248
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100249 blk_mig_lock();
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200250 blk->ret = ret;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100251
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100252 QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200253 bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100254
Jan Kiszkad11ecd32009-11-30 18:21:20 +0100255 block_mig_state.submitted--;
256 block_mig_state.read_done++;
257 assert(block_mig_state.submitted >= 0);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100258 blk_mig_unlock();
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200259}
260
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100261/* Called with no lock taken. */
262
Luiz Capitulino539de122011-12-05 14:06:56 -0200263static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100264{
Jan Kiszka57cce122009-11-30 18:21:20 +0100265 int64_t total_sectors = bmds->total_sectors;
266 int64_t cur_sector = bmds->cur_sector;
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200267 BlockBackend *bb = bmds->blk;
Jan Kiszka57cce122009-11-30 18:21:20 +0100268 BlkMigBlock *blk;
Jan Kiszka13f0b672009-11-30 18:21:21 +0100269 int nr_sectors;
Eric Blaked6a644b2017-07-07 07:44:57 -0500270 int64_t count;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100271
272 if (bmds->shared_base) {
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100273 qemu_mutex_lock_iothread();
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200274 aio_context_acquire(blk_get_aio_context(bb));
Eric Blaked6a644b2017-07-07 07:44:57 -0500275 /* Skip unallocated sectors; intentionally treats failure or
276 * partial sector as an allocated sector */
Jan Kiszkab1d10852009-11-30 18:21:20 +0100277 while (cur_sector < total_sectors &&
Eric Blaked6a644b2017-07-07 07:44:57 -0500278 !bdrv_is_allocated(blk_bs(bb), cur_sector * BDRV_SECTOR_SIZE,
279 MAX_IS_ALLOCATED_SEARCH, &count)) {
280 if (count < BDRV_SECTOR_SIZE) {
281 break;
282 }
283 cur_sector += count >> BDRV_SECTOR_BITS;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200284 }
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200285 aio_context_release(blk_get_aio_context(bb));
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100286 qemu_mutex_unlock_iothread();
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200287 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100288
289 if (cur_sector >= total_sectors) {
Jan Kiszka82801d82009-11-30 18:21:21 +0100290 bmds->cur_sector = bmds->completed_sectors = total_sectors;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200291 return 1;
292 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100293
Jan Kiszka82801d82009-11-30 18:21:21 +0100294 bmds->completed_sectors = cur_sector;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100295
Jan Kiszka6ea44302009-11-30 18:21:19 +0100296 cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100297
Jan Kiszka6ea44302009-11-30 18:21:19 +0100298 /* we are going to transfer a full block even if it is not allocated */
299 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100300
Jan Kiszka6ea44302009-11-30 18:21:19 +0100301 if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
Jan Kiszka57cce122009-11-30 18:21:20 +0100302 nr_sectors = total_sectors - cur_sector;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200303 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100304
Markus Armbruster5839e532014-08-19 10:31:08 +0200305 blk = g_new(BlkMigBlock, 1);
Anthony Liguori7267c092011-08-20 22:09:37 -0500306 blk->buf = g_malloc(BLOCK_SIZE);
Jan Kiszka13f0b672009-11-30 18:21:21 +0100307 blk->bmds = bmds;
308 blk->sector = cur_sector;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200309 blk->nr_sectors = nr_sectors;
Jan Kiszka13f0b672009-11-30 18:21:21 +0100310
Liran Schoure970ec02010-01-26 10:31:45 +0200311 blk->iov.iov_base = blk->buf;
312 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
313 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
Jan Kiszka57cce122009-11-30 18:21:20 +0100314
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100315 blk_mig_lock();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100316 block_mig_state.submitted++;
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100317 blk_mig_unlock();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100318
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100319 /* We do not know if bs is under the main thread (and thus does
320 * not acquire the AioContext when doing AIO) or rather under
321 * dataplane. Thus acquire both the iothread mutex and the
322 * AioContext.
323 *
324 * This is ugly and will disappear when we make bdrv_* thread-safe,
325 * without the need to acquire the AioContext.
326 */
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100327 qemu_mutex_lock_iothread();
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200328 aio_context_acquire(blk_get_aio_context(bmds->blk));
329 blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov,
330 0, blk_mig_read_cb, blk);
Liran Schourd76cac72010-01-26 14:04:11 +0200331
John Snow20dca812015-04-17 19:50:02 -0400332 bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200333 aio_context_release(blk_get_aio_context(bmds->blk));
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100334 qemu_mutex_unlock_iothread();
Jan Kiszka13f0b672009-11-30 18:21:21 +0100335
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100336 bmds->cur_sector = cur_sector + nr_sectors;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200337 return (bmds->cur_sector >= total_sectors);
338}
339
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100340/* Called with iothread lock taken. */
341
Fam Zhengb8afb522014-04-16 09:34:30 +0800342static int set_dirty_tracking(void)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200343{
344 BlkMigDevState *bmds;
Fam Zhengb8afb522014-04-16 09:34:30 +0800345 int ret;
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100346
347 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200348 bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
349 BLOCK_SIZE, NULL, NULL);
Fam Zhengb8afb522014-04-16 09:34:30 +0800350 if (!bmds->dirty_bitmap) {
351 ret = -errno;
352 goto fail;
353 }
Fam Zhenge4654d22013-11-13 18:29:43 +0800354 }
Fam Zhengb8afb522014-04-16 09:34:30 +0800355 return 0;
356
357fail:
358 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
359 if (bmds->dirty_bitmap) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200360 bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
Fam Zhengb8afb522014-04-16 09:34:30 +0800361 }
362 }
363 return ret;
Fam Zhenge4654d22013-11-13 18:29:43 +0800364}
365
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100366/* Called with iothread lock taken. */
367
Fam Zhenge4654d22013-11-13 18:29:43 +0800368static void unset_dirty_tracking(void)
369{
370 BlkMigDevState *bmds;
371
372 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200373 bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200374 }
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200375}
376
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100377static int init_blk_migration(QEMUFile *f)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200378{
Markus Armbrusterfea68bb2014-10-07 13:59:10 +0200379 BlockDriverState *bs;
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100380 BlkMigDevState *bmds;
Jan Kiszka792773b2009-11-30 20:34:55 +0100381 int64_t sectors;
Kevin Wolf88be7b42016-05-20 18:49:07 +0200382 BdrvNextIterator it;
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200383 int i, num_bs = 0;
384 struct {
385 BlkMigDevState *bmds;
386 BlockDriverState *bs;
387 } *bmds_bs;
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100388 Error *local_err = NULL;
389 int ret;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100390
Markus Armbrusterfea68bb2014-10-07 13:59:10 +0200391 block_mig_state.submitted = 0;
392 block_mig_state.read_done = 0;
393 block_mig_state.transferred = 0;
394 block_mig_state.total_sector_sum = 0;
395 block_mig_state.prev_progress = -1;
396 block_mig_state.bulk_completed = 0;
397 block_mig_state.zero_blocks = migrate_zero_blocks();
398
Kevin Wolf88be7b42016-05-20 18:49:07 +0200399 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200400 num_bs++;
401 }
402 bmds_bs = g_malloc0(num_bs * sizeof(*bmds_bs));
403
404 for (i = 0, bs = bdrv_first(&it); bs; bs = bdrv_next(&it), i++) {
Markus Armbrusterfea68bb2014-10-07 13:59:10 +0200405 if (bdrv_is_read_only(bs)) {
406 continue;
407 }
408
Markus Armbruster57322b72014-06-26 13:23:22 +0200409 sectors = bdrv_nb_sectors(bs);
Shahar Havivi31f54f22010-07-10 18:59:06 +0300410 if (sectors <= 0) {
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100411 ret = sectors;
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200412 goto out;
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100413 }
414
Markus Armbruster5839e532014-08-19 10:31:08 +0200415 bmds = g_new0(BlkMigDevState, 1);
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100416 bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200417 bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100418 bmds->bulk_completed = 0;
419 bmds->total_sectors = sectors;
420 bmds->completed_sectors = 0;
Juan Quintelace7c8172017-04-05 20:45:22 +0200421 bmds->shared_base = migrate_use_block_incremental();
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200422
423 assert(i < num_bs);
424 bmds_bs[i].bmds = bmds;
425 bmds_bs[i].bs = bs;
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100426
427 block_mig_state.total_sector_sum += sectors;
428
429 if (bmds->shared_base) {
Luiz Capitulino539de122011-12-05 14:06:56 -0200430 DPRINTF("Start migration for %s with shared base image\n",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +0200431 bdrv_get_device_name(bs));
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100432 } else {
Markus Armbrusterbfb197e2014-10-07 13:59:11 +0200433 DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100434 }
435
436 QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
437 }
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200438
439 /* Can only insert new BDSes now because doing so while iterating block
440 * devices may end up in a deadlock (iterating the new BDSes, too). */
441 for (i = 0; i < num_bs; i++) {
442 BlkMigDevState *bmds = bmds_bs[i].bmds;
443 BlockDriverState *bs = bmds_bs[i].bs;
444
445 if (bmds) {
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100446 ret = blk_insert_bs(bmds->blk, bs, &local_err);
447 if (ret < 0) {
448 error_report_err(local_err);
449 goto out;
450 }
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200451
452 alloc_aio_bitmap(bmds);
453 error_setg(&bmds->blocker, "block device is in use by migration");
454 bdrv_op_block_all(bs, bmds->blocker);
455 }
456 }
457
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100458 ret = 0;
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200459out:
460 g_free(bmds_bs);
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100461 return ret;
Stefan Hajnoczib66460e2010-04-09 15:22:13 +0100462}
463
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100464/* Called with no lock taken. */
465
Luiz Capitulino539de122011-12-05 14:06:56 -0200466static int blk_mig_save_bulked_block(QEMUFile *f)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200467{
Jan Kiszka82801d82009-11-30 18:21:21 +0100468 int64_t completed_sector_sum = 0;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200469 BlkMigDevState *bmds;
Jan Kiszka01e61e22009-12-01 15:20:17 +0100470 int progress;
Jan Kiszka82801d82009-11-30 18:21:21 +0100471 int ret = 0;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200472
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100473 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100474 if (bmds->bulk_completed == 0) {
Luiz Capitulino539de122011-12-05 14:06:56 -0200475 if (mig_save_device_bulk(f, bmds) == 1) {
Jan Kiszka57cce122009-11-30 18:21:20 +0100476 /* completed bulk section for this device */
477 bmds->bulk_completed = 1;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200478 }
Jan Kiszka82801d82009-11-30 18:21:21 +0100479 completed_sector_sum += bmds->completed_sectors;
480 ret = 1;
481 break;
482 } else {
483 completed_sector_sum += bmds->completed_sectors;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200484 }
485 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100486
Pierre Riteau8b6b2af2011-01-12 14:41:00 +0100487 if (block_mig_state.total_sector_sum != 0) {
488 progress = completed_sector_sum * 100 /
489 block_mig_state.total_sector_sum;
490 } else {
491 progress = 100;
492 }
Jan Kiszka01e61e22009-12-01 15:20:17 +0100493 if (progress != block_mig_state.prev_progress) {
494 block_mig_state.prev_progress = progress;
495 qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
496 | BLK_MIG_FLAG_PROGRESS);
Luiz Capitulino539de122011-12-05 14:06:56 -0200497 DPRINTF("Completed %d %%\r", progress);
Jan Kiszka82801d82009-11-30 18:21:21 +0100498 }
499
500 return ret;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200501}
502
Liran Schourd76cac72010-01-26 14:04:11 +0200503static void blk_mig_reset_dirty_cursor(void)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200504{
505 BlkMigDevState *bmds;
Jan Kiszka575a58d2009-11-30 18:21:20 +0100506
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100507 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Liran Schourd76cac72010-01-26 14:04:11 +0200508 bmds->cur_dirty = 0;
509 }
510}
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100511
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100512/* Called with iothread lock and AioContext taken. */
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100513
Luiz Capitulino539de122011-12-05 14:06:56 -0200514static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
515 int is_async)
Liran Schourd76cac72010-01-26 14:04:11 +0200516{
517 BlkMigBlock *blk;
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200518 BlockDriverState *bs = blk_bs(bmds->blk);
Liran Schourd76cac72010-01-26 14:04:11 +0200519 int64_t total_sectors = bmds->total_sectors;
520 int64_t sector;
521 int nr_sectors;
Juan Quinteladcd1d222011-09-21 23:01:54 +0200522 int ret = -EIO;
Liran Schourd76cac72010-01-26 14:04:11 +0200523
524 for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100525 blk_mig_lock();
Marcelo Tosatti62155e22010-11-12 16:07:50 -0200526 if (bmds_aio_inflight(bmds, sector)) {
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100527 blk_mig_unlock();
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200528 blk_drain(bmds->blk);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100529 } else {
530 blk_mig_unlock();
Marcelo Tosatti62155e22010-11-12 16:07:50 -0200531 }
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200532 bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
533 if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) {
Liran Schourd76cac72010-01-26 14:04:11 +0200534 if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
535 nr_sectors = total_sectors - sector;
536 } else {
537 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100538 }
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200539 bdrv_reset_dirty_bitmap_locked(bmds->dirty_bitmap, sector, nr_sectors);
540 bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
Paolo Bonzinic0bad492017-06-05 14:39:04 +0200541
Markus Armbruster5839e532014-08-19 10:31:08 +0200542 blk = g_new(BlkMigBlock, 1);
Anthony Liguori7267c092011-08-20 22:09:37 -0500543 blk->buf = g_malloc(BLOCK_SIZE);
Liran Schourd76cac72010-01-26 14:04:11 +0200544 blk->bmds = bmds;
545 blk->sector = sector;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200546 blk->nr_sectors = nr_sectors;
Liran Schourd76cac72010-01-26 14:04:11 +0200547
Liran Schour889ae392010-01-26 10:31:49 +0200548 if (is_async) {
Liran Schourd76cac72010-01-26 14:04:11 +0200549 blk->iov.iov_base = blk->buf;
550 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
551 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
552
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200553 blk->aiocb = blk_aio_preadv(bmds->blk,
554 sector * BDRV_SECTOR_SIZE,
555 &blk->qiov, 0, blk_mig_read_cb,
556 blk);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100557
558 blk_mig_lock();
Liran Schourd76cac72010-01-26 14:04:11 +0200559 block_mig_state.submitted++;
Marcelo Tosatti33656af2010-11-08 17:02:56 -0200560 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100561 blk_mig_unlock();
Liran Schourd76cac72010-01-26 14:04:11 +0200562 } else {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200563 ret = blk_pread(bmds->blk, sector * BDRV_SECTOR_SIZE, blk->buf,
564 nr_sectors * BDRV_SECTOR_SIZE);
Juan Quinteladcd1d222011-09-21 23:01:54 +0200565 if (ret < 0) {
Liran Schourd76cac72010-01-26 14:04:11 +0200566 goto error;
567 }
568 blk_send(f, blk);
569
Anthony Liguori7267c092011-08-20 22:09:37 -0500570 g_free(blk->buf);
571 g_free(blk);
Liran Schourd76cac72010-01-26 14:04:11 +0200572 }
573
Lidong Chen1cf6aa72017-03-15 11:37:33 +0800574 sector += nr_sectors;
575 bmds->cur_dirty = sector;
Liran Schourd76cac72010-01-26 14:04:11 +0200576 break;
577 }
Paolo Bonzinib64bd512017-06-05 14:39:05 +0200578
579 bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
Liran Schourd76cac72010-01-26 14:04:11 +0200580 sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
581 bmds->cur_dirty = sector;
582 }
583
584 return (bmds->cur_dirty >= bmds->total_sectors);
585
Liran Schour889ae392010-01-26 10:31:49 +0200586error:
Luiz Capitulino539de122011-12-05 14:06:56 -0200587 DPRINTF("Error reading sector %" PRId64 "\n", sector);
Anthony Liguori7267c092011-08-20 22:09:37 -0500588 g_free(blk->buf);
589 g_free(blk);
Juan Quintela43be3a22012-08-29 21:59:22 +0200590 return ret;
Liran Schourd76cac72010-01-26 14:04:11 +0200591}
592
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100593/* Called with iothread lock taken.
594 *
595 * return value:
Juan Quintelaceb2bd02012-08-29 21:37:14 +0200596 * 0: too much data for max_downtime
597 * 1: few enough data for max_downtime
598*/
Luiz Capitulino539de122011-12-05 14:06:56 -0200599static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
Liran Schourd76cac72010-01-26 14:04:11 +0200600{
601 BlkMigDevState *bmds;
Juan Quintelaceb2bd02012-08-29 21:37:14 +0200602 int ret = 1;
Liran Schourd76cac72010-01-26 14:04:11 +0200603
604 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200605 aio_context_acquire(blk_get_aio_context(bmds->blk));
Juan Quintelaceb2bd02012-08-29 21:37:14 +0200606 ret = mig_save_device_dirty(f, bmds, is_async);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200607 aio_context_release(blk_get_aio_context(bmds->blk));
Juan Quintela43be3a22012-08-29 21:59:22 +0200608 if (ret <= 0) {
Liran Schourd76cac72010-01-26 14:04:11 +0200609 break;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200610 }
611 }
Jan Kiszka575a58d2009-11-30 18:21:20 +0100612
Liran Schourd76cac72010-01-26 14:04:11 +0200613 return ret;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200614}
615
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100616/* Called with no locks taken. */
617
Juan Quintela59feec42012-08-29 20:17:13 +0200618static int flush_blks(QEMUFile *f)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200619{
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100620 BlkMigBlock *blk;
Juan Quintela59feec42012-08-29 20:17:13 +0200621 int ret = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100622
malcd0f2c4c2010-02-07 02:03:50 +0300623 DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
Jan Kiszkad11ecd32009-11-30 18:21:20 +0100624 __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
625 block_mig_state.transferred);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100626
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100627 blk_mig_lock();
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100628 while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
629 if (qemu_file_rate_limit(f)) {
630 break;
631 }
Jan Kiszka4b640362009-11-30 18:21:21 +0100632 if (blk->ret < 0) {
Juan Quintela59feec42012-08-29 20:17:13 +0200633 ret = blk->ret;
Jan Kiszka4b640362009-11-30 18:21:21 +0100634 break;
635 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100636
Jan Kiszka5e5328b2009-11-30 18:21:20 +0100637 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100638 blk_mig_unlock();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100639 blk_send(f, blk);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100640 blk_mig_lock();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100641
Anthony Liguori7267c092011-08-20 22:09:37 -0500642 g_free(blk->buf);
643 g_free(blk);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100644
Jan Kiszkad11ecd32009-11-30 18:21:20 +0100645 block_mig_state.read_done--;
646 block_mig_state.transferred++;
647 assert(block_mig_state.read_done >= 0);
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200648 }
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100649 blk_mig_unlock();
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200650
malcd0f2c4c2010-02-07 02:03:50 +0300651 DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
Jan Kiszkad11ecd32009-11-30 18:21:20 +0100652 block_mig_state.submitted, block_mig_state.read_done,
653 block_mig_state.transferred);
Juan Quintela59feec42012-08-29 20:17:13 +0200654 return ret;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200655}
656
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100657/* Called with iothread lock taken. */
658
Liran Schour889ae392010-01-26 10:31:49 +0200659static int64_t get_remaining_dirty(void)
660{
661 BlkMigDevState *bmds;
662 int64_t dirty = 0;
663
664 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200665 aio_context_acquire(blk_get_aio_context(bmds->blk));
John Snow20dca812015-04-17 19:50:02 -0400666 dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200667 aio_context_release(blk_get_aio_context(bmds->blk));
Liran Schour889ae392010-01-26 10:31:49 +0200668 }
669
Paolo Bonziniacc906c2013-01-21 17:09:44 +0100670 return dirty << BDRV_SECTOR_BITS;
Liran Schour889ae392010-01-26 10:31:49 +0200671}
672
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100673
Kevin Wolf362fdf12017-05-22 17:17:49 +0200674
675/* Called with iothread lock taken. */
676static void block_migration_cleanup_bmds(void)
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100677{
Jan Kiszka82801d82009-11-30 18:21:21 +0100678 BlkMigDevState *bmds;
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100679 AioContext *ctx;
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100680
Fam Zhenge4654d22013-11-13 18:29:43 +0800681 unset_dirty_tracking();
Marcelo Tosatti8f794c52011-01-26 12:12:31 -0200682
Jan Kiszka82801d82009-11-30 18:21:21 +0100683 while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
684 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200685 bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
Fam Zheng3718d8a2014-05-23 21:29:43 +0800686 error_free(bmds->blocker);
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100687
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200688 /* Save ctx, because bmds->blk can disappear during blk_unref. */
689 ctx = blk_get_aio_context(bmds->blk);
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100690 aio_context_acquire(ctx);
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200691 blk_unref(bmds->blk);
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100692 aio_context_release(ctx);
693
Kevin Wolfebd2f9e2016-05-27 19:50:37 +0200694 g_free(bmds->blk_name);
Anthony Liguori7267c092011-08-20 22:09:37 -0500695 g_free(bmds->aio_bitmap);
696 g_free(bmds);
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100697 }
Kevin Wolf362fdf12017-05-22 17:17:49 +0200698}
699
700/* Called with iothread lock taken. */
701static void block_migration_cleanup(void *opaque)
702{
703 BlkMigBlock *blk;
704
705 bdrv_drain_all();
706
707 block_migration_cleanup_bmds();
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100708
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100709 blk_mig_lock();
Jan Kiszka82801d82009-11-30 18:21:21 +0100710 while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
711 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
Anthony Liguori7267c092011-08-20 22:09:37 -0500712 g_free(blk->buf);
713 g_free(blk);
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100714 }
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100715 blk_mig_unlock();
Jan Kiszka4ec7fcc2009-11-30 18:21:21 +0100716}
717
Juan Quintelad1315aa2012-06-28 15:11:57 +0200718static int block_save_setup(QEMUFile *f, void *opaque)
719{
720 int ret;
721
722 DPRINTF("Enter save live setup submitted %d transferred %d\n",
723 block_mig_state.submitted, block_mig_state.transferred);
724
Paolo Bonzini9b095032013-02-22 17:36:28 +0100725 qemu_mutex_lock_iothread();
Kevin Wolf6f5ef232017-02-09 14:45:37 +0100726 ret = init_blk_migration(f);
727 if (ret < 0) {
728 qemu_mutex_unlock_iothread();
729 return ret;
730 }
Juan Quintelad1315aa2012-06-28 15:11:57 +0200731
732 /* start track dirty blocks */
Fam Zhengb8afb522014-04-16 09:34:30 +0800733 ret = set_dirty_tracking();
734
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100735 qemu_mutex_unlock_iothread();
736
Fam Zhengb8afb522014-04-16 09:34:30 +0800737 if (ret) {
Fam Zhengb8afb522014-04-16 09:34:30 +0800738 return ret;
739 }
740
Juan Quintela59feec42012-08-29 20:17:13 +0200741 ret = flush_blks(f);
Juan Quintelad1315aa2012-06-28 15:11:57 +0200742 blk_mig_reset_dirty_cursor();
Juan Quintelad1315aa2012-06-28 15:11:57 +0200743 qemu_put_be64(f, BLK_MIG_FLAG_EOS);
744
Paolo Bonzinid418cf52013-02-22 17:36:11 +0100745 return ret;
Juan Quintelad1315aa2012-06-28 15:11:57 +0200746}
747
Juan Quintela16310a32012-06-28 15:31:37 +0200748static int block_save_iterate(QEMUFile *f, void *opaque)
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200749{
Juan Quintela29757252011-10-19 15:22:18 +0200750 int ret;
Stefan Hajnoczi6aaa9da2013-02-12 10:37:15 +0100751 int64_t last_ftell = qemu_ftell(f);
Gary R Hookebd9fbd2014-11-25 17:30:02 -0600752 int64_t delta_ftell;
Juan Quintela29757252011-10-19 15:22:18 +0200753
Juan Quintela16310a32012-06-28 15:31:37 +0200754 DPRINTF("Enter save live iterate submitted %d transferred %d\n",
755 block_mig_state.submitted, block_mig_state.transferred);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100756
Juan Quintela59feec42012-08-29 20:17:13 +0200757 ret = flush_blks(f);
Juan Quintela29757252011-10-19 15:22:18 +0200758 if (ret) {
Juan Quintela29757252011-10-19 15:22:18 +0200759 return ret;
Jan Kiszka4b640362009-11-30 18:21:21 +0100760 }
761
Liran Schourd76cac72010-01-26 14:04:11 +0200762 blk_mig_reset_dirty_cursor();
763
Juan Quintela16310a32012-06-28 15:31:37 +0200764 /* control the rate of transfer */
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100765 blk_mig_lock();
Juan Quintela16310a32012-06-28 15:31:37 +0200766 while ((block_mig_state.submitted +
767 block_mig_state.read_done) * BLOCK_SIZE <
Wen Congyangf77dcdb2015-11-20 17:37:13 +0800768 qemu_file_get_rate_limit(f) &&
769 (block_mig_state.submitted +
770 block_mig_state.read_done) <
771 MAX_INFLIGHT_IO) {
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100772 blk_mig_unlock();
Juan Quintela16310a32012-06-28 15:31:37 +0200773 if (block_mig_state.bulk_completed == 0) {
774 /* first finish the bulk phase */
775 if (blk_mig_save_bulked_block(f) == 0) {
776 /* finished saving bulk on all devices */
777 block_mig_state.bulk_completed = 1;
Liran Schourd76cac72010-01-26 14:04:11 +0200778 }
Paolo Bonzini13197e32013-02-22 17:36:23 +0100779 ret = 0;
Juan Quintela16310a32012-06-28 15:31:37 +0200780 } else {
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100781 /* Always called with iothread lock taken for
782 * simplicity, block_save_complete also calls it.
783 */
784 qemu_mutex_lock_iothread();
Juan Quintela43be3a22012-08-29 21:59:22 +0200785 ret = blk_mig_save_dirty_block(f, 1);
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100786 qemu_mutex_unlock_iothread();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100787 }
788 if (ret < 0) {
789 return ret;
790 }
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100791 blk_mig_lock();
Paolo Bonzini13197e32013-02-22 17:36:23 +0100792 if (ret != 0) {
793 /* no more dirty blocks */
794 break;
Liran Schourd76cac72010-01-26 14:04:11 +0200795 }
Jan Kiszka4b640362009-11-30 18:21:21 +0100796 }
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100797 blk_mig_unlock();
Jan Kiszka4b640362009-11-30 18:21:21 +0100798
Juan Quintela59feec42012-08-29 20:17:13 +0200799 ret = flush_blks(f);
Juan Quintela16310a32012-06-28 15:31:37 +0200800 if (ret) {
Juan Quintela16310a32012-06-28 15:31:37 +0200801 return ret;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200802 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100803
804 qemu_put_be64(f, BLK_MIG_FLAG_EOS);
Gary R Hookebd9fbd2014-11-25 17:30:02 -0600805 delta_ftell = qemu_ftell(f) - last_ftell;
806 if (delta_ftell > 0) {
807 return 1;
808 } else if (delta_ftell < 0) {
809 return -1;
810 } else {
811 return 0;
812 }
Juan Quintela16310a32012-06-28 15:31:37 +0200813}
814
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100815/* Called with iothread lock taken. */
816
Juan Quintela16310a32012-06-28 15:31:37 +0200817static int block_save_complete(QEMUFile *f, void *opaque)
818{
819 int ret;
820
821 DPRINTF("Enter save live complete submitted %d transferred %d\n",
822 block_mig_state.submitted, block_mig_state.transferred);
823
Juan Quintela59feec42012-08-29 20:17:13 +0200824 ret = flush_blks(f);
Juan Quintela16310a32012-06-28 15:31:37 +0200825 if (ret) {
Juan Quintela16310a32012-06-28 15:31:37 +0200826 return ret;
827 }
828
829 blk_mig_reset_dirty_cursor();
830
831 /* we know for sure that save bulk is completed and
832 all async read completed */
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100833 blk_mig_lock();
Juan Quintela16310a32012-06-28 15:31:37 +0200834 assert(block_mig_state.submitted == 0);
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100835 blk_mig_unlock();
Juan Quintela16310a32012-06-28 15:31:37 +0200836
Juan Quintela43be3a22012-08-29 21:59:22 +0200837 do {
838 ret = blk_mig_save_dirty_block(f, 0);
Paolo Bonzinid418cf52013-02-22 17:36:11 +0100839 if (ret < 0) {
840 return ret;
841 }
Juan Quintela43be3a22012-08-29 21:59:22 +0200842 } while (ret == 0);
843
Juan Quintela43be3a22012-08-29 21:59:22 +0200844 /* report completion */
845 qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
Juan Quintela16310a32012-06-28 15:31:37 +0200846
847 DPRINTF("Block migration completed\n");
848
849 qemu_put_be64(f, BLK_MIG_FLAG_EOS);
850
Kevin Wolf362fdf12017-05-22 17:17:49 +0200851 /* Make sure that our BlockBackends are gone, so that the block driver
852 * nodes can be inactivated. */
853 block_migration_cleanup_bmds();
854
Juan Quintela16310a32012-06-28 15:31:37 +0200855 return 0;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200856}
857
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +0000858static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
859 uint64_t *non_postcopiable_pending,
860 uint64_t *postcopiable_pending)
Juan Quintelae4ed1542012-09-21 11:18:18 +0200861{
Stefan Hajnoczi6aaa9da2013-02-12 10:37:15 +0100862 /* Estimate pending number of bytes to send */
Paolo Bonzini13197e32013-02-22 17:36:23 +0100863 uint64_t pending;
864
Paolo Bonzini32c835b2013-02-22 17:36:27 +0100865 qemu_mutex_lock_iothread();
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100866 pending = get_remaining_dirty();
867 qemu_mutex_unlock_iothread();
868
Paolo Bonzini52e850d2013-02-22 17:36:25 +0100869 blk_mig_lock();
Paolo Bonzinief0716d2016-02-14 18:17:04 +0100870 pending += block_mig_state.submitted * BLOCK_SIZE +
871 block_mig_state.read_done * BLOCK_SIZE;
872 blk_mig_unlock();
Juan Quintelae4ed1542012-09-21 11:18:18 +0200873
Stefan Hajnoczi6aaa9da2013-02-12 10:37:15 +0100874 /* Report at least one block pending during bulk phase */
Vladimir Sementsov-Ogievskiy04636dc2014-12-30 13:04:16 +0300875 if (pending <= max_size && !block_mig_state.bulk_completed) {
876 pending = max_size + BLOCK_SIZE;
Stefan Hajnoczi6aaa9da2013-02-12 10:37:15 +0100877 }
Juan Quintelae4ed1542012-09-21 11:18:18 +0200878
Stefan Hajnoczi6aaa9da2013-02-12 10:37:15 +0100879 DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +0000880 /* We don't do postcopy */
881 *non_postcopiable_pending += pending;
Juan Quintelae4ed1542012-09-21 11:18:18 +0200882}
883
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200884static int block_load(QEMUFile *f, void *opaque, int version_id)
885{
Jan Kiszka01e61e22009-12-01 15:20:17 +0100886 static int banner_printed;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200887 int len, flags;
888 char device_name[256];
889 int64_t addr;
Kevin Wolfad2964b2016-05-25 17:20:06 +0200890 BlockBackend *blk, *blk_prev = NULL;;
Kevin Wolf9bd9c7f2016-02-22 10:21:15 +0100891 Error *local_err = NULL;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200892 uint8_t *buf;
Pierre Riteau77358b52011-01-21 12:42:30 +0100893 int64_t total_sectors = 0;
894 int nr_sectors;
Juan Quintela42802d42011-10-05 01:14:46 +0200895 int ret;
Lidong Chen3928d502017-04-13 10:34:28 +0800896 BlockDriverInfo bdi;
897 int cluster_size = BLOCK_SIZE;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100898
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200899 do {
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200900 addr = qemu_get_be64(f);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100901
Jan Kiszka6ea44302009-11-30 18:21:19 +0100902 flags = addr & ~BDRV_SECTOR_MASK;
903 addr >>= BDRV_SECTOR_BITS;
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100904
905 if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200906 /* get device name */
907 len = qemu_get_byte(f);
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200908 qemu_get_buffer(f, (uint8_t *)device_name, len);
909 device_name[len] = '\0';
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100910
Fam Zhengc9ebaf72015-03-02 19:36:47 +0800911 blk = blk_by_name(device_name);
912 if (!blk) {
Jan Kiszka4b640362009-11-30 18:21:21 +0100913 fprintf(stderr, "Error unknown block device %s\n",
914 device_name);
915 return -EINVAL;
916 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100917
Kevin Wolfad2964b2016-05-25 17:20:06 +0200918 if (blk != blk_prev) {
919 blk_prev = blk;
920 total_sectors = blk_nb_sectors(blk);
Pierre Riteau77358b52011-01-21 12:42:30 +0100921 if (total_sectors <= 0) {
Markus Armbruster6daf1942011-06-22 14:03:54 +0200922 error_report("Error getting length of block device %s",
Pierre Riteau77358b52011-01-21 12:42:30 +0100923 device_name);
924 return -EINVAL;
925 }
Kevin Wolf9bd9c7f2016-02-22 10:21:15 +0100926
Kevin Wolfad2964b2016-05-25 17:20:06 +0200927 blk_invalidate_cache(blk, &local_err);
Kevin Wolf9bd9c7f2016-02-22 10:21:15 +0100928 if (local_err) {
929 error_report_err(local_err);
930 return -EINVAL;
931 }
Lidong Chen3928d502017-04-13 10:34:28 +0800932
933 ret = bdrv_get_info(blk_bs(blk), &bdi);
934 if (ret == 0 && bdi.cluster_size > 0 &&
935 bdi.cluster_size <= BLOCK_SIZE &&
936 BLOCK_SIZE % bdi.cluster_size == 0) {
937 cluster_size = bdi.cluster_size;
938 } else {
939 cluster_size = BLOCK_SIZE;
940 }
Pierre Riteau77358b52011-01-21 12:42:30 +0100941 }
942
943 if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
944 nr_sectors = total_sectors - addr;
945 } else {
946 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
947 }
948
Peter Lieven323004a2013-07-18 09:48:50 +0200949 if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
Kevin Wolfad2964b2016-05-25 17:20:06 +0200950 ret = blk_pwrite_zeroes(blk, addr * BDRV_SECTOR_SIZE,
951 nr_sectors * BDRV_SECTOR_SIZE,
952 BDRV_REQ_MAY_UNMAP);
Peter Lieven323004a2013-07-18 09:48:50 +0200953 } else {
Lidong Chen3928d502017-04-13 10:34:28 +0800954 int i;
955 int64_t cur_addr;
956 uint8_t *cur_buf;
957
Peter Lieven323004a2013-07-18 09:48:50 +0200958 buf = g_malloc(BLOCK_SIZE);
959 qemu_get_buffer(f, buf, BLOCK_SIZE);
Lidong Chen3928d502017-04-13 10:34:28 +0800960 for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
961 cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
962 cur_buf = buf + i * cluster_size;
963
964 if ((!block_mig_state.zero_blocks ||
965 cluster_size < BLOCK_SIZE) &&
966 buffer_is_zero(cur_buf, cluster_size)) {
967 ret = blk_pwrite_zeroes(blk, cur_addr,
968 cluster_size,
969 BDRV_REQ_MAY_UNMAP);
970 } else {
971 ret = blk_pwrite(blk, cur_addr, cur_buf,
972 cluster_size, 0);
973 }
974 if (ret < 0) {
975 break;
976 }
977 }
Peter Lieven323004a2013-07-18 09:48:50 +0200978 g_free(buf);
979 }
Jan Kiszka575a58d2009-11-30 18:21:20 +0100980
Yoshiaki Tamurab02bea32010-07-20 18:19:00 +0900981 if (ret < 0) {
982 return ret;
983 }
Jan Kiszka01e61e22009-12-01 15:20:17 +0100984 } else if (flags & BLK_MIG_FLAG_PROGRESS) {
985 if (!banner_printed) {
986 printf("Receiving block device images\n");
987 banner_printed = 1;
988 }
989 printf("Completed %d %%%c", (int)addr,
990 (addr == 100) ? '\n' : '\r');
991 fflush(stdout);
Jan Kiszkaa55eb922009-11-30 18:21:19 +0100992 } else if (!(flags & BLK_MIG_FLAG_EOS)) {
Stefan Hajnoczid5f1f282013-02-10 23:12:44 +0100993 fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
Jan Kiszka4b640362009-11-30 18:21:21 +0100994 return -EINVAL;
995 }
Juan Quintela42802d42011-10-05 01:14:46 +0200996 ret = qemu_file_get_error(f);
997 if (ret != 0) {
998 return ret;
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +0200999 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001000 } while (!(flags & BLK_MIG_FLAG_EOS));
1001
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +02001002 return 0;
1003}
1004
Juan Quintela6bd68782012-06-27 10:59:15 +02001005static bool block_is_active(void *opaque)
1006{
Juan Quintelace7c8172017-04-05 20:45:22 +02001007 return migrate_use_block();
Juan Quintela6bd68782012-06-27 10:59:15 +02001008}
1009
Stefan Weil7a46d042014-07-07 21:09:30 +02001010static SaveVMHandlers savevm_block_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02001011 .save_setup = block_save_setup,
Juan Quintela16310a32012-06-28 15:31:37 +02001012 .save_live_iterate = block_save_iterate,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00001013 .save_live_complete_precopy = block_save_complete,
Juan Quintelae4ed1542012-09-21 11:18:18 +02001014 .save_live_pending = block_save_pending,
Juan Quintela7908c782012-06-26 18:46:10 +02001015 .load_state = block_load,
Juan Quintela70f794f2017-06-28 11:52:25 +02001016 .save_cleanup = block_migration_cleanup,
Juan Quintela6bd68782012-06-27 10:59:15 +02001017 .is_active = block_is_active,
Juan Quintela7908c782012-06-26 18:46:10 +02001018};
1019
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +02001020void blk_mig_init(void)
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001021{
Jan Kiszka5e5328b2009-11-30 18:21:20 +01001022 QSIMPLEQ_INIT(&block_mig_state.bmds_list);
1023 QSIMPLEQ_INIT(&block_mig_state.blk_list);
Paolo Bonzini52e850d2013-02-22 17:36:25 +01001024 qemu_mutex_init(&block_mig_state.lock);
Jan Kiszka5e5328b2009-11-30 18:21:20 +01001025
Juan Quintela7908c782012-06-26 18:46:10 +02001026 register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
1027 &block_mig_state);
lirans@il.ibm.comc163b5c2009-11-02 15:40:58 +02001028}