blob: a44b4f00913dc0a453fde5320a91215d133de1f4 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Peter Maydell1393a482016-01-26 18:16:54 +000028#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010029#include "qemu-common.h"
30#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +000032#include "qapi-event.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020033#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020034#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020036#include "qemu/timer.h"
37#include "qemu/main-loop.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020038#include "migration/migration.h"
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +000039#include "migration/postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020040#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020043#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "exec/ram_addr.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/rcu_queue.h"
46
47#ifdef DEBUG_MIGRATION_RAM
48#define DPRINTF(fmt, ...) \
49 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
50#else
51#define DPRINTF(fmt, ...) \
52 do { } while (0)
53#endif
54
Juan Quintela56e93d22015-05-07 19:33:31 +020055static int dirty_rate_high_cnt;
Juan Quintela56e93d22015-05-07 19:33:31 +020056
57static uint64_t bitmap_sync_count;
58
59/***********************************************************/
60/* ram save/restore */
61
62#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
63#define RAM_SAVE_FLAG_COMPRESS 0x02
64#define RAM_SAVE_FLAG_MEM_SIZE 0x04
65#define RAM_SAVE_FLAG_PAGE 0x08
66#define RAM_SAVE_FLAG_EOS 0x10
67#define RAM_SAVE_FLAG_CONTINUE 0x20
68#define RAM_SAVE_FLAG_XBZRLE 0x40
69/* 0x80 is reserved in migration.h start with 0x100 next */
70#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
71
72static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
73
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
76 return buffer_find_nonzero_offset(p, size) == size;
77}
78
79/* struct contains XBZRLE cache and a static page
80 used by the compression */
81static struct {
82 /* buffer used for XBZRLE encoding */
83 uint8_t *encoded_buf;
84 /* buffer for storing page content */
85 uint8_t *current_buf;
86 /* Cache for XBZRLE, Protected by lock. */
87 PageCache *cache;
88 QemuMutex lock;
89} XBZRLE;
90
91/* buffer used for XBZRLE decoding */
92static uint8_t *xbzrle_decoded_buf;
93
94static void XBZRLE_cache_lock(void)
95{
96 if (migrate_use_xbzrle())
97 qemu_mutex_lock(&XBZRLE.lock);
98}
99
100static void XBZRLE_cache_unlock(void)
101{
102 if (migrate_use_xbzrle())
103 qemu_mutex_unlock(&XBZRLE.lock);
104}
105
106/*
107 * called from qmp_migrate_set_cache_size in main thread, possibly while
108 * a migration is in progress.
109 * A running migration maybe using the cache and might finish during this
110 * call, hence changes to the cache are protected by XBZRLE.lock().
111 */
112int64_t xbzrle_cache_resize(int64_t new_size)
113{
114 PageCache *new_cache;
115 int64_t ret;
116
117 if (new_size < TARGET_PAGE_SIZE) {
118 return -1;
119 }
120
121 XBZRLE_cache_lock();
122
123 if (XBZRLE.cache != NULL) {
124 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
125 goto out_new_size;
126 }
127 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
128 TARGET_PAGE_SIZE);
129 if (!new_cache) {
130 error_report("Error creating cache");
131 ret = -1;
132 goto out;
133 }
134
135 cache_fini(XBZRLE.cache);
136 XBZRLE.cache = new_cache;
137 }
138
139out_new_size:
140 ret = pow2floor(new_size);
141out:
142 XBZRLE_cache_unlock();
143 return ret;
144}
145
146/* accounting for migration statistics */
147typedef struct AccountingInfo {
148 uint64_t dup_pages;
149 uint64_t skipped_pages;
150 uint64_t norm_pages;
151 uint64_t iterations;
152 uint64_t xbzrle_bytes;
153 uint64_t xbzrle_pages;
154 uint64_t xbzrle_cache_miss;
155 double xbzrle_cache_miss_rate;
156 uint64_t xbzrle_overflows;
157} AccountingInfo;
158
159static AccountingInfo acct_info;
160
161static void acct_clear(void)
162{
163 memset(&acct_info, 0, sizeof(acct_info));
164}
165
166uint64_t dup_mig_bytes_transferred(void)
167{
168 return acct_info.dup_pages * TARGET_PAGE_SIZE;
169}
170
171uint64_t dup_mig_pages_transferred(void)
172{
173 return acct_info.dup_pages;
174}
175
176uint64_t skipped_mig_bytes_transferred(void)
177{
178 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
179}
180
181uint64_t skipped_mig_pages_transferred(void)
182{
183 return acct_info.skipped_pages;
184}
185
186uint64_t norm_mig_bytes_transferred(void)
187{
188 return acct_info.norm_pages * TARGET_PAGE_SIZE;
189}
190
191uint64_t norm_mig_pages_transferred(void)
192{
193 return acct_info.norm_pages;
194}
195
196uint64_t xbzrle_mig_bytes_transferred(void)
197{
198 return acct_info.xbzrle_bytes;
199}
200
201uint64_t xbzrle_mig_pages_transferred(void)
202{
203 return acct_info.xbzrle_pages;
204}
205
206uint64_t xbzrle_mig_pages_cache_miss(void)
207{
208 return acct_info.xbzrle_cache_miss;
209}
210
211double xbzrle_mig_cache_miss_rate(void)
212{
213 return acct_info.xbzrle_cache_miss_rate;
214}
215
216uint64_t xbzrle_mig_pages_overflow(void)
217{
218 return acct_info.xbzrle_overflows;
219}
220
221/* This is the last block that we have visited serching for dirty pages
222 */
223static RAMBlock *last_seen_block;
224/* This is the last block from where we have sent data */
225static RAMBlock *last_sent_block;
226static ram_addr_t last_offset;
Li Zhijiandd631692015-07-02 20:18:06 +0800227static QemuMutex migration_bitmap_mutex;
Juan Quintela56e93d22015-05-07 19:33:31 +0200228static uint64_t migration_dirty_pages;
229static uint32_t last_version;
230static bool ram_bulk_stage;
231
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100232/* used by the search for pages to send */
233struct PageSearchStatus {
234 /* Current block being searched */
235 RAMBlock *block;
236 /* Current offset to search from */
237 ram_addr_t offset;
238 /* Set once we wrap around */
239 bool complete_round;
240};
241typedef struct PageSearchStatus PageSearchStatus;
242
Denis V. Lunev60be6342015-09-28 14:41:58 +0300243static struct BitmapRcu {
244 struct rcu_head rcu;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000245 /* Main migration bitmap */
Denis V. Lunev60be6342015-09-28 14:41:58 +0300246 unsigned long *bmap;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000247 /* bitmap of pages that haven't been sent even once
248 * only maintained and used in postcopy at the moment
249 * where it's used to send the dirtymap at the start
250 * of the postcopy phase
251 */
252 unsigned long *unsentmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300253} *migration_bitmap_rcu;
254
Juan Quintela56e93d22015-05-07 19:33:31 +0200255struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200256 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800257 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200258 QEMUFile *file;
259 QemuMutex mutex;
260 QemuCond cond;
261 RAMBlock *block;
262 ram_addr_t offset;
263};
264typedef struct CompressParam CompressParam;
265
266struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800267 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800268 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200269 QemuMutex mutex;
270 QemuCond cond;
271 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000272 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200273 int len;
274};
275typedef struct DecompressParam DecompressParam;
276
277static CompressParam *comp_param;
278static QemuThread *compress_threads;
279/* comp_done_cond is used to wake up the migration thread when
280 * one of the compression threads has finished the compression.
281 * comp_done_lock is used to co-work with comp_done_cond.
282 */
283static QemuMutex *comp_done_lock;
284static QemuCond *comp_done_cond;
285/* The empty QEMUFileOps will be used by file in CompressParam */
286static const QEMUFileOps empty_ops = { };
287
288static bool compression_switch;
Juan Quintela56e93d22015-05-07 19:33:31 +0200289static DecompressParam *decomp_param;
290static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800291static QemuMutex decomp_done_lock;
292static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200293
Liang Lia7a9a882016-05-05 15:32:57 +0800294static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
295 ram_addr_t offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200296
297static void *do_data_compress(void *opaque)
298{
299 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800300 RAMBlock *block;
301 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200302
Liang Lia7a9a882016-05-05 15:32:57 +0800303 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800304 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800305 if (param->block) {
306 block = param->block;
307 offset = param->offset;
308 param->block = NULL;
309 qemu_mutex_unlock(&param->mutex);
310
311 do_compress_ram_page(param->file, block, offset);
312
313 qemu_mutex_lock(comp_done_lock);
314 param->done = true;
315 qemu_cond_signal(comp_done_cond);
316 qemu_mutex_unlock(comp_done_lock);
317
318 qemu_mutex_lock(&param->mutex);
319 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200320 qemu_cond_wait(&param->cond, &param->mutex);
321 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200322 }
Liang Lia7a9a882016-05-05 15:32:57 +0800323 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200324
325 return NULL;
326}
327
328static inline void terminate_compression_threads(void)
329{
330 int idx, thread_count;
331
332 thread_count = migrate_compress_threads();
Juan Quintela56e93d22015-05-07 19:33:31 +0200333 for (idx = 0; idx < thread_count; idx++) {
334 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800335 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200336 qemu_cond_signal(&comp_param[idx].cond);
337 qemu_mutex_unlock(&comp_param[idx].mutex);
338 }
339}
340
341void migrate_compress_threads_join(void)
342{
343 int i, thread_count;
344
345 if (!migrate_use_compression()) {
346 return;
347 }
348 terminate_compression_threads();
349 thread_count = migrate_compress_threads();
350 for (i = 0; i < thread_count; i++) {
351 qemu_thread_join(compress_threads + i);
352 qemu_fclose(comp_param[i].file);
353 qemu_mutex_destroy(&comp_param[i].mutex);
354 qemu_cond_destroy(&comp_param[i].cond);
355 }
356 qemu_mutex_destroy(comp_done_lock);
357 qemu_cond_destroy(comp_done_cond);
358 g_free(compress_threads);
359 g_free(comp_param);
360 g_free(comp_done_cond);
361 g_free(comp_done_lock);
362 compress_threads = NULL;
363 comp_param = NULL;
364 comp_done_cond = NULL;
365 comp_done_lock = NULL;
366}
367
368void migrate_compress_threads_create(void)
369{
370 int i, thread_count;
371
372 if (!migrate_use_compression()) {
373 return;
374 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200375 compression_switch = true;
376 thread_count = migrate_compress_threads();
377 compress_threads = g_new0(QemuThread, thread_count);
378 comp_param = g_new0(CompressParam, thread_count);
379 comp_done_cond = g_new0(QemuCond, 1);
380 comp_done_lock = g_new0(QemuMutex, 1);
381 qemu_cond_init(comp_done_cond);
382 qemu_mutex_init(comp_done_lock);
383 for (i = 0; i < thread_count; i++) {
384 /* com_param[i].file is just used as a dummy buffer to save data, set
385 * it's ops to empty.
386 */
387 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
388 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800389 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200390 qemu_mutex_init(&comp_param[i].mutex);
391 qemu_cond_init(&comp_param[i].cond);
392 qemu_thread_create(compress_threads + i, "compress",
393 do_data_compress, comp_param + i,
394 QEMU_THREAD_JOINABLE);
395 }
396}
397
398/**
399 * save_page_header: Write page header to wire
400 *
401 * If this is the 1st block, it also writes the block identification
402 *
403 * Returns: Number of bytes written
404 *
405 * @f: QEMUFile where to send the data
406 * @block: block that contains the page we want to send
407 * @offset: offset inside the block for the page
408 * in the lower bits, it contains flags
409 */
410static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
411{
Liang Li9f5f3802015-07-13 17:34:10 +0800412 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200413
414 qemu_put_be64(f, offset);
415 size = 8;
416
417 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800418 len = strlen(block->idstr);
419 qemu_put_byte(f, len);
420 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
421 size += 1 + len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200422 }
423 return size;
424}
425
Jason J. Herne070afca2015-09-08 13:12:35 -0400426/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
427 * If guest dirty memory rate is reduced below the rate at which we can
428 * transfer pages to the destination then we should be able to complete
429 * migration. Some workloads dirty memory way too fast and will not effectively
430 * converge, even with auto-converge.
431 */
432static void mig_throttle_guest_down(void)
433{
434 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100435 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
436 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400437
438 /* We have not started throttling yet. Let's start it. */
439 if (!cpu_throttle_active()) {
440 cpu_throttle_set(pct_initial);
441 } else {
442 /* Throttling already on, just increase the rate */
443 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
444 }
445}
446
Juan Quintela56e93d22015-05-07 19:33:31 +0200447/* Update the xbzrle cache to reflect a page that's been sent as all 0.
448 * The important thing is that a stale (not-yet-0'd) page be replaced
449 * by the new data.
450 * As a bonus, if the page wasn't in the cache it gets added so that
451 * when a small write is made into the 0'd page it gets XBZRLE sent
452 */
453static void xbzrle_cache_zero_page(ram_addr_t current_addr)
454{
455 if (ram_bulk_stage || !migrate_use_xbzrle()) {
456 return;
457 }
458
459 /* We don't care if this fails to allocate a new cache page
460 * as long as it updated an old one */
461 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
462 bitmap_sync_count);
463}
464
465#define ENCODING_FLAG_XBZRLE 0x1
466
467/**
468 * save_xbzrle_page: compress and send current page
469 *
470 * Returns: 1 means that we wrote the page
471 * 0 means that page is identical to the one already sent
472 * -1 means that xbzrle would be longer than normal
473 *
474 * @f: QEMUFile where to send the data
475 * @current_data:
476 * @current_addr:
477 * @block: block that contains the page we want to send
478 * @offset: offset inside the block for the page
479 * @last_stage: if we are at the completion stage
480 * @bytes_transferred: increase it with the number of transferred bytes
481 */
482static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
483 ram_addr_t current_addr, RAMBlock *block,
484 ram_addr_t offset, bool last_stage,
485 uint64_t *bytes_transferred)
486{
487 int encoded_len = 0, bytes_xbzrle;
488 uint8_t *prev_cached_page;
489
490 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
491 acct_info.xbzrle_cache_miss++;
492 if (!last_stage) {
493 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
494 bitmap_sync_count) == -1) {
495 return -1;
496 } else {
497 /* update *current_data when the page has been
498 inserted into cache */
499 *current_data = get_cached_data(XBZRLE.cache, current_addr);
500 }
501 }
502 return -1;
503 }
504
505 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
506
507 /* save current buffer into memory */
508 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
509
510 /* XBZRLE encoding (if there is no overflow) */
511 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
512 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
513 TARGET_PAGE_SIZE);
514 if (encoded_len == 0) {
515 DPRINTF("Skipping unmodified page\n");
516 return 0;
517 } else if (encoded_len == -1) {
518 DPRINTF("Overflow\n");
519 acct_info.xbzrle_overflows++;
520 /* update data in the cache */
521 if (!last_stage) {
522 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
523 *current_data = prev_cached_page;
524 }
525 return -1;
526 }
527
528 /* we need to update the data in the cache, in order to get the same data */
529 if (!last_stage) {
530 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
531 }
532
533 /* Send XBZRLE based compressed page */
534 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
535 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
536 qemu_put_be16(f, encoded_len);
537 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
538 bytes_xbzrle += encoded_len + 1 + 2;
539 acct_info.xbzrle_pages++;
540 acct_info.xbzrle_bytes += bytes_xbzrle;
541 *bytes_transferred += bytes_xbzrle;
542
543 return 1;
544}
545
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000546/* Called with rcu_read_lock() to protect migration_bitmap
547 * rb: The RAMBlock to search for dirty pages in
548 * start: Start address (typically so we can continue from previous page)
549 * ram_addr_abs: Pointer into which to store the address of the dirty page
550 * within the global ram_addr space
551 *
552 * Returns: byte offset within memory region of the start of a dirty page
553 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200554static inline
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000555ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
556 ram_addr_t start,
557 ram_addr_t *ram_addr_abs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200558{
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100559 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200560 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100561 uint64_t rb_size = rb->used_length;
562 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
Li Zhijian2ff64032015-07-02 20:18:05 +0800563 unsigned long *bitmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200564
565 unsigned long next;
566
Denis V. Lunev60be6342015-09-28 14:41:58 +0300567 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200568 if (ram_bulk_stage && nr > base) {
569 next = nr + 1;
570 } else {
Li Zhijian2ff64032015-07-02 20:18:05 +0800571 next = find_next_bit(bitmap, size, nr);
Juan Quintela56e93d22015-05-07 19:33:31 +0200572 }
573
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000574 *ram_addr_abs = next << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200575 return (next - base) << TARGET_PAGE_BITS;
576}
577
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000578static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
579{
580 bool ret;
581 int nr = addr >> TARGET_PAGE_BITS;
582 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
583
584 ret = test_and_clear_bit(nr, bitmap);
585
586 if (ret) {
587 migration_dirty_pages--;
588 }
589 return ret;
590}
591
Juan Quintela56e93d22015-05-07 19:33:31 +0200592static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
593{
Li Zhijian2ff64032015-07-02 20:18:05 +0800594 unsigned long *bitmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300595 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200596 migration_dirty_pages +=
Li Zhijian2ff64032015-07-02 20:18:05 +0800597 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200598}
599
Juan Quintela56e93d22015-05-07 19:33:31 +0200600/* Fix me: there are too many global variables used in migration process. */
601static int64_t start_time;
602static int64_t bytes_xfer_prev;
603static int64_t num_dirty_pages_period;
604static uint64_t xbzrle_cache_miss_prev;
605static uint64_t iterations_prev;
606
607static void migration_bitmap_sync_init(void)
608{
609 start_time = 0;
610 bytes_xfer_prev = 0;
611 num_dirty_pages_period = 0;
612 xbzrle_cache_miss_prev = 0;
613 iterations_prev = 0;
614}
615
Juan Quintela56e93d22015-05-07 19:33:31 +0200616static void migration_bitmap_sync(void)
617{
618 RAMBlock *block;
619 uint64_t num_dirty_pages_init = migration_dirty_pages;
620 MigrationState *s = migrate_get_current();
621 int64_t end_time;
622 int64_t bytes_xfer_now;
623
624 bitmap_sync_count++;
625
626 if (!bytes_xfer_prev) {
627 bytes_xfer_prev = ram_bytes_transferred();
628 }
629
630 if (!start_time) {
631 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
632 }
633
634 trace_migration_bitmap_sync_start();
635 address_space_sync_dirty_bitmap(&address_space_memory);
636
Li Zhijiandd631692015-07-02 20:18:06 +0800637 qemu_mutex_lock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200638 rcu_read_lock();
639 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100640 migration_bitmap_sync_range(block->offset, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200641 }
642 rcu_read_unlock();
Li Zhijiandd631692015-07-02 20:18:06 +0800643 qemu_mutex_unlock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200644
645 trace_migration_bitmap_sync_end(migration_dirty_pages
646 - num_dirty_pages_init);
647 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
648 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
649
650 /* more than 1 second = 1000 millisecons */
651 if (end_time > start_time + 1000) {
652 if (migrate_auto_converge()) {
653 /* The following detection logic can be refined later. For now:
654 Check to see if the dirtied bytes is 50% more than the approx.
655 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400656 were in this routine. If that happens twice, start or increase
657 throttling */
Juan Quintela56e93d22015-05-07 19:33:31 +0200658 bytes_xfer_now = ram_bytes_transferred();
Jason J. Herne070afca2015-09-08 13:12:35 -0400659
Juan Quintela56e93d22015-05-07 19:33:31 +0200660 if (s->dirty_pages_rate &&
661 (num_dirty_pages_period * TARGET_PAGE_SIZE >
662 (bytes_xfer_now - bytes_xfer_prev)/2) &&
Jason J. Herne070afca2015-09-08 13:12:35 -0400663 (dirty_rate_high_cnt++ >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200664 trace_migration_throttle();
Juan Quintela56e93d22015-05-07 19:33:31 +0200665 dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400666 mig_throttle_guest_down();
Juan Quintela56e93d22015-05-07 19:33:31 +0200667 }
668 bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200669 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400670
Juan Quintela56e93d22015-05-07 19:33:31 +0200671 if (migrate_use_xbzrle()) {
672 if (iterations_prev != acct_info.iterations) {
673 acct_info.xbzrle_cache_miss_rate =
674 (double)(acct_info.xbzrle_cache_miss -
675 xbzrle_cache_miss_prev) /
676 (acct_info.iterations - iterations_prev);
677 }
678 iterations_prev = acct_info.iterations;
679 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
680 }
681 s->dirty_pages_rate = num_dirty_pages_period * 1000
682 / (end_time - start_time);
683 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
684 start_time = end_time;
685 num_dirty_pages_period = 0;
686 }
687 s->dirty_sync_count = bitmap_sync_count;
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000688 if (migrate_use_events()) {
689 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
690 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200691}
692
693/**
694 * save_zero_page: Send the zero page to the stream
695 *
696 * Returns: Number of pages written.
697 *
698 * @f: QEMUFile where to send the data
699 * @block: block that contains the page we want to send
700 * @offset: offset inside the block for the page
701 * @p: pointer to the page
702 * @bytes_transferred: increase it with the number of transferred bytes
703 */
704static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
705 uint8_t *p, uint64_t *bytes_transferred)
706{
707 int pages = -1;
708
709 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
710 acct_info.dup_pages++;
711 *bytes_transferred += save_page_header(f, block,
712 offset | RAM_SAVE_FLAG_COMPRESS);
713 qemu_put_byte(f, 0);
714 *bytes_transferred += 1;
715 pages = 1;
716 }
717
718 return pages;
719}
720
721/**
722 * ram_save_page: Send the given page to the stream
723 *
724 * Returns: Number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +0000725 * < 0 - error
726 * >=0 - Number of pages written - this might legally be 0
727 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +0200728 *
729 * @f: QEMUFile where to send the data
730 * @block: block that contains the page we want to send
731 * @offset: offset inside the block for the page
732 * @last_stage: if we are at the completion stage
733 * @bytes_transferred: increase it with the number of transferred bytes
734 */
zhanghailianga08f6892016-01-15 11:37:44 +0800735static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
Juan Quintela56e93d22015-05-07 19:33:31 +0200736 bool last_stage, uint64_t *bytes_transferred)
737{
738 int pages = -1;
739 uint64_t bytes_xmit;
740 ram_addr_t current_addr;
Juan Quintela56e93d22015-05-07 19:33:31 +0200741 uint8_t *p;
742 int ret;
743 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +0800744 RAMBlock *block = pss->block;
745 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200746
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100747 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200748
749 /* In doubt sent page as normal */
750 bytes_xmit = 0;
751 ret = ram_control_save_page(f, block->offset,
752 offset, TARGET_PAGE_SIZE, &bytes_xmit);
753 if (bytes_xmit) {
754 *bytes_transferred += bytes_xmit;
755 pages = 1;
756 }
757
758 XBZRLE_cache_lock();
759
760 current_addr = block->offset + offset;
761
762 if (block == last_sent_block) {
763 offset |= RAM_SAVE_FLAG_CONTINUE;
764 }
765 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
766 if (ret != RAM_SAVE_CONTROL_DELAYED) {
767 if (bytes_xmit > 0) {
768 acct_info.norm_pages++;
769 } else if (bytes_xmit == 0) {
770 acct_info.dup_pages++;
771 }
772 }
773 } else {
774 pages = save_zero_page(f, block, offset, p, bytes_transferred);
775 if (pages > 0) {
776 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
777 * page would be stale
778 */
779 xbzrle_cache_zero_page(current_addr);
780 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
781 pages = save_xbzrle_page(f, &p, current_addr, block,
782 offset, last_stage, bytes_transferred);
783 if (!last_stage) {
784 /* Can't send this cached data async, since the cache page
785 * might get updated before it gets to the wire
786 */
787 send_async = false;
788 }
789 }
790 }
791
792 /* XBZRLE overflow or normal page */
793 if (pages == -1) {
794 *bytes_transferred += save_page_header(f, block,
795 offset | RAM_SAVE_FLAG_PAGE);
796 if (send_async) {
797 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
798 } else {
799 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
800 }
801 *bytes_transferred += TARGET_PAGE_SIZE;
802 pages = 1;
803 acct_info.norm_pages++;
804 }
805
806 XBZRLE_cache_unlock();
807
808 return pages;
809}
810
Liang Lia7a9a882016-05-05 15:32:57 +0800811static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
812 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200813{
814 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +0800815 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +0200816
Liang Lia7a9a882016-05-05 15:32:57 +0800817 bytes_sent = save_page_header(f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +0200818 RAM_SAVE_FLAG_COMPRESS_PAGE);
Liang Lia7a9a882016-05-05 15:32:57 +0800819 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
Juan Quintela56e93d22015-05-07 19:33:31 +0200820 migrate_compress_level());
Liang Lib3be2892016-05-05 15:32:54 +0800821 if (blen < 0) {
822 bytes_sent = 0;
823 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
824 error_report("compressed data failed!");
825 } else {
826 bytes_sent += blen;
827 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200828
829 return bytes_sent;
830}
831
Juan Quintela56e93d22015-05-07 19:33:31 +0200832static uint64_t bytes_transferred;
833
834static void flush_compressed_data(QEMUFile *f)
835{
836 int idx, len, thread_count;
837
838 if (!migrate_use_compression()) {
839 return;
840 }
841 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +0800842
843 qemu_mutex_lock(comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200844 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +0800845 while (!comp_param[idx].done) {
846 qemu_cond_wait(comp_done_cond, comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200847 }
Liang Lia7a9a882016-05-05 15:32:57 +0800848 }
849 qemu_mutex_unlock(comp_done_lock);
850
851 for (idx = 0; idx < thread_count; idx++) {
852 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800853 if (!comp_param[idx].quit) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200854 len = qemu_put_qemu_file(f, comp_param[idx].file);
855 bytes_transferred += len;
856 }
Liang Lia7a9a882016-05-05 15:32:57 +0800857 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200858 }
859}
860
861static inline void set_compress_params(CompressParam *param, RAMBlock *block,
862 ram_addr_t offset)
863{
864 param->block = block;
865 param->offset = offset;
866}
867
868static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
869 ram_addr_t offset,
870 uint64_t *bytes_transferred)
871{
872 int idx, thread_count, bytes_xmit = -1, pages = -1;
873
874 thread_count = migrate_compress_threads();
875 qemu_mutex_lock(comp_done_lock);
876 while (true) {
877 for (idx = 0; idx < thread_count; idx++) {
878 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +0800879 comp_param[idx].done = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200880 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +0800881 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200882 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +0800883 qemu_cond_signal(&comp_param[idx].cond);
884 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200885 pages = 1;
886 acct_info.norm_pages++;
887 *bytes_transferred += bytes_xmit;
888 break;
889 }
890 }
891 if (pages > 0) {
892 break;
893 } else {
894 qemu_cond_wait(comp_done_cond, comp_done_lock);
895 }
896 }
897 qemu_mutex_unlock(comp_done_lock);
898
899 return pages;
900}
901
902/**
903 * ram_save_compressed_page: compress the given page and send it to the stream
904 *
905 * Returns: Number of pages written.
906 *
907 * @f: QEMUFile where to send the data
908 * @block: block that contains the page we want to send
909 * @offset: offset inside the block for the page
910 * @last_stage: if we are at the completion stage
911 * @bytes_transferred: increase it with the number of transferred bytes
912 */
zhanghailianga08f6892016-01-15 11:37:44 +0800913static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
914 bool last_stage,
Juan Quintela56e93d22015-05-07 19:33:31 +0200915 uint64_t *bytes_transferred)
916{
917 int pages = -1;
Liang Lifc504382016-05-05 15:32:55 +0800918 uint64_t bytes_xmit = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200919 uint8_t *p;
Liang Lifc504382016-05-05 15:32:55 +0800920 int ret, blen;
zhanghailianga08f6892016-01-15 11:37:44 +0800921 RAMBlock *block = pss->block;
922 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200923
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100924 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200925
Juan Quintela56e93d22015-05-07 19:33:31 +0200926 ret = ram_control_save_page(f, block->offset,
927 offset, TARGET_PAGE_SIZE, &bytes_xmit);
928 if (bytes_xmit) {
929 *bytes_transferred += bytes_xmit;
930 pages = 1;
931 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200932 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
933 if (ret != RAM_SAVE_CONTROL_DELAYED) {
934 if (bytes_xmit > 0) {
935 acct_info.norm_pages++;
936 } else if (bytes_xmit == 0) {
937 acct_info.dup_pages++;
938 }
939 }
940 } else {
941 /* When starting the process of a new block, the first page of
942 * the block should be sent out before other pages in the same
943 * block, and all the pages in last block should have been sent
944 * out, keeping this order is important, because the 'cont' flag
945 * is used to avoid resending the block name.
946 */
947 if (block != last_sent_block) {
948 flush_compressed_data(f);
949 pages = save_zero_page(f, block, offset, p, bytes_transferred);
950 if (pages == -1) {
Liang Lifc504382016-05-05 15:32:55 +0800951 /* Make sure the first page is sent out before other pages */
952 bytes_xmit = save_page_header(f, block, offset |
953 RAM_SAVE_FLAG_COMPRESS_PAGE);
954 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
955 migrate_compress_level());
956 if (blen > 0) {
957 *bytes_transferred += bytes_xmit + blen;
Liang Lib3be2892016-05-05 15:32:54 +0800958 acct_info.norm_pages++;
Liang Lib3be2892016-05-05 15:32:54 +0800959 pages = 1;
Liang Lifc504382016-05-05 15:32:55 +0800960 } else {
961 qemu_file_set_error(f, blen);
962 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +0800963 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200964 }
965 } else {
Liang Lifc504382016-05-05 15:32:55 +0800966 offset |= RAM_SAVE_FLAG_CONTINUE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200967 pages = save_zero_page(f, block, offset, p, bytes_transferred);
968 if (pages == -1) {
969 pages = compress_page_with_multi_thread(f, block, offset,
970 bytes_transferred);
971 }
972 }
973 }
974
975 return pages;
976}
977
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100978/*
979 * Find the next dirty page and update any state associated with
980 * the search process.
981 *
982 * Returns: True if a page is found
983 *
984 * @f: Current migration stream.
985 * @pss: Data about the state of the current dirty page scan.
986 * @*again: Set to false if the search has scanned the whole of RAM
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +0000987 * *ram_addr_abs: Pointer into which to store the address of the dirty page
988 * within the global ram_addr space
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100989 */
990static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000991 bool *again, ram_addr_t *ram_addr_abs)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100992{
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000993 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
994 ram_addr_abs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100995 if (pss->complete_round && pss->block == last_seen_block &&
996 pss->offset >= last_offset) {
997 /*
998 * We've been once around the RAM and haven't found anything.
999 * Give up.
1000 */
1001 *again = false;
1002 return false;
1003 }
1004 if (pss->offset >= pss->block->used_length) {
1005 /* Didn't find anything in this RAM Block */
1006 pss->offset = 0;
1007 pss->block = QLIST_NEXT_RCU(pss->block, next);
1008 if (!pss->block) {
1009 /* Hit the end of the list */
1010 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1011 /* Flag that we've looped */
1012 pss->complete_round = true;
1013 ram_bulk_stage = false;
1014 if (migrate_use_xbzrle()) {
1015 /* If xbzrle is on, stop using the data compression at this
1016 * point. In theory, xbzrle can do better than compression.
1017 */
1018 flush_compressed_data(f);
1019 compression_switch = false;
1020 }
1021 }
1022 /* Didn't find anything this time, but try again on the new block */
1023 *again = true;
1024 return false;
1025 } else {
1026 /* Can go around again, but... */
1027 *again = true;
1028 /* We've found something so probably don't need to */
1029 return true;
1030 }
1031}
1032
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001033/*
1034 * Helper for 'get_queued_page' - gets a page off the queue
1035 * ms: MigrationState in
1036 * *offset: Used to return the offset within the RAMBlock
1037 * ram_addr_abs: global offset in the dirty/sent bitmaps
1038 *
1039 * Returns: block (or NULL if none available)
1040 */
1041static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1042 ram_addr_t *ram_addr_abs)
1043{
1044 RAMBlock *block = NULL;
1045
1046 qemu_mutex_lock(&ms->src_page_req_mutex);
1047 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1048 struct MigrationSrcPageRequest *entry =
1049 QSIMPLEQ_FIRST(&ms->src_page_requests);
1050 block = entry->rb;
1051 *offset = entry->offset;
1052 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1053 TARGET_PAGE_MASK;
1054
1055 if (entry->len > TARGET_PAGE_SIZE) {
1056 entry->len -= TARGET_PAGE_SIZE;
1057 entry->offset += TARGET_PAGE_SIZE;
1058 } else {
1059 memory_region_unref(block->mr);
1060 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1061 g_free(entry);
1062 }
1063 }
1064 qemu_mutex_unlock(&ms->src_page_req_mutex);
1065
1066 return block;
1067}
1068
1069/*
1070 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1071 * that are already sent (!dirty)
1072 *
1073 * ms: MigrationState in
1074 * pss: PageSearchStatus structure updated with found block/offset
1075 * ram_addr_abs: global offset in the dirty/sent bitmaps
1076 *
1077 * Returns: true if a queued page is found
1078 */
1079static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1080 ram_addr_t *ram_addr_abs)
1081{
1082 RAMBlock *block;
1083 ram_addr_t offset;
1084 bool dirty;
1085
1086 do {
1087 block = unqueue_page(ms, &offset, ram_addr_abs);
1088 /*
1089 * We're sending this page, and since it's postcopy nothing else
1090 * will dirty it, and we must make sure it doesn't get sent again
1091 * even if this queue request was received after the background
1092 * search already sent it.
1093 */
1094 if (block) {
1095 unsigned long *bitmap;
1096 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1097 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1098 if (!dirty) {
1099 trace_get_queued_page_not_dirty(
1100 block->idstr, (uint64_t)offset,
1101 (uint64_t)*ram_addr_abs,
1102 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1103 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1104 } else {
1105 trace_get_queued_page(block->idstr,
1106 (uint64_t)offset,
1107 (uint64_t)*ram_addr_abs);
1108 }
1109 }
1110
1111 } while (block && !dirty);
1112
1113 if (block) {
1114 /*
1115 * As soon as we start servicing pages out of order, then we have
1116 * to kill the bulk stage, since the bulk stage assumes
1117 * in (migration_bitmap_find_and_reset_dirty) that every page is
1118 * dirty, that's no longer true.
1119 */
1120 ram_bulk_stage = false;
1121
1122 /*
1123 * We want the background search to continue from the queued page
1124 * since the guest is likely to want other pages near to the page
1125 * it just requested.
1126 */
1127 pss->block = block;
1128 pss->offset = offset;
1129 }
1130
1131 return !!block;
1132}
1133
Juan Quintela56e93d22015-05-07 19:33:31 +02001134/**
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001135 * flush_page_queue: Flush any remaining pages in the ram request queue
1136 * it should be empty at the end anyway, but in error cases there may be
1137 * some left.
1138 *
1139 * ms: MigrationState
1140 */
1141void flush_page_queue(MigrationState *ms)
1142{
1143 struct MigrationSrcPageRequest *mspr, *next_mspr;
1144 /* This queue generally should be empty - but in the case of a failed
1145 * migration might have some droppings in.
1146 */
1147 rcu_read_lock();
1148 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1149 memory_region_unref(mspr->rb->mr);
1150 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1151 g_free(mspr);
1152 }
1153 rcu_read_unlock();
1154}
1155
1156/**
1157 * Queue the pages for transmission, e.g. a request from postcopy destination
1158 * ms: MigrationStatus in which the queue is held
1159 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1160 * start: Offset from the start of the RAMBlock
1161 * len: Length (in bytes) to send
1162 * Return: 0 on success
1163 */
1164int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1165 ram_addr_t start, ram_addr_t len)
1166{
1167 RAMBlock *ramblock;
1168
Dr. David Alan Gilbertd3bf5412016-06-13 12:16:42 +01001169 ms->postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001170 rcu_read_lock();
1171 if (!rbname) {
1172 /* Reuse last RAMBlock */
1173 ramblock = ms->last_req_rb;
1174
1175 if (!ramblock) {
1176 /*
1177 * Shouldn't happen, we can't reuse the last RAMBlock if
1178 * it's the 1st request.
1179 */
1180 error_report("ram_save_queue_pages no previous block");
1181 goto err;
1182 }
1183 } else {
1184 ramblock = qemu_ram_block_by_name(rbname);
1185
1186 if (!ramblock) {
1187 /* We shouldn't be asked for a non-existent RAMBlock */
1188 error_report("ram_save_queue_pages no block '%s'", rbname);
1189 goto err;
1190 }
1191 ms->last_req_rb = ramblock;
1192 }
1193 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1194 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001195 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1196 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001197 __func__, start, len, ramblock->used_length);
1198 goto err;
1199 }
1200
1201 struct MigrationSrcPageRequest *new_entry =
1202 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1203 new_entry->rb = ramblock;
1204 new_entry->offset = start;
1205 new_entry->len = len;
1206
1207 memory_region_ref(ramblock->mr);
1208 qemu_mutex_lock(&ms->src_page_req_mutex);
1209 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1210 qemu_mutex_unlock(&ms->src_page_req_mutex);
1211 rcu_read_unlock();
1212
1213 return 0;
1214
1215err:
1216 rcu_read_unlock();
1217 return -1;
1218}
1219
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001220/**
1221 * ram_save_target_page: Save one target page
1222 *
1223 *
1224 * @f: QEMUFile where to send the data
1225 * @block: pointer to block that contains the page we want to send
1226 * @offset: offset inside the block for the page;
1227 * @last_stage: if we are at the completion stage
1228 * @bytes_transferred: increase it with the number of transferred bytes
1229 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1230 *
1231 * Returns: Number of pages written.
1232 */
1233static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
zhanghailianga08f6892016-01-15 11:37:44 +08001234 PageSearchStatus *pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001235 bool last_stage,
1236 uint64_t *bytes_transferred,
1237 ram_addr_t dirty_ram_abs)
1238{
1239 int res = 0;
1240
1241 /* Check the pages is dirty and if it is send it */
1242 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1243 unsigned long *unsentmap;
1244 if (compression_switch && migrate_use_compression()) {
zhanghailianga08f6892016-01-15 11:37:44 +08001245 res = ram_save_compressed_page(f, pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001246 last_stage,
1247 bytes_transferred);
1248 } else {
zhanghailianga08f6892016-01-15 11:37:44 +08001249 res = ram_save_page(f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001250 bytes_transferred);
1251 }
1252
1253 if (res < 0) {
1254 return res;
1255 }
1256 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1257 if (unsentmap) {
1258 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1259 }
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001260 /* Only update last_sent_block if a block was actually sent; xbzrle
1261 * might have decided the page was identical so didn't bother writing
1262 * to the stream.
1263 */
1264 if (res > 0) {
zhanghailianga08f6892016-01-15 11:37:44 +08001265 last_sent_block = pss->block;
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001266 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001267 }
1268
1269 return res;
1270}
1271
1272/**
Stefan Weilcb8d4c82016-03-23 15:59:57 +01001273 * ram_save_host_page: Starting at *offset send pages up to the end
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001274 * of the current host page. It's valid for the initial
1275 * offset to point into the middle of a host page
1276 * in which case the remainder of the hostpage is sent.
1277 * Only dirty target pages are sent.
1278 *
1279 * Returns: Number of pages written.
1280 *
1281 * @f: QEMUFile where to send the data
1282 * @block: pointer to block that contains the page we want to send
1283 * @offset: offset inside the block for the page; updated to last target page
1284 * sent
1285 * @last_stage: if we are at the completion stage
1286 * @bytes_transferred: increase it with the number of transferred bytes
1287 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1288 */
zhanghailianga08f6892016-01-15 11:37:44 +08001289static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1290 PageSearchStatus *pss,
1291 bool last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001292 uint64_t *bytes_transferred,
1293 ram_addr_t dirty_ram_abs)
1294{
1295 int tmppages, pages = 0;
1296 do {
zhanghailianga08f6892016-01-15 11:37:44 +08001297 tmppages = ram_save_target_page(ms, f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001298 bytes_transferred, dirty_ram_abs);
1299 if (tmppages < 0) {
1300 return tmppages;
1301 }
1302
1303 pages += tmppages;
zhanghailianga08f6892016-01-15 11:37:44 +08001304 pss->offset += TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001305 dirty_ram_abs += TARGET_PAGE_SIZE;
zhanghailianga08f6892016-01-15 11:37:44 +08001306 } while (pss->offset & (qemu_host_page_size - 1));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001307
1308 /* The offset we leave with is the last one we looked at */
zhanghailianga08f6892016-01-15 11:37:44 +08001309 pss->offset -= TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001310 return pages;
1311}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001312
1313/**
Juan Quintela56e93d22015-05-07 19:33:31 +02001314 * ram_find_and_save_block: Finds a dirty page and sends it to f
1315 *
1316 * Called within an RCU critical section.
1317 *
1318 * Returns: The number of pages written
1319 * 0 means no dirty pages
1320 *
1321 * @f: QEMUFile where to send the data
1322 * @last_stage: if we are at the completion stage
1323 * @bytes_transferred: increase it with the number of transferred bytes
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001324 *
1325 * On systems where host-page-size > target-page-size it will send all the
1326 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001327 */
1328
1329static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1330 uint64_t *bytes_transferred)
1331{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001332 PageSearchStatus pss;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001333 MigrationState *ms = migrate_get_current();
Juan Quintela56e93d22015-05-07 19:33:31 +02001334 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001335 bool again, found;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001336 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1337 ram_addr_t space */
Juan Quintela56e93d22015-05-07 19:33:31 +02001338
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001339 pss.block = last_seen_block;
1340 pss.offset = last_offset;
1341 pss.complete_round = false;
1342
1343 if (!pss.block) {
1344 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1345 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001346
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001347 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001348 again = true;
1349 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1350
1351 if (!found) {
1352 /* priority queue empty, so just search for something dirty */
1353 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1354 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001355
1356 if (found) {
zhanghailianga08f6892016-01-15 11:37:44 +08001357 pages = ram_save_host_page(ms, f, &pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001358 last_stage, bytes_transferred,
1359 dirty_ram_abs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001360 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001361 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001362
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001363 last_seen_block = pss.block;
1364 last_offset = pss.offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001365
1366 return pages;
1367}
1368
1369void acct_update_position(QEMUFile *f, size_t size, bool zero)
1370{
1371 uint64_t pages = size / TARGET_PAGE_SIZE;
1372 if (zero) {
1373 acct_info.dup_pages += pages;
1374 } else {
1375 acct_info.norm_pages += pages;
1376 bytes_transferred += size;
1377 qemu_update_position(f, size);
1378 }
1379}
1380
1381static ram_addr_t ram_save_remaining(void)
1382{
1383 return migration_dirty_pages;
1384}
1385
1386uint64_t ram_bytes_remaining(void)
1387{
1388 return ram_save_remaining() * TARGET_PAGE_SIZE;
1389}
1390
1391uint64_t ram_bytes_transferred(void)
1392{
1393 return bytes_transferred;
1394}
1395
1396uint64_t ram_bytes_total(void)
1397{
1398 RAMBlock *block;
1399 uint64_t total = 0;
1400
1401 rcu_read_lock();
1402 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1403 total += block->used_length;
1404 rcu_read_unlock();
1405 return total;
1406}
1407
1408void free_xbzrle_decoded_buf(void)
1409{
1410 g_free(xbzrle_decoded_buf);
1411 xbzrle_decoded_buf = NULL;
1412}
1413
Denis V. Lunev60be6342015-09-28 14:41:58 +03001414static void migration_bitmap_free(struct BitmapRcu *bmap)
1415{
1416 g_free(bmap->bmap);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001417 g_free(bmap->unsentmap);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001418 g_free(bmap);
1419}
1420
Liang Li6ad2a212015-11-02 15:37:03 +08001421static void ram_migration_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001422{
Li Zhijian2ff64032015-07-02 20:18:05 +08001423 /* caller have hold iothread lock or is in a bh, so there is
1424 * no writing race against this migration_bitmap
1425 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001426 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1427 atomic_rcu_set(&migration_bitmap_rcu, NULL);
Li Zhijian2ff64032015-07-02 20:18:05 +08001428 if (bitmap) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001429 memory_global_dirty_log_stop();
Denis V. Lunev60be6342015-09-28 14:41:58 +03001430 call_rcu(bitmap, migration_bitmap_free, rcu);
Juan Quintela56e93d22015-05-07 19:33:31 +02001431 }
1432
1433 XBZRLE_cache_lock();
1434 if (XBZRLE.cache) {
1435 cache_fini(XBZRLE.cache);
1436 g_free(XBZRLE.encoded_buf);
1437 g_free(XBZRLE.current_buf);
1438 XBZRLE.cache = NULL;
1439 XBZRLE.encoded_buf = NULL;
1440 XBZRLE.current_buf = NULL;
1441 }
1442 XBZRLE_cache_unlock();
1443}
1444
Juan Quintela56e93d22015-05-07 19:33:31 +02001445static void reset_ram_globals(void)
1446{
1447 last_seen_block = NULL;
1448 last_sent_block = NULL;
1449 last_offset = 0;
1450 last_version = ram_list.version;
1451 ram_bulk_stage = true;
1452}
1453
1454#define MAX_WAIT 50 /* ms, half buffered_file limit */
1455
Li Zhijiandd631692015-07-02 20:18:06 +08001456void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1457{
1458 /* called in qemu main thread, so there is
1459 * no writing race against this migration_bitmap
1460 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001461 if (migration_bitmap_rcu) {
1462 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1463 bitmap = g_new(struct BitmapRcu, 1);
1464 bitmap->bmap = bitmap_new(new);
Li Zhijiandd631692015-07-02 20:18:06 +08001465
1466 /* prevent migration_bitmap content from being set bit
1467 * by migration_bitmap_sync_range() at the same time.
1468 * it is safe to migration if migration_bitmap is cleared bit
1469 * at the same time.
1470 */
1471 qemu_mutex_lock(&migration_bitmap_mutex);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001472 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1473 bitmap_set(bitmap->bmap, old, new - old);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001474
1475 /* We don't have a way to safely extend the sentmap
1476 * with RCU; so mark it as missing, entry to postcopy
1477 * will fail.
1478 */
1479 bitmap->unsentmap = NULL;
1480
Denis V. Lunev60be6342015-09-28 14:41:58 +03001481 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
Li Zhijiandd631692015-07-02 20:18:06 +08001482 qemu_mutex_unlock(&migration_bitmap_mutex);
1483 migration_dirty_pages += new - old;
Denis V. Lunev60be6342015-09-28 14:41:58 +03001484 call_rcu(old_bitmap, migration_bitmap_free, rcu);
Li Zhijiandd631692015-07-02 20:18:06 +08001485 }
1486}
Juan Quintela56e93d22015-05-07 19:33:31 +02001487
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001488/*
1489 * 'expected' is the value you expect the bitmap mostly to be full
1490 * of; it won't bother printing lines that are all this value.
1491 * If 'todump' is null the migration bitmap is dumped.
1492 */
1493void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1494{
1495 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1496
1497 int64_t cur;
1498 int64_t linelen = 128;
1499 char linebuf[129];
1500
1501 if (!todump) {
1502 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1503 }
1504
1505 for (cur = 0; cur < ram_pages; cur += linelen) {
1506 int64_t curb;
1507 bool found = false;
1508 /*
1509 * Last line; catch the case where the line length
1510 * is longer than remaining ram
1511 */
1512 if (cur + linelen > ram_pages) {
1513 linelen = ram_pages - cur;
1514 }
1515 for (curb = 0; curb < linelen; curb++) {
1516 bool thisbit = test_bit(cur + curb, todump);
1517 linebuf[curb] = thisbit ? '1' : '.';
1518 found = found || (thisbit != expected);
1519 }
1520 if (found) {
1521 linebuf[curb] = '\0';
1522 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1523 }
1524 }
1525}
1526
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001527/* **** functions for postcopy ***** */
1528
1529/*
1530 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1531 * Note: At this point the 'unsentmap' is the processed bitmap combined
1532 * with the dirtymap; so a '1' means it's either dirty or unsent.
1533 * start,length: Indexes into the bitmap for the first bit
1534 * representing the named block and length in target-pages
1535 */
1536static int postcopy_send_discard_bm_ram(MigrationState *ms,
1537 PostcopyDiscardState *pds,
1538 unsigned long start,
1539 unsigned long length)
1540{
1541 unsigned long end = start + length; /* one after the end */
1542 unsigned long current;
1543 unsigned long *unsentmap;
1544
1545 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1546 for (current = start; current < end; ) {
1547 unsigned long one = find_next_bit(unsentmap, end, current);
1548
1549 if (one <= end) {
1550 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1551 unsigned long discard_length;
1552
1553 if (zero >= end) {
1554 discard_length = end - one;
1555 } else {
1556 discard_length = zero - one;
1557 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001558 if (discard_length) {
1559 postcopy_discard_send_range(ms, pds, one, discard_length);
1560 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001561 current = one + discard_length;
1562 } else {
1563 current = one;
1564 }
1565 }
1566
1567 return 0;
1568}
1569
1570/*
1571 * Utility for the outgoing postcopy code.
1572 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1573 * passing it bitmap indexes and name.
1574 * Returns: 0 on success
1575 * (qemu_ram_foreach_block ends up passing unscaled lengths
1576 * which would mean postcopy code would have to deal with target page)
1577 */
1578static int postcopy_each_ram_send_discard(MigrationState *ms)
1579{
1580 struct RAMBlock *block;
1581 int ret;
1582
1583 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1584 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1585 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1586 first,
1587 block->idstr);
1588
1589 /*
1590 * Postcopy sends chunks of bitmap over the wire, but it
1591 * just needs indexes at this point, avoids it having
1592 * target page specific code.
1593 */
1594 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1595 block->used_length >> TARGET_PAGE_BITS);
1596 postcopy_discard_send_finish(ms, pds);
1597 if (ret) {
1598 return ret;
1599 }
1600 }
1601
1602 return 0;
1603}
1604
1605/*
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001606 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1607 * the two bitmaps, that are similar, but one is inverted.
1608 *
1609 * We search for runs of target-pages that don't start or end on a
1610 * host page boundary;
1611 * unsent_pass=true: Cleans up partially unsent host pages by searching
1612 * the unsentmap
1613 * unsent_pass=false: Cleans up partially dirty host pages by searching
1614 * the main migration bitmap
1615 *
1616 */
1617static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1618 RAMBlock *block,
1619 PostcopyDiscardState *pds)
1620{
1621 unsigned long *bitmap;
1622 unsigned long *unsentmap;
1623 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1624 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1625 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1626 unsigned long last = first + (len - 1);
1627 unsigned long run_start;
1628
1629 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1630 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1631
1632 if (unsent_pass) {
1633 /* Find a sent page */
1634 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1635 } else {
1636 /* Find a dirty page */
1637 run_start = find_next_bit(bitmap, last + 1, first);
1638 }
1639
1640 while (run_start <= last) {
1641 bool do_fixup = false;
1642 unsigned long fixup_start_addr;
1643 unsigned long host_offset;
1644
1645 /*
1646 * If the start of this run of pages is in the middle of a host
1647 * page, then we need to fixup this host page.
1648 */
1649 host_offset = run_start % host_ratio;
1650 if (host_offset) {
1651 do_fixup = true;
1652 run_start -= host_offset;
1653 fixup_start_addr = run_start;
1654 /* For the next pass */
1655 run_start = run_start + host_ratio;
1656 } else {
1657 /* Find the end of this run */
1658 unsigned long run_end;
1659 if (unsent_pass) {
1660 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1661 } else {
1662 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1663 }
1664 /*
1665 * If the end isn't at the start of a host page, then the
1666 * run doesn't finish at the end of a host page
1667 * and we need to discard.
1668 */
1669 host_offset = run_end % host_ratio;
1670 if (host_offset) {
1671 do_fixup = true;
1672 fixup_start_addr = run_end - host_offset;
1673 /*
1674 * This host page has gone, the next loop iteration starts
1675 * from after the fixup
1676 */
1677 run_start = fixup_start_addr + host_ratio;
1678 } else {
1679 /*
1680 * No discards on this iteration, next loop starts from
1681 * next sent/dirty page
1682 */
1683 run_start = run_end + 1;
1684 }
1685 }
1686
1687 if (do_fixup) {
1688 unsigned long page;
1689
1690 /* Tell the destination to discard this page */
1691 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1692 /* For the unsent_pass we:
1693 * discard partially sent pages
1694 * For the !unsent_pass (dirty) we:
1695 * discard partially dirty pages that were sent
1696 * (any partially sent pages were already discarded
1697 * by the previous unsent_pass)
1698 */
1699 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1700 host_ratio);
1701 }
1702
1703 /* Clean up the bitmap */
1704 for (page = fixup_start_addr;
1705 page < fixup_start_addr + host_ratio; page++) {
1706 /* All pages in this host page are now not sent */
1707 set_bit(page, unsentmap);
1708
1709 /*
1710 * Remark them as dirty, updating the count for any pages
1711 * that weren't previously dirty.
1712 */
1713 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1714 }
1715 }
1716
1717 if (unsent_pass) {
1718 /* Find the next sent page for the next iteration */
1719 run_start = find_next_zero_bit(unsentmap, last + 1,
1720 run_start);
1721 } else {
1722 /* Find the next dirty page for the next iteration */
1723 run_start = find_next_bit(bitmap, last + 1, run_start);
1724 }
1725 }
1726}
1727
1728/*
1729 * Utility for the outgoing postcopy code.
1730 *
1731 * Discard any partially sent host-page size chunks, mark any partially
1732 * dirty host-page size chunks as all dirty.
1733 *
1734 * Returns: 0 on success
1735 */
1736static int postcopy_chunk_hostpages(MigrationState *ms)
1737{
1738 struct RAMBlock *block;
1739
1740 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1741 /* Easy case - TPS==HPS - nothing to be done */
1742 return 0;
1743 }
1744
1745 /* Easiest way to make sure we don't resume in the middle of a host-page */
1746 last_seen_block = NULL;
1747 last_sent_block = NULL;
1748 last_offset = 0;
1749
1750 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1751 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1752
1753 PostcopyDiscardState *pds =
1754 postcopy_discard_send_init(ms, first, block->idstr);
1755
1756 /* First pass: Discard all partially sent host pages */
1757 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1758 /*
1759 * Second pass: Ensure that all partially dirty host pages are made
1760 * fully dirty.
1761 */
1762 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1763
1764 postcopy_discard_send_finish(ms, pds);
1765 } /* ram_list loop */
1766
1767 return 0;
1768}
1769
1770/*
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001771 * Transmit the set of pages to be discarded after precopy to the target
1772 * these are pages that:
1773 * a) Have been previously transmitted but are now dirty again
1774 * b) Pages that have never been transmitted, this ensures that
1775 * any pages on the destination that have been mapped by background
1776 * tasks get discarded (transparent huge pages is the specific concern)
1777 * Hopefully this is pretty sparse
1778 */
1779int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1780{
1781 int ret;
1782 unsigned long *bitmap, *unsentmap;
1783
1784 rcu_read_lock();
1785
1786 /* This should be our last sync, the src is now paused */
1787 migration_bitmap_sync();
1788
1789 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1790 if (!unsentmap) {
1791 /* We don't have a safe way to resize the sentmap, so
1792 * if the bitmap was resized it will be NULL at this
1793 * point.
1794 */
1795 error_report("migration ram resized during precopy phase");
1796 rcu_read_unlock();
1797 return -EINVAL;
1798 }
1799
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001800 /* Deal with TPS != HPS */
1801 ret = postcopy_chunk_hostpages(ms);
1802 if (ret) {
1803 rcu_read_unlock();
1804 return ret;
1805 }
1806
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001807 /*
1808 * Update the unsentmap to be unsentmap = unsentmap | dirty
1809 */
1810 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1811 bitmap_or(unsentmap, unsentmap, bitmap,
1812 last_ram_offset() >> TARGET_PAGE_BITS);
1813
1814
1815 trace_ram_postcopy_send_discard_bitmap();
1816#ifdef DEBUG_POSTCOPY
1817 ram_debug_dump_bitmap(unsentmap, true);
1818#endif
1819
1820 ret = postcopy_each_ram_send_discard(ms);
1821 rcu_read_unlock();
1822
1823 return ret;
1824}
1825
1826/*
1827 * At the start of the postcopy phase of migration, any now-dirty
1828 * precopied pages are discarded.
1829 *
1830 * start, length describe a byte address range within the RAMBlock
1831 *
1832 * Returns 0 on success.
1833 */
1834int ram_discard_range(MigrationIncomingState *mis,
1835 const char *block_name,
1836 uint64_t start, size_t length)
1837{
1838 int ret = -1;
1839
1840 rcu_read_lock();
1841 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1842
1843 if (!rb) {
1844 error_report("ram_discard_range: Failed to find block '%s'",
1845 block_name);
1846 goto err;
1847 }
1848
1849 uint8_t *host_startaddr = rb->host + start;
1850
1851 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1852 error_report("ram_discard_range: Unaligned start address: %p",
1853 host_startaddr);
1854 goto err;
1855 }
1856
1857 if ((start + length) <= rb->used_length) {
1858 uint8_t *host_endaddr = host_startaddr + length;
1859 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1860 error_report("ram_discard_range: Unaligned end address: %p",
1861 host_endaddr);
1862 goto err;
1863 }
1864 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1865 } else {
1866 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
Juan Quintela9458ad62015-11-10 17:42:05 +01001867 "/%zx/" RAM_ADDR_FMT")",
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001868 block_name, start, length, rb->used_length);
1869 }
1870
1871err:
1872 rcu_read_unlock();
1873
1874 return ret;
1875}
1876
1877
Juan Quintela56e93d22015-05-07 19:33:31 +02001878/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1879 * long-running RCU critical section. When rcu-reclaims in the code
1880 * start to become numerous it will be necessary to reduce the
1881 * granularity of these critical sections.
1882 */
1883
1884static int ram_save_setup(QEMUFile *f, void *opaque)
1885{
1886 RAMBlock *block;
1887 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1888
Juan Quintela56e93d22015-05-07 19:33:31 +02001889 dirty_rate_high_cnt = 0;
1890 bitmap_sync_count = 0;
1891 migration_bitmap_sync_init();
Li Zhijiandd631692015-07-02 20:18:06 +08001892 qemu_mutex_init(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001893
1894 if (migrate_use_xbzrle()) {
1895 XBZRLE_cache_lock();
1896 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1897 TARGET_PAGE_SIZE,
1898 TARGET_PAGE_SIZE);
1899 if (!XBZRLE.cache) {
1900 XBZRLE_cache_unlock();
1901 error_report("Error creating cache");
1902 return -1;
1903 }
1904 XBZRLE_cache_unlock();
1905
1906 /* We prefer not to abort if there is no memory */
1907 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1908 if (!XBZRLE.encoded_buf) {
1909 error_report("Error allocating encoded_buf");
1910 return -1;
1911 }
1912
1913 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1914 if (!XBZRLE.current_buf) {
1915 error_report("Error allocating current_buf");
1916 g_free(XBZRLE.encoded_buf);
1917 XBZRLE.encoded_buf = NULL;
1918 return -1;
1919 }
1920
1921 acct_clear();
1922 }
1923
Paolo Bonzini49877832016-02-15 19:57:57 +01001924 /* For memory_global_dirty_log_start below. */
1925 qemu_mutex_lock_iothread();
1926
Juan Quintela56e93d22015-05-07 19:33:31 +02001927 qemu_mutex_lock_ramlist();
1928 rcu_read_lock();
1929 bytes_transferred = 0;
1930 reset_ram_globals();
1931
1932 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001933 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001934 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1935 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02001936
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001937 if (migrate_postcopy_ram()) {
1938 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1939 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1940 }
1941
Juan Quintela56e93d22015-05-07 19:33:31 +02001942 /*
1943 * Count the total number of pages used by ram blocks not including any
1944 * gaps due to alignment or unplugs.
1945 */
1946 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1947
1948 memory_global_dirty_log_start();
1949 migration_bitmap_sync();
1950 qemu_mutex_unlock_ramlist();
Paolo Bonzini49877832016-02-15 19:57:57 +01001951 qemu_mutex_unlock_iothread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001952
1953 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1954
1955 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1956 qemu_put_byte(f, strlen(block->idstr));
1957 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1958 qemu_put_be64(f, block->used_length);
1959 }
1960
1961 rcu_read_unlock();
1962
1963 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1964 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1965
1966 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1967
1968 return 0;
1969}
1970
1971static int ram_save_iterate(QEMUFile *f, void *opaque)
1972{
1973 int ret;
1974 int i;
1975 int64_t t0;
1976 int pages_sent = 0;
1977
1978 rcu_read_lock();
1979 if (ram_list.version != last_version) {
1980 reset_ram_globals();
1981 }
1982
1983 /* Read version before ram_list.blocks */
1984 smp_rmb();
1985
1986 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1987
1988 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1989 i = 0;
1990 while ((ret = qemu_file_rate_limit(f)) == 0) {
1991 int pages;
1992
1993 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1994 /* no more pages to sent */
1995 if (pages == 0) {
1996 break;
1997 }
1998 pages_sent += pages;
1999 acct_info.iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04002000
Juan Quintela56e93d22015-05-07 19:33:31 +02002001 /* we want to check in the 1st loop, just in case it was the 1st time
2002 and we had to sync the dirty bitmap.
2003 qemu_get_clock_ns() is a bit expensive, so we only check each some
2004 iterations
2005 */
2006 if ((i & 63) == 0) {
2007 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2008 if (t1 > MAX_WAIT) {
2009 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2010 t1, i);
2011 break;
2012 }
2013 }
2014 i++;
2015 }
2016 flush_compressed_data(f);
2017 rcu_read_unlock();
2018
2019 /*
2020 * Must occur before EOS (or any QEMUFile operation)
2021 * because of RDMA protocol.
2022 */
2023 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2024
2025 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2026 bytes_transferred += 8;
2027
2028 ret = qemu_file_get_error(f);
2029 if (ret < 0) {
2030 return ret;
2031 }
2032
2033 return pages_sent;
2034}
2035
2036/* Called with iothread lock */
2037static int ram_save_complete(QEMUFile *f, void *opaque)
2038{
2039 rcu_read_lock();
2040
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002041 if (!migration_in_postcopy(migrate_get_current())) {
2042 migration_bitmap_sync();
2043 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002044
2045 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2046
2047 /* try transferring iterative blocks of memory */
2048
2049 /* flush all remaining blocks regardless of rate limiting */
2050 while (true) {
2051 int pages;
2052
2053 pages = ram_find_and_save_block(f, true, &bytes_transferred);
2054 /* no more blocks to sent */
2055 if (pages == 0) {
2056 break;
2057 }
2058 }
2059
2060 flush_compressed_data(f);
2061 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002062
2063 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002064
Juan Quintela56e93d22015-05-07 19:33:31 +02002065 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2066
2067 return 0;
2068}
2069
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002070static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2071 uint64_t *non_postcopiable_pending,
2072 uint64_t *postcopiable_pending)
Juan Quintela56e93d22015-05-07 19:33:31 +02002073{
2074 uint64_t remaining_size;
2075
2076 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2077
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002078 if (!migration_in_postcopy(migrate_get_current()) &&
2079 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002080 qemu_mutex_lock_iothread();
2081 rcu_read_lock();
2082 migration_bitmap_sync();
2083 rcu_read_unlock();
2084 qemu_mutex_unlock_iothread();
2085 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2086 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002087
2088 /* We can do postcopy, and all the data is postcopiable */
2089 *postcopiable_pending += remaining_size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002090}
2091
2092static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2093{
2094 unsigned int xh_len;
2095 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002096 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002097
2098 if (!xbzrle_decoded_buf) {
2099 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2100 }
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002101 loaded_data = xbzrle_decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002102
2103 /* extract RLE header */
2104 xh_flags = qemu_get_byte(f);
2105 xh_len = qemu_get_be16(f);
2106
2107 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2108 error_report("Failed to load XBZRLE page - wrong compression!");
2109 return -1;
2110 }
2111
2112 if (xh_len > TARGET_PAGE_SIZE) {
2113 error_report("Failed to load XBZRLE page - len overflow!");
2114 return -1;
2115 }
2116 /* load data and decode */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002117 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002118
2119 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002120 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002121 TARGET_PAGE_SIZE) == -1) {
2122 error_report("Failed to load XBZRLE page - decode error!");
2123 return -1;
2124 }
2125
2126 return 0;
2127}
2128
2129/* Must be called from within a rcu critical section.
2130 * Returns a pointer from within the RCU-protected ram_list.
2131 */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002132/*
zhanghailiang4c4bad42016-01-15 11:37:41 +08002133 * Read a RAMBlock ID from the stream f.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002134 *
2135 * f: Stream to read from
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002136 * flags: Page flags (mostly to see if it's a continuation of previous block)
2137 */
zhanghailiang4c4bad42016-01-15 11:37:41 +08002138static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2139 int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002140{
2141 static RAMBlock *block = NULL;
2142 char id[256];
2143 uint8_t len;
2144
2145 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002146 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002147 error_report("Ack, bad migration stream!");
2148 return NULL;
2149 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002150 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002151 }
2152
2153 len = qemu_get_byte(f);
2154 qemu_get_buffer(f, (uint8_t *)id, len);
2155 id[len] = 0;
2156
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002157 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002158 if (!block) {
2159 error_report("Can't find block %s", id);
2160 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002161 }
2162
zhanghailiang4c4bad42016-01-15 11:37:41 +08002163 return block;
2164}
2165
2166static inline void *host_from_ram_block_offset(RAMBlock *block,
2167 ram_addr_t offset)
2168{
2169 if (!offset_in_ramblock(block, offset)) {
2170 return NULL;
2171 }
2172
2173 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002174}
2175
2176/*
2177 * If a page (or a whole RDMA chunk) has been
2178 * determined to be zero, then zap it.
2179 */
2180void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2181{
2182 if (ch != 0 || !is_zero_range(host, size)) {
2183 memset(host, ch, size);
2184 }
2185}
2186
2187static void *do_data_decompress(void *opaque)
2188{
2189 DecompressParam *param = opaque;
2190 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002191 uint8_t *des;
2192 int len;
Juan Quintela56e93d22015-05-07 19:33:31 +02002193
Liang Li33d151f2016-05-05 15:32:58 +08002194 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002195 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002196 if (param->des) {
2197 des = param->des;
2198 len = param->len;
2199 param->des = 0;
2200 qemu_mutex_unlock(&param->mutex);
2201
Liang Li73a89122016-05-05 15:32:51 +08002202 pagesize = TARGET_PAGE_SIZE;
2203 /* uncompress() will return failed in some case, especially
2204 * when the page is dirted when doing the compression, it's
2205 * not a problem because the dirty page will be retransferred
2206 * and uncompress() won't break the data in other pages.
2207 */
Liang Li33d151f2016-05-05 15:32:58 +08002208 uncompress((Bytef *)des, &pagesize,
2209 (const Bytef *)param->compbuf, len);
Liang Li73a89122016-05-05 15:32:51 +08002210
Liang Li33d151f2016-05-05 15:32:58 +08002211 qemu_mutex_lock(&decomp_done_lock);
2212 param->done = true;
2213 qemu_cond_signal(&decomp_done_cond);
2214 qemu_mutex_unlock(&decomp_done_lock);
2215
2216 qemu_mutex_lock(&param->mutex);
2217 } else {
2218 qemu_cond_wait(&param->cond, &param->mutex);
2219 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002220 }
Liang Li33d151f2016-05-05 15:32:58 +08002221 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002222
2223 return NULL;
2224}
2225
Liang Li5533b2e2016-05-05 15:32:52 +08002226static void wait_for_decompress_done(void)
2227{
2228 int idx, thread_count;
2229
2230 if (!migrate_use_compression()) {
2231 return;
2232 }
2233
2234 thread_count = migrate_decompress_threads();
2235 qemu_mutex_lock(&decomp_done_lock);
2236 for (idx = 0; idx < thread_count; idx++) {
2237 while (!decomp_param[idx].done) {
2238 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2239 }
2240 }
2241 qemu_mutex_unlock(&decomp_done_lock);
2242}
2243
Juan Quintela56e93d22015-05-07 19:33:31 +02002244void migrate_decompress_threads_create(void)
2245{
2246 int i, thread_count;
2247
2248 thread_count = migrate_decompress_threads();
2249 decompress_threads = g_new0(QemuThread, thread_count);
2250 decomp_param = g_new0(DecompressParam, thread_count);
Liang Li73a89122016-05-05 15:32:51 +08002251 qemu_mutex_init(&decomp_done_lock);
2252 qemu_cond_init(&decomp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +02002253 for (i = 0; i < thread_count; i++) {
2254 qemu_mutex_init(&decomp_param[i].mutex);
2255 qemu_cond_init(&decomp_param[i].cond);
2256 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
Liang Li73a89122016-05-05 15:32:51 +08002257 decomp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +08002258 decomp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002259 qemu_thread_create(decompress_threads + i, "decompress",
2260 do_data_decompress, decomp_param + i,
2261 QEMU_THREAD_JOINABLE);
2262 }
2263}
2264
2265void migrate_decompress_threads_join(void)
2266{
2267 int i, thread_count;
2268
Juan Quintela56e93d22015-05-07 19:33:31 +02002269 thread_count = migrate_decompress_threads();
2270 for (i = 0; i < thread_count; i++) {
2271 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002272 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002273 qemu_cond_signal(&decomp_param[i].cond);
2274 qemu_mutex_unlock(&decomp_param[i].mutex);
2275 }
2276 for (i = 0; i < thread_count; i++) {
2277 qemu_thread_join(decompress_threads + i);
2278 qemu_mutex_destroy(&decomp_param[i].mutex);
2279 qemu_cond_destroy(&decomp_param[i].cond);
2280 g_free(decomp_param[i].compbuf);
2281 }
2282 g_free(decompress_threads);
2283 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002284 decompress_threads = NULL;
2285 decomp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002286}
2287
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002288static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002289 void *host, int len)
2290{
2291 int idx, thread_count;
2292
2293 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002294 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002295 while (true) {
2296 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002297 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002298 decomp_param[idx].done = false;
2299 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002300 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002301 decomp_param[idx].des = host;
2302 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002303 qemu_cond_signal(&decomp_param[idx].cond);
2304 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002305 break;
2306 }
2307 }
2308 if (idx < thread_count) {
2309 break;
Liang Li73a89122016-05-05 15:32:51 +08002310 } else {
2311 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002312 }
2313 }
Liang Li73a89122016-05-05 15:32:51 +08002314 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002315}
2316
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002317/*
2318 * Allocate data structures etc needed by incoming migration with postcopy-ram
2319 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2320 */
2321int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2322{
2323 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2324
2325 return postcopy_ram_incoming_init(mis, ram_pages);
2326}
2327
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002328/*
2329 * Called in postcopy mode by ram_load().
2330 * rcu_read_lock is taken prior to this being called.
2331 */
2332static int ram_load_postcopy(QEMUFile *f)
2333{
2334 int flags = 0, ret = 0;
2335 bool place_needed = false;
2336 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2337 MigrationIncomingState *mis = migration_incoming_get_current();
2338 /* Temporary page that is later 'placed' */
2339 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002340 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002341 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002342
2343 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2344 ram_addr_t addr;
2345 void *host = NULL;
2346 void *page_buffer = NULL;
2347 void *place_source = NULL;
2348 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002349
2350 addr = qemu_get_be64(f);
2351 flags = addr & ~TARGET_PAGE_MASK;
2352 addr &= TARGET_PAGE_MASK;
2353
2354 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2355 place_needed = false;
2356 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002357 RAMBlock *block = ram_block_from_stream(f, flags);
2358
2359 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002360 if (!host) {
2361 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2362 ret = -EINVAL;
2363 break;
2364 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002365 /*
2366 * Postcopy requires that we place whole host pages atomically.
2367 * To make it atomic, the data is read into a temporary page
2368 * that's moved into place later.
2369 * The migration protocol uses, possibly smaller, target-pages
2370 * however the source ensures it always sends all the components
2371 * of a host page in order.
2372 */
2373 page_buffer = postcopy_host_page +
2374 ((uintptr_t)host & ~qemu_host_page_mask);
2375 /* If all TP are zero then we can optimise the place */
2376 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2377 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002378 } else {
2379 /* not the 1st TP within the HP */
2380 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002381 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002382 host, last_host);
2383 ret = -EINVAL;
2384 break;
2385 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002386 }
2387
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002388
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002389 /*
2390 * If it's the last part of a host page then we place the host
2391 * page
2392 */
2393 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2394 ~qemu_host_page_mask) == 0;
2395 place_source = postcopy_host_page;
2396 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002397 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002398
2399 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2400 case RAM_SAVE_FLAG_COMPRESS:
2401 ch = qemu_get_byte(f);
2402 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2403 if (ch) {
2404 all_zero = false;
2405 }
2406 break;
2407
2408 case RAM_SAVE_FLAG_PAGE:
2409 all_zero = false;
2410 if (!place_needed || !matching_page_sizes) {
2411 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2412 } else {
2413 /* Avoids the qemu_file copy during postcopy, which is
2414 * going to do a copy later; can only do it when we
2415 * do this read in one go (matching page sizes)
2416 */
2417 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2418 TARGET_PAGE_SIZE);
2419 }
2420 break;
2421 case RAM_SAVE_FLAG_EOS:
2422 /* normal exit */
2423 break;
2424 default:
2425 error_report("Unknown combination of migration flags: %#x"
2426 " (postcopy mode)", flags);
2427 ret = -EINVAL;
2428 }
2429
2430 if (place_needed) {
2431 /* This gets called at the last target page in the host page */
2432 if (all_zero) {
2433 ret = postcopy_place_page_zero(mis,
2434 host + TARGET_PAGE_SIZE -
2435 qemu_host_page_size);
2436 } else {
2437 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2438 qemu_host_page_size,
2439 place_source);
2440 }
2441 }
2442 if (!ret) {
2443 ret = qemu_file_get_error(f);
2444 }
2445 }
2446
2447 return ret;
2448}
2449
Juan Quintela56e93d22015-05-07 19:33:31 +02002450static int ram_load(QEMUFile *f, void *opaque, int version_id)
2451{
2452 int flags = 0, ret = 0;
2453 static uint64_t seq_iter;
2454 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002455 /*
2456 * If system is running in postcopy mode, page inserts to host memory must
2457 * be atomic
2458 */
2459 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
Juan Quintela56e93d22015-05-07 19:33:31 +02002460
2461 seq_iter++;
2462
2463 if (version_id != 4) {
2464 ret = -EINVAL;
2465 }
2466
2467 /* This RCU critical section can be very long running.
2468 * When RCU reclaims in the code start to become numerous,
2469 * it will be necessary to reduce the granularity of this
2470 * critical section.
2471 */
2472 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002473
2474 if (postcopy_running) {
2475 ret = ram_load_postcopy(f);
2476 }
2477
2478 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002479 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002480 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002481 uint8_t ch;
2482
2483 addr = qemu_get_be64(f);
2484 flags = addr & ~TARGET_PAGE_MASK;
2485 addr &= TARGET_PAGE_MASK;
2486
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002487 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2488 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002489 RAMBlock *block = ram_block_from_stream(f, flags);
2490
2491 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002492 if (!host) {
2493 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2494 ret = -EINVAL;
2495 break;
2496 }
2497 }
2498
Juan Quintela56e93d22015-05-07 19:33:31 +02002499 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2500 case RAM_SAVE_FLAG_MEM_SIZE:
2501 /* Synchronize RAM block list */
2502 total_ram_bytes = addr;
2503 while (!ret && total_ram_bytes) {
2504 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002505 char id[256];
2506 ram_addr_t length;
2507
2508 len = qemu_get_byte(f);
2509 qemu_get_buffer(f, (uint8_t *)id, len);
2510 id[len] = 0;
2511 length = qemu_get_be64(f);
2512
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002513 block = qemu_ram_block_by_name(id);
2514 if (block) {
2515 if (length != block->used_length) {
2516 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002517
Gongleifa53a0e2016-05-10 10:04:59 +08002518 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002519 &local_err);
2520 if (local_err) {
2521 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02002522 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002523 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002524 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2525 block->idstr);
2526 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02002527 error_report("Unknown ramblock \"%s\", cannot "
2528 "accept migration", id);
2529 ret = -EINVAL;
2530 }
2531
2532 total_ram_bytes -= length;
2533 }
2534 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002535
Juan Quintela56e93d22015-05-07 19:33:31 +02002536 case RAM_SAVE_FLAG_COMPRESS:
Juan Quintela56e93d22015-05-07 19:33:31 +02002537 ch = qemu_get_byte(f);
2538 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2539 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002540
Juan Quintela56e93d22015-05-07 19:33:31 +02002541 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002542 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2543 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002544
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002545 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002546 len = qemu_get_be32(f);
2547 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2548 error_report("Invalid compressed data length: %d", len);
2549 ret = -EINVAL;
2550 break;
2551 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002552 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002553 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002554
Juan Quintela56e93d22015-05-07 19:33:31 +02002555 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002556 if (load_xbzrle(f, addr, host) < 0) {
2557 error_report("Failed to decompress XBZRLE page at "
2558 RAM_ADDR_FMT, addr);
2559 ret = -EINVAL;
2560 break;
2561 }
2562 break;
2563 case RAM_SAVE_FLAG_EOS:
2564 /* normal exit */
2565 break;
2566 default:
2567 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01002568 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02002569 } else {
2570 error_report("Unknown combination of migration flags: %#x",
2571 flags);
2572 ret = -EINVAL;
2573 }
2574 }
2575 if (!ret) {
2576 ret = qemu_file_get_error(f);
2577 }
2578 }
2579
Liang Li5533b2e2016-05-05 15:32:52 +08002580 wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02002581 rcu_read_unlock();
2582 DPRINTF("Completed load of VM with exit code %d seq iteration "
2583 "%" PRIu64 "\n", ret, seq_iter);
2584 return ret;
2585}
2586
2587static SaveVMHandlers savevm_ram_handlers = {
2588 .save_live_setup = ram_save_setup,
2589 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00002590 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00002591 .save_live_complete_precopy = ram_save_complete,
Juan Quintela56e93d22015-05-07 19:33:31 +02002592 .save_live_pending = ram_save_pending,
2593 .load_state = ram_load,
Liang Li6ad2a212015-11-02 15:37:03 +08002594 .cleanup = ram_migration_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02002595};
2596
2597void ram_mig_init(void)
2598{
2599 qemu_mutex_init(&XBZRLE.lock);
2600 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2601}