blob: c8ec9f268fd0e64676c75bd7921cf564a852d358 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Peter Maydell1393a482016-01-26 18:16:54 +000028#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010029#include "qemu-common.h"
30#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +000032#include "qapi-event.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020033#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020034#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020036#include "qemu/timer.h"
37#include "qemu/main-loop.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020038#include "migration/migration.h"
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +000039#include "migration/postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020040#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020043#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "exec/ram_addr.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/rcu_queue.h"
46
47#ifdef DEBUG_MIGRATION_RAM
48#define DPRINTF(fmt, ...) \
49 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
50#else
51#define DPRINTF(fmt, ...) \
52 do { } while (0)
53#endif
54
Juan Quintela56e93d22015-05-07 19:33:31 +020055static int dirty_rate_high_cnt;
Juan Quintela56e93d22015-05-07 19:33:31 +020056
57static uint64_t bitmap_sync_count;
58
59/***********************************************************/
60/* ram save/restore */
61
62#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
63#define RAM_SAVE_FLAG_COMPRESS 0x02
64#define RAM_SAVE_FLAG_MEM_SIZE 0x04
65#define RAM_SAVE_FLAG_PAGE 0x08
66#define RAM_SAVE_FLAG_EOS 0x10
67#define RAM_SAVE_FLAG_CONTINUE 0x20
68#define RAM_SAVE_FLAG_XBZRLE 0x40
69/* 0x80 is reserved in migration.h start with 0x100 next */
70#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
71
72static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
73
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
Richard Hendersona1febc42016-08-29 11:46:14 -070076 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020077}
78
79/* struct contains XBZRLE cache and a static page
80 used by the compression */
81static struct {
82 /* buffer used for XBZRLE encoding */
83 uint8_t *encoded_buf;
84 /* buffer for storing page content */
85 uint8_t *current_buf;
86 /* Cache for XBZRLE, Protected by lock. */
87 PageCache *cache;
88 QemuMutex lock;
89} XBZRLE;
90
91/* buffer used for XBZRLE decoding */
92static uint8_t *xbzrle_decoded_buf;
93
94static void XBZRLE_cache_lock(void)
95{
96 if (migrate_use_xbzrle())
97 qemu_mutex_lock(&XBZRLE.lock);
98}
99
100static void XBZRLE_cache_unlock(void)
101{
102 if (migrate_use_xbzrle())
103 qemu_mutex_unlock(&XBZRLE.lock);
104}
105
106/*
107 * called from qmp_migrate_set_cache_size in main thread, possibly while
108 * a migration is in progress.
109 * A running migration maybe using the cache and might finish during this
110 * call, hence changes to the cache are protected by XBZRLE.lock().
111 */
112int64_t xbzrle_cache_resize(int64_t new_size)
113{
114 PageCache *new_cache;
115 int64_t ret;
116
117 if (new_size < TARGET_PAGE_SIZE) {
118 return -1;
119 }
120
121 XBZRLE_cache_lock();
122
123 if (XBZRLE.cache != NULL) {
124 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
125 goto out_new_size;
126 }
127 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
128 TARGET_PAGE_SIZE);
129 if (!new_cache) {
130 error_report("Error creating cache");
131 ret = -1;
132 goto out;
133 }
134
135 cache_fini(XBZRLE.cache);
136 XBZRLE.cache = new_cache;
137 }
138
139out_new_size:
140 ret = pow2floor(new_size);
141out:
142 XBZRLE_cache_unlock();
143 return ret;
144}
145
146/* accounting for migration statistics */
147typedef struct AccountingInfo {
148 uint64_t dup_pages;
149 uint64_t skipped_pages;
150 uint64_t norm_pages;
151 uint64_t iterations;
152 uint64_t xbzrle_bytes;
153 uint64_t xbzrle_pages;
154 uint64_t xbzrle_cache_miss;
155 double xbzrle_cache_miss_rate;
156 uint64_t xbzrle_overflows;
157} AccountingInfo;
158
159static AccountingInfo acct_info;
160
161static void acct_clear(void)
162{
163 memset(&acct_info, 0, sizeof(acct_info));
164}
165
166uint64_t dup_mig_bytes_transferred(void)
167{
168 return acct_info.dup_pages * TARGET_PAGE_SIZE;
169}
170
171uint64_t dup_mig_pages_transferred(void)
172{
173 return acct_info.dup_pages;
174}
175
176uint64_t skipped_mig_bytes_transferred(void)
177{
178 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
179}
180
181uint64_t skipped_mig_pages_transferred(void)
182{
183 return acct_info.skipped_pages;
184}
185
186uint64_t norm_mig_bytes_transferred(void)
187{
188 return acct_info.norm_pages * TARGET_PAGE_SIZE;
189}
190
191uint64_t norm_mig_pages_transferred(void)
192{
193 return acct_info.norm_pages;
194}
195
196uint64_t xbzrle_mig_bytes_transferred(void)
197{
198 return acct_info.xbzrle_bytes;
199}
200
201uint64_t xbzrle_mig_pages_transferred(void)
202{
203 return acct_info.xbzrle_pages;
204}
205
206uint64_t xbzrle_mig_pages_cache_miss(void)
207{
208 return acct_info.xbzrle_cache_miss;
209}
210
211double xbzrle_mig_cache_miss_rate(void)
212{
213 return acct_info.xbzrle_cache_miss_rate;
214}
215
216uint64_t xbzrle_mig_pages_overflow(void)
217{
218 return acct_info.xbzrle_overflows;
219}
220
221/* This is the last block that we have visited serching for dirty pages
222 */
223static RAMBlock *last_seen_block;
224/* This is the last block from where we have sent data */
225static RAMBlock *last_sent_block;
226static ram_addr_t last_offset;
Li Zhijiandd631692015-07-02 20:18:06 +0800227static QemuMutex migration_bitmap_mutex;
Juan Quintela56e93d22015-05-07 19:33:31 +0200228static uint64_t migration_dirty_pages;
229static uint32_t last_version;
230static bool ram_bulk_stage;
231
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100232/* used by the search for pages to send */
233struct PageSearchStatus {
234 /* Current block being searched */
235 RAMBlock *block;
236 /* Current offset to search from */
237 ram_addr_t offset;
238 /* Set once we wrap around */
239 bool complete_round;
240};
241typedef struct PageSearchStatus PageSearchStatus;
242
Denis V. Lunev60be6342015-09-28 14:41:58 +0300243static struct BitmapRcu {
244 struct rcu_head rcu;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000245 /* Main migration bitmap */
Denis V. Lunev60be6342015-09-28 14:41:58 +0300246 unsigned long *bmap;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000247 /* bitmap of pages that haven't been sent even once
248 * only maintained and used in postcopy at the moment
249 * where it's used to send the dirtymap at the start
250 * of the postcopy phase
251 */
252 unsigned long *unsentmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300253} *migration_bitmap_rcu;
254
Juan Quintela56e93d22015-05-07 19:33:31 +0200255struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200256 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800257 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200258 QEMUFile *file;
259 QemuMutex mutex;
260 QemuCond cond;
261 RAMBlock *block;
262 ram_addr_t offset;
263};
264typedef struct CompressParam CompressParam;
265
266struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800267 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800268 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200269 QemuMutex mutex;
270 QemuCond cond;
271 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000272 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200273 int len;
274};
275typedef struct DecompressParam DecompressParam;
276
277static CompressParam *comp_param;
278static QemuThread *compress_threads;
279/* comp_done_cond is used to wake up the migration thread when
280 * one of the compression threads has finished the compression.
281 * comp_done_lock is used to co-work with comp_done_cond.
282 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800283static QemuMutex comp_done_lock;
284static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200285/* The empty QEMUFileOps will be used by file in CompressParam */
286static const QEMUFileOps empty_ops = { };
287
288static bool compression_switch;
Juan Quintela56e93d22015-05-07 19:33:31 +0200289static DecompressParam *decomp_param;
290static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800291static QemuMutex decomp_done_lock;
292static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200293
Liang Lia7a9a882016-05-05 15:32:57 +0800294static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
295 ram_addr_t offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200296
297static void *do_data_compress(void *opaque)
298{
299 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800300 RAMBlock *block;
301 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200302
Liang Lia7a9a882016-05-05 15:32:57 +0800303 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800304 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800305 if (param->block) {
306 block = param->block;
307 offset = param->offset;
308 param->block = NULL;
309 qemu_mutex_unlock(&param->mutex);
310
311 do_compress_ram_page(param->file, block, offset);
312
Liang Li0d9f9a52016-05-05 15:32:59 +0800313 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800314 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800315 qemu_cond_signal(&comp_done_cond);
316 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800317
318 qemu_mutex_lock(&param->mutex);
319 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200320 qemu_cond_wait(&param->cond, &param->mutex);
321 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200322 }
Liang Lia7a9a882016-05-05 15:32:57 +0800323 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200324
325 return NULL;
326}
327
328static inline void terminate_compression_threads(void)
329{
330 int idx, thread_count;
331
332 thread_count = migrate_compress_threads();
Juan Quintela56e93d22015-05-07 19:33:31 +0200333 for (idx = 0; idx < thread_count; idx++) {
334 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800335 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200336 qemu_cond_signal(&comp_param[idx].cond);
337 qemu_mutex_unlock(&comp_param[idx].mutex);
338 }
339}
340
341void migrate_compress_threads_join(void)
342{
343 int i, thread_count;
344
345 if (!migrate_use_compression()) {
346 return;
347 }
348 terminate_compression_threads();
349 thread_count = migrate_compress_threads();
350 for (i = 0; i < thread_count; i++) {
351 qemu_thread_join(compress_threads + i);
352 qemu_fclose(comp_param[i].file);
353 qemu_mutex_destroy(&comp_param[i].mutex);
354 qemu_cond_destroy(&comp_param[i].cond);
355 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800356 qemu_mutex_destroy(&comp_done_lock);
357 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200358 g_free(compress_threads);
359 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200360 compress_threads = NULL;
361 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200362}
363
364void migrate_compress_threads_create(void)
365{
366 int i, thread_count;
367
368 if (!migrate_use_compression()) {
369 return;
370 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200371 compression_switch = true;
372 thread_count = migrate_compress_threads();
373 compress_threads = g_new0(QemuThread, thread_count);
374 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800375 qemu_cond_init(&comp_done_cond);
376 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200377 for (i = 0; i < thread_count; i++) {
Cao jine110aa92016-07-29 15:10:31 +0800378 /* comp_param[i].file is just used as a dummy buffer to save data,
379 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200380 */
381 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
382 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800383 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200384 qemu_mutex_init(&comp_param[i].mutex);
385 qemu_cond_init(&comp_param[i].cond);
386 qemu_thread_create(compress_threads + i, "compress",
387 do_data_compress, comp_param + i,
388 QEMU_THREAD_JOINABLE);
389 }
390}
391
392/**
393 * save_page_header: Write page header to wire
394 *
395 * If this is the 1st block, it also writes the block identification
396 *
397 * Returns: Number of bytes written
398 *
399 * @f: QEMUFile where to send the data
400 * @block: block that contains the page we want to send
401 * @offset: offset inside the block for the page
402 * in the lower bits, it contains flags
403 */
404static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
405{
Liang Li9f5f3802015-07-13 17:34:10 +0800406 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200407
408 qemu_put_be64(f, offset);
409 size = 8;
410
411 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800412 len = strlen(block->idstr);
413 qemu_put_byte(f, len);
414 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
415 size += 1 + len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200416 }
417 return size;
418}
419
Jason J. Herne070afca2015-09-08 13:12:35 -0400420/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
421 * If guest dirty memory rate is reduced below the rate at which we can
422 * transfer pages to the destination then we should be able to complete
423 * migration. Some workloads dirty memory way too fast and will not effectively
424 * converge, even with auto-converge.
425 */
426static void mig_throttle_guest_down(void)
427{
428 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100429 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
430 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400431
432 /* We have not started throttling yet. Let's start it. */
433 if (!cpu_throttle_active()) {
434 cpu_throttle_set(pct_initial);
435 } else {
436 /* Throttling already on, just increase the rate */
437 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
438 }
439}
440
Juan Quintela56e93d22015-05-07 19:33:31 +0200441/* Update the xbzrle cache to reflect a page that's been sent as all 0.
442 * The important thing is that a stale (not-yet-0'd) page be replaced
443 * by the new data.
444 * As a bonus, if the page wasn't in the cache it gets added so that
445 * when a small write is made into the 0'd page it gets XBZRLE sent
446 */
447static void xbzrle_cache_zero_page(ram_addr_t current_addr)
448{
449 if (ram_bulk_stage || !migrate_use_xbzrle()) {
450 return;
451 }
452
453 /* We don't care if this fails to allocate a new cache page
454 * as long as it updated an old one */
455 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
456 bitmap_sync_count);
457}
458
459#define ENCODING_FLAG_XBZRLE 0x1
460
461/**
462 * save_xbzrle_page: compress and send current page
463 *
464 * Returns: 1 means that we wrote the page
465 * 0 means that page is identical to the one already sent
466 * -1 means that xbzrle would be longer than normal
467 *
468 * @f: QEMUFile where to send the data
469 * @current_data:
470 * @current_addr:
471 * @block: block that contains the page we want to send
472 * @offset: offset inside the block for the page
473 * @last_stage: if we are at the completion stage
474 * @bytes_transferred: increase it with the number of transferred bytes
475 */
476static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
477 ram_addr_t current_addr, RAMBlock *block,
478 ram_addr_t offset, bool last_stage,
479 uint64_t *bytes_transferred)
480{
481 int encoded_len = 0, bytes_xbzrle;
482 uint8_t *prev_cached_page;
483
484 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
485 acct_info.xbzrle_cache_miss++;
486 if (!last_stage) {
487 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
488 bitmap_sync_count) == -1) {
489 return -1;
490 } else {
491 /* update *current_data when the page has been
492 inserted into cache */
493 *current_data = get_cached_data(XBZRLE.cache, current_addr);
494 }
495 }
496 return -1;
497 }
498
499 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
500
501 /* save current buffer into memory */
502 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
503
504 /* XBZRLE encoding (if there is no overflow) */
505 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
506 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
507 TARGET_PAGE_SIZE);
508 if (encoded_len == 0) {
509 DPRINTF("Skipping unmodified page\n");
510 return 0;
511 } else if (encoded_len == -1) {
512 DPRINTF("Overflow\n");
513 acct_info.xbzrle_overflows++;
514 /* update data in the cache */
515 if (!last_stage) {
516 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
517 *current_data = prev_cached_page;
518 }
519 return -1;
520 }
521
522 /* we need to update the data in the cache, in order to get the same data */
523 if (!last_stage) {
524 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
525 }
526
527 /* Send XBZRLE based compressed page */
528 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
529 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
530 qemu_put_be16(f, encoded_len);
531 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
532 bytes_xbzrle += encoded_len + 1 + 2;
533 acct_info.xbzrle_pages++;
534 acct_info.xbzrle_bytes += bytes_xbzrle;
535 *bytes_transferred += bytes_xbzrle;
536
537 return 1;
538}
539
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000540/* Called with rcu_read_lock() to protect migration_bitmap
541 * rb: The RAMBlock to search for dirty pages in
542 * start: Start address (typically so we can continue from previous page)
543 * ram_addr_abs: Pointer into which to store the address of the dirty page
544 * within the global ram_addr space
545 *
546 * Returns: byte offset within memory region of the start of a dirty page
547 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200548static inline
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000549ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
550 ram_addr_t start,
551 ram_addr_t *ram_addr_abs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200552{
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100553 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200554 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100555 uint64_t rb_size = rb->used_length;
556 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
Li Zhijian2ff64032015-07-02 20:18:05 +0800557 unsigned long *bitmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200558
559 unsigned long next;
560
Denis V. Lunev60be6342015-09-28 14:41:58 +0300561 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 if (ram_bulk_stage && nr > base) {
563 next = nr + 1;
564 } else {
Li Zhijian2ff64032015-07-02 20:18:05 +0800565 next = find_next_bit(bitmap, size, nr);
Juan Quintela56e93d22015-05-07 19:33:31 +0200566 }
567
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000568 *ram_addr_abs = next << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200569 return (next - base) << TARGET_PAGE_BITS;
570}
571
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000572static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
573{
574 bool ret;
575 int nr = addr >> TARGET_PAGE_BITS;
576 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
577
578 ret = test_and_clear_bit(nr, bitmap);
579
580 if (ret) {
581 migration_dirty_pages--;
582 }
583 return ret;
584}
585
Juan Quintela56e93d22015-05-07 19:33:31 +0200586static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
587{
Li Zhijian2ff64032015-07-02 20:18:05 +0800588 unsigned long *bitmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300589 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200590 migration_dirty_pages +=
Li Zhijian2ff64032015-07-02 20:18:05 +0800591 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200592}
593
Juan Quintela56e93d22015-05-07 19:33:31 +0200594/* Fix me: there are too many global variables used in migration process. */
595static int64_t start_time;
596static int64_t bytes_xfer_prev;
597static int64_t num_dirty_pages_period;
598static uint64_t xbzrle_cache_miss_prev;
599static uint64_t iterations_prev;
600
601static void migration_bitmap_sync_init(void)
602{
603 start_time = 0;
604 bytes_xfer_prev = 0;
605 num_dirty_pages_period = 0;
606 xbzrle_cache_miss_prev = 0;
607 iterations_prev = 0;
608}
609
Juan Quintela56e93d22015-05-07 19:33:31 +0200610static void migration_bitmap_sync(void)
611{
612 RAMBlock *block;
613 uint64_t num_dirty_pages_init = migration_dirty_pages;
614 MigrationState *s = migrate_get_current();
615 int64_t end_time;
616 int64_t bytes_xfer_now;
617
618 bitmap_sync_count++;
619
620 if (!bytes_xfer_prev) {
621 bytes_xfer_prev = ram_bytes_transferred();
622 }
623
624 if (!start_time) {
625 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
626 }
627
628 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200629 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200630
Li Zhijiandd631692015-07-02 20:18:06 +0800631 qemu_mutex_lock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200632 rcu_read_lock();
633 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100634 migration_bitmap_sync_range(block->offset, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200635 }
636 rcu_read_unlock();
Li Zhijiandd631692015-07-02 20:18:06 +0800637 qemu_mutex_unlock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200638
639 trace_migration_bitmap_sync_end(migration_dirty_pages
640 - num_dirty_pages_init);
641 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
642 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
643
644 /* more than 1 second = 1000 millisecons */
645 if (end_time > start_time + 1000) {
646 if (migrate_auto_converge()) {
647 /* The following detection logic can be refined later. For now:
648 Check to see if the dirtied bytes is 50% more than the approx.
649 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400650 were in this routine. If that happens twice, start or increase
651 throttling */
Juan Quintela56e93d22015-05-07 19:33:31 +0200652 bytes_xfer_now = ram_bytes_transferred();
Jason J. Herne070afca2015-09-08 13:12:35 -0400653
Juan Quintela56e93d22015-05-07 19:33:31 +0200654 if (s->dirty_pages_rate &&
655 (num_dirty_pages_period * TARGET_PAGE_SIZE >
656 (bytes_xfer_now - bytes_xfer_prev)/2) &&
Jason J. Herne070afca2015-09-08 13:12:35 -0400657 (dirty_rate_high_cnt++ >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200658 trace_migration_throttle();
Juan Quintela56e93d22015-05-07 19:33:31 +0200659 dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400660 mig_throttle_guest_down();
Juan Quintela56e93d22015-05-07 19:33:31 +0200661 }
662 bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200663 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400664
Juan Quintela56e93d22015-05-07 19:33:31 +0200665 if (migrate_use_xbzrle()) {
666 if (iterations_prev != acct_info.iterations) {
667 acct_info.xbzrle_cache_miss_rate =
668 (double)(acct_info.xbzrle_cache_miss -
669 xbzrle_cache_miss_prev) /
670 (acct_info.iterations - iterations_prev);
671 }
672 iterations_prev = acct_info.iterations;
673 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
674 }
675 s->dirty_pages_rate = num_dirty_pages_period * 1000
676 / (end_time - start_time);
677 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
678 start_time = end_time;
679 num_dirty_pages_period = 0;
680 }
681 s->dirty_sync_count = bitmap_sync_count;
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000682 if (migrate_use_events()) {
683 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
684 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200685}
686
687/**
688 * save_zero_page: Send the zero page to the stream
689 *
690 * Returns: Number of pages written.
691 *
692 * @f: QEMUFile where to send the data
693 * @block: block that contains the page we want to send
694 * @offset: offset inside the block for the page
695 * @p: pointer to the page
696 * @bytes_transferred: increase it with the number of transferred bytes
697 */
698static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
699 uint8_t *p, uint64_t *bytes_transferred)
700{
701 int pages = -1;
702
703 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
704 acct_info.dup_pages++;
705 *bytes_transferred += save_page_header(f, block,
706 offset | RAM_SAVE_FLAG_COMPRESS);
707 qemu_put_byte(f, 0);
708 *bytes_transferred += 1;
709 pages = 1;
710 }
711
712 return pages;
713}
714
715/**
716 * ram_save_page: Send the given page to the stream
717 *
718 * Returns: Number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +0000719 * < 0 - error
720 * >=0 - Number of pages written - this might legally be 0
721 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +0200722 *
723 * @f: QEMUFile where to send the data
724 * @block: block that contains the page we want to send
725 * @offset: offset inside the block for the page
726 * @last_stage: if we are at the completion stage
727 * @bytes_transferred: increase it with the number of transferred bytes
728 */
zhanghailianga08f6892016-01-15 11:37:44 +0800729static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
Juan Quintela56e93d22015-05-07 19:33:31 +0200730 bool last_stage, uint64_t *bytes_transferred)
731{
732 int pages = -1;
733 uint64_t bytes_xmit;
734 ram_addr_t current_addr;
Juan Quintela56e93d22015-05-07 19:33:31 +0200735 uint8_t *p;
736 int ret;
737 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +0800738 RAMBlock *block = pss->block;
739 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200740
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100741 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200742
743 /* In doubt sent page as normal */
744 bytes_xmit = 0;
745 ret = ram_control_save_page(f, block->offset,
746 offset, TARGET_PAGE_SIZE, &bytes_xmit);
747 if (bytes_xmit) {
748 *bytes_transferred += bytes_xmit;
749 pages = 1;
750 }
751
752 XBZRLE_cache_lock();
753
754 current_addr = block->offset + offset;
755
756 if (block == last_sent_block) {
757 offset |= RAM_SAVE_FLAG_CONTINUE;
758 }
759 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
760 if (ret != RAM_SAVE_CONTROL_DELAYED) {
761 if (bytes_xmit > 0) {
762 acct_info.norm_pages++;
763 } else if (bytes_xmit == 0) {
764 acct_info.dup_pages++;
765 }
766 }
767 } else {
768 pages = save_zero_page(f, block, offset, p, bytes_transferred);
769 if (pages > 0) {
770 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
771 * page would be stale
772 */
773 xbzrle_cache_zero_page(current_addr);
774 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
775 pages = save_xbzrle_page(f, &p, current_addr, block,
776 offset, last_stage, bytes_transferred);
777 if (!last_stage) {
778 /* Can't send this cached data async, since the cache page
779 * might get updated before it gets to the wire
780 */
781 send_async = false;
782 }
783 }
784 }
785
786 /* XBZRLE overflow or normal page */
787 if (pages == -1) {
788 *bytes_transferred += save_page_header(f, block,
789 offset | RAM_SAVE_FLAG_PAGE);
790 if (send_async) {
791 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
792 } else {
793 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
794 }
795 *bytes_transferred += TARGET_PAGE_SIZE;
796 pages = 1;
797 acct_info.norm_pages++;
798 }
799
800 XBZRLE_cache_unlock();
801
802 return pages;
803}
804
Liang Lia7a9a882016-05-05 15:32:57 +0800805static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
806 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200807{
808 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +0800809 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +0200810
Liang Lia7a9a882016-05-05 15:32:57 +0800811 bytes_sent = save_page_header(f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +0200812 RAM_SAVE_FLAG_COMPRESS_PAGE);
Liang Lia7a9a882016-05-05 15:32:57 +0800813 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
Juan Quintela56e93d22015-05-07 19:33:31 +0200814 migrate_compress_level());
Liang Lib3be2892016-05-05 15:32:54 +0800815 if (blen < 0) {
816 bytes_sent = 0;
817 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
818 error_report("compressed data failed!");
819 } else {
820 bytes_sent += blen;
821 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200822
823 return bytes_sent;
824}
825
Juan Quintela56e93d22015-05-07 19:33:31 +0200826static uint64_t bytes_transferred;
827
828static void flush_compressed_data(QEMUFile *f)
829{
830 int idx, len, thread_count;
831
832 if (!migrate_use_compression()) {
833 return;
834 }
835 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +0800836
Liang Li0d9f9a52016-05-05 15:32:59 +0800837 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200838 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +0800839 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +0800840 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200841 }
Liang Lia7a9a882016-05-05 15:32:57 +0800842 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800843 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800844
845 for (idx = 0; idx < thread_count; idx++) {
846 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800847 if (!comp_param[idx].quit) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200848 len = qemu_put_qemu_file(f, comp_param[idx].file);
849 bytes_transferred += len;
850 }
Liang Lia7a9a882016-05-05 15:32:57 +0800851 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200852 }
853}
854
855static inline void set_compress_params(CompressParam *param, RAMBlock *block,
856 ram_addr_t offset)
857{
858 param->block = block;
859 param->offset = offset;
860}
861
862static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
863 ram_addr_t offset,
864 uint64_t *bytes_transferred)
865{
866 int idx, thread_count, bytes_xmit = -1, pages = -1;
867
868 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +0800869 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200870 while (true) {
871 for (idx = 0; idx < thread_count; idx++) {
872 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +0800873 comp_param[idx].done = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200874 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +0800875 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200876 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +0800877 qemu_cond_signal(&comp_param[idx].cond);
878 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200879 pages = 1;
880 acct_info.norm_pages++;
881 *bytes_transferred += bytes_xmit;
882 break;
883 }
884 }
885 if (pages > 0) {
886 break;
887 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +0800888 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200889 }
890 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800891 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200892
893 return pages;
894}
895
896/**
897 * ram_save_compressed_page: compress the given page and send it to the stream
898 *
899 * Returns: Number of pages written.
900 *
901 * @f: QEMUFile where to send the data
902 * @block: block that contains the page we want to send
903 * @offset: offset inside the block for the page
904 * @last_stage: if we are at the completion stage
905 * @bytes_transferred: increase it with the number of transferred bytes
906 */
zhanghailianga08f6892016-01-15 11:37:44 +0800907static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
908 bool last_stage,
Juan Quintela56e93d22015-05-07 19:33:31 +0200909 uint64_t *bytes_transferred)
910{
911 int pages = -1;
Liang Lifc504382016-05-05 15:32:55 +0800912 uint64_t bytes_xmit = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200913 uint8_t *p;
Liang Lifc504382016-05-05 15:32:55 +0800914 int ret, blen;
zhanghailianga08f6892016-01-15 11:37:44 +0800915 RAMBlock *block = pss->block;
916 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200917
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100918 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200919
Juan Quintela56e93d22015-05-07 19:33:31 +0200920 ret = ram_control_save_page(f, block->offset,
921 offset, TARGET_PAGE_SIZE, &bytes_xmit);
922 if (bytes_xmit) {
923 *bytes_transferred += bytes_xmit;
924 pages = 1;
925 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200926 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
927 if (ret != RAM_SAVE_CONTROL_DELAYED) {
928 if (bytes_xmit > 0) {
929 acct_info.norm_pages++;
930 } else if (bytes_xmit == 0) {
931 acct_info.dup_pages++;
932 }
933 }
934 } else {
935 /* When starting the process of a new block, the first page of
936 * the block should be sent out before other pages in the same
937 * block, and all the pages in last block should have been sent
938 * out, keeping this order is important, because the 'cont' flag
939 * is used to avoid resending the block name.
940 */
941 if (block != last_sent_block) {
942 flush_compressed_data(f);
943 pages = save_zero_page(f, block, offset, p, bytes_transferred);
944 if (pages == -1) {
Liang Lifc504382016-05-05 15:32:55 +0800945 /* Make sure the first page is sent out before other pages */
946 bytes_xmit = save_page_header(f, block, offset |
947 RAM_SAVE_FLAG_COMPRESS_PAGE);
948 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
949 migrate_compress_level());
950 if (blen > 0) {
951 *bytes_transferred += bytes_xmit + blen;
Liang Lib3be2892016-05-05 15:32:54 +0800952 acct_info.norm_pages++;
Liang Lib3be2892016-05-05 15:32:54 +0800953 pages = 1;
Liang Lifc504382016-05-05 15:32:55 +0800954 } else {
955 qemu_file_set_error(f, blen);
956 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +0800957 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200958 }
959 } else {
Liang Lifc504382016-05-05 15:32:55 +0800960 offset |= RAM_SAVE_FLAG_CONTINUE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200961 pages = save_zero_page(f, block, offset, p, bytes_transferred);
962 if (pages == -1) {
963 pages = compress_page_with_multi_thread(f, block, offset,
964 bytes_transferred);
965 }
966 }
967 }
968
969 return pages;
970}
971
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100972/*
973 * Find the next dirty page and update any state associated with
974 * the search process.
975 *
976 * Returns: True if a page is found
977 *
978 * @f: Current migration stream.
979 * @pss: Data about the state of the current dirty page scan.
980 * @*again: Set to false if the search has scanned the whole of RAM
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +0000981 * *ram_addr_abs: Pointer into which to store the address of the dirty page
982 * within the global ram_addr space
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100983 */
984static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000985 bool *again, ram_addr_t *ram_addr_abs)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100986{
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000987 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
988 ram_addr_abs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100989 if (pss->complete_round && pss->block == last_seen_block &&
990 pss->offset >= last_offset) {
991 /*
992 * We've been once around the RAM and haven't found anything.
993 * Give up.
994 */
995 *again = false;
996 return false;
997 }
998 if (pss->offset >= pss->block->used_length) {
999 /* Didn't find anything in this RAM Block */
1000 pss->offset = 0;
1001 pss->block = QLIST_NEXT_RCU(pss->block, next);
1002 if (!pss->block) {
1003 /* Hit the end of the list */
1004 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1005 /* Flag that we've looped */
1006 pss->complete_round = true;
1007 ram_bulk_stage = false;
1008 if (migrate_use_xbzrle()) {
1009 /* If xbzrle is on, stop using the data compression at this
1010 * point. In theory, xbzrle can do better than compression.
1011 */
1012 flush_compressed_data(f);
1013 compression_switch = false;
1014 }
1015 }
1016 /* Didn't find anything this time, but try again on the new block */
1017 *again = true;
1018 return false;
1019 } else {
1020 /* Can go around again, but... */
1021 *again = true;
1022 /* We've found something so probably don't need to */
1023 return true;
1024 }
1025}
1026
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001027/*
1028 * Helper for 'get_queued_page' - gets a page off the queue
1029 * ms: MigrationState in
1030 * *offset: Used to return the offset within the RAMBlock
1031 * ram_addr_abs: global offset in the dirty/sent bitmaps
1032 *
1033 * Returns: block (or NULL if none available)
1034 */
1035static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1036 ram_addr_t *ram_addr_abs)
1037{
1038 RAMBlock *block = NULL;
1039
1040 qemu_mutex_lock(&ms->src_page_req_mutex);
1041 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1042 struct MigrationSrcPageRequest *entry =
1043 QSIMPLEQ_FIRST(&ms->src_page_requests);
1044 block = entry->rb;
1045 *offset = entry->offset;
1046 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1047 TARGET_PAGE_MASK;
1048
1049 if (entry->len > TARGET_PAGE_SIZE) {
1050 entry->len -= TARGET_PAGE_SIZE;
1051 entry->offset += TARGET_PAGE_SIZE;
1052 } else {
1053 memory_region_unref(block->mr);
1054 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1055 g_free(entry);
1056 }
1057 }
1058 qemu_mutex_unlock(&ms->src_page_req_mutex);
1059
1060 return block;
1061}
1062
1063/*
1064 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1065 * that are already sent (!dirty)
1066 *
1067 * ms: MigrationState in
1068 * pss: PageSearchStatus structure updated with found block/offset
1069 * ram_addr_abs: global offset in the dirty/sent bitmaps
1070 *
1071 * Returns: true if a queued page is found
1072 */
1073static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1074 ram_addr_t *ram_addr_abs)
1075{
1076 RAMBlock *block;
1077 ram_addr_t offset;
1078 bool dirty;
1079
1080 do {
1081 block = unqueue_page(ms, &offset, ram_addr_abs);
1082 /*
1083 * We're sending this page, and since it's postcopy nothing else
1084 * will dirty it, and we must make sure it doesn't get sent again
1085 * even if this queue request was received after the background
1086 * search already sent it.
1087 */
1088 if (block) {
1089 unsigned long *bitmap;
1090 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1091 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1092 if (!dirty) {
1093 trace_get_queued_page_not_dirty(
1094 block->idstr, (uint64_t)offset,
1095 (uint64_t)*ram_addr_abs,
1096 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1097 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1098 } else {
1099 trace_get_queued_page(block->idstr,
1100 (uint64_t)offset,
1101 (uint64_t)*ram_addr_abs);
1102 }
1103 }
1104
1105 } while (block && !dirty);
1106
1107 if (block) {
1108 /*
1109 * As soon as we start servicing pages out of order, then we have
1110 * to kill the bulk stage, since the bulk stage assumes
1111 * in (migration_bitmap_find_and_reset_dirty) that every page is
1112 * dirty, that's no longer true.
1113 */
1114 ram_bulk_stage = false;
1115
1116 /*
1117 * We want the background search to continue from the queued page
1118 * since the guest is likely to want other pages near to the page
1119 * it just requested.
1120 */
1121 pss->block = block;
1122 pss->offset = offset;
1123 }
1124
1125 return !!block;
1126}
1127
Juan Quintela56e93d22015-05-07 19:33:31 +02001128/**
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001129 * flush_page_queue: Flush any remaining pages in the ram request queue
1130 * it should be empty at the end anyway, but in error cases there may be
1131 * some left.
1132 *
1133 * ms: MigrationState
1134 */
1135void flush_page_queue(MigrationState *ms)
1136{
1137 struct MigrationSrcPageRequest *mspr, *next_mspr;
1138 /* This queue generally should be empty - but in the case of a failed
1139 * migration might have some droppings in.
1140 */
1141 rcu_read_lock();
1142 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1143 memory_region_unref(mspr->rb->mr);
1144 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1145 g_free(mspr);
1146 }
1147 rcu_read_unlock();
1148}
1149
1150/**
1151 * Queue the pages for transmission, e.g. a request from postcopy destination
1152 * ms: MigrationStatus in which the queue is held
1153 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1154 * start: Offset from the start of the RAMBlock
1155 * len: Length (in bytes) to send
1156 * Return: 0 on success
1157 */
1158int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1159 ram_addr_t start, ram_addr_t len)
1160{
1161 RAMBlock *ramblock;
1162
Dr. David Alan Gilbertd3bf5412016-06-13 12:16:42 +01001163 ms->postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001164 rcu_read_lock();
1165 if (!rbname) {
1166 /* Reuse last RAMBlock */
1167 ramblock = ms->last_req_rb;
1168
1169 if (!ramblock) {
1170 /*
1171 * Shouldn't happen, we can't reuse the last RAMBlock if
1172 * it's the 1st request.
1173 */
1174 error_report("ram_save_queue_pages no previous block");
1175 goto err;
1176 }
1177 } else {
1178 ramblock = qemu_ram_block_by_name(rbname);
1179
1180 if (!ramblock) {
1181 /* We shouldn't be asked for a non-existent RAMBlock */
1182 error_report("ram_save_queue_pages no block '%s'", rbname);
1183 goto err;
1184 }
1185 ms->last_req_rb = ramblock;
1186 }
1187 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1188 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001189 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1190 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001191 __func__, start, len, ramblock->used_length);
1192 goto err;
1193 }
1194
1195 struct MigrationSrcPageRequest *new_entry =
1196 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1197 new_entry->rb = ramblock;
1198 new_entry->offset = start;
1199 new_entry->len = len;
1200
1201 memory_region_ref(ramblock->mr);
1202 qemu_mutex_lock(&ms->src_page_req_mutex);
1203 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1204 qemu_mutex_unlock(&ms->src_page_req_mutex);
1205 rcu_read_unlock();
1206
1207 return 0;
1208
1209err:
1210 rcu_read_unlock();
1211 return -1;
1212}
1213
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001214/**
1215 * ram_save_target_page: Save one target page
1216 *
1217 *
1218 * @f: QEMUFile where to send the data
1219 * @block: pointer to block that contains the page we want to send
1220 * @offset: offset inside the block for the page;
1221 * @last_stage: if we are at the completion stage
1222 * @bytes_transferred: increase it with the number of transferred bytes
1223 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1224 *
1225 * Returns: Number of pages written.
1226 */
1227static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
zhanghailianga08f6892016-01-15 11:37:44 +08001228 PageSearchStatus *pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001229 bool last_stage,
1230 uint64_t *bytes_transferred,
1231 ram_addr_t dirty_ram_abs)
1232{
1233 int res = 0;
1234
1235 /* Check the pages is dirty and if it is send it */
1236 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1237 unsigned long *unsentmap;
1238 if (compression_switch && migrate_use_compression()) {
zhanghailianga08f6892016-01-15 11:37:44 +08001239 res = ram_save_compressed_page(f, pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001240 last_stage,
1241 bytes_transferred);
1242 } else {
zhanghailianga08f6892016-01-15 11:37:44 +08001243 res = ram_save_page(f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001244 bytes_transferred);
1245 }
1246
1247 if (res < 0) {
1248 return res;
1249 }
1250 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1251 if (unsentmap) {
1252 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1253 }
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001254 /* Only update last_sent_block if a block was actually sent; xbzrle
1255 * might have decided the page was identical so didn't bother writing
1256 * to the stream.
1257 */
1258 if (res > 0) {
zhanghailianga08f6892016-01-15 11:37:44 +08001259 last_sent_block = pss->block;
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001260 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001261 }
1262
1263 return res;
1264}
1265
1266/**
Stefan Weilcb8d4c82016-03-23 15:59:57 +01001267 * ram_save_host_page: Starting at *offset send pages up to the end
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001268 * of the current host page. It's valid for the initial
1269 * offset to point into the middle of a host page
1270 * in which case the remainder of the hostpage is sent.
1271 * Only dirty target pages are sent.
1272 *
1273 * Returns: Number of pages written.
1274 *
1275 * @f: QEMUFile where to send the data
1276 * @block: pointer to block that contains the page we want to send
1277 * @offset: offset inside the block for the page; updated to last target page
1278 * sent
1279 * @last_stage: if we are at the completion stage
1280 * @bytes_transferred: increase it with the number of transferred bytes
1281 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1282 */
zhanghailianga08f6892016-01-15 11:37:44 +08001283static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1284 PageSearchStatus *pss,
1285 bool last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001286 uint64_t *bytes_transferred,
1287 ram_addr_t dirty_ram_abs)
1288{
1289 int tmppages, pages = 0;
1290 do {
zhanghailianga08f6892016-01-15 11:37:44 +08001291 tmppages = ram_save_target_page(ms, f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001292 bytes_transferred, dirty_ram_abs);
1293 if (tmppages < 0) {
1294 return tmppages;
1295 }
1296
1297 pages += tmppages;
zhanghailianga08f6892016-01-15 11:37:44 +08001298 pss->offset += TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001299 dirty_ram_abs += TARGET_PAGE_SIZE;
zhanghailianga08f6892016-01-15 11:37:44 +08001300 } while (pss->offset & (qemu_host_page_size - 1));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001301
1302 /* The offset we leave with is the last one we looked at */
zhanghailianga08f6892016-01-15 11:37:44 +08001303 pss->offset -= TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001304 return pages;
1305}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001306
1307/**
Juan Quintela56e93d22015-05-07 19:33:31 +02001308 * ram_find_and_save_block: Finds a dirty page and sends it to f
1309 *
1310 * Called within an RCU critical section.
1311 *
1312 * Returns: The number of pages written
1313 * 0 means no dirty pages
1314 *
1315 * @f: QEMUFile where to send the data
1316 * @last_stage: if we are at the completion stage
1317 * @bytes_transferred: increase it with the number of transferred bytes
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001318 *
1319 * On systems where host-page-size > target-page-size it will send all the
1320 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001321 */
1322
1323static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1324 uint64_t *bytes_transferred)
1325{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001326 PageSearchStatus pss;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001327 MigrationState *ms = migrate_get_current();
Juan Quintela56e93d22015-05-07 19:33:31 +02001328 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001329 bool again, found;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001330 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1331 ram_addr_t space */
Juan Quintela56e93d22015-05-07 19:33:31 +02001332
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001333 pss.block = last_seen_block;
1334 pss.offset = last_offset;
1335 pss.complete_round = false;
1336
1337 if (!pss.block) {
1338 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1339 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001340
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001341 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001342 again = true;
1343 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1344
1345 if (!found) {
1346 /* priority queue empty, so just search for something dirty */
1347 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1348 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001349
1350 if (found) {
zhanghailianga08f6892016-01-15 11:37:44 +08001351 pages = ram_save_host_page(ms, f, &pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001352 last_stage, bytes_transferred,
1353 dirty_ram_abs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001354 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001355 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001356
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001357 last_seen_block = pss.block;
1358 last_offset = pss.offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001359
1360 return pages;
1361}
1362
1363void acct_update_position(QEMUFile *f, size_t size, bool zero)
1364{
1365 uint64_t pages = size / TARGET_PAGE_SIZE;
1366 if (zero) {
1367 acct_info.dup_pages += pages;
1368 } else {
1369 acct_info.norm_pages += pages;
1370 bytes_transferred += size;
1371 qemu_update_position(f, size);
1372 }
1373}
1374
1375static ram_addr_t ram_save_remaining(void)
1376{
1377 return migration_dirty_pages;
1378}
1379
1380uint64_t ram_bytes_remaining(void)
1381{
1382 return ram_save_remaining() * TARGET_PAGE_SIZE;
1383}
1384
1385uint64_t ram_bytes_transferred(void)
1386{
1387 return bytes_transferred;
1388}
1389
1390uint64_t ram_bytes_total(void)
1391{
1392 RAMBlock *block;
1393 uint64_t total = 0;
1394
1395 rcu_read_lock();
1396 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1397 total += block->used_length;
1398 rcu_read_unlock();
1399 return total;
1400}
1401
1402void free_xbzrle_decoded_buf(void)
1403{
1404 g_free(xbzrle_decoded_buf);
1405 xbzrle_decoded_buf = NULL;
1406}
1407
Denis V. Lunev60be6342015-09-28 14:41:58 +03001408static void migration_bitmap_free(struct BitmapRcu *bmap)
1409{
1410 g_free(bmap->bmap);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001411 g_free(bmap->unsentmap);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001412 g_free(bmap);
1413}
1414
Liang Li6ad2a212015-11-02 15:37:03 +08001415static void ram_migration_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001416{
Li Zhijian2ff64032015-07-02 20:18:05 +08001417 /* caller have hold iothread lock or is in a bh, so there is
1418 * no writing race against this migration_bitmap
1419 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001420 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1421 atomic_rcu_set(&migration_bitmap_rcu, NULL);
Li Zhijian2ff64032015-07-02 20:18:05 +08001422 if (bitmap) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001423 memory_global_dirty_log_stop();
Denis V. Lunev60be6342015-09-28 14:41:58 +03001424 call_rcu(bitmap, migration_bitmap_free, rcu);
Juan Quintela56e93d22015-05-07 19:33:31 +02001425 }
1426
1427 XBZRLE_cache_lock();
1428 if (XBZRLE.cache) {
1429 cache_fini(XBZRLE.cache);
1430 g_free(XBZRLE.encoded_buf);
1431 g_free(XBZRLE.current_buf);
1432 XBZRLE.cache = NULL;
1433 XBZRLE.encoded_buf = NULL;
1434 XBZRLE.current_buf = NULL;
1435 }
1436 XBZRLE_cache_unlock();
1437}
1438
Juan Quintela56e93d22015-05-07 19:33:31 +02001439static void reset_ram_globals(void)
1440{
1441 last_seen_block = NULL;
1442 last_sent_block = NULL;
1443 last_offset = 0;
1444 last_version = ram_list.version;
1445 ram_bulk_stage = true;
1446}
1447
1448#define MAX_WAIT 50 /* ms, half buffered_file limit */
1449
Li Zhijiandd631692015-07-02 20:18:06 +08001450void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1451{
1452 /* called in qemu main thread, so there is
1453 * no writing race against this migration_bitmap
1454 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001455 if (migration_bitmap_rcu) {
1456 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1457 bitmap = g_new(struct BitmapRcu, 1);
1458 bitmap->bmap = bitmap_new(new);
Li Zhijiandd631692015-07-02 20:18:06 +08001459
1460 /* prevent migration_bitmap content from being set bit
1461 * by migration_bitmap_sync_range() at the same time.
1462 * it is safe to migration if migration_bitmap is cleared bit
1463 * at the same time.
1464 */
1465 qemu_mutex_lock(&migration_bitmap_mutex);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001466 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1467 bitmap_set(bitmap->bmap, old, new - old);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001468
1469 /* We don't have a way to safely extend the sentmap
1470 * with RCU; so mark it as missing, entry to postcopy
1471 * will fail.
1472 */
1473 bitmap->unsentmap = NULL;
1474
Denis V. Lunev60be6342015-09-28 14:41:58 +03001475 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
Li Zhijiandd631692015-07-02 20:18:06 +08001476 qemu_mutex_unlock(&migration_bitmap_mutex);
1477 migration_dirty_pages += new - old;
Denis V. Lunev60be6342015-09-28 14:41:58 +03001478 call_rcu(old_bitmap, migration_bitmap_free, rcu);
Li Zhijiandd631692015-07-02 20:18:06 +08001479 }
1480}
Juan Quintela56e93d22015-05-07 19:33:31 +02001481
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001482/*
1483 * 'expected' is the value you expect the bitmap mostly to be full
1484 * of; it won't bother printing lines that are all this value.
1485 * If 'todump' is null the migration bitmap is dumped.
1486 */
1487void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1488{
1489 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1490
1491 int64_t cur;
1492 int64_t linelen = 128;
1493 char linebuf[129];
1494
1495 if (!todump) {
1496 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1497 }
1498
1499 for (cur = 0; cur < ram_pages; cur += linelen) {
1500 int64_t curb;
1501 bool found = false;
1502 /*
1503 * Last line; catch the case where the line length
1504 * is longer than remaining ram
1505 */
1506 if (cur + linelen > ram_pages) {
1507 linelen = ram_pages - cur;
1508 }
1509 for (curb = 0; curb < linelen; curb++) {
1510 bool thisbit = test_bit(cur + curb, todump);
1511 linebuf[curb] = thisbit ? '1' : '.';
1512 found = found || (thisbit != expected);
1513 }
1514 if (found) {
1515 linebuf[curb] = '\0';
1516 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1517 }
1518 }
1519}
1520
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001521/* **** functions for postcopy ***** */
1522
1523/*
1524 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1525 * Note: At this point the 'unsentmap' is the processed bitmap combined
1526 * with the dirtymap; so a '1' means it's either dirty or unsent.
1527 * start,length: Indexes into the bitmap for the first bit
1528 * representing the named block and length in target-pages
1529 */
1530static int postcopy_send_discard_bm_ram(MigrationState *ms,
1531 PostcopyDiscardState *pds,
1532 unsigned long start,
1533 unsigned long length)
1534{
1535 unsigned long end = start + length; /* one after the end */
1536 unsigned long current;
1537 unsigned long *unsentmap;
1538
1539 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1540 for (current = start; current < end; ) {
1541 unsigned long one = find_next_bit(unsentmap, end, current);
1542
1543 if (one <= end) {
1544 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1545 unsigned long discard_length;
1546
1547 if (zero >= end) {
1548 discard_length = end - one;
1549 } else {
1550 discard_length = zero - one;
1551 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001552 if (discard_length) {
1553 postcopy_discard_send_range(ms, pds, one, discard_length);
1554 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001555 current = one + discard_length;
1556 } else {
1557 current = one;
1558 }
1559 }
1560
1561 return 0;
1562}
1563
1564/*
1565 * Utility for the outgoing postcopy code.
1566 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1567 * passing it bitmap indexes and name.
1568 * Returns: 0 on success
1569 * (qemu_ram_foreach_block ends up passing unscaled lengths
1570 * which would mean postcopy code would have to deal with target page)
1571 */
1572static int postcopy_each_ram_send_discard(MigrationState *ms)
1573{
1574 struct RAMBlock *block;
1575 int ret;
1576
1577 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1578 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1579 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1580 first,
1581 block->idstr);
1582
1583 /*
1584 * Postcopy sends chunks of bitmap over the wire, but it
1585 * just needs indexes at this point, avoids it having
1586 * target page specific code.
1587 */
1588 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1589 block->used_length >> TARGET_PAGE_BITS);
1590 postcopy_discard_send_finish(ms, pds);
1591 if (ret) {
1592 return ret;
1593 }
1594 }
1595
1596 return 0;
1597}
1598
1599/*
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001600 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1601 * the two bitmaps, that are similar, but one is inverted.
1602 *
1603 * We search for runs of target-pages that don't start or end on a
1604 * host page boundary;
1605 * unsent_pass=true: Cleans up partially unsent host pages by searching
1606 * the unsentmap
1607 * unsent_pass=false: Cleans up partially dirty host pages by searching
1608 * the main migration bitmap
1609 *
1610 */
1611static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1612 RAMBlock *block,
1613 PostcopyDiscardState *pds)
1614{
1615 unsigned long *bitmap;
1616 unsigned long *unsentmap;
1617 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1618 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1619 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1620 unsigned long last = first + (len - 1);
1621 unsigned long run_start;
1622
1623 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1624 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1625
1626 if (unsent_pass) {
1627 /* Find a sent page */
1628 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1629 } else {
1630 /* Find a dirty page */
1631 run_start = find_next_bit(bitmap, last + 1, first);
1632 }
1633
1634 while (run_start <= last) {
1635 bool do_fixup = false;
1636 unsigned long fixup_start_addr;
1637 unsigned long host_offset;
1638
1639 /*
1640 * If the start of this run of pages is in the middle of a host
1641 * page, then we need to fixup this host page.
1642 */
1643 host_offset = run_start % host_ratio;
1644 if (host_offset) {
1645 do_fixup = true;
1646 run_start -= host_offset;
1647 fixup_start_addr = run_start;
1648 /* For the next pass */
1649 run_start = run_start + host_ratio;
1650 } else {
1651 /* Find the end of this run */
1652 unsigned long run_end;
1653 if (unsent_pass) {
1654 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1655 } else {
1656 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1657 }
1658 /*
1659 * If the end isn't at the start of a host page, then the
1660 * run doesn't finish at the end of a host page
1661 * and we need to discard.
1662 */
1663 host_offset = run_end % host_ratio;
1664 if (host_offset) {
1665 do_fixup = true;
1666 fixup_start_addr = run_end - host_offset;
1667 /*
1668 * This host page has gone, the next loop iteration starts
1669 * from after the fixup
1670 */
1671 run_start = fixup_start_addr + host_ratio;
1672 } else {
1673 /*
1674 * No discards on this iteration, next loop starts from
1675 * next sent/dirty page
1676 */
1677 run_start = run_end + 1;
1678 }
1679 }
1680
1681 if (do_fixup) {
1682 unsigned long page;
1683
1684 /* Tell the destination to discard this page */
1685 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1686 /* For the unsent_pass we:
1687 * discard partially sent pages
1688 * For the !unsent_pass (dirty) we:
1689 * discard partially dirty pages that were sent
1690 * (any partially sent pages were already discarded
1691 * by the previous unsent_pass)
1692 */
1693 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1694 host_ratio);
1695 }
1696
1697 /* Clean up the bitmap */
1698 for (page = fixup_start_addr;
1699 page < fixup_start_addr + host_ratio; page++) {
1700 /* All pages in this host page are now not sent */
1701 set_bit(page, unsentmap);
1702
1703 /*
1704 * Remark them as dirty, updating the count for any pages
1705 * that weren't previously dirty.
1706 */
1707 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1708 }
1709 }
1710
1711 if (unsent_pass) {
1712 /* Find the next sent page for the next iteration */
1713 run_start = find_next_zero_bit(unsentmap, last + 1,
1714 run_start);
1715 } else {
1716 /* Find the next dirty page for the next iteration */
1717 run_start = find_next_bit(bitmap, last + 1, run_start);
1718 }
1719 }
1720}
1721
1722/*
1723 * Utility for the outgoing postcopy code.
1724 *
1725 * Discard any partially sent host-page size chunks, mark any partially
1726 * dirty host-page size chunks as all dirty.
1727 *
1728 * Returns: 0 on success
1729 */
1730static int postcopy_chunk_hostpages(MigrationState *ms)
1731{
1732 struct RAMBlock *block;
1733
1734 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1735 /* Easy case - TPS==HPS - nothing to be done */
1736 return 0;
1737 }
1738
1739 /* Easiest way to make sure we don't resume in the middle of a host-page */
1740 last_seen_block = NULL;
1741 last_sent_block = NULL;
1742 last_offset = 0;
1743
1744 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1745 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1746
1747 PostcopyDiscardState *pds =
1748 postcopy_discard_send_init(ms, first, block->idstr);
1749
1750 /* First pass: Discard all partially sent host pages */
1751 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1752 /*
1753 * Second pass: Ensure that all partially dirty host pages are made
1754 * fully dirty.
1755 */
1756 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1757
1758 postcopy_discard_send_finish(ms, pds);
1759 } /* ram_list loop */
1760
1761 return 0;
1762}
1763
1764/*
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001765 * Transmit the set of pages to be discarded after precopy to the target
1766 * these are pages that:
1767 * a) Have been previously transmitted but are now dirty again
1768 * b) Pages that have never been transmitted, this ensures that
1769 * any pages on the destination that have been mapped by background
1770 * tasks get discarded (transparent huge pages is the specific concern)
1771 * Hopefully this is pretty sparse
1772 */
1773int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1774{
1775 int ret;
1776 unsigned long *bitmap, *unsentmap;
1777
1778 rcu_read_lock();
1779
1780 /* This should be our last sync, the src is now paused */
1781 migration_bitmap_sync();
1782
1783 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1784 if (!unsentmap) {
1785 /* We don't have a safe way to resize the sentmap, so
1786 * if the bitmap was resized it will be NULL at this
1787 * point.
1788 */
1789 error_report("migration ram resized during precopy phase");
1790 rcu_read_unlock();
1791 return -EINVAL;
1792 }
1793
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001794 /* Deal with TPS != HPS */
1795 ret = postcopy_chunk_hostpages(ms);
1796 if (ret) {
1797 rcu_read_unlock();
1798 return ret;
1799 }
1800
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001801 /*
1802 * Update the unsentmap to be unsentmap = unsentmap | dirty
1803 */
1804 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1805 bitmap_or(unsentmap, unsentmap, bitmap,
1806 last_ram_offset() >> TARGET_PAGE_BITS);
1807
1808
1809 trace_ram_postcopy_send_discard_bitmap();
1810#ifdef DEBUG_POSTCOPY
1811 ram_debug_dump_bitmap(unsentmap, true);
1812#endif
1813
1814 ret = postcopy_each_ram_send_discard(ms);
1815 rcu_read_unlock();
1816
1817 return ret;
1818}
1819
1820/*
1821 * At the start of the postcopy phase of migration, any now-dirty
1822 * precopied pages are discarded.
1823 *
1824 * start, length describe a byte address range within the RAMBlock
1825 *
1826 * Returns 0 on success.
1827 */
1828int ram_discard_range(MigrationIncomingState *mis,
1829 const char *block_name,
1830 uint64_t start, size_t length)
1831{
1832 int ret = -1;
1833
1834 rcu_read_lock();
1835 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1836
1837 if (!rb) {
1838 error_report("ram_discard_range: Failed to find block '%s'",
1839 block_name);
1840 goto err;
1841 }
1842
1843 uint8_t *host_startaddr = rb->host + start;
1844
1845 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1846 error_report("ram_discard_range: Unaligned start address: %p",
1847 host_startaddr);
1848 goto err;
1849 }
1850
1851 if ((start + length) <= rb->used_length) {
1852 uint8_t *host_endaddr = host_startaddr + length;
1853 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1854 error_report("ram_discard_range: Unaligned end address: %p",
1855 host_endaddr);
1856 goto err;
1857 }
1858 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1859 } else {
1860 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
Juan Quintela9458ad62015-11-10 17:42:05 +01001861 "/%zx/" RAM_ADDR_FMT")",
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001862 block_name, start, length, rb->used_length);
1863 }
1864
1865err:
1866 rcu_read_unlock();
1867
1868 return ret;
1869}
1870
1871
Juan Quintela56e93d22015-05-07 19:33:31 +02001872/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1873 * long-running RCU critical section. When rcu-reclaims in the code
1874 * start to become numerous it will be necessary to reduce the
1875 * granularity of these critical sections.
1876 */
1877
1878static int ram_save_setup(QEMUFile *f, void *opaque)
1879{
1880 RAMBlock *block;
1881 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1882
Juan Quintela56e93d22015-05-07 19:33:31 +02001883 dirty_rate_high_cnt = 0;
1884 bitmap_sync_count = 0;
1885 migration_bitmap_sync_init();
Li Zhijiandd631692015-07-02 20:18:06 +08001886 qemu_mutex_init(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001887
1888 if (migrate_use_xbzrle()) {
1889 XBZRLE_cache_lock();
1890 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1891 TARGET_PAGE_SIZE,
1892 TARGET_PAGE_SIZE);
1893 if (!XBZRLE.cache) {
1894 XBZRLE_cache_unlock();
1895 error_report("Error creating cache");
1896 return -1;
1897 }
1898 XBZRLE_cache_unlock();
1899
1900 /* We prefer not to abort if there is no memory */
1901 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1902 if (!XBZRLE.encoded_buf) {
1903 error_report("Error allocating encoded_buf");
1904 return -1;
1905 }
1906
1907 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1908 if (!XBZRLE.current_buf) {
1909 error_report("Error allocating current_buf");
1910 g_free(XBZRLE.encoded_buf);
1911 XBZRLE.encoded_buf = NULL;
1912 return -1;
1913 }
1914
1915 acct_clear();
1916 }
1917
Paolo Bonzini49877832016-02-15 19:57:57 +01001918 /* For memory_global_dirty_log_start below. */
1919 qemu_mutex_lock_iothread();
1920
Juan Quintela56e93d22015-05-07 19:33:31 +02001921 qemu_mutex_lock_ramlist();
1922 rcu_read_lock();
1923 bytes_transferred = 0;
1924 reset_ram_globals();
1925
1926 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001927 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001928 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1929 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02001930
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001931 if (migrate_postcopy_ram()) {
1932 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1933 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1934 }
1935
Juan Quintela56e93d22015-05-07 19:33:31 +02001936 /*
1937 * Count the total number of pages used by ram blocks not including any
1938 * gaps due to alignment or unplugs.
1939 */
1940 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1941
1942 memory_global_dirty_log_start();
1943 migration_bitmap_sync();
1944 qemu_mutex_unlock_ramlist();
Paolo Bonzini49877832016-02-15 19:57:57 +01001945 qemu_mutex_unlock_iothread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001946
1947 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1948
1949 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1950 qemu_put_byte(f, strlen(block->idstr));
1951 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1952 qemu_put_be64(f, block->used_length);
1953 }
1954
1955 rcu_read_unlock();
1956
1957 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1958 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1959
1960 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1961
1962 return 0;
1963}
1964
1965static int ram_save_iterate(QEMUFile *f, void *opaque)
1966{
1967 int ret;
1968 int i;
1969 int64_t t0;
1970 int pages_sent = 0;
1971
1972 rcu_read_lock();
1973 if (ram_list.version != last_version) {
1974 reset_ram_globals();
1975 }
1976
1977 /* Read version before ram_list.blocks */
1978 smp_rmb();
1979
1980 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1981
1982 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1983 i = 0;
1984 while ((ret = qemu_file_rate_limit(f)) == 0) {
1985 int pages;
1986
1987 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1988 /* no more pages to sent */
1989 if (pages == 0) {
1990 break;
1991 }
1992 pages_sent += pages;
1993 acct_info.iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04001994
Juan Quintela56e93d22015-05-07 19:33:31 +02001995 /* we want to check in the 1st loop, just in case it was the 1st time
1996 and we had to sync the dirty bitmap.
1997 qemu_get_clock_ns() is a bit expensive, so we only check each some
1998 iterations
1999 */
2000 if ((i & 63) == 0) {
2001 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2002 if (t1 > MAX_WAIT) {
2003 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2004 t1, i);
2005 break;
2006 }
2007 }
2008 i++;
2009 }
2010 flush_compressed_data(f);
2011 rcu_read_unlock();
2012
2013 /*
2014 * Must occur before EOS (or any QEMUFile operation)
2015 * because of RDMA protocol.
2016 */
2017 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2018
2019 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2020 bytes_transferred += 8;
2021
2022 ret = qemu_file_get_error(f);
2023 if (ret < 0) {
2024 return ret;
2025 }
2026
2027 return pages_sent;
2028}
2029
2030/* Called with iothread lock */
2031static int ram_save_complete(QEMUFile *f, void *opaque)
2032{
2033 rcu_read_lock();
2034
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002035 if (!migration_in_postcopy(migrate_get_current())) {
2036 migration_bitmap_sync();
2037 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002038
2039 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2040
2041 /* try transferring iterative blocks of memory */
2042
2043 /* flush all remaining blocks regardless of rate limiting */
2044 while (true) {
2045 int pages;
2046
2047 pages = ram_find_and_save_block(f, true, &bytes_transferred);
2048 /* no more blocks to sent */
2049 if (pages == 0) {
2050 break;
2051 }
2052 }
2053
2054 flush_compressed_data(f);
2055 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002056
2057 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002058
Juan Quintela56e93d22015-05-07 19:33:31 +02002059 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2060
2061 return 0;
2062}
2063
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002064static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2065 uint64_t *non_postcopiable_pending,
2066 uint64_t *postcopiable_pending)
Juan Quintela56e93d22015-05-07 19:33:31 +02002067{
2068 uint64_t remaining_size;
2069
2070 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2071
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002072 if (!migration_in_postcopy(migrate_get_current()) &&
2073 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002074 qemu_mutex_lock_iothread();
2075 rcu_read_lock();
2076 migration_bitmap_sync();
2077 rcu_read_unlock();
2078 qemu_mutex_unlock_iothread();
2079 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2080 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002081
2082 /* We can do postcopy, and all the data is postcopiable */
2083 *postcopiable_pending += remaining_size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002084}
2085
2086static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2087{
2088 unsigned int xh_len;
2089 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002090 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002091
2092 if (!xbzrle_decoded_buf) {
2093 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2094 }
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002095 loaded_data = xbzrle_decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002096
2097 /* extract RLE header */
2098 xh_flags = qemu_get_byte(f);
2099 xh_len = qemu_get_be16(f);
2100
2101 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2102 error_report("Failed to load XBZRLE page - wrong compression!");
2103 return -1;
2104 }
2105
2106 if (xh_len > TARGET_PAGE_SIZE) {
2107 error_report("Failed to load XBZRLE page - len overflow!");
2108 return -1;
2109 }
2110 /* load data and decode */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002111 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002112
2113 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002114 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002115 TARGET_PAGE_SIZE) == -1) {
2116 error_report("Failed to load XBZRLE page - decode error!");
2117 return -1;
2118 }
2119
2120 return 0;
2121}
2122
2123/* Must be called from within a rcu critical section.
2124 * Returns a pointer from within the RCU-protected ram_list.
2125 */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002126/*
zhanghailiang4c4bad42016-01-15 11:37:41 +08002127 * Read a RAMBlock ID from the stream f.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002128 *
2129 * f: Stream to read from
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002130 * flags: Page flags (mostly to see if it's a continuation of previous block)
2131 */
zhanghailiang4c4bad42016-01-15 11:37:41 +08002132static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2133 int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002134{
2135 static RAMBlock *block = NULL;
2136 char id[256];
2137 uint8_t len;
2138
2139 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002140 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002141 error_report("Ack, bad migration stream!");
2142 return NULL;
2143 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002144 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002145 }
2146
2147 len = qemu_get_byte(f);
2148 qemu_get_buffer(f, (uint8_t *)id, len);
2149 id[len] = 0;
2150
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002151 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002152 if (!block) {
2153 error_report("Can't find block %s", id);
2154 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002155 }
2156
zhanghailiang4c4bad42016-01-15 11:37:41 +08002157 return block;
2158}
2159
2160static inline void *host_from_ram_block_offset(RAMBlock *block,
2161 ram_addr_t offset)
2162{
2163 if (!offset_in_ramblock(block, offset)) {
2164 return NULL;
2165 }
2166
2167 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002168}
2169
2170/*
2171 * If a page (or a whole RDMA chunk) has been
2172 * determined to be zero, then zap it.
2173 */
2174void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2175{
2176 if (ch != 0 || !is_zero_range(host, size)) {
2177 memset(host, ch, size);
2178 }
2179}
2180
2181static void *do_data_decompress(void *opaque)
2182{
2183 DecompressParam *param = opaque;
2184 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002185 uint8_t *des;
2186 int len;
Juan Quintela56e93d22015-05-07 19:33:31 +02002187
Liang Li33d151f2016-05-05 15:32:58 +08002188 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002189 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002190 if (param->des) {
2191 des = param->des;
2192 len = param->len;
2193 param->des = 0;
2194 qemu_mutex_unlock(&param->mutex);
2195
Liang Li73a89122016-05-05 15:32:51 +08002196 pagesize = TARGET_PAGE_SIZE;
2197 /* uncompress() will return failed in some case, especially
2198 * when the page is dirted when doing the compression, it's
2199 * not a problem because the dirty page will be retransferred
2200 * and uncompress() won't break the data in other pages.
2201 */
Liang Li33d151f2016-05-05 15:32:58 +08002202 uncompress((Bytef *)des, &pagesize,
2203 (const Bytef *)param->compbuf, len);
Liang Li73a89122016-05-05 15:32:51 +08002204
Liang Li33d151f2016-05-05 15:32:58 +08002205 qemu_mutex_lock(&decomp_done_lock);
2206 param->done = true;
2207 qemu_cond_signal(&decomp_done_cond);
2208 qemu_mutex_unlock(&decomp_done_lock);
2209
2210 qemu_mutex_lock(&param->mutex);
2211 } else {
2212 qemu_cond_wait(&param->cond, &param->mutex);
2213 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002214 }
Liang Li33d151f2016-05-05 15:32:58 +08002215 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002216
2217 return NULL;
2218}
2219
Liang Li5533b2e2016-05-05 15:32:52 +08002220static void wait_for_decompress_done(void)
2221{
2222 int idx, thread_count;
2223
2224 if (!migrate_use_compression()) {
2225 return;
2226 }
2227
2228 thread_count = migrate_decompress_threads();
2229 qemu_mutex_lock(&decomp_done_lock);
2230 for (idx = 0; idx < thread_count; idx++) {
2231 while (!decomp_param[idx].done) {
2232 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2233 }
2234 }
2235 qemu_mutex_unlock(&decomp_done_lock);
2236}
2237
Juan Quintela56e93d22015-05-07 19:33:31 +02002238void migrate_decompress_threads_create(void)
2239{
2240 int i, thread_count;
2241
2242 thread_count = migrate_decompress_threads();
2243 decompress_threads = g_new0(QemuThread, thread_count);
2244 decomp_param = g_new0(DecompressParam, thread_count);
Liang Li73a89122016-05-05 15:32:51 +08002245 qemu_mutex_init(&decomp_done_lock);
2246 qemu_cond_init(&decomp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +02002247 for (i = 0; i < thread_count; i++) {
2248 qemu_mutex_init(&decomp_param[i].mutex);
2249 qemu_cond_init(&decomp_param[i].cond);
2250 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
Liang Li73a89122016-05-05 15:32:51 +08002251 decomp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +08002252 decomp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002253 qemu_thread_create(decompress_threads + i, "decompress",
2254 do_data_decompress, decomp_param + i,
2255 QEMU_THREAD_JOINABLE);
2256 }
2257}
2258
2259void migrate_decompress_threads_join(void)
2260{
2261 int i, thread_count;
2262
Juan Quintela56e93d22015-05-07 19:33:31 +02002263 thread_count = migrate_decompress_threads();
2264 for (i = 0; i < thread_count; i++) {
2265 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002266 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002267 qemu_cond_signal(&decomp_param[i].cond);
2268 qemu_mutex_unlock(&decomp_param[i].mutex);
2269 }
2270 for (i = 0; i < thread_count; i++) {
2271 qemu_thread_join(decompress_threads + i);
2272 qemu_mutex_destroy(&decomp_param[i].mutex);
2273 qemu_cond_destroy(&decomp_param[i].cond);
2274 g_free(decomp_param[i].compbuf);
2275 }
2276 g_free(decompress_threads);
2277 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002278 decompress_threads = NULL;
2279 decomp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002280}
2281
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002282static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002283 void *host, int len)
2284{
2285 int idx, thread_count;
2286
2287 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002288 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002289 while (true) {
2290 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002291 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002292 decomp_param[idx].done = false;
2293 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002294 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002295 decomp_param[idx].des = host;
2296 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002297 qemu_cond_signal(&decomp_param[idx].cond);
2298 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002299 break;
2300 }
2301 }
2302 if (idx < thread_count) {
2303 break;
Liang Li73a89122016-05-05 15:32:51 +08002304 } else {
2305 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002306 }
2307 }
Liang Li73a89122016-05-05 15:32:51 +08002308 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002309}
2310
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002311/*
2312 * Allocate data structures etc needed by incoming migration with postcopy-ram
2313 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2314 */
2315int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2316{
2317 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2318
2319 return postcopy_ram_incoming_init(mis, ram_pages);
2320}
2321
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002322/*
2323 * Called in postcopy mode by ram_load().
2324 * rcu_read_lock is taken prior to this being called.
2325 */
2326static int ram_load_postcopy(QEMUFile *f)
2327{
2328 int flags = 0, ret = 0;
2329 bool place_needed = false;
2330 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2331 MigrationIncomingState *mis = migration_incoming_get_current();
2332 /* Temporary page that is later 'placed' */
2333 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002334 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002335 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002336
2337 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2338 ram_addr_t addr;
2339 void *host = NULL;
2340 void *page_buffer = NULL;
2341 void *place_source = NULL;
2342 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002343
2344 addr = qemu_get_be64(f);
2345 flags = addr & ~TARGET_PAGE_MASK;
2346 addr &= TARGET_PAGE_MASK;
2347
2348 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2349 place_needed = false;
2350 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002351 RAMBlock *block = ram_block_from_stream(f, flags);
2352
2353 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002354 if (!host) {
2355 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2356 ret = -EINVAL;
2357 break;
2358 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002359 /*
2360 * Postcopy requires that we place whole host pages atomically.
2361 * To make it atomic, the data is read into a temporary page
2362 * that's moved into place later.
2363 * The migration protocol uses, possibly smaller, target-pages
2364 * however the source ensures it always sends all the components
2365 * of a host page in order.
2366 */
2367 page_buffer = postcopy_host_page +
2368 ((uintptr_t)host & ~qemu_host_page_mask);
2369 /* If all TP are zero then we can optimise the place */
2370 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2371 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002372 } else {
2373 /* not the 1st TP within the HP */
2374 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002375 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002376 host, last_host);
2377 ret = -EINVAL;
2378 break;
2379 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002380 }
2381
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002382
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002383 /*
2384 * If it's the last part of a host page then we place the host
2385 * page
2386 */
2387 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2388 ~qemu_host_page_mask) == 0;
2389 place_source = postcopy_host_page;
2390 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002391 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002392
2393 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2394 case RAM_SAVE_FLAG_COMPRESS:
2395 ch = qemu_get_byte(f);
2396 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2397 if (ch) {
2398 all_zero = false;
2399 }
2400 break;
2401
2402 case RAM_SAVE_FLAG_PAGE:
2403 all_zero = false;
2404 if (!place_needed || !matching_page_sizes) {
2405 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2406 } else {
2407 /* Avoids the qemu_file copy during postcopy, which is
2408 * going to do a copy later; can only do it when we
2409 * do this read in one go (matching page sizes)
2410 */
2411 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2412 TARGET_PAGE_SIZE);
2413 }
2414 break;
2415 case RAM_SAVE_FLAG_EOS:
2416 /* normal exit */
2417 break;
2418 default:
2419 error_report("Unknown combination of migration flags: %#x"
2420 " (postcopy mode)", flags);
2421 ret = -EINVAL;
2422 }
2423
2424 if (place_needed) {
2425 /* This gets called at the last target page in the host page */
2426 if (all_zero) {
2427 ret = postcopy_place_page_zero(mis,
2428 host + TARGET_PAGE_SIZE -
2429 qemu_host_page_size);
2430 } else {
2431 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2432 qemu_host_page_size,
2433 place_source);
2434 }
2435 }
2436 if (!ret) {
2437 ret = qemu_file_get_error(f);
2438 }
2439 }
2440
2441 return ret;
2442}
2443
Juan Quintela56e93d22015-05-07 19:33:31 +02002444static int ram_load(QEMUFile *f, void *opaque, int version_id)
2445{
2446 int flags = 0, ret = 0;
2447 static uint64_t seq_iter;
2448 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002449 /*
2450 * If system is running in postcopy mode, page inserts to host memory must
2451 * be atomic
2452 */
2453 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
Juan Quintela56e93d22015-05-07 19:33:31 +02002454
2455 seq_iter++;
2456
2457 if (version_id != 4) {
2458 ret = -EINVAL;
2459 }
2460
2461 /* This RCU critical section can be very long running.
2462 * When RCU reclaims in the code start to become numerous,
2463 * it will be necessary to reduce the granularity of this
2464 * critical section.
2465 */
2466 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002467
2468 if (postcopy_running) {
2469 ret = ram_load_postcopy(f);
2470 }
2471
2472 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002473 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002474 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002475 uint8_t ch;
2476
2477 addr = qemu_get_be64(f);
2478 flags = addr & ~TARGET_PAGE_MASK;
2479 addr &= TARGET_PAGE_MASK;
2480
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002481 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2482 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002483 RAMBlock *block = ram_block_from_stream(f, flags);
2484
2485 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002486 if (!host) {
2487 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2488 ret = -EINVAL;
2489 break;
2490 }
2491 }
2492
Juan Quintela56e93d22015-05-07 19:33:31 +02002493 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2494 case RAM_SAVE_FLAG_MEM_SIZE:
2495 /* Synchronize RAM block list */
2496 total_ram_bytes = addr;
2497 while (!ret && total_ram_bytes) {
2498 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002499 char id[256];
2500 ram_addr_t length;
2501
2502 len = qemu_get_byte(f);
2503 qemu_get_buffer(f, (uint8_t *)id, len);
2504 id[len] = 0;
2505 length = qemu_get_be64(f);
2506
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002507 block = qemu_ram_block_by_name(id);
2508 if (block) {
2509 if (length != block->used_length) {
2510 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002511
Gongleifa53a0e2016-05-10 10:04:59 +08002512 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002513 &local_err);
2514 if (local_err) {
2515 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02002516 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002517 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002518 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2519 block->idstr);
2520 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02002521 error_report("Unknown ramblock \"%s\", cannot "
2522 "accept migration", id);
2523 ret = -EINVAL;
2524 }
2525
2526 total_ram_bytes -= length;
2527 }
2528 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002529
Juan Quintela56e93d22015-05-07 19:33:31 +02002530 case RAM_SAVE_FLAG_COMPRESS:
Juan Quintela56e93d22015-05-07 19:33:31 +02002531 ch = qemu_get_byte(f);
2532 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2533 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002534
Juan Quintela56e93d22015-05-07 19:33:31 +02002535 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002536 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2537 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002538
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002539 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002540 len = qemu_get_be32(f);
2541 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2542 error_report("Invalid compressed data length: %d", len);
2543 ret = -EINVAL;
2544 break;
2545 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002546 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002547 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002548
Juan Quintela56e93d22015-05-07 19:33:31 +02002549 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002550 if (load_xbzrle(f, addr, host) < 0) {
2551 error_report("Failed to decompress XBZRLE page at "
2552 RAM_ADDR_FMT, addr);
2553 ret = -EINVAL;
2554 break;
2555 }
2556 break;
2557 case RAM_SAVE_FLAG_EOS:
2558 /* normal exit */
2559 break;
2560 default:
2561 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01002562 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02002563 } else {
2564 error_report("Unknown combination of migration flags: %#x",
2565 flags);
2566 ret = -EINVAL;
2567 }
2568 }
2569 if (!ret) {
2570 ret = qemu_file_get_error(f);
2571 }
2572 }
2573
Liang Li5533b2e2016-05-05 15:32:52 +08002574 wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02002575 rcu_read_unlock();
2576 DPRINTF("Completed load of VM with exit code %d seq iteration "
2577 "%" PRIu64 "\n", ret, seq_iter);
2578 return ret;
2579}
2580
2581static SaveVMHandlers savevm_ram_handlers = {
2582 .save_live_setup = ram_save_setup,
2583 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00002584 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00002585 .save_live_complete_precopy = ram_save_complete,
Juan Quintela56e93d22015-05-07 19:33:31 +02002586 .save_live_pending = ram_save_pending,
2587 .load_state = ram_load,
Liang Li6ad2a212015-11-02 15:37:03 +08002588 .cleanup = ram_migration_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02002589};
2590
2591void ram_mig_init(void)
2592{
2593 qemu_mutex_init(&XBZRLE.lock);
2594 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2595}