blob: 7811cde643a1f0ebf12e870373ba5eef07a6436d [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020031#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020032#include "qemu/bitops.h"
33#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020034#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020035#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020036#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020037#include "migration.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020038#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020039#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020040#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020041#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030042#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020043#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010044#include "qapi/error.h"
Juan Quintelaab7cbb02019-05-15 13:37:46 +020045#include "qapi/qapi-types-migration.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010046#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020047#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020048#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030050#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080052#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030053#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020054#include "sysemu/sysemu.h"
Claudio Fontanab0c3cf92020-06-29 11:35:03 +020055#include "sysemu/cpu-throttle.h"
Peter Xuedd090c2018-05-02 18:47:32 +080056#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010057#include "qemu/iov.h"
Juan Quintelad32ca5a2020-01-22 16:16:07 +010058#include "multifd.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020059
Juan Quintela56e93d22015-05-07 19:33:31 +020060/***********************************************************/
61/* ram save/restore */
62
Juan Quintelabb890ed2017-04-28 09:39:55 +020063/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
64 * worked for pages that where filled with the same char. We switched
65 * it to only search for the zero value. And to avoid confusion with
66 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
67 */
68
Juan Quintela56e93d22015-05-07 19:33:31 +020069#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020070#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020071#define RAM_SAVE_FLAG_MEM_SIZE 0x04
72#define RAM_SAVE_FLAG_PAGE 0x08
73#define RAM_SAVE_FLAG_EOS 0x10
74#define RAM_SAVE_FLAG_CONTINUE 0x20
75#define RAM_SAVE_FLAG_XBZRLE 0x40
76/* 0x80 is reserved in migration.h start with 0x100 next */
77#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78
Juan Quintela56e93d22015-05-07 19:33:31 +020079static inline bool is_zero_range(uint8_t *p, uint64_t size)
80{
Richard Hendersona1febc42016-08-29 11:46:14 -070081 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020082}
83
Juan Quintela93604472017-06-06 19:49:03 +020084XBZRLECacheStats xbzrle_counters;
85
Juan Quintela56e93d22015-05-07 19:33:31 +020086/* struct contains XBZRLE cache and a static page
87 used by the compression */
88static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020096 /* it will store a page full of zeros */
97 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020098 /* buffer used for XBZRLE decoding */
99 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200100} XBZRLE;
101
Juan Quintela56e93d22015-05-07 19:33:31 +0200102static void XBZRLE_cache_lock(void)
103{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800104 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200105 qemu_mutex_lock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800106 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200107}
108
109static void XBZRLE_cache_unlock(void)
110{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800111 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200112 qemu_mutex_unlock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800113 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200114}
115
Juan Quintela3d0684b2017-03-23 15:06:39 +0100116/**
117 * xbzrle_cache_resize: resize the xbzrle cache
118 *
119 * This function is called from qmp_migrate_set_cache_size in main
120 * thread, possibly while a migration is in progress. A running
121 * migration may be using the cache and might finish during this call,
122 * hence changes to the cache are protected by XBZRLE.lock().
123 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200124 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100125 *
126 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200127 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200128 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200129int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200130{
131 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200132 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200133
Juan Quintela8acabf62017-10-05 22:00:31 +0200134 /* Check for truncation */
135 if (new_size != (size_t)new_size) {
136 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
137 "exceeding address space");
138 return -1;
139 }
140
Juan Quintela2a313e52017-10-06 23:00:12 +0200141 if (new_size == migrate_xbzrle_cache_size()) {
142 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200143 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200144 }
145
Juan Quintela56e93d22015-05-07 19:33:31 +0200146 XBZRLE_cache_lock();
147
148 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200149 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200151 ret = -1;
152 goto out;
153 }
154
155 cache_fini(XBZRLE.cache);
156 XBZRLE.cache = new_cache;
157 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200158out:
159 XBZRLE_cache_unlock();
160 return ret;
161}
162
Chuan Zheng3ded54b2020-09-16 14:22:00 +0800163bool ramblock_is_ignored(RAMBlock *block)
Yury Kotovfbd162e2019-02-15 20:45:46 +0300164{
165 return !qemu_ram_is_migratable(block) ||
166 (migrate_ignore_shared() && qemu_ram_is_shared(block));
167}
168
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100169#undef RAMBLOCK_FOREACH
170
Yury Kotovfbd162e2019-02-15 20:45:46 +0300171int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
172{
173 RAMBlock *block;
174 int ret = 0;
175
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +0100176 RCU_READ_LOCK_GUARD();
177
Yury Kotovfbd162e2019-02-15 20:45:46 +0300178 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
179 ret = func(block, opaque);
180 if (ret) {
181 break;
182 }
183 }
Yury Kotovfbd162e2019-02-15 20:45:46 +0300184 return ret;
185}
186
Alexey Perevalovf9494612017-10-05 14:13:20 +0300187static void ramblock_recv_map_init(void)
188{
189 RAMBlock *rb;
190
Yury Kotovfbd162e2019-02-15 20:45:46 +0300191 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300192 assert(!rb->receivedmap);
193 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
194 }
195}
196
197int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
198{
199 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
200 rb->receivedmap);
201}
202
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000203bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
204{
205 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
206}
207
Alexey Perevalovf9494612017-10-05 14:13:20 +0300208void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
209{
210 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
211}
212
213void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
214 size_t nr)
215{
216 bitmap_set_atomic(rb->receivedmap,
217 ramblock_recv_bitmap_offset(host_addr, rb),
218 nr);
219}
220
Peter Xua335deb2018-05-02 18:47:28 +0800221#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
222
223/*
224 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
225 *
226 * Returns >0 if success with sent bytes, or <0 if error.
227 */
228int64_t ramblock_recv_bitmap_send(QEMUFile *file,
229 const char *block_name)
230{
231 RAMBlock *block = qemu_ram_block_by_name(block_name);
232 unsigned long *le_bitmap, nbits;
233 uint64_t size;
234
235 if (!block) {
236 error_report("%s: invalid block name: %s", __func__, block_name);
237 return -1;
238 }
239
240 nbits = block->used_length >> TARGET_PAGE_BITS;
241
242 /*
243 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
244 * machines we may need 4 more bytes for padding (see below
245 * comment). So extend it a bit before hand.
246 */
247 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
248
249 /*
250 * Always use little endian when sending the bitmap. This is
251 * required that when source and destination VMs are not using the
zhaolichang3a4452d2020-09-17 15:50:21 +0800252 * same endianness. (Note: big endian won't work.)
Peter Xua335deb2018-05-02 18:47:28 +0800253 */
254 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
255
256 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800257 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800258
259 /*
260 * size is always aligned to 8 bytes for 64bit machines, but it
261 * may not be true for 32bit machines. We need this padding to
262 * make sure the migration can survive even between 32bit and
263 * 64bit machines.
264 */
265 size = ROUND_UP(size, 8);
266
267 qemu_put_be64(file, size);
268 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
269 /*
270 * Mark as an end, in case the middle part is screwed up due to
zhaolichang3a4452d2020-09-17 15:50:21 +0800271 * some "mysterious" reason.
Peter Xua335deb2018-05-02 18:47:28 +0800272 */
273 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
274 qemu_fflush(file);
275
Peter Xubf269902018-05-25 09:50:42 +0800276 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800277
278 if (qemu_file_get_error(file)) {
279 return qemu_file_get_error(file);
280 }
281
282 return size + sizeof(size);
283}
284
Juan Quintelaec481c62017-03-20 22:12:40 +0100285/*
286 * An outstanding page request, on the source, having been received
287 * and queued
288 */
289struct RAMSrcPageRequest {
290 RAMBlock *rb;
291 hwaddr offset;
292 hwaddr len;
293
294 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
295};
296
Juan Quintela6f37bb82017-03-13 19:26:29 +0100297/* State of RAM for migration */
298struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100299 /* QEMUFile used for this migration */
300 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100301 /* Last block that we have visited searching for dirty pages */
302 RAMBlock *last_seen_block;
303 /* Last block from where we have sent data */
304 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100305 /* Last dirty target page we have sent */
306 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100307 /* last ram version we have seen */
308 uint32_t last_version;
309 /* We are in the first round */
310 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800311 /* The free page optimization is enabled */
312 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100313 /* How many times we have dirty too many pages */
314 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100315 /* these variables are used for bitmap sync */
316 /* last time we did a full bitmap_sync */
317 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200318 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200319 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200320 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200321 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100322 /* xbzrle misses since the beginning of the period */
323 uint64_t xbzrle_cache_miss_prev;
Wei Wange460a4b2020-04-30 08:59:35 +0800324 /* Amount of xbzrle pages since the beginning of the period */
325 uint64_t xbzrle_pages_prev;
326 /* Amount of xbzrle encoded bytes since the beginning of the period */
327 uint64_t xbzrle_bytes_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800328
329 /* compression statistics since the beginning of the period */
330 /* amount of count that no free thread to compress data */
331 uint64_t compress_thread_busy_prev;
332 /* amount bytes after compression */
333 uint64_t compressed_size_prev;
334 /* amount of compressed pages */
335 uint64_t compress_pages_prev;
336
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800337 /* total handled target pages at the beginning of period */
338 uint64_t target_page_count_prev;
339 /* total handled target pages since start */
340 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200341 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800342 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800343 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100344 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100345 /* The RAMBlock used in the last src_page_requests */
346 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100347 /* Queue of outstanding page requests from the destination */
348 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100349 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100350};
351typedef struct RAMState RAMState;
352
Juan Quintela53518d92017-05-04 11:46:24 +0200353static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100354
Wei Wangbd227062018-12-11 16:24:51 +0800355static NotifierWithReturnList precopy_notifier_list;
356
357void precopy_infrastructure_init(void)
358{
359 notifier_with_return_list_init(&precopy_notifier_list);
360}
361
362void precopy_add_notifier(NotifierWithReturn *n)
363{
364 notifier_with_return_list_add(&precopy_notifier_list, n);
365}
366
367void precopy_remove_notifier(NotifierWithReturn *n)
368{
369 notifier_with_return_remove(n);
370}
371
372int precopy_notify(PrecopyNotifyReason reason, Error **errp)
373{
374 PrecopyNotifyData pnd;
375 pnd.reason = reason;
376 pnd.errp = errp;
377
378 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
379}
380
Wei Wang6eeb63f2018-12-11 16:24:52 +0800381void precopy_enable_free_page_optimization(void)
382{
383 if (!ram_state) {
384 return;
385 }
386
387 ram_state->fpo_enabled = true;
388}
389
Juan Quintela9edabd42017-03-14 12:02:16 +0100390uint64_t ram_bytes_remaining(void)
391{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000392 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
393 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100394}
395
Juan Quintela93604472017-06-06 19:49:03 +0200396MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100397
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100398/* used by the search for pages to send */
399struct PageSearchStatus {
400 /* Current block being searched */
401 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100402 /* Current page to search from */
403 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100404 /* Set once we wrap around */
405 bool complete_round;
406};
407typedef struct PageSearchStatus PageSearchStatus;
408
Xiao Guangrong76e03002018-09-06 15:01:00 +0800409CompressionStats compression_counters;
410
Juan Quintela56e93d22015-05-07 19:33:31 +0200411struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200412 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800413 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800414 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200415 QEMUFile *file;
416 QemuMutex mutex;
417 QemuCond cond;
418 RAMBlock *block;
419 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800420
421 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800422 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800423 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200424};
425typedef struct CompressParam CompressParam;
426
427struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800428 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800429 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200430 QemuMutex mutex;
431 QemuCond cond;
432 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000433 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800435 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200436};
437typedef struct DecompressParam DecompressParam;
438
439static CompressParam *comp_param;
440static QemuThread *compress_threads;
441/* comp_done_cond is used to wake up the migration thread when
442 * one of the compression threads has finished the compression.
443 * comp_done_lock is used to co-work with comp_done_cond.
444 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800445static QemuMutex comp_done_lock;
446static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200447/* The empty QEMUFileOps will be used by file in CompressParam */
448static const QEMUFileOps empty_ops = { };
449
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800450static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200451static DecompressParam *decomp_param;
452static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800453static QemuMutex decomp_done_lock;
454static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200455
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800456static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800457 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200458
459static void *do_data_compress(void *opaque)
460{
461 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800462 RAMBlock *block;
463 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800464 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200465
Liang Lia7a9a882016-05-05 15:32:57 +0800466 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800467 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800468 if (param->block) {
469 block = param->block;
470 offset = param->offset;
471 param->block = NULL;
472 qemu_mutex_unlock(&param->mutex);
473
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800474 zero_page = do_compress_ram_page(param->file, &param->stream,
475 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800476
Liang Li0d9f9a52016-05-05 15:32:59 +0800477 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800478 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800479 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800480 qemu_cond_signal(&comp_done_cond);
481 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800482
483 qemu_mutex_lock(&param->mutex);
484 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200485 qemu_cond_wait(&param->cond, &param->mutex);
486 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200487 }
Liang Lia7a9a882016-05-05 15:32:57 +0800488 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200489
490 return NULL;
491}
492
Juan Quintelaf0afa332017-06-28 11:52:28 +0200493static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200494{
495 int i, thread_count;
496
Fei Li05306932018-09-25 17:14:40 +0800497 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200498 return;
499 }
Fei Li05306932018-09-25 17:14:40 +0800500
Juan Quintela56e93d22015-05-07 19:33:31 +0200501 thread_count = migrate_compress_threads();
502 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800503 /*
504 * we use it as a indicator which shows if the thread is
505 * properly init'd or not
506 */
507 if (!comp_param[i].file) {
508 break;
509 }
Fei Li05306932018-09-25 17:14:40 +0800510
511 qemu_mutex_lock(&comp_param[i].mutex);
512 comp_param[i].quit = true;
513 qemu_cond_signal(&comp_param[i].cond);
514 qemu_mutex_unlock(&comp_param[i].mutex);
515
Juan Quintela56e93d22015-05-07 19:33:31 +0200516 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200517 qemu_mutex_destroy(&comp_param[i].mutex);
518 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800519 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800520 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800521 qemu_fclose(comp_param[i].file);
522 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200523 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800524 qemu_mutex_destroy(&comp_done_lock);
525 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200526 g_free(compress_threads);
527 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200528 compress_threads = NULL;
529 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200530}
531
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800532static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200533{
534 int i, thread_count;
535
536 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800537 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200538 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200539 thread_count = migrate_compress_threads();
540 compress_threads = g_new0(QemuThread, thread_count);
541 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800542 qemu_cond_init(&comp_done_cond);
543 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200544 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800545 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
546 if (!comp_param[i].originbuf) {
547 goto exit;
548 }
549
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800550 if (deflateInit(&comp_param[i].stream,
551 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800552 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800553 goto exit;
554 }
555
Cao jine110aa92016-07-29 15:10:31 +0800556 /* comp_param[i].file is just used as a dummy buffer to save data,
557 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200558 */
559 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
560 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800561 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 qemu_mutex_init(&comp_param[i].mutex);
563 qemu_cond_init(&comp_param[i].cond);
564 qemu_thread_create(compress_threads + i, "compress",
565 do_data_compress, comp_param + i,
566 QEMU_THREAD_JOINABLE);
567 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800568 return 0;
569
570exit:
571 compress_threads_save_cleanup();
572 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200573}
574
575/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100576 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200577 *
578 * If this is the 1st block, it also writes the block identification
579 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100580 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200581 *
582 * @f: QEMUFile where to send the data
583 * @block: block that contains the page we want to send
584 * @offset: offset inside the block for the page
585 * in the lower bits, it contains flags
586 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200587static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
588 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200589{
Liang Li9f5f3802015-07-13 17:34:10 +0800590 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200591
Juan Quintela24795692017-03-21 11:45:01 +0100592 if (block == rs->last_sent_block) {
593 offset |= RAM_SAVE_FLAG_CONTINUE;
594 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200595 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200596 size = 8;
597
598 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800599 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200600 qemu_put_byte(f, len);
601 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800602 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100603 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200604 }
605 return size;
606}
607
Juan Quintela3d0684b2017-03-23 15:06:39 +0100608/**
609 * mig_throttle_guest_down: throotle down the guest
610 *
611 * Reduce amount of guest cpu execution to hopefully slow down memory
612 * writes. If guest dirty memory rate is reduced below the rate at
613 * which we can transfer pages to the destination then we should be
614 * able to complete migration. Some workloads dirty memory way too
615 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400616 */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800617static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
618 uint64_t bytes_dirty_threshold)
Jason J. Herne070afca2015-09-08 13:12:35 -0400619{
620 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100621 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
Keqian Zhucbbf8182020-04-13 18:15:08 +0800622 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
623 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
Li Qiang4cbc9c72018-08-01 06:00:20 -0700624 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -0400625
Keqian Zhucbbf8182020-04-13 18:15:08 +0800626 uint64_t throttle_now = cpu_throttle_get_percentage();
627 uint64_t cpu_now, cpu_ideal, throttle_inc;
628
Jason J. Herne070afca2015-09-08 13:12:35 -0400629 /* We have not started throttling yet. Let's start it. */
630 if (!cpu_throttle_active()) {
631 cpu_throttle_set(pct_initial);
632 } else {
633 /* Throttling already on, just increase the rate */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800634 if (!pct_tailslow) {
635 throttle_inc = pct_increment;
636 } else {
637 /* Compute the ideal CPU percentage used by Guest, which may
638 * make the dirty rate match the dirty rate threshold. */
639 cpu_now = 100 - throttle_now;
640 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
641 bytes_dirty_period);
642 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
643 }
644 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -0400645 }
646}
647
Juan Quintela3d0684b2017-03-23 15:06:39 +0100648/**
649 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
650 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100651 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100652 * @current_addr: address for the zero page
653 *
654 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200655 * The important thing is that a stale (not-yet-0'd) page be replaced
656 * by the new data.
657 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100658 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200659 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100660static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200661{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100662 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200663 return;
664 }
665
666 /* We don't care if this fails to allocate a new cache page
667 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200668 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200669 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200670}
671
672#define ENCODING_FLAG_XBZRLE 0x1
673
674/**
675 * save_xbzrle_page: compress and send current page
676 *
677 * Returns: 1 means that we wrote the page
678 * 0 means that page is identical to the one already sent
679 * -1 means that xbzrle would be longer than normal
680 *
Juan Quintela5a987732017-03-13 19:39:02 +0100681 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100682 * @current_data: pointer to the address of the page contents
683 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200684 * @block: block that contains the page we want to send
685 * @offset: offset inside the block for the page
686 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200687 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100688static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200689 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100690 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200691{
692 int encoded_len = 0, bytes_xbzrle;
693 uint8_t *prev_cached_page;
694
Juan Quintela93604472017-06-06 19:49:03 +0200695 if (!cache_is_cached(XBZRLE.cache, current_addr,
696 ram_counters.dirty_sync_count)) {
697 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200698 if (!last_stage) {
699 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200700 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200701 return -1;
702 } else {
703 /* update *current_data when the page has been
704 inserted into cache */
705 *current_data = get_cached_data(XBZRLE.cache, current_addr);
706 }
707 }
708 return -1;
709 }
710
Wei Wange460a4b2020-04-30 08:59:35 +0800711 /*
712 * Reaching here means the page has hit the xbzrle cache, no matter what
713 * encoding result it is (normal encoding, overflow or skipping the page),
zhaolichang3a4452d2020-09-17 15:50:21 +0800714 * count the page as encoded. This is used to calculate the encoding rate.
Wei Wange460a4b2020-04-30 08:59:35 +0800715 *
716 * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
717 * 2nd page turns out to be skipped (i.e. no new bytes written to the
718 * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
719 * skipped page included. In this way, the encoding rate can tell if the
720 * guest page is good for xbzrle encoding.
721 */
722 xbzrle_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200723 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
724
725 /* save current buffer into memory */
726 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
727
728 /* XBZRLE encoding (if there is no overflow) */
729 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
730 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
731 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +0800732
733 /*
734 * Update the cache contents, so that it corresponds to the data
735 * sent, in all cases except where we skip the page.
736 */
737 if (!last_stage && encoded_len != 0) {
738 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
739 /*
740 * In the case where we couldn't compress, ensure that the caller
741 * sends the data from the cache, since the guest might have
742 * changed the RAM since we copied it.
743 */
744 *current_data = prev_cached_page;
745 }
746
Juan Quintela56e93d22015-05-07 19:33:31 +0200747 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100748 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200749 return 0;
750 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100751 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200752 xbzrle_counters.overflow++;
Wei Wange460a4b2020-04-30 08:59:35 +0800753 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200754 return -1;
755 }
756
Juan Quintela56e93d22015-05-07 19:33:31 +0200757 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200758 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100759 offset | RAM_SAVE_FLAG_XBZRLE);
760 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
761 qemu_put_be16(rs->f, encoded_len);
762 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200763 bytes_xbzrle += encoded_len + 1 + 2;
Wei Wange460a4b2020-04-30 08:59:35 +0800764 /*
765 * Like compressed_size (please see update_compress_thread_counts),
766 * the xbzrle encoded bytes don't count the 8 byte header with
767 * RAM_SAVE_FLAG_CONTINUE.
768 */
769 xbzrle_counters.bytes += bytes_xbzrle - 8;
Juan Quintela93604472017-06-06 19:49:03 +0200770 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200771
772 return 1;
773}
774
Juan Quintela3d0684b2017-03-23 15:06:39 +0100775/**
776 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000777 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800778 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +0100779 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100780 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100781 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100782 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000783 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200784static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100785unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100786 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +0200787{
Juan Quintela6b6712e2017-03-22 15:18:04 +0100788 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
789 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200790 unsigned long next;
791
Yury Kotovfbd162e2019-02-15 20:45:46 +0300792 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200793 return size;
794 }
795
Wei Wang6eeb63f2018-12-11 16:24:52 +0800796 /*
797 * When the free page optimization is enabled, we need to check the bitmap
798 * to send the non-free pages rather than all the pages in the bulk stage.
799 */
800 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +0100801 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200802 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +0100803 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +0200804 }
805
Juan Quintela6b6712e2017-03-22 15:18:04 +0100806 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +0200807}
808
Juan Quintela06b10682017-03-21 15:18:05 +0100809static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100810 RAMBlock *rb,
811 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000812{
813 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000814
Wei Wang386a9072018-12-11 16:24:49 +0800815 qemu_mutex_lock(&rs->bitmap_mutex);
Peter Xu002cad62019-06-03 14:50:56 +0800816
817 /*
818 * Clear dirty bitmap if needed. This _must_ be called before we
819 * send any of the page in the chunk because we need to make sure
820 * we can capture further page content changes when we sync dirty
821 * log the next time. So as long as we are going to send any of
822 * the page in the chunk we clear the remote dirty bitmap for all.
823 * Clearing it earlier won't be a problem, but too late will.
824 */
825 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
826 uint8_t shift = rb->clear_bmap_shift;
827 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
Alexey Romko8bba0042020-01-10 14:51:34 +0100828 hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
Peter Xu002cad62019-06-03 14:50:56 +0800829
830 /*
831 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
832 * can make things easier sometimes since then start address
833 * of the small chunk will always be 64 pages aligned so the
834 * bitmap will always be aligned to unsigned long. We should
835 * even be able to remove this restriction but I'm simply
836 * keeping it.
837 */
838 assert(shift >= 6);
839 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
840 memory_region_clear_dirty_bitmap(rb->mr, start, size);
841 }
842
Juan Quintela6b6712e2017-03-22 15:18:04 +0100843 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000844
845 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100846 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000847 }
Wei Wang386a9072018-12-11 16:24:49 +0800848 qemu_mutex_unlock(&rs->bitmap_mutex);
849
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000850 return ret;
851}
852
Peter Xu267691b2019-06-03 14:50:46 +0800853/* Called with RCU critical section */
Wei Yang7a3e9572019-08-08 11:31:55 +0800854static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +0200855{
Keqian Zhufb613582020-06-22 11:20:37 +0800856 uint64_t new_dirty_pages =
857 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
858
859 rs->migration_dirty_pages += new_dirty_pages;
860 rs->num_dirty_pages_period += new_dirty_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200861}
862
Juan Quintela3d0684b2017-03-23 15:06:39 +0100863/**
864 * ram_pagesize_summary: calculate all the pagesizes of a VM
865 *
866 * Returns a summary bitmap of the page sizes of all RAMBlocks
867 *
868 * For VMs with just normal pages this is equivalent to the host page
869 * size. If it's got some huge pages then it's the OR of all the
870 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000871 */
872uint64_t ram_pagesize_summary(void)
873{
874 RAMBlock *block;
875 uint64_t summary = 0;
876
Yury Kotovfbd162e2019-02-15 20:45:46 +0300877 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000878 summary |= block->page_size;
879 }
880
881 return summary;
882}
883
Xiao Guangrongaecbfe92019-01-11 14:37:30 +0800884uint64_t ram_get_total_transferred_pages(void)
885{
886 return ram_counters.normal + ram_counters.duplicate +
887 compression_counters.pages + xbzrle_counters.pages;
888}
889
Xiao Guangrongb7340352018-06-04 17:55:12 +0800890static void migration_update_rates(RAMState *rs, int64_t end_time)
891{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800892 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800893 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +0800894
895 /* calculate period counters */
896 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
897 / (end_time - rs->time_last_bitmap_sync);
898
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800899 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +0800900 return;
901 }
902
903 if (migrate_use_xbzrle()) {
Wei Wange460a4b2020-04-30 08:59:35 +0800904 double encoded_size, unencoded_size;
905
Xiao Guangrongb7340352018-06-04 17:55:12 +0800906 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800907 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +0800908 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Wei Wange460a4b2020-04-30 08:59:35 +0800909 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
910 TARGET_PAGE_SIZE;
911 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
Wei Wang92271402020-06-17 13:13:05 -0700912 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
Wei Wange460a4b2020-04-30 08:59:35 +0800913 xbzrle_counters.encoding_rate = 0;
Wei Wange460a4b2020-04-30 08:59:35 +0800914 } else {
915 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
916 }
917 rs->xbzrle_pages_prev = xbzrle_counters.pages;
918 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
Xiao Guangrongb7340352018-06-04 17:55:12 +0800919 }
Xiao Guangrong76e03002018-09-06 15:01:00 +0800920
921 if (migrate_use_compression()) {
922 compression_counters.busy_rate = (double)(compression_counters.busy -
923 rs->compress_thread_busy_prev) / page_count;
924 rs->compress_thread_busy_prev = compression_counters.busy;
925
926 compressed_size = compression_counters.compressed_size -
927 rs->compressed_size_prev;
928 if (compressed_size) {
929 double uncompressed_size = (compression_counters.pages -
930 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
931
932 /* Compression-Ratio = Uncompressed-size / Compressed-size */
933 compression_counters.compression_rate =
934 uncompressed_size / compressed_size;
935
936 rs->compress_pages_prev = compression_counters.pages;
937 rs->compressed_size_prev = compression_counters.compressed_size;
938 }
939 }
Xiao Guangrongb7340352018-06-04 17:55:12 +0800940}
941
Keqian Zhudc14a472020-02-24 10:31:42 +0800942static void migration_trigger_throttle(RAMState *rs)
943{
944 MigrationState *s = migrate_get_current();
945 uint64_t threshold = s->parameters.throttle_trigger_threshold;
946
947 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
948 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
949 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
950
951 /* During block migration the auto-converge logic incorrectly detects
952 * that ram migration makes no progress. Avoid this by disabling the
953 * throttling logic during the bulk phase of block migration. */
954 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
955 /* The following detection logic can be refined later. For now:
956 Check to see if the ratio between dirtied bytes and the approx.
957 amount of bytes that just got transferred since the last time
958 we were in this routine reaches the threshold. If that happens
959 twice, start or increase throttling. */
960
961 if ((bytes_dirty_period > bytes_dirty_threshold) &&
962 (++rs->dirty_rate_high_cnt >= 2)) {
963 trace_migration_throttle();
964 rs->dirty_rate_high_cnt = 0;
Keqian Zhucbbf8182020-04-13 18:15:08 +0800965 mig_throttle_guest_down(bytes_dirty_period,
966 bytes_dirty_threshold);
Keqian Zhudc14a472020-02-24 10:31:42 +0800967 }
968 }
969}
970
Juan Quintela8d820d62017-03-13 19:35:50 +0100971static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200972{
973 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200974 int64_t end_time;
Juan Quintela56e93d22015-05-07 19:33:31 +0200975
Juan Quintela93604472017-06-06 19:49:03 +0200976 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200977
Juan Quintelaf664da82017-03-13 19:44:57 +0100978 if (!rs->time_last_bitmap_sync) {
979 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +0200980 }
981
982 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200983 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200984
Juan Quintela108cfae2017-03-13 21:38:09 +0100985 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +0100986 WITH_RCU_READ_LOCK_GUARD() {
987 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
988 ramblock_sync_dirty_bitmap(rs, block);
989 }
990 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +0200991 }
Juan Quintela108cfae2017-03-13 21:38:09 +0100992 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200993
Paolo Bonzini9458a9a2018-02-06 18:37:39 +0100994 memory_global_after_dirty_log_sync();
Juan Quintelaa66cd902017-03-28 15:02:43 +0200995 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +0800996
Juan Quintela56e93d22015-05-07 19:33:31 +0200997 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
998
999 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001000 if (end_time > rs->time_last_bitmap_sync + 1000) {
Keqian Zhudc14a472020-02-24 10:31:42 +08001001 migration_trigger_throttle(rs);
Jason J. Herne070afca2015-09-08 13:12:35 -04001002
Xiao Guangrongb7340352018-06-04 17:55:12 +08001003 migration_update_rates(rs, end_time);
1004
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001005 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001006
1007 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001008 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001009 rs->num_dirty_pages_period = 0;
Keqian Zhudc14a472020-02-24 10:31:42 +08001010 rs->bytes_xfer_prev = ram_counters.transferred;
Juan Quintela56e93d22015-05-07 19:33:31 +02001011 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001012 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001013 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001014 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001015}
1016
Wei Wangbd227062018-12-11 16:24:51 +08001017static void migration_bitmap_sync_precopy(RAMState *rs)
1018{
1019 Error *local_err = NULL;
1020
1021 /*
1022 * The current notifier usage is just an optimization to migration, so we
1023 * don't stop the normal migration process in the error case.
1024 */
1025 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1026 error_report_err(local_err);
Vladimir Sementsov-Ogievskiyb4a17332020-03-24 18:36:29 +03001027 local_err = NULL;
Wei Wangbd227062018-12-11 16:24:51 +08001028 }
1029
1030 migration_bitmap_sync(rs);
1031
1032 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1033 error_report_err(local_err);
1034 }
1035}
1036
Juan Quintela56e93d22015-05-07 19:33:31 +02001037/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001038 * save_zero_page_to_file: send the zero page to the file
1039 *
1040 * Returns the size of data written to the file, 0 means the page is not
1041 * a zero page
1042 *
1043 * @rs: current RAM state
1044 * @file: the file where the data is saved
1045 * @block: block that contains the page we want to send
1046 * @offset: offset inside the block for the page
1047 */
1048static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1049 RAMBlock *block, ram_addr_t offset)
1050{
1051 uint8_t *p = block->host + offset;
1052 int len = 0;
1053
1054 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1055 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1056 qemu_put_byte(file, 0);
1057 len += 1;
1058 }
1059 return len;
1060}
1061
1062/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001063 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001064 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001065 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001066 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001067 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001068 * @block: block that contains the page we want to send
1069 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001070 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001071static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001072{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001073 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001074
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001075 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001076 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001077 ram_counters.transferred += len;
1078 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001079 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001080 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001081}
1082
Juan Quintela57273092017-03-20 22:25:28 +01001083static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001084{
Juan Quintela57273092017-03-20 22:25:28 +01001085 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001086 return;
1087 }
1088
Alexey Romko8bba0042020-01-10 14:51:34 +01001089 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001090}
1091
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001092/*
1093 * @pages: the number of pages written by the control path,
1094 * < 0 - error
1095 * > 0 - number of pages written
1096 *
1097 * Return true if the pages has been saved, otherwise false is returned.
1098 */
1099static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1100 int *pages)
1101{
1102 uint64_t bytes_xmit = 0;
1103 int ret;
1104
1105 *pages = -1;
1106 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1107 &bytes_xmit);
1108 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1109 return false;
1110 }
1111
1112 if (bytes_xmit) {
1113 ram_counters.transferred += bytes_xmit;
1114 *pages = 1;
1115 }
1116
1117 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1118 return true;
1119 }
1120
1121 if (bytes_xmit > 0) {
1122 ram_counters.normal++;
1123 } else if (bytes_xmit == 0) {
1124 ram_counters.duplicate++;
1125 }
1126
1127 return true;
1128}
1129
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001130/*
1131 * directly send the page to the stream
1132 *
1133 * Returns the number of pages written.
1134 *
1135 * @rs: current RAM state
1136 * @block: block that contains the page we want to send
1137 * @offset: offset inside the block for the page
1138 * @buf: the page to be sent
1139 * @async: send to page asyncly
1140 */
1141static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1142 uint8_t *buf, bool async)
1143{
1144 ram_counters.transferred += save_page_header(rs, rs->f, block,
1145 offset | RAM_SAVE_FLAG_PAGE);
1146 if (async) {
1147 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1148 migrate_release_ram() &
1149 migration_in_postcopy());
1150 } else {
1151 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1152 }
1153 ram_counters.transferred += TARGET_PAGE_SIZE;
1154 ram_counters.normal++;
1155 return 1;
1156}
1157
Juan Quintela56e93d22015-05-07 19:33:31 +02001158/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001159 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001160 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001161 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001162 * < 0 - error
1163 * >=0 - Number of pages written - this might legally be 0
1164 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001165 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001166 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001167 * @block: block that contains the page we want to send
1168 * @offset: offset inside the block for the page
1169 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001170 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001171static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001172{
1173 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001174 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001175 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001176 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01001177 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001178 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001179
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001180 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001181 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001182
Juan Quintela56e93d22015-05-07 19:33:31 +02001183 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001184 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1185 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001186 pages = save_xbzrle_page(rs, &p, current_addr, block,
1187 offset, last_stage);
1188 if (!last_stage) {
1189 /* Can't send this cached data async, since the cache page
1190 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001191 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001192 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001193 }
1194 }
1195
1196 /* XBZRLE overflow or normal page */
1197 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001198 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001199 }
1200
1201 XBZRLE_cache_unlock();
1202
1203 return pages;
1204}
1205
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001206static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1207 ram_addr_t offset)
1208{
Juan Quintela67a4c892020-01-22 16:03:01 +01001209 if (multifd_queue_page(rs->f, block, offset) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08001210 return -1;
1211 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001212 ram_counters.normal++;
1213
1214 return 1;
1215}
1216
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001217static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001218 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001219{
Juan Quintela53518d92017-05-04 11:46:24 +02001220 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08001221 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001222 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001223 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02001224
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001225 if (save_zero_page_to_file(rs, f, block, offset)) {
1226 zero_page = true;
1227 goto exit;
1228 }
1229
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001230 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001231
1232 /*
1233 * copy it to a internal buffer to avoid it being modified by VM
1234 * so that we can catch up the error during compression and
1235 * decompression
1236 */
1237 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001238 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1239 if (ret < 0) {
1240 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08001241 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001242 return false;
Liang Lib3be2892016-05-05 15:32:54 +08001243 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001244
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001245exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001246 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001247 return zero_page;
1248}
1249
1250static void
1251update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1252{
Xiao Guangrong76e03002018-09-06 15:01:00 +08001253 ram_counters.transferred += bytes_xmit;
1254
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001255 if (param->zero_page) {
1256 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001257 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001258 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001259
1260 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
1261 compression_counters.compressed_size += bytes_xmit - 8;
1262 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001263}
1264
Xiao Guangrong32b05492018-09-06 15:01:01 +08001265static bool save_page_use_compression(RAMState *rs);
1266
Juan Quintelace25d332017-03-15 11:00:51 +01001267static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001268{
1269 int idx, len, thread_count;
1270
Xiao Guangrong32b05492018-09-06 15:01:01 +08001271 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001272 return;
1273 }
1274 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001275
Liang Li0d9f9a52016-05-05 15:32:59 +08001276 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001277 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001278 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001279 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001280 }
Liang Lia7a9a882016-05-05 15:32:57 +08001281 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001282 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001283
1284 for (idx = 0; idx < thread_count; idx++) {
1285 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001286 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001287 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001288 /*
1289 * it's safe to fetch zero_page without holding comp_done_lock
1290 * as there is no further request submitted to the thread,
1291 * i.e, the thread should be waiting for a request at this point.
1292 */
1293 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001294 }
Liang Lia7a9a882016-05-05 15:32:57 +08001295 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001296 }
1297}
1298
1299static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1300 ram_addr_t offset)
1301{
1302 param->block = block;
1303 param->offset = offset;
1304}
1305
Juan Quintelace25d332017-03-15 11:00:51 +01001306static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1307 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001308{
1309 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08001310 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001311
1312 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001313 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08001314retry:
1315 for (idx = 0; idx < thread_count; idx++) {
1316 if (comp_param[idx].done) {
1317 comp_param[idx].done = false;
1318 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1319 qemu_mutex_lock(&comp_param[idx].mutex);
1320 set_compress_params(&comp_param[idx], block, offset);
1321 qemu_cond_signal(&comp_param[idx].cond);
1322 qemu_mutex_unlock(&comp_param[idx].mutex);
1323 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001324 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02001325 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02001326 }
1327 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08001328
1329 /*
1330 * wait for the free thread if the user specifies 'compress-wait-thread',
1331 * otherwise we will post the page out in the main thread as normal page.
1332 */
1333 if (pages < 0 && wait) {
1334 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1335 goto retry;
1336 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001337 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001338
1339 return pages;
1340}
1341
1342/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001343 * find_dirty_block: find the next dirty page and update any state
1344 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001345 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001346 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001347 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001348 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001349 * @pss: data about the state of the current dirty page scan
1350 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001351 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001352static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001353{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001354 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001355 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001356 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001357 /*
1358 * We've been once around the RAM and haven't found anything.
1359 * Give up.
1360 */
1361 *again = false;
1362 return false;
1363 }
Alexey Romko8bba0042020-01-10 14:51:34 +01001364 if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
1365 >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001366 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001367 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001368 pss->block = QLIST_NEXT_RCU(pss->block, next);
1369 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08001370 /*
1371 * If memory migration starts over, we will meet a dirtied page
1372 * which may still exists in compression threads's ring, so we
1373 * should flush the compressed data to make sure the new page
1374 * is not overwritten by the old one in the destination.
1375 *
1376 * Also If xbzrle is on, stop using the data compression at this
1377 * point. In theory, xbzrle can do better than compression.
1378 */
1379 flush_compressed_data(rs);
1380
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001381 /* Hit the end of the list */
1382 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1383 /* Flag that we've looped */
1384 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001385 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001386 }
1387 /* Didn't find anything this time, but try again on the new block */
1388 *again = true;
1389 return false;
1390 } else {
1391 /* Can go around again, but... */
1392 *again = true;
1393 /* We've found something so probably don't need to */
1394 return true;
1395 }
1396}
1397
Juan Quintela3d0684b2017-03-23 15:06:39 +01001398/**
1399 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001400 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001401 * Helper for 'get_queued_page' - gets a page off the queue
1402 *
1403 * Returns the block of the page (or NULL if none available)
1404 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001405 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001406 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001407 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001408static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001409{
1410 RAMBlock *block = NULL;
1411
Xiao Guangrongae526e32018-08-21 16:10:25 +08001412 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1413 return NULL;
1414 }
1415
Daniel Brodsky6e8a3552020-04-03 21:21:08 -07001416 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
Juan Quintelaec481c62017-03-20 22:12:40 +01001417 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1418 struct RAMSrcPageRequest *entry =
1419 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001420 block = entry->rb;
1421 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001422
1423 if (entry->len > TARGET_PAGE_SIZE) {
1424 entry->len -= TARGET_PAGE_SIZE;
1425 entry->offset += TARGET_PAGE_SIZE;
1426 } else {
1427 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001428 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001429 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01001430 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001431 }
1432 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001433
1434 return block;
1435}
1436
Juan Quintela3d0684b2017-03-23 15:06:39 +01001437/**
Li Qiangff1543a2019-05-24 23:28:32 -07001438 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001439 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001440 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001441 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001442 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01001443 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001444 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001445 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001446 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001447static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001448{
1449 RAMBlock *block;
1450 ram_addr_t offset;
1451 bool dirty;
1452
1453 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001454 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001455 /*
1456 * We're sending this page, and since it's postcopy nothing else
1457 * will dirty it, and we must make sure it doesn't get sent again
1458 * even if this queue request was received after the background
1459 * search already sent it.
1460 */
1461 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001462 unsigned long page;
1463
Juan Quintela6b6712e2017-03-22 15:18:04 +01001464 page = offset >> TARGET_PAGE_BITS;
1465 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001466 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001467 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Wei Yang64737602019-08-19 14:18:43 +08001468 page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001469 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001470 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001471 }
1472 }
1473
1474 } while (block && !dirty);
1475
1476 if (block) {
1477 /*
1478 * As soon as we start servicing pages out of order, then we have
1479 * to kill the bulk stage, since the bulk stage assumes
1480 * in (migration_bitmap_find_and_reset_dirty) that every page is
1481 * dirty, that's no longer true.
1482 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001483 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001484
1485 /*
1486 * We want the background search to continue from the queued page
1487 * since the guest is likely to want other pages near to the page
1488 * it just requested.
1489 */
1490 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001491 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08001492
1493 /*
1494 * This unqueued page would break the "one round" check, even is
1495 * really rare.
1496 */
1497 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001498 }
1499
1500 return !!block;
1501}
1502
Juan Quintela56e93d22015-05-07 19:33:31 +02001503/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001504 * migration_page_queue_free: drop any remaining pages in the ram
1505 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001506 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001507 * It should be empty at the end anyway, but in error cases there may
1508 * be some left. in case that there is any page left, we drop it.
1509 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001510 */
Juan Quintela83c13382017-05-04 11:45:01 +02001511static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001512{
Juan Quintelaec481c62017-03-20 22:12:40 +01001513 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001514 /* This queue generally should be empty - but in the case of a failed
1515 * migration might have some droppings in.
1516 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001517 RCU_READ_LOCK_GUARD();
Juan Quintelaec481c62017-03-20 22:12:40 +01001518 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001519 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001520 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001521 g_free(mspr);
1522 }
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001523}
1524
1525/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001526 * ram_save_queue_pages: queue the page for transmission
1527 *
1528 * A request from postcopy destination for example.
1529 *
1530 * Returns zero on success or negative on error
1531 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001532 * @rbname: Name of the RAMBLock of the request. NULL means the
1533 * same that last one.
1534 * @start: starting address from the start of the RAMBlock
1535 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001536 */
Juan Quintela96506892017-03-14 18:41:03 +01001537int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001538{
1539 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02001540 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001541
Juan Quintela93604472017-06-06 19:49:03 +02001542 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001543 RCU_READ_LOCK_GUARD();
1544
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001545 if (!rbname) {
1546 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001547 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001548
1549 if (!ramblock) {
1550 /*
1551 * Shouldn't happen, we can't reuse the last RAMBlock if
1552 * it's the 1st request.
1553 */
1554 error_report("ram_save_queue_pages no previous block");
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001555 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001556 }
1557 } else {
1558 ramblock = qemu_ram_block_by_name(rbname);
1559
1560 if (!ramblock) {
1561 /* We shouldn't be asked for a non-existent RAMBlock */
1562 error_report("ram_save_queue_pages no block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001563 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001564 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001565 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001566 }
1567 trace_ram_save_queue_pages(ramblock->idstr, start, len);
Bihong Yu395cb452020-10-20 11:10:43 +08001568 if (start + len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001569 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1570 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001571 __func__, start, len, ramblock->used_length);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001572 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001573 }
1574
Juan Quintelaec481c62017-03-20 22:12:40 +01001575 struct RAMSrcPageRequest *new_entry =
1576 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001577 new_entry->rb = ramblock;
1578 new_entry->offset = start;
1579 new_entry->len = len;
1580
1581 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001582 qemu_mutex_lock(&rs->src_page_req_mutex);
1583 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01001584 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01001585 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001586
1587 return 0;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001588}
1589
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001590static bool save_page_use_compression(RAMState *rs)
1591{
1592 if (!migrate_use_compression()) {
1593 return false;
1594 }
1595
1596 /*
1597 * If xbzrle is on, stop using the data compression after first
1598 * round of migration even if compression is enabled. In theory,
1599 * xbzrle can do better than compression.
1600 */
1601 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1602 return true;
1603 }
1604
1605 return false;
1606}
1607
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001608/*
1609 * try to compress the page before posting it out, return true if the page
1610 * has been properly handled by compression, otherwise needs other
1611 * paths to handle it
1612 */
1613static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1614{
1615 if (!save_page_use_compression(rs)) {
1616 return false;
1617 }
1618
1619 /*
1620 * When starting the process of a new block, the first page of
1621 * the block should be sent out before other pages in the same
1622 * block, and all the pages in last block should have been sent
1623 * out, keeping this order is important, because the 'cont' flag
1624 * is used to avoid resending the block name.
1625 *
1626 * We post the fist page as normal page as compression will take
1627 * much CPU resource.
1628 */
1629 if (block != rs->last_sent_block) {
1630 flush_compressed_data(rs);
1631 return false;
1632 }
1633
1634 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
1635 return true;
1636 }
1637
Xiao Guangrong76e03002018-09-06 15:01:00 +08001638 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001639 return false;
1640}
1641
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001642/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001643 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001644 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001645 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001646 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001647 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001648 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001649 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001650 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001651static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001652 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001653{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08001654 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01001655 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08001656 int res;
1657
1658 if (control_save_page(rs, block, offset, &res)) {
1659 return res;
1660 }
1661
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001662 if (save_compress_page(rs, block, offset)) {
1663 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001664 }
1665
1666 res = save_zero_page(rs, block, offset);
1667 if (res > 0) {
1668 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1669 * page would be stale
1670 */
1671 if (!save_page_use_compression(rs)) {
1672 XBZRLE_cache_lock();
1673 xbzrle_cache_zero_page(rs, block->offset + offset);
1674 XBZRLE_cache_unlock();
1675 }
1676 ram_release_pages(block->idstr, offset, res);
1677 return res;
1678 }
1679
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08001680 /*
Wei Yangc6b3a2e2019-10-26 07:20:00 +08001681 * Do not use multifd for:
1682 * 1. Compression as the first page in the new block should be posted out
1683 * before sending the compressed page
1684 * 2. In postcopy as one whole host page should be placed
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08001685 */
Wei Yangc6b3a2e2019-10-26 07:20:00 +08001686 if (!save_page_use_compression(rs) && migrate_use_multifd()
1687 && !migration_in_postcopy()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001688 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001689 }
1690
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001691 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001692}
1693
1694/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001695 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001696 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001697 * Starting at *offset send pages up to the end of the current host
1698 * page. It's valid for the initial offset to point into the middle of
1699 * a host page in which case the remainder of the hostpage is sent.
1700 * Only dirty target pages are sent. Note that the host page size may
1701 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001702 * The saving stops at the boundary of the used_length of the block
1703 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001704 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001705 * Returns the number of pages written or negative on error
1706 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001707 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001708 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001709 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001710 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001711 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001712static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001713 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001714{
1715 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01001716 size_t pagesize_bits =
1717 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001718
Yury Kotovfbd162e2019-02-15 20:45:46 +03001719 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001720 error_report("block %s should not be migrated !", pss->block->idstr);
1721 return 0;
1722 }
1723
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001724 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001725 /* Check the pages is dirty and if it is send it */
1726 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1727 pss->page++;
1728 continue;
1729 }
1730
Juan Quintelaf20e2862017-03-21 16:19:05 +01001731 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001732 if (tmppages < 0) {
1733 return tmppages;
1734 }
1735
1736 pages += tmppages;
Juan Quintelaa935e302017-03-21 15:36:51 +01001737 pss->page++;
Dr. David Alan Gilbert97e1e062019-12-05 10:29:18 +00001738 /* Allow rate limiting to happen in the middle of huge pages */
1739 migration_rate_limit();
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001740 } while ((pss->page & (pagesize_bits - 1)) &&
Alexey Romko8bba0042020-01-10 14:51:34 +01001741 offset_in_ramblock(pss->block,
1742 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001743
1744 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01001745 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001746 return pages;
1747}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001748
1749/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001750 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001751 *
1752 * Called within an RCU critical section.
1753 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08001754 * Returns the number of pages written where zero means no dirty pages,
1755 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02001756 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001757 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001758 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001759 *
1760 * On systems where host-page-size > target-page-size it will send all the
1761 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001762 */
1763
Juan Quintelace25d332017-03-15 11:00:51 +01001764static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001765{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001766 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001767 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001768 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02001769
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301770 /* No dirty page as there is zero RAM */
1771 if (!ram_bytes_total()) {
1772 return pages;
1773 }
1774
Juan Quintela6f37bb82017-03-13 19:26:29 +01001775 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001776 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001777 pss.complete_round = false;
1778
1779 if (!pss.block) {
1780 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1781 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001782
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001783 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001784 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01001785 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001786
1787 if (!found) {
1788 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001789 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001790 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001791
1792 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001793 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02001794 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001795 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001796
Juan Quintela6f37bb82017-03-13 19:26:29 +01001797 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001798 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02001799
1800 return pages;
1801}
1802
1803void acct_update_position(QEMUFile *f, size_t size, bool zero)
1804{
1805 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001806
Juan Quintela56e93d22015-05-07 19:33:31 +02001807 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02001808 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02001809 } else {
Juan Quintela93604472017-06-06 19:49:03 +02001810 ram_counters.normal += pages;
1811 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02001812 qemu_update_position(f, size);
1813 }
1814}
1815
Yury Kotovfbd162e2019-02-15 20:45:46 +03001816static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02001817{
1818 RAMBlock *block;
1819 uint64_t total = 0;
1820
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001821 RCU_READ_LOCK_GUARD();
1822
Yury Kotovfbd162e2019-02-15 20:45:46 +03001823 if (count_ignored) {
1824 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1825 total += block->used_length;
1826 }
1827 } else {
1828 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1829 total += block->used_length;
1830 }
Peter Xu99e15582017-05-12 12:17:39 +08001831 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001832 return total;
1833}
1834
Yury Kotovfbd162e2019-02-15 20:45:46 +03001835uint64_t ram_bytes_total(void)
1836{
1837 return ram_bytes_total_common(false);
1838}
1839
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001840static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02001841{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001842 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001843}
1844
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001845static void xbzrle_load_cleanup(void)
1846{
1847 g_free(XBZRLE.decoded_buf);
1848 XBZRLE.decoded_buf = NULL;
1849}
1850
Peter Xu7d7c96b2017-10-19 14:31:58 +08001851static void ram_state_cleanup(RAMState **rsp)
1852{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00001853 if (*rsp) {
1854 migration_page_queue_free(*rsp);
1855 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1856 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1857 g_free(*rsp);
1858 *rsp = NULL;
1859 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08001860}
1861
Peter Xu84593a02017-10-19 14:31:59 +08001862static void xbzrle_cleanup(void)
1863{
1864 XBZRLE_cache_lock();
1865 if (XBZRLE.cache) {
1866 cache_fini(XBZRLE.cache);
1867 g_free(XBZRLE.encoded_buf);
1868 g_free(XBZRLE.current_buf);
1869 g_free(XBZRLE.zero_target_page);
1870 XBZRLE.cache = NULL;
1871 XBZRLE.encoded_buf = NULL;
1872 XBZRLE.current_buf = NULL;
1873 XBZRLE.zero_target_page = NULL;
1874 }
1875 XBZRLE_cache_unlock();
1876}
1877
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001878static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001879{
Juan Quintela53518d92017-05-04 11:46:24 +02001880 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001881 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001882
Li Zhijian2ff64032015-07-02 20:18:05 +08001883 /* caller have hold iothread lock or is in a bh, so there is
Yi Wang46334562019-04-15 14:51:29 +08001884 * no writing race against the migration bitmap
Li Zhijian2ff64032015-07-02 20:18:05 +08001885 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001886 memory_global_dirty_log_stop();
1887
Yury Kotovfbd162e2019-02-15 20:45:46 +03001888 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08001889 g_free(block->clear_bmap);
1890 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001891 g_free(block->bmap);
1892 block->bmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001893 }
1894
Peter Xu84593a02017-10-19 14:31:59 +08001895 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02001896 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08001897 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02001898}
1899
Juan Quintela6f37bb82017-03-13 19:26:29 +01001900static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001901{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001902 rs->last_seen_block = NULL;
1903 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001904 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001905 rs->last_version = ram_list.version;
1906 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08001907 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001908}
1909
1910#define MAX_WAIT 50 /* ms, half buffered_file limit */
1911
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001912/*
1913 * 'expected' is the value you expect the bitmap mostly to be full
1914 * of; it won't bother printing lines that are all this value.
1915 * If 'todump' is null the migration bitmap is dumped.
1916 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001917void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1918 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001919{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001920 int64_t cur;
1921 int64_t linelen = 128;
1922 char linebuf[129];
1923
Juan Quintela6b6712e2017-03-22 15:18:04 +01001924 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001925 int64_t curb;
1926 bool found = false;
1927 /*
1928 * Last line; catch the case where the line length
1929 * is longer than remaining ram
1930 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001931 if (cur + linelen > pages) {
1932 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001933 }
1934 for (curb = 0; curb < linelen; curb++) {
1935 bool thisbit = test_bit(cur + curb, todump);
1936 linebuf[curb] = thisbit ? '1' : '.';
1937 found = found || (thisbit != expected);
1938 }
1939 if (found) {
1940 linebuf[curb] = '\0';
1941 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1942 }
1943 }
1944}
1945
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001946/* **** functions for postcopy ***** */
1947
Pavel Butsykinced1c612017-02-03 18:23:21 +03001948void ram_postcopy_migrated_memory_release(MigrationState *ms)
1949{
1950 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001951
Yury Kotovfbd162e2019-02-15 20:45:46 +03001952 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001953 unsigned long *bitmap = block->bmap;
1954 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1955 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03001956
1957 while (run_start < range) {
1958 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Alexey Romko8bba0042020-01-10 14:51:34 +01001959 ram_discard_range(block->idstr,
1960 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
1961 ((ram_addr_t)(run_end - run_start))
1962 << TARGET_PAGE_BITS);
Pavel Butsykinced1c612017-02-03 18:23:21 +03001963 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1964 }
1965 }
1966}
1967
Juan Quintela3d0684b2017-03-23 15:06:39 +01001968/**
1969 * postcopy_send_discard_bm_ram: discard a RAMBlock
1970 *
1971 * Returns zero on success
1972 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001973 * Callback from postcopy_each_ram_send_discard for each RAMBlock
Juan Quintela3d0684b2017-03-23 15:06:39 +01001974 *
1975 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08001976 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001977 */
Wei Yang810cf2b2019-07-24 09:07:21 +08001978static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001979{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001980 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001981 unsigned long current;
Wei Yang1e7cf8c2019-08-19 14:18:42 +08001982 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001983
Juan Quintela6b6712e2017-03-22 15:18:04 +01001984 for (current = 0; current < end; ) {
Wei Yang1e7cf8c2019-08-19 14:18:42 +08001985 unsigned long one = find_next_bit(bitmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08001986 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001987
Wei Yang33a5cb622019-06-27 10:08:21 +08001988 if (one >= end) {
1989 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001990 }
Wei Yang33a5cb622019-06-27 10:08:21 +08001991
Wei Yang1e7cf8c2019-08-19 14:18:42 +08001992 zero = find_next_zero_bit(bitmap, end, one + 1);
Wei Yang33a5cb622019-06-27 10:08:21 +08001993
1994 if (zero >= end) {
1995 discard_length = end - one;
1996 } else {
1997 discard_length = zero - one;
1998 }
Wei Yang810cf2b2019-07-24 09:07:21 +08001999 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002000 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002001 }
2002
2003 return 0;
2004}
2005
Juan Quintela3d0684b2017-03-23 15:06:39 +01002006/**
2007 * postcopy_each_ram_send_discard: discard all RAMBlocks
2008 *
2009 * Returns 0 for success or negative for error
2010 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002011 * Utility for the outgoing postcopy code.
2012 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2013 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002014 * (qemu_ram_foreach_block ends up passing unscaled lengths
2015 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002016 *
2017 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002018 */
2019static int postcopy_each_ram_send_discard(MigrationState *ms)
2020{
2021 struct RAMBlock *block;
2022 int ret;
2023
Yury Kotovfbd162e2019-02-15 20:45:46 +03002024 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002025 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002026
2027 /*
2028 * Postcopy sends chunks of bitmap over the wire, but it
2029 * just needs indexes at this point, avoids it having
2030 * target page specific code.
2031 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002032 ret = postcopy_send_discard_bm_ram(ms, block);
2033 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002034 if (ret) {
2035 return ret;
2036 }
2037 }
2038
2039 return 0;
2040}
2041
Juan Quintela3d0684b2017-03-23 15:06:39 +01002042/**
Wei Yang8324ef82019-08-19 14:18:41 +08002043 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002044 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002045 * Helper for postcopy_chunk_hostpages; it's called twice to
2046 * canonicalize the two bitmaps, that are similar, but one is
2047 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002048 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002049 * Postcopy requires that all target pages in a hostpage are dirty or
2050 * clean, not a mix. This function canonicalizes the bitmaps.
2051 *
2052 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002053 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002054 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002055static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002056{
Juan Quintela53518d92017-05-04 11:46:24 +02002057 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002058 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002059 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002060 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002061 unsigned long run_start;
2062
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002063 if (block->page_size == TARGET_PAGE_SIZE) {
2064 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2065 return;
2066 }
2067
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002068 /* Find a dirty page */
2069 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002070
Juan Quintela6b6712e2017-03-22 15:18:04 +01002071 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002072
2073 /*
2074 * If the start of this run of pages is in the middle of a host
2075 * page, then we need to fixup this host page.
2076 */
Wei Yang9dec3cc2019-08-06 08:46:48 +08002077 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002078 /* Find the end of this run */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002079 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002080 /*
2081 * If the end isn't at the start of a host page, then the
2082 * run doesn't finish at the end of a host page
2083 * and we need to discard.
2084 */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002085 }
2086
Wei Yang9dec3cc2019-08-06 08:46:48 +08002087 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002088 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08002089 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2090 host_ratio);
2091 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002092
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002093 /* Clean up the bitmap */
2094 for (page = fixup_start_addr;
2095 page < fixup_start_addr + host_ratio; page++) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002096 /*
2097 * Remark them as dirty, updating the count for any pages
2098 * that weren't previously dirty.
2099 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002100 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002101 }
2102 }
2103
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002104 /* Find the next dirty page for the next iteration */
2105 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002106 }
2107}
2108
Juan Quintela3d0684b2017-03-23 15:06:39 +01002109/**
Wei Yang89dab312019-07-15 10:05:49 +08002110 * postcopy_chunk_hostpages: discard any partially sent host page
Juan Quintela3d0684b2017-03-23 15:06:39 +01002111 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002112 * Utility for the outgoing postcopy code.
2113 *
2114 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002115 * dirty host-page size chunks as all dirty. In this case the host-page
2116 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002117 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002118 * Returns zero on success
2119 *
2120 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002121 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002122 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002123static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002124{
Wei Yang810cf2b2019-07-24 09:07:21 +08002125 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002126
Juan Quintela6b6712e2017-03-22 15:18:04 +01002127 /*
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002128 * Ensure that all partially dirty host pages are made fully dirty.
Juan Quintela6b6712e2017-03-22 15:18:04 +01002129 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002130 postcopy_chunk_hostpages_pass(ms, block);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002131
Wei Yang810cf2b2019-07-24 09:07:21 +08002132 postcopy_discard_send_finish(ms);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002133 return 0;
2134}
2135
Juan Quintela3d0684b2017-03-23 15:06:39 +01002136/**
2137 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2138 *
2139 * Returns zero on success
2140 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002141 * Transmit the set of pages to be discarded after precopy to the target
2142 * these are pages that:
2143 * a) Have been previously transmitted but are now dirty again
2144 * b) Pages that have never been transmitted, this ensures that
2145 * any pages on the destination that have been mapped by background
2146 * tasks get discarded (transparent huge pages is the specific concern)
2147 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002148 *
2149 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002150 */
2151int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2152{
Juan Quintela53518d92017-05-04 11:46:24 +02002153 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002154 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002155 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002156
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002157 RCU_READ_LOCK_GUARD();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002158
2159 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002160 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002161
Juan Quintela6b6712e2017-03-22 15:18:04 +01002162 /* Easiest way to make sure we don't resume in the middle of a host-page */
2163 rs->last_seen_block = NULL;
2164 rs->last_sent_block = NULL;
2165 rs->last_page = 0;
2166
Yury Kotovfbd162e2019-02-15 20:45:46 +03002167 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002168 /* Deal with TPS != HPS and huge pages */
2169 ret = postcopy_chunk_hostpages(ms, block);
2170 if (ret) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002171 return ret;
2172 }
2173
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002174#ifdef DEBUG_POSTCOPY
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002175 ram_debug_dump_bitmap(block->bmap, true,
2176 block->used_length >> TARGET_PAGE_BITS);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002177#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002178 }
2179 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002180
Simran Singhalb3ac2b92020-04-01 22:23:14 +05302181 return postcopy_each_ram_send_discard(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002182}
2183
Juan Quintela3d0684b2017-03-23 15:06:39 +01002184/**
2185 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002186 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002187 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002188 *
Juan Quintela36449152017-03-23 15:11:59 +01002189 * @rbname: name of the RAMBlock of the request. NULL means the
2190 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002191 * @start: RAMBlock starting page
2192 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002193 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002194int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002195{
Juan Quintela36449152017-03-23 15:11:59 +01002196 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002197
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002198 RCU_READ_LOCK_GUARD();
Juan Quintela36449152017-03-23 15:11:59 +01002199 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002200
2201 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002202 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002203 return -1;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002204 }
2205
Peter Xu814bb082018-07-23 20:33:02 +08002206 /*
2207 * On source VM, we don't need to update the received bitmap since
2208 * we don't even have one.
2209 */
2210 if (rb->receivedmap) {
2211 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2212 length >> qemu_target_page_bits());
2213 }
2214
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002215 return ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002216}
2217
Peter Xu84593a02017-10-19 14:31:59 +08002218/*
2219 * For every allocation, we will try not to crash the VM if the
2220 * allocation failed.
2221 */
2222static int xbzrle_init(void)
2223{
2224 Error *local_err = NULL;
2225
2226 if (!migrate_use_xbzrle()) {
2227 return 0;
2228 }
2229
2230 XBZRLE_cache_lock();
2231
2232 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2233 if (!XBZRLE.zero_target_page) {
2234 error_report("%s: Error allocating zero page", __func__);
2235 goto err_out;
2236 }
2237
2238 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2239 TARGET_PAGE_SIZE, &local_err);
2240 if (!XBZRLE.cache) {
2241 error_report_err(local_err);
2242 goto free_zero_page;
2243 }
2244
2245 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2246 if (!XBZRLE.encoded_buf) {
2247 error_report("%s: Error allocating encoded_buf", __func__);
2248 goto free_cache;
2249 }
2250
2251 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2252 if (!XBZRLE.current_buf) {
2253 error_report("%s: Error allocating current_buf", __func__);
2254 goto free_encoded_buf;
2255 }
2256
2257 /* We are all good */
2258 XBZRLE_cache_unlock();
2259 return 0;
2260
2261free_encoded_buf:
2262 g_free(XBZRLE.encoded_buf);
2263 XBZRLE.encoded_buf = NULL;
2264free_cache:
2265 cache_fini(XBZRLE.cache);
2266 XBZRLE.cache = NULL;
2267free_zero_page:
2268 g_free(XBZRLE.zero_target_page);
2269 XBZRLE.zero_target_page = NULL;
2270err_out:
2271 XBZRLE_cache_unlock();
2272 return -ENOMEM;
2273}
2274
Juan Quintela53518d92017-05-04 11:46:24 +02002275static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002276{
Peter Xu7d00ee62017-10-19 14:31:57 +08002277 *rsp = g_try_new0(RAMState, 1);
2278
2279 if (!*rsp) {
2280 error_report("%s: Init ramstate fail", __func__);
2281 return -1;
2282 }
Juan Quintela53518d92017-05-04 11:46:24 +02002283
2284 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2285 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2286 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002287
Peter Xu7d00ee62017-10-19 14:31:57 +08002288 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002289 * Count the total number of pages used by ram blocks not including any
2290 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08002291 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08002292 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002293 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08002294 ram_state_reset(*rsp);
2295
2296 return 0;
2297}
2298
Peter Xud6eff5d2017-10-19 14:32:00 +08002299static void ram_list_init_bitmaps(void)
2300{
Peter Xu002cad62019-06-03 14:50:56 +08002301 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08002302 RAMBlock *block;
2303 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08002304 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08002305
2306 /* Skip setting bitmap if there is no RAM */
2307 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08002308 shift = ms->clear_bitmap_shift;
2309 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2310 error_report("clear_bitmap_shift (%u) too big, using "
2311 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2312 shift = CLEAR_BITMAP_SHIFT_MAX;
2313 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2314 error_report("clear_bitmap_shift (%u) too small, using "
2315 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2316 shift = CLEAR_BITMAP_SHIFT_MIN;
2317 }
2318
Yury Kotovfbd162e2019-02-15 20:45:46 +03002319 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08002320 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08002321 /*
2322 * The initial dirty bitmap for migration must be set with all
2323 * ones to make sure we'll migrate every guest RAM page to
2324 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002325 * Here we set RAMBlock.bmap all to 1 because when rebegin a
2326 * new migration after a failed migration, ram_list.
2327 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
2328 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08002329 */
Peter Xud6eff5d2017-10-19 14:32:00 +08002330 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002331 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08002332 block->clear_bmap_shift = shift;
2333 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08002334 }
2335 }
2336}
2337
2338static void ram_init_bitmaps(RAMState *rs)
2339{
2340 /* For memory_global_dirty_log_start below. */
2341 qemu_mutex_lock_iothread();
2342 qemu_mutex_lock_ramlist();
Peter Xud6eff5d2017-10-19 14:32:00 +08002343
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002344 WITH_RCU_READ_LOCK_GUARD() {
2345 ram_list_init_bitmaps();
2346 memory_global_dirty_log_start();
2347 migration_bitmap_sync_precopy(rs);
2348 }
Peter Xud6eff5d2017-10-19 14:32:00 +08002349 qemu_mutex_unlock_ramlist();
2350 qemu_mutex_unlock_iothread();
2351}
2352
Peter Xu7d00ee62017-10-19 14:31:57 +08002353static int ram_init_all(RAMState **rsp)
2354{
Peter Xu7d00ee62017-10-19 14:31:57 +08002355 if (ram_state_init(rsp)) {
2356 return -1;
2357 }
2358
Peter Xu84593a02017-10-19 14:31:59 +08002359 if (xbzrle_init()) {
2360 ram_state_cleanup(rsp);
2361 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002362 }
2363
Peter Xud6eff5d2017-10-19 14:32:00 +08002364 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002365
2366 return 0;
2367}
2368
Peter Xu08614f32018-05-02 18:47:33 +08002369static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2370{
2371 RAMBlock *block;
2372 uint64_t pages = 0;
2373
2374 /*
2375 * Postcopy is not using xbzrle/compression, so no need for that.
2376 * Also, since source are already halted, we don't need to care
2377 * about dirty page logging as well.
2378 */
2379
Yury Kotovfbd162e2019-02-15 20:45:46 +03002380 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08002381 pages += bitmap_count_one(block->bmap,
2382 block->used_length >> TARGET_PAGE_BITS);
2383 }
2384
2385 /* This may not be aligned with current bitmaps. Recalculate. */
2386 rs->migration_dirty_pages = pages;
2387
2388 rs->last_seen_block = NULL;
2389 rs->last_sent_block = NULL;
2390 rs->last_page = 0;
2391 rs->last_version = ram_list.version;
2392 /*
2393 * Disable the bulk stage, otherwise we'll resend the whole RAM no
2394 * matter what we have sent.
2395 */
2396 rs->ram_bulk_stage = false;
2397
2398 /* Update RAMState cache of output QEMUFile */
2399 rs->f = out;
2400
2401 trace_ram_state_resume_prepare(pages);
2402}
2403
Juan Quintela3d0684b2017-03-23 15:06:39 +01002404/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08002405 * This function clears bits of the free pages reported by the caller from the
2406 * migration dirty bitmap. @addr is the host address corresponding to the
2407 * start of the continuous guest free pages, and @len is the total bytes of
2408 * those pages.
2409 */
2410void qemu_guest_free_page_hint(void *addr, size_t len)
2411{
2412 RAMBlock *block;
2413 ram_addr_t offset;
2414 size_t used_len, start, npages;
2415 MigrationState *s = migrate_get_current();
2416
2417 /* This function is currently expected to be used during live migration */
2418 if (!migration_is_setup_or_active(s->state)) {
2419 return;
2420 }
2421
2422 for (; len > 0; len -= used_len, addr += used_len) {
2423 block = qemu_ram_block_from_host(addr, false, &offset);
2424 if (unlikely(!block || offset >= block->used_length)) {
2425 /*
2426 * The implementation might not support RAMBlock resize during
2427 * live migration, but it could happen in theory with future
2428 * updates. So we add a check here to capture that case.
2429 */
2430 error_report_once("%s unexpected error", __func__);
2431 return;
2432 }
2433
2434 if (len <= block->used_length - offset) {
2435 used_len = len;
2436 } else {
2437 used_len = block->used_length - offset;
2438 }
2439
2440 start = offset >> TARGET_PAGE_BITS;
2441 npages = used_len >> TARGET_PAGE_BITS;
2442
2443 qemu_mutex_lock(&ram_state->bitmap_mutex);
2444 ram_state->migration_dirty_pages -=
2445 bitmap_count_one_with_offset(block->bmap, start, npages);
2446 bitmap_clear(block->bmap, start, npages);
2447 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2448 }
2449}
2450
2451/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01002452 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002453 * long-running RCU critical section. When rcu-reclaims in the code
2454 * start to become numerous it will be necessary to reduce the
2455 * granularity of these critical sections.
2456 */
2457
Juan Quintela3d0684b2017-03-23 15:06:39 +01002458/**
2459 * ram_save_setup: Setup RAM for migration
2460 *
2461 * Returns zero to indicate success and negative for error
2462 *
2463 * @f: QEMUFile where to send the data
2464 * @opaque: RAMState pointer
2465 */
zhanghailianga91246c2016-10-27 14:42:59 +08002466static int ram_save_setup(QEMUFile *f, void *opaque)
2467{
Juan Quintela53518d92017-05-04 11:46:24 +02002468 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002469 RAMBlock *block;
2470
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002471 if (compress_threads_save_setup()) {
2472 return -1;
2473 }
2474
zhanghailianga91246c2016-10-27 14:42:59 +08002475 /* migration has already setup the bitmap, reuse it. */
2476 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08002477 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002478 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08002479 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02002480 }
zhanghailianga91246c2016-10-27 14:42:59 +08002481 }
Juan Quintela53518d92017-05-04 11:46:24 +02002482 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002483
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002484 WITH_RCU_READ_LOCK_GUARD() {
2485 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002486
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002487 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2488 qemu_put_byte(f, strlen(block->idstr));
2489 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2490 qemu_put_be64(f, block->used_length);
2491 if (migrate_postcopy_ram() && block->page_size !=
2492 qemu_host_page_size) {
2493 qemu_put_be64(f, block->page_size);
2494 }
2495 if (migrate_ignore_shared()) {
2496 qemu_put_be64(f, block->mr->addr);
2497 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03002498 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002499 }
2500
Juan Quintela56e93d22015-05-07 19:33:31 +02002501 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2502 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2503
Juan Quintela99f2c6f2020-01-22 16:04:53 +01002504 multifd_send_sync_main(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002505 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02002506 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002507
2508 return 0;
2509}
2510
Juan Quintela3d0684b2017-03-23 15:06:39 +01002511/**
2512 * ram_save_iterate: iterative stage for migration
2513 *
2514 * Returns zero to indicate success and negative for error
2515 *
2516 * @f: QEMUFile where to send the data
2517 * @opaque: RAMState pointer
2518 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002519static int ram_save_iterate(QEMUFile *f, void *opaque)
2520{
Juan Quintela53518d92017-05-04 11:46:24 +02002521 RAMState **temp = opaque;
2522 RAMState *rs = *temp;
Juan Quintela3d4095b2019-12-18 05:12:36 +01002523 int ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002524 int i;
2525 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002526 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002527
Peter Lievenb2557342018-03-08 12:18:24 +01002528 if (blk_mig_bulk_active()) {
2529 /* Avoid transferring ram during bulk phase of block migration as
2530 * the bulk phase will usually take a long time and transferring
2531 * ram updates during that time is pointless. */
2532 goto out;
2533 }
2534
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002535 WITH_RCU_READ_LOCK_GUARD() {
2536 if (ram_list.version != rs->last_version) {
2537 ram_state_reset(rs);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002538 }
2539
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002540 /* Read version before ram_list.blocks */
2541 smp_rmb();
Xiao Guangronge8f37352018-09-03 17:26:44 +08002542
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002543 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
Xiao Guangronge8f37352018-09-03 17:26:44 +08002544
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002545 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2546 i = 0;
2547 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2548 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2549 int pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04002550
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002551 if (qemu_file_get_error(f)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002552 break;
2553 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002554
2555 pages = ram_find_and_save_block(rs, false);
2556 /* no more pages to sent */
2557 if (pages == 0) {
2558 done = 1;
2559 break;
2560 }
2561
2562 if (pages < 0) {
2563 qemu_file_set_error(f, pages);
2564 break;
2565 }
2566
2567 rs->target_page_count += pages;
2568
2569 /*
Wei Yang644acf92019-11-07 20:39:07 +08002570 * During postcopy, it is necessary to make sure one whole host
2571 * page is sent in one chunk.
2572 */
2573 if (migrate_postcopy_ram()) {
2574 flush_compressed_data(rs);
2575 }
2576
2577 /*
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002578 * we want to check in the 1st loop, just in case it was the 1st
2579 * time and we had to sync the dirty bitmap.
2580 * qemu_clock_get_ns() is a bit expensive, so we only check each
2581 * some iterations
2582 */
2583 if ((i & 63) == 0) {
2584 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
2585 1000000;
2586 if (t1 > MAX_WAIT) {
2587 trace_ram_save_iterate_big_wait(t1, i);
2588 break;
2589 }
2590 }
2591 i++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002592 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002593 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002594
2595 /*
2596 * Must occur before EOS (or any QEMUFile operation)
2597 * because of RDMA protocol.
2598 */
2599 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2600
Peter Lievenb2557342018-03-08 12:18:24 +01002601out:
Juan Quintelab69a0222020-01-22 11:36:12 +01002602 if (ret >= 0
2603 && migration_is_setup_or_active(migrate_get_current()->state)) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01002604 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01002605 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2606 qemu_fflush(f);
2607 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02002608
Juan Quintela3d4095b2019-12-18 05:12:36 +01002609 ret = qemu_file_get_error(f);
2610 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002611 if (ret < 0) {
2612 return ret;
2613 }
2614
Thomas Huth5c903082016-11-04 14:10:17 +01002615 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002616}
2617
Juan Quintela3d0684b2017-03-23 15:06:39 +01002618/**
2619 * ram_save_complete: function called to send the remaining amount of ram
2620 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002621 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01002622 *
2623 * Called with iothread lock
2624 *
2625 * @f: QEMUFile where to send the data
2626 * @opaque: RAMState pointer
2627 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002628static int ram_save_complete(QEMUFile *f, void *opaque)
2629{
Juan Quintela53518d92017-05-04 11:46:24 +02002630 RAMState **temp = opaque;
2631 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08002632 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002633
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002634 WITH_RCU_READ_LOCK_GUARD() {
2635 if (!migration_in_postcopy()) {
2636 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002637 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002638
2639 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2640
2641 /* try transferring iterative blocks of memory */
2642
2643 /* flush all remaining blocks regardless of rate limiting */
2644 while (true) {
2645 int pages;
2646
2647 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2648 /* no more blocks to sent */
2649 if (pages == 0) {
2650 break;
2651 }
2652 if (pages < 0) {
2653 ret = pages;
2654 break;
2655 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08002656 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002657
2658 flush_compressed_data(rs);
2659 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002660 }
2661
Juan Quintela3d4095b2019-12-18 05:12:36 +01002662 if (ret >= 0) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01002663 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01002664 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2665 qemu_fflush(f);
2666 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002667
Xiao Guangronge8f37352018-09-03 17:26:44 +08002668 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002669}
2670
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002671static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002672 uint64_t *res_precopy_only,
2673 uint64_t *res_compatible,
2674 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02002675{
Juan Quintela53518d92017-05-04 11:46:24 +02002676 RAMState **temp = opaque;
2677 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002678 uint64_t remaining_size;
2679
Juan Quintela9edabd42017-03-14 12:02:16 +01002680 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002681
Juan Quintela57273092017-03-20 22:25:28 +01002682 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002683 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002684 qemu_mutex_lock_iothread();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002685 WITH_RCU_READ_LOCK_GUARD() {
2686 migration_bitmap_sync_precopy(rs);
2687 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002688 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01002689 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002690 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002691
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002692 if (migrate_postcopy_ram()) {
2693 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002694 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002695 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002696 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002697 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002698}
2699
2700static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2701{
2702 unsigned int xh_len;
2703 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002704 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002705
Juan Quintela56e93d22015-05-07 19:33:31 +02002706 /* extract RLE header */
2707 xh_flags = qemu_get_byte(f);
2708 xh_len = qemu_get_be16(f);
2709
2710 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2711 error_report("Failed to load XBZRLE page - wrong compression!");
2712 return -1;
2713 }
2714
2715 if (xh_len > TARGET_PAGE_SIZE) {
2716 error_report("Failed to load XBZRLE page - len overflow!");
2717 return -1;
2718 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002719 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002720 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002721 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002722 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002723
2724 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002725 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002726 TARGET_PAGE_SIZE) == -1) {
2727 error_report("Failed to load XBZRLE page - decode error!");
2728 return -1;
2729 }
2730
2731 return 0;
2732}
2733
Juan Quintela3d0684b2017-03-23 15:06:39 +01002734/**
2735 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002736 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002737 * Must be called from within a rcu critical section.
2738 *
2739 * Returns a pointer from within the RCU-protected ram_list.
2740 *
2741 * @f: QEMUFile where to read the data from
2742 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002743 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002744static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002745{
Bihong Yu49324e92020-10-20 11:10:46 +08002746 static RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002747 char id[256];
2748 uint8_t len;
2749
2750 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002751 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002752 error_report("Ack, bad migration stream!");
2753 return NULL;
2754 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002755 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002756 }
2757
2758 len = qemu_get_byte(f);
2759 qemu_get_buffer(f, (uint8_t *)id, len);
2760 id[len] = 0;
2761
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002762 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002763 if (!block) {
2764 error_report("Can't find block %s", id);
2765 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002766 }
2767
Yury Kotovfbd162e2019-02-15 20:45:46 +03002768 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002769 error_report("block %s should not be migrated !", id);
2770 return NULL;
2771 }
2772
zhanghailiang4c4bad42016-01-15 11:37:41 +08002773 return block;
2774}
2775
2776static inline void *host_from_ram_block_offset(RAMBlock *block,
2777 ram_addr_t offset)
2778{
2779 if (!offset_in_ramblock(block, offset)) {
2780 return NULL;
2781 }
2782
2783 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002784}
2785
Zhang Chen13af18f2018-09-03 12:38:48 +08002786static inline void *colo_cache_from_block_offset(RAMBlock *block,
zhanghailiang8af66372020-02-24 14:54:11 +08002787 ram_addr_t offset, bool record_bitmap)
Zhang Chen13af18f2018-09-03 12:38:48 +08002788{
2789 if (!offset_in_ramblock(block, offset)) {
2790 return NULL;
2791 }
2792 if (!block->colo_cache) {
2793 error_report("%s: colo_cache is NULL in block :%s",
2794 __func__, block->idstr);
2795 return NULL;
2796 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08002797
2798 /*
2799 * During colo checkpoint, we need bitmap of these migrated pages.
2800 * It help us to decide which pages in ram cache should be flushed
2801 * into VM's RAM later.
2802 */
zhanghailiang8af66372020-02-24 14:54:11 +08002803 if (record_bitmap &&
2804 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08002805 ram_state->migration_dirty_pages++;
2806 }
Zhang Chen13af18f2018-09-03 12:38:48 +08002807 return block->colo_cache + offset;
2808}
2809
Juan Quintela3d0684b2017-03-23 15:06:39 +01002810/**
2811 * ram_handle_compressed: handle the zero page case
2812 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002813 * If a page (or a whole RDMA chunk) has been
2814 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002815 *
2816 * @host: host address for the zero page
2817 * @ch: what the page is filled from. We only support zero
2818 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002819 */
2820void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2821{
2822 if (ch != 0 || !is_zero_range(host, size)) {
2823 memset(host, ch, size);
2824 }
2825}
2826
Xiao Guangrong797ca152018-03-30 15:51:21 +08002827/* return the size after decompression, or negative value on error */
2828static int
2829qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2830 const uint8_t *source, size_t source_len)
2831{
2832 int err;
2833
2834 err = inflateReset(stream);
2835 if (err != Z_OK) {
2836 return -1;
2837 }
2838
2839 stream->avail_in = source_len;
2840 stream->next_in = (uint8_t *)source;
2841 stream->avail_out = dest_len;
2842 stream->next_out = dest;
2843
2844 err = inflate(stream, Z_NO_FLUSH);
2845 if (err != Z_STREAM_END) {
2846 return -1;
2847 }
2848
2849 return stream->total_out;
2850}
2851
Juan Quintela56e93d22015-05-07 19:33:31 +02002852static void *do_data_decompress(void *opaque)
2853{
2854 DecompressParam *param = opaque;
2855 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002856 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002857 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002858
Liang Li33d151f2016-05-05 15:32:58 +08002859 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002860 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002861 if (param->des) {
2862 des = param->des;
2863 len = param->len;
2864 param->des = 0;
2865 qemu_mutex_unlock(&param->mutex);
2866
Liang Li73a89122016-05-05 15:32:51 +08002867 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002868
2869 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2870 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08002871 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002872 error_report("decompress data failed");
2873 qemu_file_set_error(decomp_file, ret);
2874 }
Liang Li73a89122016-05-05 15:32:51 +08002875
Liang Li33d151f2016-05-05 15:32:58 +08002876 qemu_mutex_lock(&decomp_done_lock);
2877 param->done = true;
2878 qemu_cond_signal(&decomp_done_cond);
2879 qemu_mutex_unlock(&decomp_done_lock);
2880
2881 qemu_mutex_lock(&param->mutex);
2882 } else {
2883 qemu_cond_wait(&param->cond, &param->mutex);
2884 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002885 }
Liang Li33d151f2016-05-05 15:32:58 +08002886 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002887
2888 return NULL;
2889}
2890
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002891static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08002892{
2893 int idx, thread_count;
2894
2895 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002896 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08002897 }
2898
2899 thread_count = migrate_decompress_threads();
2900 qemu_mutex_lock(&decomp_done_lock);
2901 for (idx = 0; idx < thread_count; idx++) {
2902 while (!decomp_param[idx].done) {
2903 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2904 }
2905 }
2906 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002907 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08002908}
2909
Juan Quintelaf0afa332017-06-28 11:52:28 +02002910static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002911{
2912 int i, thread_count;
2913
Juan Quintela3416ab52016-04-20 11:56:01 +02002914 if (!migrate_use_compression()) {
2915 return;
2916 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002917 thread_count = migrate_decompress_threads();
2918 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002919 /*
2920 * we use it as a indicator which shows if the thread is
2921 * properly init'd or not
2922 */
2923 if (!decomp_param[i].compbuf) {
2924 break;
2925 }
2926
Juan Quintela56e93d22015-05-07 19:33:31 +02002927 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002928 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002929 qemu_cond_signal(&decomp_param[i].cond);
2930 qemu_mutex_unlock(&decomp_param[i].mutex);
2931 }
2932 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002933 if (!decomp_param[i].compbuf) {
2934 break;
2935 }
2936
Juan Quintela56e93d22015-05-07 19:33:31 +02002937 qemu_thread_join(decompress_threads + i);
2938 qemu_mutex_destroy(&decomp_param[i].mutex);
2939 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002940 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02002941 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002942 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002943 }
2944 g_free(decompress_threads);
2945 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002946 decompress_threads = NULL;
2947 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002948 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002949}
2950
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002951static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08002952{
2953 int i, thread_count;
2954
2955 if (!migrate_use_compression()) {
2956 return 0;
2957 }
2958
2959 thread_count = migrate_decompress_threads();
2960 decompress_threads = g_new0(QemuThread, thread_count);
2961 decomp_param = g_new0(DecompressParam, thread_count);
2962 qemu_mutex_init(&decomp_done_lock);
2963 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002964 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08002965 for (i = 0; i < thread_count; i++) {
2966 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2967 goto exit;
2968 }
2969
2970 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2971 qemu_mutex_init(&decomp_param[i].mutex);
2972 qemu_cond_init(&decomp_param[i].cond);
2973 decomp_param[i].done = true;
2974 decomp_param[i].quit = false;
2975 qemu_thread_create(decompress_threads + i, "decompress",
2976 do_data_decompress, decomp_param + i,
2977 QEMU_THREAD_JOINABLE);
2978 }
2979 return 0;
2980exit:
2981 compress_threads_load_cleanup();
2982 return -1;
2983}
2984
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002985static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002986 void *host, int len)
2987{
2988 int idx, thread_count;
2989
2990 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002991 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002992 while (true) {
2993 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002994 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002995 decomp_param[idx].done = false;
2996 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002997 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002998 decomp_param[idx].des = host;
2999 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003000 qemu_cond_signal(&decomp_param[idx].cond);
3001 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003002 break;
3003 }
3004 }
3005 if (idx < thread_count) {
3006 break;
Liang Li73a89122016-05-05 15:32:51 +08003007 } else {
3008 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003009 }
3010 }
Liang Li73a89122016-05-05 15:32:51 +08003011 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003012}
3013
Rao, Leib70cb3b2020-10-16 13:52:01 +08003014 /*
3015 * we must set ram_bulk_stage to false, otherwise in
3016 * migation_bitmap_find_dirty the bitmap will be unused and
3017 * all the pages in ram cache wil be flushed to the ram of
3018 * secondary VM.
3019 */
3020static void colo_init_ram_state(void)
3021{
3022 ram_state_init(&ram_state);
3023 ram_state->ram_bulk_stage = false;
3024}
3025
Zhang Chen13af18f2018-09-03 12:38:48 +08003026/*
3027 * colo cache: this is for secondary VM, we cache the whole
3028 * memory of the secondary VM, it is need to hold the global lock
3029 * to call this helper.
3030 */
3031int colo_init_ram_cache(void)
3032{
3033 RAMBlock *block;
3034
Paolo Bonzini44901b52019-12-13 15:07:22 +01003035 WITH_RCU_READ_LOCK_GUARD() {
3036 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3037 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3038 NULL,
3039 false);
3040 if (!block->colo_cache) {
3041 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3042 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3043 block->used_length);
3044 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3045 if (block->colo_cache) {
3046 qemu_anon_ram_free(block->colo_cache, block->used_length);
3047 block->colo_cache = NULL;
3048 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003049 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003050 return -errno;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003051 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003052 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003053 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003054
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003055 /*
3056 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3057 * with to decide which page in cache should be flushed into SVM's RAM. Here
3058 * we use the same name 'ram_bitmap' as for migration.
3059 */
3060 if (ram_bytes_total()) {
3061 RAMBlock *block;
3062
Yury Kotovfbd162e2019-02-15 20:45:46 +03003063 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003064 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003065 block->bmap = bitmap_new(pages);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003066 }
3067 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003068
Rao, Leib70cb3b2020-10-16 13:52:01 +08003069 colo_init_ram_state();
Zhang Chen13af18f2018-09-03 12:38:48 +08003070 return 0;
Zhang Chen13af18f2018-09-03 12:38:48 +08003071}
3072
zhanghailiang03930312020-02-24 14:54:10 +08003073/* TODO: duplicated with ram_init_bitmaps */
3074void colo_incoming_start_dirty_log(void)
3075{
3076 RAMBlock *block = NULL;
3077 /* For memory_global_dirty_log_start below. */
3078 qemu_mutex_lock_iothread();
3079 qemu_mutex_lock_ramlist();
3080
3081 memory_global_dirty_log_sync();
3082 WITH_RCU_READ_LOCK_GUARD() {
3083 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3084 ramblock_sync_dirty_bitmap(ram_state, block);
3085 /* Discard this dirty bitmap record */
3086 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3087 }
3088 memory_global_dirty_log_start();
3089 }
3090 ram_state->migration_dirty_pages = 0;
3091 qemu_mutex_unlock_ramlist();
3092 qemu_mutex_unlock_iothread();
3093}
3094
Zhang Chen13af18f2018-09-03 12:38:48 +08003095/* It is need to hold the global lock to call this helper */
3096void colo_release_ram_cache(void)
3097{
3098 RAMBlock *block;
3099
zhanghailiangd1955d22018-09-03 12:38:55 +08003100 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003101 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003102 g_free(block->bmap);
3103 block->bmap = NULL;
3104 }
3105
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003106 WITH_RCU_READ_LOCK_GUARD() {
3107 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3108 if (block->colo_cache) {
3109 qemu_anon_ram_free(block->colo_cache, block->used_length);
3110 block->colo_cache = NULL;
3111 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003112 }
3113 }
zhanghailiang03930312020-02-24 14:54:10 +08003114 ram_state_cleanup(&ram_state);
Zhang Chen13af18f2018-09-03 12:38:48 +08003115}
3116
Juan Quintela3d0684b2017-03-23 15:06:39 +01003117/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003118 * ram_load_setup: Setup RAM for migration incoming side
3119 *
3120 * Returns zero to indicate success and negative for error
3121 *
3122 * @f: QEMUFile where to receive the data
3123 * @opaque: RAMState pointer
3124 */
3125static int ram_load_setup(QEMUFile *f, void *opaque)
3126{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003127 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003128 return -1;
3129 }
3130
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003131 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003132 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003133
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003134 return 0;
3135}
3136
3137static int ram_load_cleanup(void *opaque)
3138{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003139 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003140
Yury Kotovfbd162e2019-02-15 20:45:46 +03003141 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Beata Michalskabd108a42019-11-21 00:08:42 +00003142 qemu_ram_block_writeback(rb);
Junyan He56eb90a2018-07-18 15:48:03 +08003143 }
3144
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003145 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003146 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003147
Yury Kotovfbd162e2019-02-15 20:45:46 +03003148 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003149 g_free(rb->receivedmap);
3150 rb->receivedmap = NULL;
3151 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003152
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003153 return 0;
3154}
3155
3156/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003157 * ram_postcopy_incoming_init: allocate postcopy data structures
3158 *
3159 * Returns 0 for success and negative if there was one error
3160 *
3161 * @mis: current migration incoming state
3162 *
3163 * Allocate data structures etc needed by incoming migration with
3164 * postcopy-ram. postcopy-ram's similarly names
3165 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003166 */
3167int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3168{
David Hildenbrandc1361802018-06-20 22:27:36 +02003169 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003170}
3171
Juan Quintela3d0684b2017-03-23 15:06:39 +01003172/**
3173 * ram_load_postcopy: load a page in postcopy case
3174 *
3175 * Returns 0 for success or -errno in case of error
3176 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003177 * Called in postcopy mode by ram_load().
3178 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003179 *
3180 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003181 */
3182static int ram_load_postcopy(QEMUFile *f)
3183{
3184 int flags = 0, ret = 0;
3185 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003186 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003187 MigrationIncomingState *mis = migration_incoming_get_current();
3188 /* Temporary page that is later 'placed' */
Wei Yang34143222019-10-05 21:50:20 +08003189 void *postcopy_host_page = mis->postcopy_tmp_page;
Wei Yang91ba4422019-11-07 20:39:06 +08003190 void *this_host = NULL;
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003191 bool all_zero = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003192 int target_pages = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003193
3194 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3195 ram_addr_t addr;
3196 void *host = NULL;
3197 void *page_buffer = NULL;
3198 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003199 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003200 uint8_t ch;
Wei Yang644acf92019-11-07 20:39:07 +08003201 int len;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003202
3203 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003204
3205 /*
3206 * If qemu file error, we should stop here, and then "addr"
3207 * may be invalid
3208 */
3209 ret = qemu_file_get_error(f);
3210 if (ret) {
3211 break;
3212 }
3213
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003214 flags = addr & ~TARGET_PAGE_MASK;
3215 addr &= TARGET_PAGE_MASK;
3216
3217 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
Wei Yang644acf92019-11-07 20:39:07 +08003218 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3219 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003220 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003221
3222 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003223 if (!host) {
3224 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3225 ret = -EINVAL;
3226 break;
3227 }
Wei Yang4cbb3c62019-11-07 20:39:04 +08003228 target_pages++;
Peter Xu1aa83672018-07-10 17:18:53 +08003229 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003230 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003231 * Postcopy requires that we place whole host pages atomically;
3232 * these may be huge pages for RAMBlocks that are backed by
3233 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003234 * To make it atomic, the data is read into a temporary page
3235 * that's moved into place later.
3236 * The migration protocol uses, possibly smaller, target-pages
3237 * however the source ensures it always sends all the components
Wei Yang91ba4422019-11-07 20:39:06 +08003238 * of a host page in one chunk.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003239 */
3240 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003241 ((uintptr_t)host & (block->page_size - 1));
Wei Yange5e73b02019-11-07 20:39:05 +08003242 if (target_pages == 1) {
Wei Yang91ba4422019-11-07 20:39:06 +08003243 this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3244 block->page_size);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003245 } else {
3246 /* not the 1st TP within the HP */
Wei Yang91ba4422019-11-07 20:39:06 +08003247 if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
3248 (uintptr_t)this_host) {
3249 error_report("Non-same host page %p/%p",
3250 host, this_host);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003251 ret = -EINVAL;
3252 break;
3253 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003254 }
3255
3256 /*
3257 * If it's the last part of a host page then we place the host
3258 * page
3259 */
Wei Yang4cbb3c62019-11-07 20:39:04 +08003260 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3261 place_needed = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003262 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003263 place_source = postcopy_host_page;
3264 }
3265
3266 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003267 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003268 ch = qemu_get_byte(f);
Wei Yang2e36bc12019-11-07 20:39:02 +08003269 /*
3270 * Can skip to set page_buffer when
3271 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
3272 */
3273 if (ch || !matches_target_page_size) {
3274 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3275 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003276 if (ch) {
3277 all_zero = false;
3278 }
3279 break;
3280
3281 case RAM_SAVE_FLAG_PAGE:
3282 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003283 if (!matches_target_page_size) {
3284 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003285 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3286 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08003287 /*
3288 * For small pages that matches target page size, we
3289 * avoid the qemu_file copy. Instead we directly use
3290 * the buffer of QEMUFile to place the page. Note: we
3291 * cannot do any QEMUFile operation before using that
3292 * buffer to make sure the buffer is valid when
3293 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003294 */
3295 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3296 TARGET_PAGE_SIZE);
3297 }
3298 break;
Wei Yang644acf92019-11-07 20:39:07 +08003299 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3300 all_zero = false;
3301 len = qemu_get_be32(f);
3302 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3303 error_report("Invalid compressed data length: %d", len);
3304 ret = -EINVAL;
3305 break;
3306 }
3307 decompress_data_with_multi_threads(f, page_buffer, len);
3308 break;
3309
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003310 case RAM_SAVE_FLAG_EOS:
3311 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003312 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003313 break;
3314 default:
Bihong Yu29fccad2020-10-20 11:10:42 +08003315 error_report("Unknown combination of migration flags: 0x%x"
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003316 " (postcopy mode)", flags);
3317 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003318 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003319 }
3320
Wei Yang644acf92019-11-07 20:39:07 +08003321 /* Got the whole host page, wait for decompress before placing. */
3322 if (place_needed) {
3323 ret |= wait_for_decompress_done();
3324 }
3325
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003326 /* Detect for any possible file errors */
3327 if (!ret && qemu_file_get_error(f)) {
3328 ret = qemu_file_get_error(f);
3329 }
3330
3331 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003332 /* This gets called at the last target page in the host page */
Wei Yang91ba4422019-11-07 20:39:06 +08003333 void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3334 block->page_size);
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003335
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003336 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003337 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003338 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003339 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003340 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003341 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003342 }
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003343 place_needed = false;
3344 target_pages = 0;
3345 /* Assume we have a zero page until we detect something different */
3346 all_zero = true;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003347 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003348 }
3349
3350 return ret;
3351}
3352
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003353static bool postcopy_is_advised(void)
3354{
3355 PostcopyState ps = postcopy_state_get();
3356 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3357}
3358
3359static bool postcopy_is_running(void)
3360{
3361 PostcopyState ps = postcopy_state_get();
3362 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3363}
3364
Zhang Chene6f4aa12018-09-03 12:38:50 +08003365/*
3366 * Flush content of RAM cache into SVM's memory.
3367 * Only flush the pages that be dirtied by PVM or SVM or both.
3368 */
Lukas Straub24fa16f2020-05-11 13:10:51 +02003369void colo_flush_ram_cache(void)
Zhang Chene6f4aa12018-09-03 12:38:50 +08003370{
3371 RAMBlock *block = NULL;
3372 void *dst_host;
3373 void *src_host;
3374 unsigned long offset = 0;
3375
zhanghailiangd1955d22018-09-03 12:38:55 +08003376 memory_global_dirty_log_sync();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003377 WITH_RCU_READ_LOCK_GUARD() {
3378 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3379 ramblock_sync_dirty_bitmap(ram_state, block);
Zhang Chene6f4aa12018-09-03 12:38:50 +08003380 }
3381 }
3382
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003383 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3384 WITH_RCU_READ_LOCK_GUARD() {
3385 block = QLIST_FIRST_RCU(&ram_list.blocks);
3386
3387 while (block) {
3388 offset = migration_bitmap_find_dirty(ram_state, block, offset);
3389
Alexey Romko8bba0042020-01-10 14:51:34 +01003390 if (((ram_addr_t)offset) << TARGET_PAGE_BITS
3391 >= block->used_length) {
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003392 offset = 0;
3393 block = QLIST_NEXT_RCU(block, next);
3394 } else {
3395 migration_bitmap_clear_dirty(ram_state, block, offset);
Alexey Romko8bba0042020-01-10 14:51:34 +01003396 dst_host = block->host
3397 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3398 src_host = block->colo_cache
3399 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003400 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
3401 }
3402 }
3403 }
Zhang Chene6f4aa12018-09-03 12:38:50 +08003404 trace_colo_flush_ram_cache_end();
3405}
3406
Wei Yang10da4a32019-07-25 08:20:23 +08003407/**
3408 * ram_load_precopy: load pages in precopy case
3409 *
3410 * Returns 0 for success or -errno in case of error
3411 *
3412 * Called in precopy mode by ram_load().
3413 * rcu_read_lock is taken prior to this being called.
3414 *
3415 * @f: QEMUFile where to send the data
3416 */
3417static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02003418{
Yury Kotove65cec52019-11-25 16:36:32 +03003419 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003420 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003421 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01003422 if (!migrate_use_compression()) {
3423 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3424 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003425
Wei Yang10da4a32019-07-25 08:20:23 +08003426 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003427 ram_addr_t addr, total_ram_bytes;
zhanghailiang03930312020-02-24 14:54:10 +08003428 void *host = NULL, *host_bak = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003429 uint8_t ch;
3430
Yury Kotove65cec52019-11-25 16:36:32 +03003431 /*
3432 * Yield periodically to let main loop run, but an iteration of
3433 * the main loop is expensive, so do it each some iterations
3434 */
3435 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3436 aio_co_schedule(qemu_get_current_aio_context(),
3437 qemu_coroutine_self());
3438 qemu_coroutine_yield();
3439 }
3440 i++;
3441
Juan Quintela56e93d22015-05-07 19:33:31 +02003442 addr = qemu_get_be64(f);
3443 flags = addr & ~TARGET_PAGE_MASK;
3444 addr &= TARGET_PAGE_MASK;
3445
Juan Quintelaedc60122016-11-02 12:40:46 +01003446 if (flags & invalid_flags) {
3447 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3448 error_report("Received an unexpected compressed page");
3449 }
3450
3451 ret = -EINVAL;
3452 break;
3453 }
3454
Juan Quintelabb890ed2017-04-28 09:39:55 +02003455 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003456 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003457 RAMBlock *block = ram_block_from_stream(f, flags);
3458
zhanghailiang03930312020-02-24 14:54:10 +08003459 host = host_from_ram_block_offset(block, addr);
Zhang Chen13af18f2018-09-03 12:38:48 +08003460 /*
zhanghailiang03930312020-02-24 14:54:10 +08003461 * After going into COLO stage, we should not load the page
3462 * into SVM's memory directly, we put them into colo_cache firstly.
3463 * NOTE: We need to keep a copy of SVM's ram in colo_cache.
3464 * Previously, we copied all these memory in preparing stage of COLO
3465 * while we need to stop VM, which is a time-consuming process.
3466 * Here we optimize it by a trick, back-up every page while in
3467 * migration process while COLO is enabled, though it affects the
3468 * speed of the migration, but it obviously reduce the downtime of
3469 * back-up all SVM'S memory in COLO preparing stage.
Zhang Chen13af18f2018-09-03 12:38:48 +08003470 */
zhanghailiang03930312020-02-24 14:54:10 +08003471 if (migration_incoming_colo_enabled()) {
3472 if (migration_incoming_in_colo_state()) {
3473 /* In COLO stage, put all pages into cache temporarily */
zhanghailiang8af66372020-02-24 14:54:11 +08003474 host = colo_cache_from_block_offset(block, addr, true);
zhanghailiang03930312020-02-24 14:54:10 +08003475 } else {
3476 /*
3477 * In migration stage but before COLO stage,
3478 * Put all pages into both cache and SVM's memory.
3479 */
zhanghailiang8af66372020-02-24 14:54:11 +08003480 host_bak = colo_cache_from_block_offset(block, addr, false);
zhanghailiang03930312020-02-24 14:54:10 +08003481 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003482 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003483 if (!host) {
3484 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3485 ret = -EINVAL;
3486 break;
3487 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003488 if (!migration_incoming_in_colo_state()) {
3489 ramblock_recv_bitmap_set(block, host);
3490 }
3491
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003492 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003493 }
3494
Juan Quintela56e93d22015-05-07 19:33:31 +02003495 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3496 case RAM_SAVE_FLAG_MEM_SIZE:
3497 /* Synchronize RAM block list */
3498 total_ram_bytes = addr;
3499 while (!ret && total_ram_bytes) {
3500 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003501 char id[256];
3502 ram_addr_t length;
3503
3504 len = qemu_get_byte(f);
3505 qemu_get_buffer(f, (uint8_t *)id, len);
3506 id[len] = 0;
3507 length = qemu_get_be64(f);
3508
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003509 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003510 if (block && !qemu_ram_is_migratable(block)) {
3511 error_report("block %s should not be migrated !", id);
3512 ret = -EINVAL;
3513 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003514 if (length != block->used_length) {
3515 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003516
Gongleifa53a0e2016-05-10 10:04:59 +08003517 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003518 &local_err);
3519 if (local_err) {
3520 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003521 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003522 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003523 /* For postcopy we need to check hugepage sizes match */
3524 if (postcopy_advised &&
3525 block->page_size != qemu_host_page_size) {
3526 uint64_t remote_page_size = qemu_get_be64(f);
3527 if (remote_page_size != block->page_size) {
3528 error_report("Mismatched RAM page size %s "
3529 "(local) %zd != %" PRId64,
3530 id, block->page_size,
3531 remote_page_size);
3532 ret = -EINVAL;
3533 }
3534 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003535 if (migrate_ignore_shared()) {
3536 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003537 if (ramblock_is_ignored(block) &&
3538 block->mr->addr != addr) {
3539 error_report("Mismatched GPAs for block %s "
3540 "%" PRId64 "!= %" PRId64,
3541 id, (uint64_t)addr,
3542 (uint64_t)block->mr->addr);
3543 ret = -EINVAL;
3544 }
3545 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003546 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3547 block->idstr);
3548 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02003549 error_report("Unknown ramblock \"%s\", cannot "
3550 "accept migration", id);
3551 ret = -EINVAL;
3552 }
3553
3554 total_ram_bytes -= length;
3555 }
3556 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003557
Juan Quintelabb890ed2017-04-28 09:39:55 +02003558 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02003559 ch = qemu_get_byte(f);
3560 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3561 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003562
Juan Quintela56e93d22015-05-07 19:33:31 +02003563 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003564 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3565 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02003566
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003567 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003568 len = qemu_get_be32(f);
3569 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3570 error_report("Invalid compressed data length: %d", len);
3571 ret = -EINVAL;
3572 break;
3573 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003574 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003575 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003576
Juan Quintela56e93d22015-05-07 19:33:31 +02003577 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003578 if (load_xbzrle(f, addr, host) < 0) {
3579 error_report("Failed to decompress XBZRLE page at "
3580 RAM_ADDR_FMT, addr);
3581 ret = -EINVAL;
3582 break;
3583 }
3584 break;
3585 case RAM_SAVE_FLAG_EOS:
3586 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003587 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003588 break;
3589 default:
3590 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01003591 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02003592 } else {
Bihong Yu29fccad2020-10-20 11:10:42 +08003593 error_report("Unknown combination of migration flags: 0x%x",
Juan Quintela56e93d22015-05-07 19:33:31 +02003594 flags);
3595 ret = -EINVAL;
3596 }
3597 }
3598 if (!ret) {
3599 ret = qemu_file_get_error(f);
3600 }
zhanghailiang03930312020-02-24 14:54:10 +08003601 if (!ret && host_bak) {
3602 memcpy(host_bak, host, TARGET_PAGE_SIZE);
3603 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003604 }
3605
Wei Yangca1a6b72019-11-07 20:39:03 +08003606 ret |= wait_for_decompress_done();
Wei Yang10da4a32019-07-25 08:20:23 +08003607 return ret;
3608}
3609
3610static int ram_load(QEMUFile *f, void *opaque, int version_id)
3611{
3612 int ret = 0;
3613 static uint64_t seq_iter;
3614 /*
3615 * If system is running in postcopy mode, page inserts to host memory must
3616 * be atomic
3617 */
3618 bool postcopy_running = postcopy_is_running();
3619
3620 seq_iter++;
3621
3622 if (version_id != 4) {
3623 return -EINVAL;
3624 }
3625
3626 /*
3627 * This RCU critical section can be very long running.
3628 * When RCU reclaims in the code start to become numerous,
3629 * it will be necessary to reduce the granularity of this
3630 * critical section.
3631 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003632 WITH_RCU_READ_LOCK_GUARD() {
3633 if (postcopy_running) {
3634 ret = ram_load_postcopy(f);
3635 } else {
3636 ret = ram_load_precopy(f);
3637 }
Wei Yang10da4a32019-07-25 08:20:23 +08003638 }
Juan Quintela55c44462017-01-23 22:32:05 +01003639 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08003640
Juan Quintela56e93d22015-05-07 19:33:31 +02003641 return ret;
3642}
3643
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003644static bool ram_has_postcopy(void *opaque)
3645{
Junyan He469dd512018-07-18 15:48:02 +08003646 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03003647 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08003648 if (ramblock_is_pmem(rb)) {
3649 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
3650 "is not supported now!", rb->idstr, rb->host);
3651 return false;
3652 }
3653 }
3654
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003655 return migrate_postcopy_ram();
3656}
3657
Peter Xuedd090c2018-05-02 18:47:32 +08003658/* Sync all the dirty bitmap with destination VM. */
3659static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3660{
3661 RAMBlock *block;
3662 QEMUFile *file = s->to_dst_file;
3663 int ramblock_count = 0;
3664
3665 trace_ram_dirty_bitmap_sync_start();
3666
Yury Kotovfbd162e2019-02-15 20:45:46 +03003667 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08003668 qemu_savevm_send_recv_bitmap(file, block->idstr);
3669 trace_ram_dirty_bitmap_request(block->idstr);
3670 ramblock_count++;
3671 }
3672
3673 trace_ram_dirty_bitmap_sync_wait();
3674
3675 /* Wait until all the ramblocks' dirty bitmap synced */
3676 while (ramblock_count--) {
3677 qemu_sem_wait(&s->rp_state.rp_sem);
3678 }
3679
3680 trace_ram_dirty_bitmap_sync_complete();
3681
3682 return 0;
3683}
3684
3685static void ram_dirty_bitmap_reload_notify(MigrationState *s)
3686{
3687 qemu_sem_post(&s->rp_state.rp_sem);
3688}
3689
Peter Xua335deb2018-05-02 18:47:28 +08003690/*
3691 * Read the received bitmap, revert it as the initial dirty bitmap.
3692 * This is only used when the postcopy migration is paused but wants
3693 * to resume from a middle point.
3694 */
3695int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
3696{
3697 int ret = -EINVAL;
3698 QEMUFile *file = s->rp_state.from_dst_file;
3699 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08003700 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08003701 uint64_t size, end_mark;
3702
3703 trace_ram_dirty_bitmap_reload_begin(block->idstr);
3704
3705 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
3706 error_report("%s: incorrect state %s", __func__,
3707 MigrationStatus_str(s->state));
3708 return -EINVAL;
3709 }
3710
3711 /*
3712 * Note: see comments in ramblock_recv_bitmap_send() on why we
zhaolichang3a4452d2020-09-17 15:50:21 +08003713 * need the endianness conversion, and the paddings.
Peter Xua335deb2018-05-02 18:47:28 +08003714 */
3715 local_size = ROUND_UP(local_size, 8);
3716
3717 /* Add paddings */
3718 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
3719
3720 size = qemu_get_be64(file);
3721
3722 /* The size of the bitmap should match with our ramblock */
3723 if (size != local_size) {
3724 error_report("%s: ramblock '%s' bitmap size mismatch "
3725 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
3726 block->idstr, size, local_size);
3727 ret = -EINVAL;
3728 goto out;
3729 }
3730
3731 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
3732 end_mark = qemu_get_be64(file);
3733
3734 ret = qemu_file_get_error(file);
3735 if (ret || size != local_size) {
3736 error_report("%s: read bitmap failed for ramblock '%s': %d"
3737 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
3738 __func__, block->idstr, ret, local_size, size);
3739 ret = -EIO;
3740 goto out;
3741 }
3742
3743 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
Philippe Mathieu-Daudéaf3bbbe2020-11-03 12:25:58 +01003744 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
Peter Xua335deb2018-05-02 18:47:28 +08003745 __func__, block->idstr, end_mark);
3746 ret = -EINVAL;
3747 goto out;
3748 }
3749
3750 /*
zhaolichang3a4452d2020-09-17 15:50:21 +08003751 * Endianness conversion. We are during postcopy (though paused).
Peter Xua335deb2018-05-02 18:47:28 +08003752 * The dirty bitmap won't change. We can directly modify it.
3753 */
3754 bitmap_from_le(block->bmap, le_bitmap, nbits);
3755
3756 /*
3757 * What we received is "received bitmap". Revert it as the initial
3758 * dirty bitmap for this ramblock.
3759 */
3760 bitmap_complement(block->bmap, block->bmap, nbits);
3761
3762 trace_ram_dirty_bitmap_reload_complete(block->idstr);
3763
Peter Xuedd090c2018-05-02 18:47:32 +08003764 /*
3765 * We succeeded to sync bitmap for current ramblock. If this is
3766 * the last one to sync, we need to notify the main send thread.
3767 */
3768 ram_dirty_bitmap_reload_notify(s);
3769
Peter Xua335deb2018-05-02 18:47:28 +08003770 ret = 0;
3771out:
Peter Xubf269902018-05-25 09:50:42 +08003772 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08003773 return ret;
3774}
3775
Peter Xuedd090c2018-05-02 18:47:32 +08003776static int ram_resume_prepare(MigrationState *s, void *opaque)
3777{
3778 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08003779 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08003780
Peter Xu08614f32018-05-02 18:47:33 +08003781 ret = ram_dirty_bitmap_sync_all(s, rs);
3782 if (ret) {
3783 return ret;
3784 }
3785
3786 ram_state_resume_prepare(rs, s->to_dst_file);
3787
3788 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08003789}
3790
Juan Quintela56e93d22015-05-07 19:33:31 +02003791static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02003792 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02003793 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00003794 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00003795 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003796 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02003797 .save_live_pending = ram_save_pending,
3798 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003799 .save_cleanup = ram_save_cleanup,
3800 .load_setup = ram_load_setup,
3801 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08003802 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02003803};
3804
3805void ram_mig_init(void)
3806{
3807 qemu_mutex_init(&XBZRLE.lock);
Dr. David Alan Gilbertce62df52019-08-22 12:54:33 +01003808 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02003809}