blob: f95d656c26908e356c343b473c0a9e2d5c349cb3 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020036#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020037#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020038#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010039#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020040#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020041#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020042#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020043#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030044#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010046#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010047#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020048#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030051#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020052#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080053#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030054#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020055#include "sysemu/sysemu.h"
56#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080057#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010058#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020059
Juan Quintela56e93d22015-05-07 19:33:31 +020060/***********************************************************/
61/* ram save/restore */
62
Juan Quintelabb890ed2017-04-28 09:39:55 +020063/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
64 * worked for pages that where filled with the same char. We switched
65 * it to only search for the zero value. And to avoid confusion with
66 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
67 */
68
Juan Quintela56e93d22015-05-07 19:33:31 +020069#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020070#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020071#define RAM_SAVE_FLAG_MEM_SIZE 0x04
72#define RAM_SAVE_FLAG_PAGE 0x08
73#define RAM_SAVE_FLAG_EOS 0x10
74#define RAM_SAVE_FLAG_CONTINUE 0x20
75#define RAM_SAVE_FLAG_XBZRLE 0x40
76/* 0x80 is reserved in migration.h start with 0x100 next */
77#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78
Juan Quintela56e93d22015-05-07 19:33:31 +020079static inline bool is_zero_range(uint8_t *p, uint64_t size)
80{
Richard Hendersona1febc42016-08-29 11:46:14 -070081 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020082}
83
Juan Quintela93604472017-06-06 19:49:03 +020084XBZRLECacheStats xbzrle_counters;
85
Juan Quintela56e93d22015-05-07 19:33:31 +020086/* struct contains XBZRLE cache and a static page
87 used by the compression */
88static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020096 /* it will store a page full of zeros */
97 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020098 /* buffer used for XBZRLE decoding */
99 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200100} XBZRLE;
101
Juan Quintela56e93d22015-05-07 19:33:31 +0200102static void XBZRLE_cache_lock(void)
103{
104 if (migrate_use_xbzrle())
105 qemu_mutex_lock(&XBZRLE.lock);
106}
107
108static void XBZRLE_cache_unlock(void)
109{
110 if (migrate_use_xbzrle())
111 qemu_mutex_unlock(&XBZRLE.lock);
112}
113
Juan Quintela3d0684b2017-03-23 15:06:39 +0100114/**
115 * xbzrle_cache_resize: resize the xbzrle cache
116 *
117 * This function is called from qmp_migrate_set_cache_size in main
118 * thread, possibly while a migration is in progress. A running
119 * migration may be using the cache and might finish during this call,
120 * hence changes to the cache are protected by XBZRLE.lock().
121 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200122 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100123 *
124 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200125 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200126 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200127int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200128{
129 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200130 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200131
Juan Quintela8acabf62017-10-05 22:00:31 +0200132 /* Check for truncation */
133 if (new_size != (size_t)new_size) {
134 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
135 "exceeding address space");
136 return -1;
137 }
138
Juan Quintela2a313e52017-10-06 23:00:12 +0200139 if (new_size == migrate_xbzrle_cache_size()) {
140 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200141 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200142 }
143
Juan Quintela56e93d22015-05-07 19:33:31 +0200144 XBZRLE_cache_lock();
145
146 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200147 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200148 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 ret = -1;
150 goto out;
151 }
152
153 cache_fini(XBZRLE.cache);
154 XBZRLE.cache = new_cache;
155 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200156out:
157 XBZRLE_cache_unlock();
158 return ret;
159}
160
Yury Kotovfbd162e2019-02-15 20:45:46 +0300161static bool ramblock_is_ignored(RAMBlock *block)
162{
163 return !qemu_ram_is_migratable(block) ||
164 (migrate_ignore_shared() && qemu_ram_is_shared(block));
165}
166
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200167/* Should be holding either ram_list.mutex, or the RCU lock. */
Yury Kotovfbd162e2019-02-15 20:45:46 +0300168#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
169 INTERNAL_RAMBLOCK_FOREACH(block) \
170 if (ramblock_is_ignored(block)) {} else
171
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200172#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100173 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200174 if (!qemu_ram_is_migratable(block)) {} else
175
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100176#undef RAMBLOCK_FOREACH
177
Yury Kotovfbd162e2019-02-15 20:45:46 +0300178int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
179{
180 RAMBlock *block;
181 int ret = 0;
182
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +0100183 RCU_READ_LOCK_GUARD();
184
Yury Kotovfbd162e2019-02-15 20:45:46 +0300185 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
186 ret = func(block, opaque);
187 if (ret) {
188 break;
189 }
190 }
Yury Kotovfbd162e2019-02-15 20:45:46 +0300191 return ret;
192}
193
Alexey Perevalovf9494612017-10-05 14:13:20 +0300194static void ramblock_recv_map_init(void)
195{
196 RAMBlock *rb;
197
Yury Kotovfbd162e2019-02-15 20:45:46 +0300198 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300199 assert(!rb->receivedmap);
200 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
201 }
202}
203
204int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
205{
206 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
207 rb->receivedmap);
208}
209
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000210bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
211{
212 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
213}
214
Alexey Perevalovf9494612017-10-05 14:13:20 +0300215void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
216{
217 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
218}
219
220void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
221 size_t nr)
222{
223 bitmap_set_atomic(rb->receivedmap,
224 ramblock_recv_bitmap_offset(host_addr, rb),
225 nr);
226}
227
Peter Xua335deb2018-05-02 18:47:28 +0800228#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
229
230/*
231 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
232 *
233 * Returns >0 if success with sent bytes, or <0 if error.
234 */
235int64_t ramblock_recv_bitmap_send(QEMUFile *file,
236 const char *block_name)
237{
238 RAMBlock *block = qemu_ram_block_by_name(block_name);
239 unsigned long *le_bitmap, nbits;
240 uint64_t size;
241
242 if (!block) {
243 error_report("%s: invalid block name: %s", __func__, block_name);
244 return -1;
245 }
246
247 nbits = block->used_length >> TARGET_PAGE_BITS;
248
249 /*
250 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
251 * machines we may need 4 more bytes for padding (see below
252 * comment). So extend it a bit before hand.
253 */
254 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
255
256 /*
257 * Always use little endian when sending the bitmap. This is
258 * required that when source and destination VMs are not using the
259 * same endianess. (Note: big endian won't work.)
260 */
261 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
262
263 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800264 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800265
266 /*
267 * size is always aligned to 8 bytes for 64bit machines, but it
268 * may not be true for 32bit machines. We need this padding to
269 * make sure the migration can survive even between 32bit and
270 * 64bit machines.
271 */
272 size = ROUND_UP(size, 8);
273
274 qemu_put_be64(file, size);
275 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
276 /*
277 * Mark as an end, in case the middle part is screwed up due to
278 * some "misterious" reason.
279 */
280 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
281 qemu_fflush(file);
282
Peter Xubf269902018-05-25 09:50:42 +0800283 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800284
285 if (qemu_file_get_error(file)) {
286 return qemu_file_get_error(file);
287 }
288
289 return size + sizeof(size);
290}
291
Juan Quintelaec481c62017-03-20 22:12:40 +0100292/*
293 * An outstanding page request, on the source, having been received
294 * and queued
295 */
296struct RAMSrcPageRequest {
297 RAMBlock *rb;
298 hwaddr offset;
299 hwaddr len;
300
301 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
302};
303
Juan Quintela6f37bb82017-03-13 19:26:29 +0100304/* State of RAM for migration */
305struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100306 /* QEMUFile used for this migration */
307 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100308 /* Last block that we have visited searching for dirty pages */
309 RAMBlock *last_seen_block;
310 /* Last block from where we have sent data */
311 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100312 /* Last dirty target page we have sent */
313 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100314 /* last ram version we have seen */
315 uint32_t last_version;
316 /* We are in the first round */
317 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800318 /* The free page optimization is enabled */
319 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100320 /* How many times we have dirty too many pages */
321 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100322 /* these variables are used for bitmap sync */
323 /* last time we did a full bitmap_sync */
324 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200325 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200326 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200327 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200328 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100329 /* xbzrle misses since the beginning of the period */
330 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800331
332 /* compression statistics since the beginning of the period */
333 /* amount of count that no free thread to compress data */
334 uint64_t compress_thread_busy_prev;
335 /* amount bytes after compression */
336 uint64_t compressed_size_prev;
337 /* amount of compressed pages */
338 uint64_t compress_pages_prev;
339
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800340 /* total handled target pages at the beginning of period */
341 uint64_t target_page_count_prev;
342 /* total handled target pages since start */
343 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200344 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800345 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800346 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100347 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100348 /* The RAMBlock used in the last src_page_requests */
349 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100350 /* Queue of outstanding page requests from the destination */
351 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100352 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100353};
354typedef struct RAMState RAMState;
355
Juan Quintela53518d92017-05-04 11:46:24 +0200356static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100357
Wei Wangbd227062018-12-11 16:24:51 +0800358static NotifierWithReturnList precopy_notifier_list;
359
360void precopy_infrastructure_init(void)
361{
362 notifier_with_return_list_init(&precopy_notifier_list);
363}
364
365void precopy_add_notifier(NotifierWithReturn *n)
366{
367 notifier_with_return_list_add(&precopy_notifier_list, n);
368}
369
370void precopy_remove_notifier(NotifierWithReturn *n)
371{
372 notifier_with_return_remove(n);
373}
374
375int precopy_notify(PrecopyNotifyReason reason, Error **errp)
376{
377 PrecopyNotifyData pnd;
378 pnd.reason = reason;
379 pnd.errp = errp;
380
381 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
382}
383
Wei Wang6eeb63f2018-12-11 16:24:52 +0800384void precopy_enable_free_page_optimization(void)
385{
386 if (!ram_state) {
387 return;
388 }
389
390 ram_state->fpo_enabled = true;
391}
392
Juan Quintela9edabd42017-03-14 12:02:16 +0100393uint64_t ram_bytes_remaining(void)
394{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000395 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
396 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100397}
398
Juan Quintela93604472017-06-06 19:49:03 +0200399MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100400
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100401/* used by the search for pages to send */
402struct PageSearchStatus {
403 /* Current block being searched */
404 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100405 /* Current page to search from */
406 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100407 /* Set once we wrap around */
408 bool complete_round;
409};
410typedef struct PageSearchStatus PageSearchStatus;
411
Xiao Guangrong76e03002018-09-06 15:01:00 +0800412CompressionStats compression_counters;
413
Juan Quintela56e93d22015-05-07 19:33:31 +0200414struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200415 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800416 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800417 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200418 QEMUFile *file;
419 QemuMutex mutex;
420 QemuCond cond;
421 RAMBlock *block;
422 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800423
424 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800425 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800426 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200427};
428typedef struct CompressParam CompressParam;
429
430struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800431 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800432 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200433 QemuMutex mutex;
434 QemuCond cond;
435 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000436 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200437 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800438 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200439};
440typedef struct DecompressParam DecompressParam;
441
442static CompressParam *comp_param;
443static QemuThread *compress_threads;
444/* comp_done_cond is used to wake up the migration thread when
445 * one of the compression threads has finished the compression.
446 * comp_done_lock is used to co-work with comp_done_cond.
447 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800448static QemuMutex comp_done_lock;
449static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200450/* The empty QEMUFileOps will be used by file in CompressParam */
451static const QEMUFileOps empty_ops = { };
452
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800453static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200454static DecompressParam *decomp_param;
455static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800456static QemuMutex decomp_done_lock;
457static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200458
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800459static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800460 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200461
462static void *do_data_compress(void *opaque)
463{
464 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800465 RAMBlock *block;
466 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800467 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200468
Liang Lia7a9a882016-05-05 15:32:57 +0800469 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800470 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800471 if (param->block) {
472 block = param->block;
473 offset = param->offset;
474 param->block = NULL;
475 qemu_mutex_unlock(&param->mutex);
476
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800477 zero_page = do_compress_ram_page(param->file, &param->stream,
478 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800479
Liang Li0d9f9a52016-05-05 15:32:59 +0800480 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800481 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800482 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800483 qemu_cond_signal(&comp_done_cond);
484 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800485
486 qemu_mutex_lock(&param->mutex);
487 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200488 qemu_cond_wait(&param->cond, &param->mutex);
489 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200490 }
Liang Lia7a9a882016-05-05 15:32:57 +0800491 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200492
493 return NULL;
494}
495
Juan Quintelaf0afa332017-06-28 11:52:28 +0200496static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200497{
498 int i, thread_count;
499
Fei Li05306932018-09-25 17:14:40 +0800500 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200501 return;
502 }
Fei Li05306932018-09-25 17:14:40 +0800503
Juan Quintela56e93d22015-05-07 19:33:31 +0200504 thread_count = migrate_compress_threads();
505 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800506 /*
507 * we use it as a indicator which shows if the thread is
508 * properly init'd or not
509 */
510 if (!comp_param[i].file) {
511 break;
512 }
Fei Li05306932018-09-25 17:14:40 +0800513
514 qemu_mutex_lock(&comp_param[i].mutex);
515 comp_param[i].quit = true;
516 qemu_cond_signal(&comp_param[i].cond);
517 qemu_mutex_unlock(&comp_param[i].mutex);
518
Juan Quintela56e93d22015-05-07 19:33:31 +0200519 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 qemu_mutex_destroy(&comp_param[i].mutex);
521 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800522 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800523 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800524 qemu_fclose(comp_param[i].file);
525 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200526 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800527 qemu_mutex_destroy(&comp_done_lock);
528 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200529 g_free(compress_threads);
530 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200531 compress_threads = NULL;
532 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200533}
534
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800535static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200536{
537 int i, thread_count;
538
539 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800540 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200541 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200542 thread_count = migrate_compress_threads();
543 compress_threads = g_new0(QemuThread, thread_count);
544 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800545 qemu_cond_init(&comp_done_cond);
546 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200547 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800548 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
549 if (!comp_param[i].originbuf) {
550 goto exit;
551 }
552
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800553 if (deflateInit(&comp_param[i].stream,
554 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800555 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800556 goto exit;
557 }
558
Cao jine110aa92016-07-29 15:10:31 +0800559 /* comp_param[i].file is just used as a dummy buffer to save data,
560 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200561 */
562 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
563 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800564 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200565 qemu_mutex_init(&comp_param[i].mutex);
566 qemu_cond_init(&comp_param[i].cond);
567 qemu_thread_create(compress_threads + i, "compress",
568 do_data_compress, comp_param + i,
569 QEMU_THREAD_JOINABLE);
570 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800571 return 0;
572
573exit:
574 compress_threads_save_cleanup();
575 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200576}
577
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100578/* Multiple fd's */
579
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200580#define MULTIFD_MAGIC 0x11223344U
581#define MULTIFD_VERSION 1
582
Juan Quintela6df264a2018-02-28 09:10:07 +0100583#define MULTIFD_FLAG_SYNC (1 << 0)
584
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100585/* This value needs to be a multiple of qemu_target_page_size() */
Juan Quintela4b0c7262019-02-20 12:45:57 +0100586#define MULTIFD_PACKET_SIZE (512 * 1024)
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100587
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200588typedef struct {
589 uint32_t magic;
590 uint32_t version;
591 unsigned char uuid[16]; /* QemuUUID */
592 uint8_t id;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100593 uint8_t unused1[7]; /* Reserved for future use */
594 uint64_t unused2[4]; /* Reserved for future use */
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200595} __attribute__((packed)) MultiFDInit_t;
596
Juan Quintela8c4598f2018-04-07 13:59:07 +0200597typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200598 uint32_t magic;
599 uint32_t version;
600 uint32_t flags;
Juan Quintela6f862692019-02-20 12:04:04 +0100601 /* maximum number of allocated pages */
602 uint32_t pages_alloc;
603 uint32_t pages_used;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100604 /* size of the next packet that contains pages */
605 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200606 uint64_t packet_num;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100607 uint64_t unused[4]; /* Reserved for future use */
Juan Quintela2a26c972018-04-04 11:26:58 +0200608 char ramblock[256];
609 uint64_t offset[];
610} __attribute__((packed)) MultiFDPacket_t;
611
612typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200613 /* number of used pages */
614 uint32_t used;
615 /* number of allocated pages */
616 uint32_t allocated;
617 /* global number of generated multifd packets */
618 uint64_t packet_num;
619 /* offset of each page */
620 ram_addr_t *offset;
621 /* pointer to each page */
622 struct iovec *iov;
623 RAMBlock *block;
624} MultiFDPages_t;
625
626typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200627 /* this fields are not changed once the thread is created */
628 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100629 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200630 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100631 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200632 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100633 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200634 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100635 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200636 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100637 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200638 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100639 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200640 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100641 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200642 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100643 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200644 /* thread has work to do */
645 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200646 /* array of pages to sent */
647 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200648 /* packet allocated len */
649 uint32_t packet_len;
650 /* pointer to the packet */
651 MultiFDPacket_t *packet;
652 /* multifd flags for each packet */
653 uint32_t flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100654 /* size of the next packet that contains pages */
655 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200656 /* global number of generated multifd packets */
657 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200658 /* thread local variables */
659 /* packets sent through this channel */
660 uint64_t num_packets;
661 /* pages sent through this channel */
662 uint64_t num_pages;
Juan Quintela18cdcea2019-08-14 04:02:14 +0200663 /* syncs main thread and channels */
664 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200665} MultiFDSendParams;
666
667typedef struct {
668 /* this fields are not changed once the thread is created */
669 /* channel number */
670 uint8_t id;
671 /* channel thread name */
672 char *name;
673 /* channel thread id */
674 QemuThread thread;
675 /* communication channel */
676 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200677 /* this mutex protects the following parameters */
678 QemuMutex mutex;
679 /* is this channel thread running */
680 bool running;
Juan Quintela3c3ca252019-07-24 11:46:24 +0200681 /* should this thread finish */
682 bool quit;
Juan Quintela34c55a92018-04-10 23:35:15 +0200683 /* array of pages to receive */
684 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200685 /* packet allocated len */
686 uint32_t packet_len;
687 /* pointer to the packet */
688 MultiFDPacket_t *packet;
689 /* multifd flags for each packet */
690 uint32_t flags;
691 /* global number of generated multifd packets */
692 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200693 /* thread local variables */
Juan Quintela2a34ee52019-01-04 19:45:39 +0100694 /* size of the next packet that contains pages */
695 uint32_t next_packet_size;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200696 /* packets sent through this channel */
697 uint64_t num_packets;
698 /* pages sent through this channel */
699 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100700 /* syncs main thread and channels */
701 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200702} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100703
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200704static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
705{
Juan Quintelad069bcc2020-01-13 18:41:02 +0100706 MultiFDInit_t msg = {};
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200707 int ret;
708
709 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
710 msg.version = cpu_to_be32(MULTIFD_VERSION);
711 msg.id = p->id;
712 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
713
714 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
715 if (ret != 0) {
716 return -1;
717 }
718 return 0;
719}
720
721static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
722{
723 MultiFDInit_t msg;
724 int ret;
725
726 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
727 if (ret != 0) {
728 return -1;
729 }
730
Peter Maydell341ba0d2018-09-25 17:19:24 +0100731 msg.magic = be32_to_cpu(msg.magic);
732 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200733
734 if (msg.magic != MULTIFD_MAGIC) {
735 error_setg(errp, "multifd: received packet magic %x "
736 "expected %x", msg.magic, MULTIFD_MAGIC);
737 return -1;
738 }
739
740 if (msg.version != MULTIFD_VERSION) {
741 error_setg(errp, "multifd: received packet version %d "
742 "expected %d", msg.version, MULTIFD_VERSION);
743 return -1;
744 }
745
746 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
747 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
748 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
749
750 error_setg(errp, "multifd: received uuid '%s' and expected "
751 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
752 g_free(uuid);
753 g_free(msg_uuid);
754 return -1;
755 }
756
757 if (msg.id > migrate_multifd_channels()) {
758 error_setg(errp, "multifd: received channel version %d "
759 "expected %d", msg.version, MULTIFD_VERSION);
760 return -1;
761 }
762
763 return msg.id;
764}
765
Juan Quintela34c55a92018-04-10 23:35:15 +0200766static MultiFDPages_t *multifd_pages_init(size_t size)
767{
768 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
769
770 pages->allocated = size;
771 pages->iov = g_new0(struct iovec, size);
772 pages->offset = g_new0(ram_addr_t, size);
773
774 return pages;
775}
776
777static void multifd_pages_clear(MultiFDPages_t *pages)
778{
779 pages->used = 0;
780 pages->allocated = 0;
781 pages->packet_num = 0;
782 pages->block = NULL;
783 g_free(pages->iov);
784 pages->iov = NULL;
785 g_free(pages->offset);
786 pages->offset = NULL;
787 g_free(pages);
788}
789
Juan Quintela2a26c972018-04-04 11:26:58 +0200790static void multifd_send_fill_packet(MultiFDSendParams *p)
791{
792 MultiFDPacket_t *packet = p->packet;
793 int i;
794
Juan Quintela2a26c972018-04-04 11:26:58 +0200795 packet->flags = cpu_to_be32(p->flags);
Wei Yangf2148c42019-10-11 16:50:48 +0800796 packet->pages_alloc = cpu_to_be32(p->pages->allocated);
Juan Quintela6f862692019-02-20 12:04:04 +0100797 packet->pages_used = cpu_to_be32(p->pages->used);
Juan Quintela2a34ee52019-01-04 19:45:39 +0100798 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200799 packet->packet_num = cpu_to_be64(p->packet_num);
800
801 if (p->pages->block) {
802 strncpy(packet->ramblock, p->pages->block->idstr, 256);
803 }
804
805 for (i = 0; i < p->pages->used; i++) {
Juan Quinteladdac5cb2020-01-14 12:24:09 +0100806 /* there are architectures where ram_addr_t is 32 bit */
807 uint64_t temp = p->pages->offset[i];
808
809 packet->offset[i] = cpu_to_be64(temp);
Juan Quintela2a26c972018-04-04 11:26:58 +0200810 }
811}
812
813static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
814{
815 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100816 uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200817 RAMBlock *block;
818 int i;
819
Peter Maydell341ba0d2018-09-25 17:19:24 +0100820 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200821 if (packet->magic != MULTIFD_MAGIC) {
822 error_setg(errp, "multifd: received packet "
823 "magic %x and expected magic %x",
824 packet->magic, MULTIFD_MAGIC);
825 return -1;
826 }
827
Peter Maydell341ba0d2018-09-25 17:19:24 +0100828 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200829 if (packet->version != MULTIFD_VERSION) {
830 error_setg(errp, "multifd: received packet "
831 "version %d and expected version %d",
832 packet->version, MULTIFD_VERSION);
833 return -1;
834 }
835
836 p->flags = be32_to_cpu(packet->flags);
837
Juan Quintela6f862692019-02-20 12:04:04 +0100838 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100839 /*
Wei Yangd884e772019-10-11 16:50:47 +0800840 * If we received a packet that is 100 times bigger than expected
Juan Quintela7ed379b2019-02-20 12:44:07 +0100841 * just stop migration. It is a magic number.
842 */
843 if (packet->pages_alloc > pages_max * 100) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200844 error_setg(errp, "multifd: received packet "
Juan Quintela7ed379b2019-02-20 12:44:07 +0100845 "with size %d and expected a maximum size of %d",
846 packet->pages_alloc, pages_max * 100) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200847 return -1;
848 }
Juan Quintela7ed379b2019-02-20 12:44:07 +0100849 /*
850 * We received a packet that is bigger than expected but inside
851 * reasonable limits (see previous comment). Just reallocate.
852 */
853 if (packet->pages_alloc > p->pages->allocated) {
854 multifd_pages_clear(p->pages);
Peter Maydellf151f8a2019-04-09 16:18:30 +0100855 p->pages = multifd_pages_init(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100856 }
Juan Quintela2a26c972018-04-04 11:26:58 +0200857
Juan Quintela6f862692019-02-20 12:04:04 +0100858 p->pages->used = be32_to_cpu(packet->pages_used);
859 if (p->pages->used > packet->pages_alloc) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200860 error_setg(errp, "multifd: received packet "
Juan Quintela6f862692019-02-20 12:04:04 +0100861 "with %d pages and expected maximum pages are %d",
862 p->pages->used, packet->pages_alloc) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200863 return -1;
864 }
865
Juan Quintela2a34ee52019-01-04 19:45:39 +0100866 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200867 p->packet_num = be64_to_cpu(packet->packet_num);
868
Marc-André Lureaue4f1bea2019-10-04 15:12:09 +0400869 if (p->pages->used == 0) {
870 return 0;
871 }
872
873 /* make sure that ramblock is 0 terminated */
874 packet->ramblock[255] = 0;
875 block = qemu_ram_block_by_name(packet->ramblock);
876 if (!block) {
877 error_setg(errp, "multifd: unknown ram block %s",
878 packet->ramblock);
879 return -1;
Juan Quintela2a26c972018-04-04 11:26:58 +0200880 }
881
882 for (i = 0; i < p->pages->used; i++) {
Juan Quinteladdac5cb2020-01-14 12:24:09 +0100883 uint64_t offset = be64_to_cpu(packet->offset[i]);
Juan Quintela2a26c972018-04-04 11:26:58 +0200884
885 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
Juan Quinteladdac5cb2020-01-14 12:24:09 +0100886 error_setg(errp, "multifd: offset too long %" PRIu64
Juan Quintela2a26c972018-04-04 11:26:58 +0200887 " (max " RAM_ADDR_FMT ")",
888 offset, block->max_length);
889 return -1;
890 }
891 p->pages->iov[i].iov_base = block->host + offset;
892 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
893 }
894
895 return 0;
896}
897
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100898struct {
899 MultiFDSendParams *params;
Juan Quintela34c55a92018-04-10 23:35:15 +0200900 /* array of pages to sent */
901 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100902 /* global number of generated multifd packets */
903 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100904 /* send channels ready */
905 QemuSemaphore channels_ready;
Juan Quintela4d65a622019-12-18 05:36:22 +0100906 /*
907 * Have we already run terminate threads. There is a race when it
908 * happens that we got one error while we are exiting.
909 * We will use atomic operations. Only valid values are 0 and 1.
910 */
911 int exiting;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100912} *multifd_send_state;
913
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100914/*
915 * How we use multifd_send_state->pages and channel->pages?
916 *
917 * We create a pages for each channel, and a main one. Each time that
918 * we need to send a batch of pages we interchange the ones between
919 * multifd_send_state and the channel that is sending it. There are
920 * two reasons for that:
921 * - to not have to do so many mallocs during migration
922 * - to make easier to know what to free at the end of migration
923 *
924 * This way we always know who is the owner of each "pages" struct,
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800925 * and we don't need any locking. It belongs to the migration thread
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100926 * or to the channel thread. Switching is safe because the migration
927 * thread is using the channel mutex when changing it, and the channel
928 * have to had finish with its own, otherwise pending_job can't be
929 * false.
930 */
931
Ivan Ren1b81c972019-07-30 13:33:35 +0800932static int multifd_send_pages(RAMState *rs)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100933{
934 int i;
935 static int next_channel;
936 MultiFDSendParams *p = NULL; /* make happy gcc */
937 MultiFDPages_t *pages = multifd_send_state->pages;
938 uint64_t transferred;
939
Juan Quintela4d65a622019-12-18 05:36:22 +0100940 if (atomic_read(&multifd_send_state->exiting)) {
941 return -1;
942 }
943
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100944 qemu_sem_wait(&multifd_send_state->channels_ready);
945 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
946 p = &multifd_send_state->params[i];
947
948 qemu_mutex_lock(&p->mutex);
Ivan Ren713f7622019-06-25 21:18:17 +0800949 if (p->quit) {
950 error_report("%s: channel %d has already quit!", __func__, i);
951 qemu_mutex_unlock(&p->mutex);
952 return -1;
953 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100954 if (!p->pending_job) {
955 p->pending_job++;
956 next_channel = (i + 1) % migrate_multifd_channels();
957 break;
958 }
959 qemu_mutex_unlock(&p->mutex);
960 }
Wei Yangeab54aa2019-10-26 07:19:59 +0800961 assert(!p->pages->used);
962 assert(!p->pages->block);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100963
964 p->packet_num = multifd_send_state->packet_num++;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100965 multifd_send_state->pages = p->pages;
966 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800967 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Ivan Ren1b81c972019-07-30 13:33:35 +0800968 qemu_file_update_transfer(rs->f, transferred);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100969 ram_counters.multifd_bytes += transferred;
970 ram_counters.transferred += transferred;;
971 qemu_mutex_unlock(&p->mutex);
972 qemu_sem_post(&p->sem);
Ivan Ren713f7622019-06-25 21:18:17 +0800973
974 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100975}
976
Ivan Ren1b81c972019-07-30 13:33:35 +0800977static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100978{
979 MultiFDPages_t *pages = multifd_send_state->pages;
980
981 if (!pages->block) {
982 pages->block = block;
983 }
984
985 if (pages->block == block) {
986 pages->offset[pages->used] = offset;
987 pages->iov[pages->used].iov_base = block->host + offset;
988 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
989 pages->used++;
990
991 if (pages->used < pages->allocated) {
Ivan Ren713f7622019-06-25 21:18:17 +0800992 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100993 }
994 }
995
Ivan Ren1b81c972019-07-30 13:33:35 +0800996 if (multifd_send_pages(rs) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +0800997 return -1;
998 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100999
1000 if (pages->block != block) {
Ivan Ren1b81c972019-07-30 13:33:35 +08001001 return multifd_queue_page(rs, block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001002 }
Ivan Ren713f7622019-06-25 21:18:17 +08001003
1004 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001005}
1006
Juan Quintela66770702018-02-19 19:01:45 +01001007static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001008{
1009 int i;
1010
Juan Quintela5558c912019-08-14 04:02:13 +02001011 trace_multifd_send_terminate_threads(err != NULL);
1012
Juan Quintela7a169d72018-02-19 19:01:15 +01001013 if (err) {
1014 MigrationState *s = migrate_get_current();
1015 migrate_set_error(s, err);
1016 if (s->state == MIGRATION_STATUS_SETUP ||
1017 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
1018 s->state == MIGRATION_STATUS_DEVICE ||
1019 s->state == MIGRATION_STATUS_ACTIVE) {
1020 migrate_set_state(&s->state, s->state,
1021 MIGRATION_STATUS_FAILED);
1022 }
1023 }
1024
Juan Quintela4d65a622019-12-18 05:36:22 +01001025 /*
1026 * We don't want to exit each threads twice. Depending on where
1027 * we get the error, or if there are two independent errors in two
1028 * threads at the same time, we can end calling this function
1029 * twice.
1030 */
1031 if (atomic_xchg(&multifd_send_state->exiting, 1)) {
1032 return;
1033 }
1034
Juan Quintela66770702018-02-19 19:01:45 +01001035 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001036 MultiFDSendParams *p = &multifd_send_state->params[i];
1037
1038 qemu_mutex_lock(&p->mutex);
1039 p->quit = true;
1040 qemu_sem_post(&p->sem);
1041 qemu_mutex_unlock(&p->mutex);
1042 }
1043}
1044
Fei Li1398b2e2019-01-13 22:08:47 +08001045void multifd_save_cleanup(void)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001046{
1047 int i;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001048
1049 if (!migrate_use_multifd()) {
Fei Li1398b2e2019-01-13 22:08:47 +08001050 return;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001051 }
Juan Quintela66770702018-02-19 19:01:45 +01001052 multifd_send_terminate_threads(NULL);
1053 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001054 MultiFDSendParams *p = &multifd_send_state->params[i];
1055
Juan Quintela66770702018-02-19 19:01:45 +01001056 if (p->running) {
1057 qemu_thread_join(&p->thread);
1058 }
Jiahui Cen9560a482019-10-23 11:47:37 +08001059 }
1060 for (i = 0; i < migrate_multifd_channels(); i++) {
1061 MultiFDSendParams *p = &multifd_send_state->params[i];
1062
Juan Quintela60df2d42018-03-07 07:56:15 +01001063 socket_send_channel_destroy(p->c);
1064 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001065 qemu_mutex_destroy(&p->mutex);
1066 qemu_sem_destroy(&p->sem);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001067 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001068 g_free(p->name);
1069 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001070 multifd_pages_clear(p->pages);
1071 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001072 p->packet_len = 0;
1073 g_free(p->packet);
1074 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001075 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001076 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001077 g_free(multifd_send_state->params);
1078 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001079 multifd_pages_clear(multifd_send_state->pages);
1080 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001081 g_free(multifd_send_state);
1082 multifd_send_state = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001083}
1084
Ivan Ren1b81c972019-07-30 13:33:35 +08001085static void multifd_send_sync_main(RAMState *rs)
Juan Quintela6df264a2018-02-28 09:10:07 +01001086{
1087 int i;
1088
1089 if (!migrate_use_multifd()) {
1090 return;
1091 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001092 if (multifd_send_state->pages->used) {
Ivan Ren1b81c972019-07-30 13:33:35 +08001093 if (multifd_send_pages(rs) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08001094 error_report("%s: multifd_send_pages fail", __func__);
1095 return;
1096 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001097 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001098 for (i = 0; i < migrate_multifd_channels(); i++) {
1099 MultiFDSendParams *p = &multifd_send_state->params[i];
1100
1101 trace_multifd_send_sync_main_signal(p->id);
1102
1103 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001104
Ivan Ren713f7622019-06-25 21:18:17 +08001105 if (p->quit) {
1106 error_report("%s: channel %d has already quit", __func__, i);
1107 qemu_mutex_unlock(&p->mutex);
1108 return;
1109 }
1110
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001111 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +01001112 p->flags |= MULTIFD_FLAG_SYNC;
1113 p->pending_job++;
Ivan Ren1b81c972019-07-30 13:33:35 +08001114 qemu_file_update_transfer(rs->f, p->packet_len);
Ivan Ren81507f62019-07-30 13:33:36 +08001115 ram_counters.multifd_bytes += p->packet_len;
1116 ram_counters.transferred += p->packet_len;
Juan Quintela6df264a2018-02-28 09:10:07 +01001117 qemu_mutex_unlock(&p->mutex);
1118 qemu_sem_post(&p->sem);
1119 }
1120 for (i = 0; i < migrate_multifd_channels(); i++) {
1121 MultiFDSendParams *p = &multifd_send_state->params[i];
1122
1123 trace_multifd_send_sync_main_wait(p->id);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001124 qemu_sem_wait(&p->sem_sync);
Juan Quintela6df264a2018-02-28 09:10:07 +01001125 }
1126 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1127}
1128
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001129static void *multifd_send_thread(void *opaque)
1130{
1131 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001132 Error *local_err = NULL;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001133 int ret = 0;
1134 uint32_t flags = 0;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001135
Juan Quintela408ea6a2018-04-06 18:28:59 +02001136 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001137 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001138
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001139 if (multifd_send_initial_packet(p, &local_err) < 0) {
Ivan Ren2f4aefd2019-08-29 10:16:36 +08001140 ret = -1;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001141 goto out;
1142 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001143 /* initial packet */
1144 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001145
1146 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001147 qemu_sem_wait(&p->sem);
Juan Quintela4d65a622019-12-18 05:36:22 +01001148
1149 if (atomic_read(&multifd_send_state->exiting)) {
1150 break;
1151 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001152 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001153
1154 if (p->pending_job) {
1155 uint32_t used = p->pages->used;
1156 uint64_t packet_num = p->packet_num;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001157 flags = p->flags;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001158
Juan Quintela2a34ee52019-01-04 19:45:39 +01001159 p->next_packet_size = used * qemu_target_page_size();
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001160 multifd_send_fill_packet(p);
1161 p->flags = 0;
1162 p->num_packets++;
1163 p->num_pages += used;
Wei Yangeab54aa2019-10-26 07:19:59 +08001164 p->pages->used = 0;
1165 p->pages->block = NULL;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001166 qemu_mutex_unlock(&p->mutex);
1167
Juan Quintela2a34ee52019-01-04 19:45:39 +01001168 trace_multifd_send(p->id, packet_num, used, flags,
1169 p->next_packet_size);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001170
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001171 ret = qio_channel_write_all(p->c, (void *)p->packet,
1172 p->packet_len, &local_err);
1173 if (ret != 0) {
1174 break;
1175 }
1176
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001177 if (used) {
1178 ret = qio_channel_writev_all(p->c, p->pages->iov,
1179 used, &local_err);
1180 if (ret != 0) {
1181 break;
1182 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001183 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001184
1185 qemu_mutex_lock(&p->mutex);
1186 p->pending_job--;
1187 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001188
1189 if (flags & MULTIFD_FLAG_SYNC) {
Juan Quintela18cdcea2019-08-14 04:02:14 +02001190 qemu_sem_post(&p->sem_sync);
Juan Quintela6df264a2018-02-28 09:10:07 +01001191 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001192 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001193 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001194 qemu_mutex_unlock(&p->mutex);
1195 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001196 } else {
1197 qemu_mutex_unlock(&p->mutex);
1198 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001199 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001200 }
1201
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001202out:
1203 if (local_err) {
Juan Quintela7dd59d02019-08-14 04:02:17 +02001204 trace_multifd_send_error(p->id);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001205 multifd_send_terminate_threads(local_err);
1206 }
1207
Ivan Rena3ec6b72019-06-25 21:18:18 +08001208 /*
1209 * Error happen, I will exit, but I can't just leave, tell
1210 * who pay attention to me.
1211 */
1212 if (ret != 0) {
Ivan Ren2f4aefd2019-08-29 10:16:36 +08001213 qemu_sem_post(&p->sem_sync);
Ivan Rena3ec6b72019-06-25 21:18:18 +08001214 qemu_sem_post(&multifd_send_state->channels_ready);
1215 }
1216
Juan Quintela66770702018-02-19 19:01:45 +01001217 qemu_mutex_lock(&p->mutex);
1218 p->running = false;
1219 qemu_mutex_unlock(&p->mutex);
1220
Lidong Chen74637e62018-08-06 21:29:29 +08001221 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001222 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1223
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001224 return NULL;
1225}
1226
Juan Quintela60df2d42018-03-07 07:56:15 +01001227static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1228{
1229 MultiFDSendParams *p = opaque;
1230 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1231 Error *local_err = NULL;
1232
Juan Quintela7dd59d02019-08-14 04:02:17 +02001233 trace_multifd_new_send_channel_async(p->id);
Juan Quintela60df2d42018-03-07 07:56:15 +01001234 if (qio_task_propagate_error(task, &local_err)) {
Fei Li1398b2e2019-01-13 22:08:47 +08001235 migrate_set_error(migrate_get_current(), local_err);
1236 multifd_save_cleanup();
Juan Quintela60df2d42018-03-07 07:56:15 +01001237 } else {
1238 p->c = QIO_CHANNEL(sioc);
1239 qio_channel_set_delay(p->c, false);
1240 p->running = true;
1241 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1242 QEMU_THREAD_JOINABLE);
Juan Quintela60df2d42018-03-07 07:56:15 +01001243 }
1244}
1245
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001246int multifd_save_setup(void)
1247{
1248 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001249 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001250 uint8_t i;
1251
1252 if (!migrate_use_multifd()) {
1253 return 0;
1254 }
1255 thread_count = migrate_multifd_channels();
1256 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1257 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela34c55a92018-04-10 23:35:15 +02001258 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001259 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela4d65a622019-12-18 05:36:22 +01001260 atomic_set(&multifd_send_state->exiting, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001261
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001262 for (i = 0; i < thread_count; i++) {
1263 MultiFDSendParams *p = &multifd_send_state->params[i];
1264
1265 qemu_mutex_init(&p->mutex);
1266 qemu_sem_init(&p->sem, 0);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001267 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001268 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001269 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001270 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001271 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001272 p->packet_len = sizeof(MultiFDPacket_t)
Juan Quinteladdac5cb2020-01-14 12:24:09 +01001273 + sizeof(uint64_t) * page_count;
Juan Quintela2a26c972018-04-04 11:26:58 +02001274 p->packet = g_malloc0(p->packet_len);
Wei Yang9985e1f2019-10-11 16:50:49 +08001275 p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
1276 p->packet->version = cpu_to_be32(MULTIFD_VERSION);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001277 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001278 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001279 }
1280 return 0;
1281}
1282
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001283struct {
1284 MultiFDRecvParams *params;
1285 /* number of created threads */
1286 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001287 /* syncs main thread and channels */
1288 QemuSemaphore sem_sync;
1289 /* global number of generated multifd packets */
1290 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001291} *multifd_recv_state;
1292
Juan Quintela66770702018-02-19 19:01:45 +01001293static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001294{
1295 int i;
1296
Juan Quintela5558c912019-08-14 04:02:13 +02001297 trace_multifd_recv_terminate_threads(err != NULL);
1298
Juan Quintela7a169d72018-02-19 19:01:15 +01001299 if (err) {
1300 MigrationState *s = migrate_get_current();
1301 migrate_set_error(s, err);
1302 if (s->state == MIGRATION_STATUS_SETUP ||
1303 s->state == MIGRATION_STATUS_ACTIVE) {
1304 migrate_set_state(&s->state, s->state,
1305 MIGRATION_STATUS_FAILED);
1306 }
1307 }
1308
Juan Quintela66770702018-02-19 19:01:45 +01001309 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001310 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1311
1312 qemu_mutex_lock(&p->mutex);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001313 p->quit = true;
Juan Quintela7a5cc332018-04-18 00:49:19 +02001314 /* We could arrive here for two reasons:
1315 - normal quit, i.e. everything went fine, just finished
1316 - error quit: We close the channels so the channel threads
1317 finish the qio_channel_read_all_eof() */
Jiahui Cenf76e32e2019-10-23 12:30:02 +08001318 if (p->c) {
1319 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1320 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001321 qemu_mutex_unlock(&p->mutex);
1322 }
1323}
1324
1325int multifd_load_cleanup(Error **errp)
1326{
1327 int i;
1328 int ret = 0;
1329
1330 if (!migrate_use_multifd()) {
1331 return 0;
1332 }
Juan Quintela66770702018-02-19 19:01:45 +01001333 multifd_recv_terminate_threads(NULL);
1334 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001335 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1336
Juan Quintela66770702018-02-19 19:01:45 +01001337 if (p->running) {
Juan Quintela3c3ca252019-07-24 11:46:24 +02001338 p->quit = true;
Ivan Renf193bc02019-06-25 21:18:19 +08001339 /*
1340 * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
1341 * however try to wakeup it without harm in cleanup phase.
1342 */
1343 qemu_sem_post(&p->sem_sync);
Juan Quintela66770702018-02-19 19:01:45 +01001344 qemu_thread_join(&p->thread);
1345 }
Jiahui Cen9560a482019-10-23 11:47:37 +08001346 }
1347 for (i = 0; i < migrate_multifd_channels(); i++) {
1348 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1349
Juan Quintela60df2d42018-03-07 07:56:15 +01001350 object_unref(OBJECT(p->c));
1351 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001352 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001353 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001354 g_free(p->name);
1355 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001356 multifd_pages_clear(p->pages);
1357 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001358 p->packet_len = 0;
1359 g_free(p->packet);
1360 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001361 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001362 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001363 g_free(multifd_recv_state->params);
1364 multifd_recv_state->params = NULL;
1365 g_free(multifd_recv_state);
1366 multifd_recv_state = NULL;
1367
1368 return ret;
1369}
1370
Juan Quintela6df264a2018-02-28 09:10:07 +01001371static void multifd_recv_sync_main(void)
1372{
1373 int i;
1374
1375 if (!migrate_use_multifd()) {
1376 return;
1377 }
1378 for (i = 0; i < migrate_multifd_channels(); i++) {
1379 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1380
Juan Quintela6df264a2018-02-28 09:10:07 +01001381 trace_multifd_recv_sync_main_wait(p->id);
1382 qemu_sem_wait(&multifd_recv_state->sem_sync);
Wei Yang77568ea2019-06-04 10:35:40 +08001383 }
1384 for (i = 0; i < migrate_multifd_channels(); i++) {
1385 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1386
Juan Quintela6df264a2018-02-28 09:10:07 +01001387 qemu_mutex_lock(&p->mutex);
1388 if (multifd_recv_state->packet_num < p->packet_num) {
1389 multifd_recv_state->packet_num = p->packet_num;
1390 }
1391 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001392 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001393 qemu_sem_post(&p->sem_sync);
1394 }
1395 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1396}
1397
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001398static void *multifd_recv_thread(void *opaque)
1399{
1400 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001401 Error *local_err = NULL;
1402 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001403
Juan Quintela408ea6a2018-04-06 18:28:59 +02001404 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001405 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001406
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001407 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001408 uint32_t used;
1409 uint32_t flags;
1410
Juan Quintela3c3ca252019-07-24 11:46:24 +02001411 if (p->quit) {
1412 break;
1413 }
1414
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001415 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1416 p->packet_len, &local_err);
1417 if (ret == 0) { /* EOF */
1418 break;
1419 }
1420 if (ret == -1) { /* Error */
1421 break;
1422 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001423
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001424 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001425 ret = multifd_recv_unfill_packet(p, &local_err);
1426 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001427 qemu_mutex_unlock(&p->mutex);
1428 break;
1429 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001430
1431 used = p->pages->used;
1432 flags = p->flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +01001433 trace_multifd_recv(p->id, p->packet_num, used, flags,
1434 p->next_packet_size);
Juan Quintela6df264a2018-02-28 09:10:07 +01001435 p->num_packets++;
1436 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001437 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001438
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001439 if (used) {
1440 ret = qio_channel_readv_all(p->c, p->pages->iov,
1441 used, &local_err);
1442 if (ret != 0) {
1443 break;
1444 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001445 }
1446
Juan Quintela6df264a2018-02-28 09:10:07 +01001447 if (flags & MULTIFD_FLAG_SYNC) {
1448 qemu_sem_post(&multifd_recv_state->sem_sync);
1449 qemu_sem_wait(&p->sem_sync);
1450 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001451 }
1452
Juan Quintelad82628e2018-04-11 02:44:24 +02001453 if (local_err) {
1454 multifd_recv_terminate_threads(local_err);
1455 }
Juan Quintela66770702018-02-19 19:01:45 +01001456 qemu_mutex_lock(&p->mutex);
1457 p->running = false;
1458 qemu_mutex_unlock(&p->mutex);
1459
Lidong Chen74637e62018-08-06 21:29:29 +08001460 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001461 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1462
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001463 return NULL;
1464}
1465
1466int multifd_load_setup(void)
1467{
1468 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001469 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001470 uint8_t i;
1471
1472 if (!migrate_use_multifd()) {
1473 return 0;
1474 }
1475 thread_count = migrate_multifd_channels();
1476 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1477 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001478 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001479 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001480
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001481 for (i = 0; i < thread_count; i++) {
1482 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1483
1484 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001485 qemu_sem_init(&p->sem_sync, 0);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001486 p->quit = false;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001487 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001488 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001489 p->packet_len = sizeof(MultiFDPacket_t)
Juan Quinteladdac5cb2020-01-14 12:24:09 +01001490 + sizeof(uint64_t) * page_count;
Juan Quintela2a26c972018-04-04 11:26:58 +02001491 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001492 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001493 }
1494 return 0;
1495}
1496
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001497bool multifd_recv_all_channels_created(void)
1498{
1499 int thread_count = migrate_multifd_channels();
1500
1501 if (!migrate_use_multifd()) {
1502 return true;
1503 }
1504
1505 return thread_count == atomic_read(&multifd_recv_state->count);
1506}
1507
Fei Li49ed0d22019-01-13 22:08:46 +08001508/*
1509 * Try to receive all multifd channels to get ready for the migration.
1510 * - Return true and do not set @errp when correctly receving all channels;
1511 * - Return false and do not set @errp when correctly receiving the current one;
1512 * - Return false and set @errp when failing to receive the current channel.
1513 */
1514bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001515{
Juan Quintela60df2d42018-03-07 07:56:15 +01001516 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001517 Error *local_err = NULL;
1518 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001519
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001520 id = multifd_recv_initial_packet(ioc, &local_err);
1521 if (id < 0) {
1522 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001523 error_propagate_prepend(errp, local_err,
1524 "failed to receive packet"
1525 " via multifd channel %d: ",
1526 atomic_read(&multifd_recv_state->count));
Peter Xu81e62052018-06-27 21:22:44 +08001527 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001528 }
Juan Quintela7dd59d02019-08-14 04:02:17 +02001529 trace_multifd_recv_new_channel(id);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001530
1531 p = &multifd_recv_state->params[id];
1532 if (p->c != NULL) {
1533 error_setg(&local_err, "multifd: received id '%d' already setup'",
1534 id);
1535 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001536 error_propagate(errp, local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001537 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001538 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001539 p->c = ioc;
1540 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001541 /* initial packet */
1542 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001543
1544 p->running = true;
1545 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1546 QEMU_THREAD_JOINABLE);
1547 atomic_inc(&multifd_recv_state->count);
Fei Li49ed0d22019-01-13 22:08:46 +08001548 return atomic_read(&multifd_recv_state->count) ==
1549 migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001550}
1551
Juan Quintela56e93d22015-05-07 19:33:31 +02001552/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001553 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001554 *
1555 * If this is the 1st block, it also writes the block identification
1556 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001557 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001558 *
1559 * @f: QEMUFile where to send the data
1560 * @block: block that contains the page we want to send
1561 * @offset: offset inside the block for the page
1562 * in the lower bits, it contains flags
1563 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001564static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1565 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001566{
Liang Li9f5f3802015-07-13 17:34:10 +08001567 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001568
Juan Quintela24795692017-03-21 11:45:01 +01001569 if (block == rs->last_sent_block) {
1570 offset |= RAM_SAVE_FLAG_CONTINUE;
1571 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001572 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001573 size = 8;
1574
1575 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001576 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001577 qemu_put_byte(f, len);
1578 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001579 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001580 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001581 }
1582 return size;
1583}
1584
Juan Quintela3d0684b2017-03-23 15:06:39 +01001585/**
1586 * mig_throttle_guest_down: throotle down the guest
1587 *
1588 * Reduce amount of guest cpu execution to hopefully slow down memory
1589 * writes. If guest dirty memory rate is reduced below the rate at
1590 * which we can transfer pages to the destination then we should be
1591 * able to complete migration. Some workloads dirty memory way too
1592 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001593 */
1594static void mig_throttle_guest_down(void)
1595{
1596 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001597 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1598 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001599 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001600
1601 /* We have not started throttling yet. Let's start it. */
1602 if (!cpu_throttle_active()) {
1603 cpu_throttle_set(pct_initial);
1604 } else {
1605 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001606 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1607 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001608 }
1609}
1610
Juan Quintela3d0684b2017-03-23 15:06:39 +01001611/**
1612 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1613 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001614 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001615 * @current_addr: address for the zero page
1616 *
1617 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001618 * The important thing is that a stale (not-yet-0'd) page be replaced
1619 * by the new data.
1620 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001621 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001622 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001623static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001624{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001625 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001626 return;
1627 }
1628
1629 /* We don't care if this fails to allocate a new cache page
1630 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001631 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001632 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001633}
1634
1635#define ENCODING_FLAG_XBZRLE 0x1
1636
1637/**
1638 * save_xbzrle_page: compress and send current page
1639 *
1640 * Returns: 1 means that we wrote the page
1641 * 0 means that page is identical to the one already sent
1642 * -1 means that xbzrle would be longer than normal
1643 *
Juan Quintela5a987732017-03-13 19:39:02 +01001644 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001645 * @current_data: pointer to the address of the page contents
1646 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001647 * @block: block that contains the page we want to send
1648 * @offset: offset inside the block for the page
1649 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001650 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001651static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001652 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001653 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001654{
1655 int encoded_len = 0, bytes_xbzrle;
1656 uint8_t *prev_cached_page;
1657
Juan Quintela93604472017-06-06 19:49:03 +02001658 if (!cache_is_cached(XBZRLE.cache, current_addr,
1659 ram_counters.dirty_sync_count)) {
1660 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001661 if (!last_stage) {
1662 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001663 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001664 return -1;
1665 } else {
1666 /* update *current_data when the page has been
1667 inserted into cache */
1668 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1669 }
1670 }
1671 return -1;
1672 }
1673
1674 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1675
1676 /* save current buffer into memory */
1677 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1678
1679 /* XBZRLE encoding (if there is no overflow) */
1680 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1681 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1682 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +08001683
1684 /*
1685 * Update the cache contents, so that it corresponds to the data
1686 * sent, in all cases except where we skip the page.
1687 */
1688 if (!last_stage && encoded_len != 0) {
1689 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1690 /*
1691 * In the case where we couldn't compress, ensure that the caller
1692 * sends the data from the cache, since the guest might have
1693 * changed the RAM since we copied it.
1694 */
1695 *current_data = prev_cached_page;
1696 }
1697
Juan Quintela56e93d22015-05-07 19:33:31 +02001698 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001699 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001700 return 0;
1701 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001702 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001703 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001704 return -1;
1705 }
1706
Juan Quintela56e93d22015-05-07 19:33:31 +02001707 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001708 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001709 offset | RAM_SAVE_FLAG_XBZRLE);
1710 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1711 qemu_put_be16(rs->f, encoded_len);
1712 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001713 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001714 xbzrle_counters.pages++;
1715 xbzrle_counters.bytes += bytes_xbzrle;
1716 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001717
1718 return 1;
1719}
1720
Juan Quintela3d0684b2017-03-23 15:06:39 +01001721/**
1722 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001723 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001724 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +01001725 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001726 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001727 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001728 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001729 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001730static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001731unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001732 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001733{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001734 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1735 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001736 unsigned long next;
1737
Yury Kotovfbd162e2019-02-15 20:45:46 +03001738 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001739 return size;
1740 }
1741
Wei Wang6eeb63f2018-12-11 16:24:52 +08001742 /*
1743 * When the free page optimization is enabled, we need to check the bitmap
1744 * to send the non-free pages rather than all the pages in the bulk stage.
1745 */
1746 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001747 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001748 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001749 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001750 }
1751
Juan Quintela6b6712e2017-03-22 15:18:04 +01001752 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001753}
1754
Juan Quintela06b10682017-03-21 15:18:05 +01001755static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001756 RAMBlock *rb,
1757 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001758{
1759 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001760
Wei Wang386a9072018-12-11 16:24:49 +08001761 qemu_mutex_lock(&rs->bitmap_mutex);
Peter Xu002cad62019-06-03 14:50:56 +08001762
1763 /*
1764 * Clear dirty bitmap if needed. This _must_ be called before we
1765 * send any of the page in the chunk because we need to make sure
1766 * we can capture further page content changes when we sync dirty
1767 * log the next time. So as long as we are going to send any of
1768 * the page in the chunk we clear the remote dirty bitmap for all.
1769 * Clearing it earlier won't be a problem, but too late will.
1770 */
1771 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
1772 uint8_t shift = rb->clear_bmap_shift;
1773 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
Alexey Romko8bba0042020-01-10 14:51:34 +01001774 hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
Peter Xu002cad62019-06-03 14:50:56 +08001775
1776 /*
1777 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
1778 * can make things easier sometimes since then start address
1779 * of the small chunk will always be 64 pages aligned so the
1780 * bitmap will always be aligned to unsigned long. We should
1781 * even be able to remove this restriction but I'm simply
1782 * keeping it.
1783 */
1784 assert(shift >= 6);
1785 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
1786 memory_region_clear_dirty_bitmap(rb->mr, start, size);
1787 }
1788
Juan Quintela6b6712e2017-03-22 15:18:04 +01001789 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001790
1791 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001792 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001793 }
Wei Wang386a9072018-12-11 16:24:49 +08001794 qemu_mutex_unlock(&rs->bitmap_mutex);
1795
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001796 return ret;
1797}
1798
Peter Xu267691b2019-06-03 14:50:46 +08001799/* Called with RCU critical section */
Wei Yang7a3e9572019-08-08 11:31:55 +08001800static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +02001801{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001802 rs->migration_dirty_pages +=
Wei Yang5d0980a2019-07-18 09:25:47 +08001803 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001804 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001805}
1806
Juan Quintela3d0684b2017-03-23 15:06:39 +01001807/**
1808 * ram_pagesize_summary: calculate all the pagesizes of a VM
1809 *
1810 * Returns a summary bitmap of the page sizes of all RAMBlocks
1811 *
1812 * For VMs with just normal pages this is equivalent to the host page
1813 * size. If it's got some huge pages then it's the OR of all the
1814 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001815 */
1816uint64_t ram_pagesize_summary(void)
1817{
1818 RAMBlock *block;
1819 uint64_t summary = 0;
1820
Yury Kotovfbd162e2019-02-15 20:45:46 +03001821 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001822 summary |= block->page_size;
1823 }
1824
1825 return summary;
1826}
1827
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001828uint64_t ram_get_total_transferred_pages(void)
1829{
1830 return ram_counters.normal + ram_counters.duplicate +
1831 compression_counters.pages + xbzrle_counters.pages;
1832}
1833
Xiao Guangrongb7340352018-06-04 17:55:12 +08001834static void migration_update_rates(RAMState *rs, int64_t end_time)
1835{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001836 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001837 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001838
1839 /* calculate period counters */
1840 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1841 / (end_time - rs->time_last_bitmap_sync);
1842
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001843 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001844 return;
1845 }
1846
1847 if (migrate_use_xbzrle()) {
1848 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001849 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001850 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1851 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001852
1853 if (migrate_use_compression()) {
1854 compression_counters.busy_rate = (double)(compression_counters.busy -
1855 rs->compress_thread_busy_prev) / page_count;
1856 rs->compress_thread_busy_prev = compression_counters.busy;
1857
1858 compressed_size = compression_counters.compressed_size -
1859 rs->compressed_size_prev;
1860 if (compressed_size) {
1861 double uncompressed_size = (compression_counters.pages -
1862 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1863
1864 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1865 compression_counters.compression_rate =
1866 uncompressed_size / compressed_size;
1867
1868 rs->compress_pages_prev = compression_counters.pages;
1869 rs->compressed_size_prev = compression_counters.compressed_size;
1870 }
1871 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001872}
1873
Juan Quintela8d820d62017-03-13 19:35:50 +01001874static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001875{
1876 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001877 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001878 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001879
Juan Quintela93604472017-06-06 19:49:03 +02001880 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001881
Juan Quintelaf664da82017-03-13 19:44:57 +01001882 if (!rs->time_last_bitmap_sync) {
1883 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001884 }
1885
1886 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001887 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001888
Juan Quintela108cfae2017-03-13 21:38:09 +01001889 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001890 WITH_RCU_READ_LOCK_GUARD() {
1891 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1892 ramblock_sync_dirty_bitmap(rs, block);
1893 }
1894 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001895 }
Juan Quintela108cfae2017-03-13 21:38:09 +01001896 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001897
Paolo Bonzini9458a9a2018-02-06 18:37:39 +01001898 memory_global_after_dirty_log_sync();
Juan Quintelaa66cd902017-03-28 15:02:43 +02001899 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001900
Juan Quintela56e93d22015-05-07 19:33:31 +02001901 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1902
1903 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001904 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001905 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001906
Peter Lieven9ac78b62017-09-26 12:33:16 +02001907 /* During block migration the auto-converge logic incorrectly detects
1908 * that ram migration makes no progress. Avoid this by disabling the
1909 * throttling logic during the bulk phase of block migration. */
1910 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001911 /* The following detection logic can be refined later. For now:
1912 Check to see if the dirtied bytes is 50% more than the approx.
1913 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001914 were in this routine. If that happens twice, start or increase
1915 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001916
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001917 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001918 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001919 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001920 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001921 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001922 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001923 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001924 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001925
Xiao Guangrongb7340352018-06-04 17:55:12 +08001926 migration_update_rates(rs, end_time);
1927
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001928 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001929
1930 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001931 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001932 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001933 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001934 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001935 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001936 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001937 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001938}
1939
Wei Wangbd227062018-12-11 16:24:51 +08001940static void migration_bitmap_sync_precopy(RAMState *rs)
1941{
1942 Error *local_err = NULL;
1943
1944 /*
1945 * The current notifier usage is just an optimization to migration, so we
1946 * don't stop the normal migration process in the error case.
1947 */
1948 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1949 error_report_err(local_err);
1950 }
1951
1952 migration_bitmap_sync(rs);
1953
1954 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1955 error_report_err(local_err);
1956 }
1957}
1958
Juan Quintela56e93d22015-05-07 19:33:31 +02001959/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001960 * save_zero_page_to_file: send the zero page to the file
1961 *
1962 * Returns the size of data written to the file, 0 means the page is not
1963 * a zero page
1964 *
1965 * @rs: current RAM state
1966 * @file: the file where the data is saved
1967 * @block: block that contains the page we want to send
1968 * @offset: offset inside the block for the page
1969 */
1970static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1971 RAMBlock *block, ram_addr_t offset)
1972{
1973 uint8_t *p = block->host + offset;
1974 int len = 0;
1975
1976 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1977 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1978 qemu_put_byte(file, 0);
1979 len += 1;
1980 }
1981 return len;
1982}
1983
1984/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001985 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001986 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001987 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001988 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001989 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001990 * @block: block that contains the page we want to send
1991 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001992 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001993static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001994{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001995 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001996
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001997 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001998 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001999 ram_counters.transferred += len;
2000 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002001 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08002002 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002003}
2004
Juan Quintela57273092017-03-20 22:25:28 +01002005static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03002006{
Juan Quintela57273092017-03-20 22:25:28 +01002007 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03002008 return;
2009 }
2010
Alexey Romko8bba0042020-01-10 14:51:34 +01002011 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03002012}
2013
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002014/*
2015 * @pages: the number of pages written by the control path,
2016 * < 0 - error
2017 * > 0 - number of pages written
2018 *
2019 * Return true if the pages has been saved, otherwise false is returned.
2020 */
2021static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
2022 int *pages)
2023{
2024 uint64_t bytes_xmit = 0;
2025 int ret;
2026
2027 *pages = -1;
2028 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
2029 &bytes_xmit);
2030 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
2031 return false;
2032 }
2033
2034 if (bytes_xmit) {
2035 ram_counters.transferred += bytes_xmit;
2036 *pages = 1;
2037 }
2038
2039 if (ret == RAM_SAVE_CONTROL_DELAYED) {
2040 return true;
2041 }
2042
2043 if (bytes_xmit > 0) {
2044 ram_counters.normal++;
2045 } else if (bytes_xmit == 0) {
2046 ram_counters.duplicate++;
2047 }
2048
2049 return true;
2050}
2051
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002052/*
2053 * directly send the page to the stream
2054 *
2055 * Returns the number of pages written.
2056 *
2057 * @rs: current RAM state
2058 * @block: block that contains the page we want to send
2059 * @offset: offset inside the block for the page
2060 * @buf: the page to be sent
2061 * @async: send to page asyncly
2062 */
2063static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
2064 uint8_t *buf, bool async)
2065{
2066 ram_counters.transferred += save_page_header(rs, rs->f, block,
2067 offset | RAM_SAVE_FLAG_PAGE);
2068 if (async) {
2069 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
2070 migrate_release_ram() &
2071 migration_in_postcopy());
2072 } else {
2073 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
2074 }
2075 ram_counters.transferred += TARGET_PAGE_SIZE;
2076 ram_counters.normal++;
2077 return 1;
2078}
2079
Juan Quintela56e93d22015-05-07 19:33:31 +02002080/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002081 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02002082 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002083 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00002084 * < 0 - error
2085 * >=0 - Number of pages written - this might legally be 0
2086 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02002087 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002088 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002089 * @block: block that contains the page we want to send
2090 * @offset: offset inside the block for the page
2091 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02002092 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002093static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002094{
2095 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002096 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02002097 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08002098 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01002099 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002100 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002101
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01002102 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01002103 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02002104
Juan Quintela56e93d22015-05-07 19:33:31 +02002105 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002106 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
2107 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002108 pages = save_xbzrle_page(rs, &p, current_addr, block,
2109 offset, last_stage);
2110 if (!last_stage) {
2111 /* Can't send this cached data async, since the cache page
2112 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02002113 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002114 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002115 }
2116 }
2117
2118 /* XBZRLE overflow or normal page */
2119 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002120 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02002121 }
2122
2123 XBZRLE_cache_unlock();
2124
2125 return pages;
2126}
2127
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002128static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
2129 ram_addr_t offset)
2130{
Ivan Ren1b81c972019-07-30 13:33:35 +08002131 if (multifd_queue_page(rs, block, offset) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08002132 return -1;
2133 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002134 ram_counters.normal++;
2135
2136 return 1;
2137}
2138
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002139static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002140 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02002141{
Juan Quintela53518d92017-05-04 11:46:24 +02002142 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08002143 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002144 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002145 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002146
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002147 if (save_zero_page_to_file(rs, f, block, offset)) {
2148 zero_page = true;
2149 goto exit;
2150 }
2151
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002152 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002153
2154 /*
2155 * copy it to a internal buffer to avoid it being modified by VM
2156 * so that we can catch up the error during compression and
2157 * decompression
2158 */
2159 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002160 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2161 if (ret < 0) {
2162 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08002163 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002164 return false;
Liang Lib3be2892016-05-05 15:32:54 +08002165 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002166
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002167exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002168 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002169 return zero_page;
2170}
2171
2172static void
2173update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2174{
Xiao Guangrong76e03002018-09-06 15:01:00 +08002175 ram_counters.transferred += bytes_xmit;
2176
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002177 if (param->zero_page) {
2178 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08002179 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002180 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08002181
2182 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
2183 compression_counters.compressed_size += bytes_xmit - 8;
2184 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002185}
2186
Xiao Guangrong32b05492018-09-06 15:01:01 +08002187static bool save_page_use_compression(RAMState *rs);
2188
Juan Quintelace25d332017-03-15 11:00:51 +01002189static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002190{
2191 int idx, len, thread_count;
2192
Xiao Guangrong32b05492018-09-06 15:01:01 +08002193 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002194 return;
2195 }
2196 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08002197
Liang Li0d9f9a52016-05-05 15:32:59 +08002198 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002199 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08002200 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08002201 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002202 }
Liang Lia7a9a882016-05-05 15:32:57 +08002203 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002204 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08002205
2206 for (idx = 0; idx < thread_count; idx++) {
2207 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002208 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01002209 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002210 /*
2211 * it's safe to fetch zero_page without holding comp_done_lock
2212 * as there is no further request submitted to the thread,
2213 * i.e, the thread should be waiting for a request at this point.
2214 */
2215 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002216 }
Liang Lia7a9a882016-05-05 15:32:57 +08002217 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002218 }
2219}
2220
2221static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2222 ram_addr_t offset)
2223{
2224 param->block = block;
2225 param->offset = offset;
2226}
2227
Juan Quintelace25d332017-03-15 11:00:51 +01002228static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2229 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02002230{
2231 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08002232 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02002233
2234 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08002235 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08002236retry:
2237 for (idx = 0; idx < thread_count; idx++) {
2238 if (comp_param[idx].done) {
2239 comp_param[idx].done = false;
2240 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2241 qemu_mutex_lock(&comp_param[idx].mutex);
2242 set_compress_params(&comp_param[idx], block, offset);
2243 qemu_cond_signal(&comp_param[idx].cond);
2244 qemu_mutex_unlock(&comp_param[idx].mutex);
2245 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002246 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02002247 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002248 }
2249 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08002250
2251 /*
2252 * wait for the free thread if the user specifies 'compress-wait-thread',
2253 * otherwise we will post the page out in the main thread as normal page.
2254 */
2255 if (pages < 0 && wait) {
2256 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2257 goto retry;
2258 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002259 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002260
2261 return pages;
2262}
2263
2264/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002265 * find_dirty_block: find the next dirty page and update any state
2266 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002267 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002268 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002269 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002270 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002271 * @pss: data about the state of the current dirty page scan
2272 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002273 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002274static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002275{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002276 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002277 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002278 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002279 /*
2280 * We've been once around the RAM and haven't found anything.
2281 * Give up.
2282 */
2283 *again = false;
2284 return false;
2285 }
Alexey Romko8bba0042020-01-10 14:51:34 +01002286 if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
2287 >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002288 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002289 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002290 pss->block = QLIST_NEXT_RCU(pss->block, next);
2291 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002292 /*
2293 * If memory migration starts over, we will meet a dirtied page
2294 * which may still exists in compression threads's ring, so we
2295 * should flush the compressed data to make sure the new page
2296 * is not overwritten by the old one in the destination.
2297 *
2298 * Also If xbzrle is on, stop using the data compression at this
2299 * point. In theory, xbzrle can do better than compression.
2300 */
2301 flush_compressed_data(rs);
2302
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002303 /* Hit the end of the list */
2304 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2305 /* Flag that we've looped */
2306 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002307 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002308 }
2309 /* Didn't find anything this time, but try again on the new block */
2310 *again = true;
2311 return false;
2312 } else {
2313 /* Can go around again, but... */
2314 *again = true;
2315 /* We've found something so probably don't need to */
2316 return true;
2317 }
2318}
2319
Juan Quintela3d0684b2017-03-23 15:06:39 +01002320/**
2321 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002322 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002323 * Helper for 'get_queued_page' - gets a page off the queue
2324 *
2325 * Returns the block of the page (or NULL if none available)
2326 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002327 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002328 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002329 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002330static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002331{
2332 RAMBlock *block = NULL;
2333
Xiao Guangrongae526e32018-08-21 16:10:25 +08002334 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2335 return NULL;
2336 }
2337
Juan Quintelaec481c62017-03-20 22:12:40 +01002338 qemu_mutex_lock(&rs->src_page_req_mutex);
2339 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2340 struct RAMSrcPageRequest *entry =
2341 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002342 block = entry->rb;
2343 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002344
2345 if (entry->len > TARGET_PAGE_SIZE) {
2346 entry->len -= TARGET_PAGE_SIZE;
2347 entry->offset += TARGET_PAGE_SIZE;
2348 } else {
2349 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002350 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002351 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002352 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002353 }
2354 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002355 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002356
2357 return block;
2358}
2359
Juan Quintela3d0684b2017-03-23 15:06:39 +01002360/**
Li Qiangff1543a2019-05-24 23:28:32 -07002361 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002362 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002363 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002364 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002365 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01002366 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002367 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002368 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002369 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002370static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002371{
2372 RAMBlock *block;
2373 ram_addr_t offset;
2374 bool dirty;
2375
2376 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002377 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002378 /*
2379 * We're sending this page, and since it's postcopy nothing else
2380 * will dirty it, and we must make sure it doesn't get sent again
2381 * even if this queue request was received after the background
2382 * search already sent it.
2383 */
2384 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002385 unsigned long page;
2386
Juan Quintela6b6712e2017-03-22 15:18:04 +01002387 page = offset >> TARGET_PAGE_BITS;
2388 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002389 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002390 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Wei Yang64737602019-08-19 14:18:43 +08002391 page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002392 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002393 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002394 }
2395 }
2396
2397 } while (block && !dirty);
2398
2399 if (block) {
2400 /*
2401 * As soon as we start servicing pages out of order, then we have
2402 * to kill the bulk stage, since the bulk stage assumes
2403 * in (migration_bitmap_find_and_reset_dirty) that every page is
2404 * dirty, that's no longer true.
2405 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002406 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002407
2408 /*
2409 * We want the background search to continue from the queued page
2410 * since the guest is likely to want other pages near to the page
2411 * it just requested.
2412 */
2413 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002414 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08002415
2416 /*
2417 * This unqueued page would break the "one round" check, even is
2418 * really rare.
2419 */
2420 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002421 }
2422
2423 return !!block;
2424}
2425
Juan Quintela56e93d22015-05-07 19:33:31 +02002426/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002427 * migration_page_queue_free: drop any remaining pages in the ram
2428 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002429 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002430 * It should be empty at the end anyway, but in error cases there may
2431 * be some left. in case that there is any page left, we drop it.
2432 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002433 */
Juan Quintela83c13382017-05-04 11:45:01 +02002434static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002435{
Juan Quintelaec481c62017-03-20 22:12:40 +01002436 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002437 /* This queue generally should be empty - but in the case of a failed
2438 * migration might have some droppings in.
2439 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002440 RCU_READ_LOCK_GUARD();
Juan Quintelaec481c62017-03-20 22:12:40 +01002441 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002442 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002443 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002444 g_free(mspr);
2445 }
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002446}
2447
2448/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002449 * ram_save_queue_pages: queue the page for transmission
2450 *
2451 * A request from postcopy destination for example.
2452 *
2453 * Returns zero on success or negative on error
2454 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002455 * @rbname: Name of the RAMBLock of the request. NULL means the
2456 * same that last one.
2457 * @start: starting address from the start of the RAMBlock
2458 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002459 */
Juan Quintela96506892017-03-14 18:41:03 +01002460int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002461{
2462 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002463 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002464
Juan Quintela93604472017-06-06 19:49:03 +02002465 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002466 RCU_READ_LOCK_GUARD();
2467
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002468 if (!rbname) {
2469 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002470 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002471
2472 if (!ramblock) {
2473 /*
2474 * Shouldn't happen, we can't reuse the last RAMBlock if
2475 * it's the 1st request.
2476 */
2477 error_report("ram_save_queue_pages no previous block");
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002478 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002479 }
2480 } else {
2481 ramblock = qemu_ram_block_by_name(rbname);
2482
2483 if (!ramblock) {
2484 /* We shouldn't be asked for a non-existent RAMBlock */
2485 error_report("ram_save_queue_pages no block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002486 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002487 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002488 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002489 }
2490 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2491 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002492 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2493 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002494 __func__, start, len, ramblock->used_length);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002495 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002496 }
2497
Juan Quintelaec481c62017-03-20 22:12:40 +01002498 struct RAMSrcPageRequest *new_entry =
2499 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002500 new_entry->rb = ramblock;
2501 new_entry->offset = start;
2502 new_entry->len = len;
2503
2504 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002505 qemu_mutex_lock(&rs->src_page_req_mutex);
2506 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002507 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002508 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002509
2510 return 0;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002511}
2512
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002513static bool save_page_use_compression(RAMState *rs)
2514{
2515 if (!migrate_use_compression()) {
2516 return false;
2517 }
2518
2519 /*
2520 * If xbzrle is on, stop using the data compression after first
2521 * round of migration even if compression is enabled. In theory,
2522 * xbzrle can do better than compression.
2523 */
2524 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2525 return true;
2526 }
2527
2528 return false;
2529}
2530
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002531/*
2532 * try to compress the page before posting it out, return true if the page
2533 * has been properly handled by compression, otherwise needs other
2534 * paths to handle it
2535 */
2536static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2537{
2538 if (!save_page_use_compression(rs)) {
2539 return false;
2540 }
2541
2542 /*
2543 * When starting the process of a new block, the first page of
2544 * the block should be sent out before other pages in the same
2545 * block, and all the pages in last block should have been sent
2546 * out, keeping this order is important, because the 'cont' flag
2547 * is used to avoid resending the block name.
2548 *
2549 * We post the fist page as normal page as compression will take
2550 * much CPU resource.
2551 */
2552 if (block != rs->last_sent_block) {
2553 flush_compressed_data(rs);
2554 return false;
2555 }
2556
2557 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2558 return true;
2559 }
2560
Xiao Guangrong76e03002018-09-06 15:01:00 +08002561 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002562 return false;
2563}
2564
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002565/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002566 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002567 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002568 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002569 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002570 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002571 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002572 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002573 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002574static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002575 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002576{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002577 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01002578 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002579 int res;
2580
2581 if (control_save_page(rs, block, offset, &res)) {
2582 return res;
2583 }
2584
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002585 if (save_compress_page(rs, block, offset)) {
2586 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002587 }
2588
2589 res = save_zero_page(rs, block, offset);
2590 if (res > 0) {
2591 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2592 * page would be stale
2593 */
2594 if (!save_page_use_compression(rs)) {
2595 XBZRLE_cache_lock();
2596 xbzrle_cache_zero_page(rs, block->offset + offset);
2597 XBZRLE_cache_unlock();
2598 }
2599 ram_release_pages(block->idstr, offset, res);
2600 return res;
2601 }
2602
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002603 /*
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002604 * Do not use multifd for:
2605 * 1. Compression as the first page in the new block should be posted out
2606 * before sending the compressed page
2607 * 2. In postcopy as one whole host page should be placed
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002608 */
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002609 if (!save_page_use_compression(rs) && migrate_use_multifd()
2610 && !migration_in_postcopy()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002611 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002612 }
2613
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002614 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002615}
2616
2617/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002618 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002619 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002620 * Starting at *offset send pages up to the end of the current host
2621 * page. It's valid for the initial offset to point into the middle of
2622 * a host page in which case the remainder of the hostpage is sent.
2623 * Only dirty target pages are sent. Note that the host page size may
2624 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002625 * The saving stops at the boundary of the used_length of the block
2626 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002627 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002628 * Returns the number of pages written or negative on error
2629 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002630 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002631 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002632 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002633 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002634 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002635static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002636 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002637{
2638 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002639 size_t pagesize_bits =
2640 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002641
Yury Kotovfbd162e2019-02-15 20:45:46 +03002642 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002643 error_report("block %s should not be migrated !", pss->block->idstr);
2644 return 0;
2645 }
2646
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002647 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002648 /* Check the pages is dirty and if it is send it */
2649 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2650 pss->page++;
2651 continue;
2652 }
2653
Juan Quintelaf20e2862017-03-21 16:19:05 +01002654 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002655 if (tmppages < 0) {
2656 return tmppages;
2657 }
2658
2659 pages += tmppages;
Juan Quintelaa935e302017-03-21 15:36:51 +01002660 pss->page++;
Dr. David Alan Gilbert97e1e062019-12-05 10:29:18 +00002661 /* Allow rate limiting to happen in the middle of huge pages */
2662 migration_rate_limit();
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002663 } while ((pss->page & (pagesize_bits - 1)) &&
Alexey Romko8bba0042020-01-10 14:51:34 +01002664 offset_in_ramblock(pss->block,
2665 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002666
2667 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002668 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002669 return pages;
2670}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002671
2672/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002673 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002674 *
2675 * Called within an RCU critical section.
2676 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002677 * Returns the number of pages written where zero means no dirty pages,
2678 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002679 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002680 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002681 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002682 *
2683 * On systems where host-page-size > target-page-size it will send all the
2684 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002685 */
2686
Juan Quintelace25d332017-03-15 11:00:51 +01002687static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002688{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002689 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002690 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002691 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002692
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302693 /* No dirty page as there is zero RAM */
2694 if (!ram_bytes_total()) {
2695 return pages;
2696 }
2697
Juan Quintela6f37bb82017-03-13 19:26:29 +01002698 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002699 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002700 pss.complete_round = false;
2701
2702 if (!pss.block) {
2703 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2704 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002705
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002706 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002707 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002708 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002709
2710 if (!found) {
2711 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002712 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002713 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002714
2715 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002716 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002717 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002718 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002719
Juan Quintela6f37bb82017-03-13 19:26:29 +01002720 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002721 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002722
2723 return pages;
2724}
2725
2726void acct_update_position(QEMUFile *f, size_t size, bool zero)
2727{
2728 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002729
Juan Quintela56e93d22015-05-07 19:33:31 +02002730 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002731 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002732 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002733 ram_counters.normal += pages;
2734 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002735 qemu_update_position(f, size);
2736 }
2737}
2738
Yury Kotovfbd162e2019-02-15 20:45:46 +03002739static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002740{
2741 RAMBlock *block;
2742 uint64_t total = 0;
2743
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002744 RCU_READ_LOCK_GUARD();
2745
Yury Kotovfbd162e2019-02-15 20:45:46 +03002746 if (count_ignored) {
2747 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2748 total += block->used_length;
2749 }
2750 } else {
2751 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2752 total += block->used_length;
2753 }
Peter Xu99e15582017-05-12 12:17:39 +08002754 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002755 return total;
2756}
2757
Yury Kotovfbd162e2019-02-15 20:45:46 +03002758uint64_t ram_bytes_total(void)
2759{
2760 return ram_bytes_total_common(false);
2761}
2762
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002763static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002764{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002765 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002766}
2767
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002768static void xbzrle_load_cleanup(void)
2769{
2770 g_free(XBZRLE.decoded_buf);
2771 XBZRLE.decoded_buf = NULL;
2772}
2773
Peter Xu7d7c96b2017-10-19 14:31:58 +08002774static void ram_state_cleanup(RAMState **rsp)
2775{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002776 if (*rsp) {
2777 migration_page_queue_free(*rsp);
2778 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2779 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2780 g_free(*rsp);
2781 *rsp = NULL;
2782 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002783}
2784
Peter Xu84593a02017-10-19 14:31:59 +08002785static void xbzrle_cleanup(void)
2786{
2787 XBZRLE_cache_lock();
2788 if (XBZRLE.cache) {
2789 cache_fini(XBZRLE.cache);
2790 g_free(XBZRLE.encoded_buf);
2791 g_free(XBZRLE.current_buf);
2792 g_free(XBZRLE.zero_target_page);
2793 XBZRLE.cache = NULL;
2794 XBZRLE.encoded_buf = NULL;
2795 XBZRLE.current_buf = NULL;
2796 XBZRLE.zero_target_page = NULL;
2797 }
2798 XBZRLE_cache_unlock();
2799}
2800
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002801static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002802{
Juan Quintela53518d92017-05-04 11:46:24 +02002803 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002804 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002805
Li Zhijian2ff64032015-07-02 20:18:05 +08002806 /* caller have hold iothread lock or is in a bh, so there is
Yi Wang46334562019-04-15 14:51:29 +08002807 * no writing race against the migration bitmap
Li Zhijian2ff64032015-07-02 20:18:05 +08002808 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002809 memory_global_dirty_log_stop();
2810
Yury Kotovfbd162e2019-02-15 20:45:46 +03002811 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08002812 g_free(block->clear_bmap);
2813 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002814 g_free(block->bmap);
2815 block->bmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002816 }
2817
Peter Xu84593a02017-10-19 14:31:59 +08002818 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002819 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002820 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002821}
2822
Juan Quintela6f37bb82017-03-13 19:26:29 +01002823static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002824{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002825 rs->last_seen_block = NULL;
2826 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002827 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002828 rs->last_version = ram_list.version;
2829 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08002830 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002831}
2832
2833#define MAX_WAIT 50 /* ms, half buffered_file limit */
2834
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002835/*
2836 * 'expected' is the value you expect the bitmap mostly to be full
2837 * of; it won't bother printing lines that are all this value.
2838 * If 'todump' is null the migration bitmap is dumped.
2839 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002840void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2841 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002842{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002843 int64_t cur;
2844 int64_t linelen = 128;
2845 char linebuf[129];
2846
Juan Quintela6b6712e2017-03-22 15:18:04 +01002847 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002848 int64_t curb;
2849 bool found = false;
2850 /*
2851 * Last line; catch the case where the line length
2852 * is longer than remaining ram
2853 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002854 if (cur + linelen > pages) {
2855 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002856 }
2857 for (curb = 0; curb < linelen; curb++) {
2858 bool thisbit = test_bit(cur + curb, todump);
2859 linebuf[curb] = thisbit ? '1' : '.';
2860 found = found || (thisbit != expected);
2861 }
2862 if (found) {
2863 linebuf[curb] = '\0';
2864 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2865 }
2866 }
2867}
2868
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002869/* **** functions for postcopy ***** */
2870
Pavel Butsykinced1c612017-02-03 18:23:21 +03002871void ram_postcopy_migrated_memory_release(MigrationState *ms)
2872{
2873 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002874
Yury Kotovfbd162e2019-02-15 20:45:46 +03002875 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002876 unsigned long *bitmap = block->bmap;
2877 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2878 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002879
2880 while (run_start < range) {
2881 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Alexey Romko8bba0042020-01-10 14:51:34 +01002882 ram_discard_range(block->idstr,
2883 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2884 ((ram_addr_t)(run_end - run_start))
2885 << TARGET_PAGE_BITS);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002886 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2887 }
2888 }
2889}
2890
Juan Quintela3d0684b2017-03-23 15:06:39 +01002891/**
2892 * postcopy_send_discard_bm_ram: discard a RAMBlock
2893 *
2894 * Returns zero on success
2895 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002896 * Callback from postcopy_each_ram_send_discard for each RAMBlock
Juan Quintela3d0684b2017-03-23 15:06:39 +01002897 *
2898 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08002899 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002900 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002901static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002902{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002903 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002904 unsigned long current;
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002905 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002906
Juan Quintela6b6712e2017-03-22 15:18:04 +01002907 for (current = 0; current < end; ) {
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002908 unsigned long one = find_next_bit(bitmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08002909 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002910
Wei Yang33a5cb622019-06-27 10:08:21 +08002911 if (one >= end) {
2912 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002913 }
Wei Yang33a5cb622019-06-27 10:08:21 +08002914
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002915 zero = find_next_zero_bit(bitmap, end, one + 1);
Wei Yang33a5cb622019-06-27 10:08:21 +08002916
2917 if (zero >= end) {
2918 discard_length = end - one;
2919 } else {
2920 discard_length = zero - one;
2921 }
Wei Yang810cf2b2019-07-24 09:07:21 +08002922 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002923 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002924 }
2925
2926 return 0;
2927}
2928
Juan Quintela3d0684b2017-03-23 15:06:39 +01002929/**
2930 * postcopy_each_ram_send_discard: discard all RAMBlocks
2931 *
2932 * Returns 0 for success or negative for error
2933 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002934 * Utility for the outgoing postcopy code.
2935 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2936 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002937 * (qemu_ram_foreach_block ends up passing unscaled lengths
2938 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002939 *
2940 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002941 */
2942static int postcopy_each_ram_send_discard(MigrationState *ms)
2943{
2944 struct RAMBlock *block;
2945 int ret;
2946
Yury Kotovfbd162e2019-02-15 20:45:46 +03002947 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002948 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002949
2950 /*
2951 * Postcopy sends chunks of bitmap over the wire, but it
2952 * just needs indexes at this point, avoids it having
2953 * target page specific code.
2954 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002955 ret = postcopy_send_discard_bm_ram(ms, block);
2956 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002957 if (ret) {
2958 return ret;
2959 }
2960 }
2961
2962 return 0;
2963}
2964
Juan Quintela3d0684b2017-03-23 15:06:39 +01002965/**
Wei Yang8324ef82019-08-19 14:18:41 +08002966 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002967 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002968 * Helper for postcopy_chunk_hostpages; it's called twice to
2969 * canonicalize the two bitmaps, that are similar, but one is
2970 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002971 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002972 * Postcopy requires that all target pages in a hostpage are dirty or
2973 * clean, not a mix. This function canonicalizes the bitmaps.
2974 *
2975 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002976 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002977 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002978static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002979{
Juan Quintela53518d92017-05-04 11:46:24 +02002980 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002981 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002982 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002983 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002984 unsigned long run_start;
2985
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002986 if (block->page_size == TARGET_PAGE_SIZE) {
2987 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2988 return;
2989 }
2990
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002991 /* Find a dirty page */
2992 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002993
Juan Quintela6b6712e2017-03-22 15:18:04 +01002994 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002995
2996 /*
2997 * If the start of this run of pages is in the middle of a host
2998 * page, then we need to fixup this host page.
2999 */
Wei Yang9dec3cc2019-08-06 08:46:48 +08003000 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003001 /* Find the end of this run */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08003002 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003003 /*
3004 * If the end isn't at the start of a host page, then the
3005 * run doesn't finish at the end of a host page
3006 * and we need to discard.
3007 */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003008 }
3009
Wei Yang9dec3cc2019-08-06 08:46:48 +08003010 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003011 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08003012 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
3013 host_ratio);
3014 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003015
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003016 /* Clean up the bitmap */
3017 for (page = fixup_start_addr;
3018 page < fixup_start_addr + host_ratio; page++) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003019 /*
3020 * Remark them as dirty, updating the count for any pages
3021 * that weren't previously dirty.
3022 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01003023 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003024 }
3025 }
3026
Wei Yang1e7cf8c2019-08-19 14:18:42 +08003027 /* Find the next dirty page for the next iteration */
3028 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003029 }
3030}
3031
Juan Quintela3d0684b2017-03-23 15:06:39 +01003032/**
Wei Yang89dab312019-07-15 10:05:49 +08003033 * postcopy_chunk_hostpages: discard any partially sent host page
Juan Quintela3d0684b2017-03-23 15:06:39 +01003034 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003035 * Utility for the outgoing postcopy code.
3036 *
3037 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00003038 * dirty host-page size chunks as all dirty. In this case the host-page
3039 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003040 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003041 * Returns zero on success
3042 *
3043 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01003044 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003045 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003046static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003047{
Wei Yang810cf2b2019-07-24 09:07:21 +08003048 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003049
Juan Quintela6b6712e2017-03-22 15:18:04 +01003050 /*
Wei Yang1e7cf8c2019-08-19 14:18:42 +08003051 * Ensure that all partially dirty host pages are made fully dirty.
Juan Quintela6b6712e2017-03-22 15:18:04 +01003052 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08003053 postcopy_chunk_hostpages_pass(ms, block);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003054
Wei Yang810cf2b2019-07-24 09:07:21 +08003055 postcopy_discard_send_finish(ms);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003056 return 0;
3057}
3058
Juan Quintela3d0684b2017-03-23 15:06:39 +01003059/**
3060 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
3061 *
3062 * Returns zero on success
3063 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003064 * Transmit the set of pages to be discarded after precopy to the target
3065 * these are pages that:
3066 * a) Have been previously transmitted but are now dirty again
3067 * b) Pages that have never been transmitted, this ensures that
3068 * any pages on the destination that have been mapped by background
3069 * tasks get discarded (transparent huge pages is the specific concern)
3070 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01003071 *
3072 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003073 */
3074int ram_postcopy_send_discard_bitmap(MigrationState *ms)
3075{
Juan Quintela53518d92017-05-04 11:46:24 +02003076 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01003077 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003078 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003079
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003080 RCU_READ_LOCK_GUARD();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003081
3082 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01003083 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003084
Juan Quintela6b6712e2017-03-22 15:18:04 +01003085 /* Easiest way to make sure we don't resume in the middle of a host-page */
3086 rs->last_seen_block = NULL;
3087 rs->last_sent_block = NULL;
3088 rs->last_page = 0;
3089
Yury Kotovfbd162e2019-02-15 20:45:46 +03003090 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003091 /* Deal with TPS != HPS and huge pages */
3092 ret = postcopy_chunk_hostpages(ms, block);
3093 if (ret) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003094 return ret;
3095 }
3096
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003097#ifdef DEBUG_POSTCOPY
Wei Yang1e7cf8c2019-08-19 14:18:42 +08003098 ram_debug_dump_bitmap(block->bmap, true,
3099 block->used_length >> TARGET_PAGE_BITS);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003100#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01003101 }
3102 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003103
3104 ret = postcopy_each_ram_send_discard(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003105
3106 return ret;
3107}
3108
Juan Quintela3d0684b2017-03-23 15:06:39 +01003109/**
3110 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003111 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003112 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003113 *
Juan Quintela36449152017-03-23 15:11:59 +01003114 * @rbname: name of the RAMBlock of the request. NULL means the
3115 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003116 * @start: RAMBlock starting page
3117 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003118 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01003119int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003120{
Juan Quintela36449152017-03-23 15:11:59 +01003121 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003122
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003123 RCU_READ_LOCK_GUARD();
Juan Quintela36449152017-03-23 15:11:59 +01003124 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003125
3126 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01003127 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03003128 return -1;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003129 }
3130
Peter Xu814bb082018-07-23 20:33:02 +08003131 /*
3132 * On source VM, we don't need to update the received bitmap since
3133 * we don't even have one.
3134 */
3135 if (rb->receivedmap) {
3136 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3137 length >> qemu_target_page_bits());
3138 }
3139
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03003140 return ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003141}
3142
Peter Xu84593a02017-10-19 14:31:59 +08003143/*
3144 * For every allocation, we will try not to crash the VM if the
3145 * allocation failed.
3146 */
3147static int xbzrle_init(void)
3148{
3149 Error *local_err = NULL;
3150
3151 if (!migrate_use_xbzrle()) {
3152 return 0;
3153 }
3154
3155 XBZRLE_cache_lock();
3156
3157 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3158 if (!XBZRLE.zero_target_page) {
3159 error_report("%s: Error allocating zero page", __func__);
3160 goto err_out;
3161 }
3162
3163 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3164 TARGET_PAGE_SIZE, &local_err);
3165 if (!XBZRLE.cache) {
3166 error_report_err(local_err);
3167 goto free_zero_page;
3168 }
3169
3170 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3171 if (!XBZRLE.encoded_buf) {
3172 error_report("%s: Error allocating encoded_buf", __func__);
3173 goto free_cache;
3174 }
3175
3176 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3177 if (!XBZRLE.current_buf) {
3178 error_report("%s: Error allocating current_buf", __func__);
3179 goto free_encoded_buf;
3180 }
3181
3182 /* We are all good */
3183 XBZRLE_cache_unlock();
3184 return 0;
3185
3186free_encoded_buf:
3187 g_free(XBZRLE.encoded_buf);
3188 XBZRLE.encoded_buf = NULL;
3189free_cache:
3190 cache_fini(XBZRLE.cache);
3191 XBZRLE.cache = NULL;
3192free_zero_page:
3193 g_free(XBZRLE.zero_target_page);
3194 XBZRLE.zero_target_page = NULL;
3195err_out:
3196 XBZRLE_cache_unlock();
3197 return -ENOMEM;
3198}
3199
Juan Quintela53518d92017-05-04 11:46:24 +02003200static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003201{
Peter Xu7d00ee62017-10-19 14:31:57 +08003202 *rsp = g_try_new0(RAMState, 1);
3203
3204 if (!*rsp) {
3205 error_report("%s: Init ramstate fail", __func__);
3206 return -1;
3207 }
Juan Quintela53518d92017-05-04 11:46:24 +02003208
3209 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3210 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3211 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003212
Peter Xu7d00ee62017-10-19 14:31:57 +08003213 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003214 * Count the total number of pages used by ram blocks not including any
3215 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08003216 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08003217 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003218 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08003219 ram_state_reset(*rsp);
3220
3221 return 0;
3222}
3223
Peter Xud6eff5d2017-10-19 14:32:00 +08003224static void ram_list_init_bitmaps(void)
3225{
Peter Xu002cad62019-06-03 14:50:56 +08003226 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08003227 RAMBlock *block;
3228 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08003229 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08003230
3231 /* Skip setting bitmap if there is no RAM */
3232 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08003233 shift = ms->clear_bitmap_shift;
3234 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
3235 error_report("clear_bitmap_shift (%u) too big, using "
3236 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
3237 shift = CLEAR_BITMAP_SHIFT_MAX;
3238 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
3239 error_report("clear_bitmap_shift (%u) too small, using "
3240 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
3241 shift = CLEAR_BITMAP_SHIFT_MIN;
3242 }
3243
Yury Kotovfbd162e2019-02-15 20:45:46 +03003244 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003245 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08003246 /*
3247 * The initial dirty bitmap for migration must be set with all
3248 * ones to make sure we'll migrate every guest RAM page to
3249 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003250 * Here we set RAMBlock.bmap all to 1 because when rebegin a
3251 * new migration after a failed migration, ram_list.
3252 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
3253 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08003254 */
Peter Xud6eff5d2017-10-19 14:32:00 +08003255 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003256 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08003257 block->clear_bmap_shift = shift;
3258 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08003259 }
3260 }
3261}
3262
3263static void ram_init_bitmaps(RAMState *rs)
3264{
3265 /* For memory_global_dirty_log_start below. */
3266 qemu_mutex_lock_iothread();
3267 qemu_mutex_lock_ramlist();
Peter Xud6eff5d2017-10-19 14:32:00 +08003268
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003269 WITH_RCU_READ_LOCK_GUARD() {
3270 ram_list_init_bitmaps();
3271 memory_global_dirty_log_start();
3272 migration_bitmap_sync_precopy(rs);
3273 }
Peter Xud6eff5d2017-10-19 14:32:00 +08003274 qemu_mutex_unlock_ramlist();
3275 qemu_mutex_unlock_iothread();
3276}
3277
Peter Xu7d00ee62017-10-19 14:31:57 +08003278static int ram_init_all(RAMState **rsp)
3279{
Peter Xu7d00ee62017-10-19 14:31:57 +08003280 if (ram_state_init(rsp)) {
3281 return -1;
3282 }
3283
Peter Xu84593a02017-10-19 14:31:59 +08003284 if (xbzrle_init()) {
3285 ram_state_cleanup(rsp);
3286 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003287 }
3288
Peter Xud6eff5d2017-10-19 14:32:00 +08003289 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003290
3291 return 0;
3292}
3293
Peter Xu08614f32018-05-02 18:47:33 +08003294static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3295{
3296 RAMBlock *block;
3297 uint64_t pages = 0;
3298
3299 /*
3300 * Postcopy is not using xbzrle/compression, so no need for that.
3301 * Also, since source are already halted, we don't need to care
3302 * about dirty page logging as well.
3303 */
3304
Yury Kotovfbd162e2019-02-15 20:45:46 +03003305 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003306 pages += bitmap_count_one(block->bmap,
3307 block->used_length >> TARGET_PAGE_BITS);
3308 }
3309
3310 /* This may not be aligned with current bitmaps. Recalculate. */
3311 rs->migration_dirty_pages = pages;
3312
3313 rs->last_seen_block = NULL;
3314 rs->last_sent_block = NULL;
3315 rs->last_page = 0;
3316 rs->last_version = ram_list.version;
3317 /*
3318 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3319 * matter what we have sent.
3320 */
3321 rs->ram_bulk_stage = false;
3322
3323 /* Update RAMState cache of output QEMUFile */
3324 rs->f = out;
3325
3326 trace_ram_state_resume_prepare(pages);
3327}
3328
Juan Quintela3d0684b2017-03-23 15:06:39 +01003329/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08003330 * This function clears bits of the free pages reported by the caller from the
3331 * migration dirty bitmap. @addr is the host address corresponding to the
3332 * start of the continuous guest free pages, and @len is the total bytes of
3333 * those pages.
3334 */
3335void qemu_guest_free_page_hint(void *addr, size_t len)
3336{
3337 RAMBlock *block;
3338 ram_addr_t offset;
3339 size_t used_len, start, npages;
3340 MigrationState *s = migrate_get_current();
3341
3342 /* This function is currently expected to be used during live migration */
3343 if (!migration_is_setup_or_active(s->state)) {
3344 return;
3345 }
3346
3347 for (; len > 0; len -= used_len, addr += used_len) {
3348 block = qemu_ram_block_from_host(addr, false, &offset);
3349 if (unlikely(!block || offset >= block->used_length)) {
3350 /*
3351 * The implementation might not support RAMBlock resize during
3352 * live migration, but it could happen in theory with future
3353 * updates. So we add a check here to capture that case.
3354 */
3355 error_report_once("%s unexpected error", __func__);
3356 return;
3357 }
3358
3359 if (len <= block->used_length - offset) {
3360 used_len = len;
3361 } else {
3362 used_len = block->used_length - offset;
3363 }
3364
3365 start = offset >> TARGET_PAGE_BITS;
3366 npages = used_len >> TARGET_PAGE_BITS;
3367
3368 qemu_mutex_lock(&ram_state->bitmap_mutex);
3369 ram_state->migration_dirty_pages -=
3370 bitmap_count_one_with_offset(block->bmap, start, npages);
3371 bitmap_clear(block->bmap, start, npages);
3372 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3373 }
3374}
3375
3376/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01003377 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003378 * long-running RCU critical section. When rcu-reclaims in the code
3379 * start to become numerous it will be necessary to reduce the
3380 * granularity of these critical sections.
3381 */
3382
Juan Quintela3d0684b2017-03-23 15:06:39 +01003383/**
3384 * ram_save_setup: Setup RAM for migration
3385 *
3386 * Returns zero to indicate success and negative for error
3387 *
3388 * @f: QEMUFile where to send the data
3389 * @opaque: RAMState pointer
3390 */
zhanghailianga91246c2016-10-27 14:42:59 +08003391static int ram_save_setup(QEMUFile *f, void *opaque)
3392{
Juan Quintela53518d92017-05-04 11:46:24 +02003393 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003394 RAMBlock *block;
3395
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003396 if (compress_threads_save_setup()) {
3397 return -1;
3398 }
3399
zhanghailianga91246c2016-10-27 14:42:59 +08003400 /* migration has already setup the bitmap, reuse it. */
3401 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003402 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003403 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003404 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003405 }
zhanghailianga91246c2016-10-27 14:42:59 +08003406 }
Juan Quintela53518d92017-05-04 11:46:24 +02003407 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003408
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01003409 WITH_RCU_READ_LOCK_GUARD() {
3410 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02003411
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01003412 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3413 qemu_put_byte(f, strlen(block->idstr));
3414 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3415 qemu_put_be64(f, block->used_length);
3416 if (migrate_postcopy_ram() && block->page_size !=
3417 qemu_host_page_size) {
3418 qemu_put_be64(f, block->page_size);
3419 }
3420 if (migrate_ignore_shared()) {
3421 qemu_put_be64(f, block->mr->addr);
3422 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003423 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003424 }
3425
Juan Quintela56e93d22015-05-07 19:33:31 +02003426 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3427 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3428
Ivan Ren1b81c972019-07-30 13:33:35 +08003429 multifd_send_sync_main(*rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02003430 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003431 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003432
3433 return 0;
3434}
3435
Juan Quintela3d0684b2017-03-23 15:06:39 +01003436/**
3437 * ram_save_iterate: iterative stage for migration
3438 *
3439 * Returns zero to indicate success and negative for error
3440 *
3441 * @f: QEMUFile where to send the data
3442 * @opaque: RAMState pointer
3443 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003444static int ram_save_iterate(QEMUFile *f, void *opaque)
3445{
Juan Quintela53518d92017-05-04 11:46:24 +02003446 RAMState **temp = opaque;
3447 RAMState *rs = *temp;
Juan Quintela3d4095b2019-12-18 05:12:36 +01003448 int ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003449 int i;
3450 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003451 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003452
Peter Lievenb2557342018-03-08 12:18:24 +01003453 if (blk_mig_bulk_active()) {
3454 /* Avoid transferring ram during bulk phase of block migration as
3455 * the bulk phase will usually take a long time and transferring
3456 * ram updates during that time is pointless. */
3457 goto out;
3458 }
3459
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003460 WITH_RCU_READ_LOCK_GUARD() {
3461 if (ram_list.version != rs->last_version) {
3462 ram_state_reset(rs);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003463 }
3464
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003465 /* Read version before ram_list.blocks */
3466 smp_rmb();
Xiao Guangronge8f37352018-09-03 17:26:44 +08003467
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003468 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
Xiao Guangronge8f37352018-09-03 17:26:44 +08003469
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003470 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3471 i = 0;
3472 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3473 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3474 int pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003475
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003476 if (qemu_file_get_error(f)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003477 break;
3478 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003479
3480 pages = ram_find_and_save_block(rs, false);
3481 /* no more pages to sent */
3482 if (pages == 0) {
3483 done = 1;
3484 break;
3485 }
3486
3487 if (pages < 0) {
3488 qemu_file_set_error(f, pages);
3489 break;
3490 }
3491
3492 rs->target_page_count += pages;
3493
3494 /*
Wei Yang644acf92019-11-07 20:39:07 +08003495 * During postcopy, it is necessary to make sure one whole host
3496 * page is sent in one chunk.
3497 */
3498 if (migrate_postcopy_ram()) {
3499 flush_compressed_data(rs);
3500 }
3501
3502 /*
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003503 * we want to check in the 1st loop, just in case it was the 1st
3504 * time and we had to sync the dirty bitmap.
3505 * qemu_clock_get_ns() is a bit expensive, so we only check each
3506 * some iterations
3507 */
3508 if ((i & 63) == 0) {
3509 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3510 1000000;
3511 if (t1 > MAX_WAIT) {
3512 trace_ram_save_iterate_big_wait(t1, i);
3513 break;
3514 }
3515 }
3516 i++;
Juan Quintela56e93d22015-05-07 19:33:31 +02003517 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003518 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003519
3520 /*
3521 * Must occur before EOS (or any QEMUFile operation)
3522 * because of RDMA protocol.
3523 */
3524 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3525
Peter Lievenb2557342018-03-08 12:18:24 +01003526out:
Juan Quintela3d4095b2019-12-18 05:12:36 +01003527 if (ret >= 0) {
3528 multifd_send_sync_main(rs);
3529 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3530 qemu_fflush(f);
3531 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003532
Juan Quintela3d4095b2019-12-18 05:12:36 +01003533 ret = qemu_file_get_error(f);
3534 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003535 if (ret < 0) {
3536 return ret;
3537 }
3538
Thomas Huth5c903082016-11-04 14:10:17 +01003539 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003540}
3541
Juan Quintela3d0684b2017-03-23 15:06:39 +01003542/**
3543 * ram_save_complete: function called to send the remaining amount of ram
3544 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003545 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003546 *
3547 * Called with iothread lock
3548 *
3549 * @f: QEMUFile where to send the data
3550 * @opaque: RAMState pointer
3551 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003552static int ram_save_complete(QEMUFile *f, void *opaque)
3553{
Juan Quintela53518d92017-05-04 11:46:24 +02003554 RAMState **temp = opaque;
3555 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003556 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003557
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003558 WITH_RCU_READ_LOCK_GUARD() {
3559 if (!migration_in_postcopy()) {
3560 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003561 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003562
3563 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3564
3565 /* try transferring iterative blocks of memory */
3566
3567 /* flush all remaining blocks regardless of rate limiting */
3568 while (true) {
3569 int pages;
3570
3571 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3572 /* no more blocks to sent */
3573 if (pages == 0) {
3574 break;
3575 }
3576 if (pages < 0) {
3577 ret = pages;
3578 break;
3579 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003580 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003581
3582 flush_compressed_data(rs);
3583 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003584 }
3585
Juan Quintela3d4095b2019-12-18 05:12:36 +01003586 if (ret >= 0) {
3587 multifd_send_sync_main(rs);
3588 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3589 qemu_fflush(f);
3590 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003591
Xiao Guangronge8f37352018-09-03 17:26:44 +08003592 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003593}
3594
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003595static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003596 uint64_t *res_precopy_only,
3597 uint64_t *res_compatible,
3598 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003599{
Juan Quintela53518d92017-05-04 11:46:24 +02003600 RAMState **temp = opaque;
3601 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003602 uint64_t remaining_size;
3603
Juan Quintela9edabd42017-03-14 12:02:16 +01003604 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003605
Juan Quintela57273092017-03-20 22:25:28 +01003606 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003607 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003608 qemu_mutex_lock_iothread();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003609 WITH_RCU_READ_LOCK_GUARD() {
3610 migration_bitmap_sync_precopy(rs);
3611 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003612 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003613 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003614 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003615
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003616 if (migrate_postcopy_ram()) {
3617 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003618 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003619 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003620 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003621 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003622}
3623
3624static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3625{
3626 unsigned int xh_len;
3627 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003628 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003629
Juan Quintela56e93d22015-05-07 19:33:31 +02003630 /* extract RLE header */
3631 xh_flags = qemu_get_byte(f);
3632 xh_len = qemu_get_be16(f);
3633
3634 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3635 error_report("Failed to load XBZRLE page - wrong compression!");
3636 return -1;
3637 }
3638
3639 if (xh_len > TARGET_PAGE_SIZE) {
3640 error_report("Failed to load XBZRLE page - len overflow!");
3641 return -1;
3642 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003643 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003644 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003645 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003646 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003647
3648 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003649 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003650 TARGET_PAGE_SIZE) == -1) {
3651 error_report("Failed to load XBZRLE page - decode error!");
3652 return -1;
3653 }
3654
3655 return 0;
3656}
3657
Juan Quintela3d0684b2017-03-23 15:06:39 +01003658/**
3659 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003660 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003661 * Must be called from within a rcu critical section.
3662 *
3663 * Returns a pointer from within the RCU-protected ram_list.
3664 *
3665 * @f: QEMUFile where to read the data from
3666 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003667 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003668static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003669{
3670 static RAMBlock *block = NULL;
3671 char id[256];
3672 uint8_t len;
3673
3674 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003675 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003676 error_report("Ack, bad migration stream!");
3677 return NULL;
3678 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003679 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003680 }
3681
3682 len = qemu_get_byte(f);
3683 qemu_get_buffer(f, (uint8_t *)id, len);
3684 id[len] = 0;
3685
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003686 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003687 if (!block) {
3688 error_report("Can't find block %s", id);
3689 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003690 }
3691
Yury Kotovfbd162e2019-02-15 20:45:46 +03003692 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003693 error_report("block %s should not be migrated !", id);
3694 return NULL;
3695 }
3696
zhanghailiang4c4bad42016-01-15 11:37:41 +08003697 return block;
3698}
3699
3700static inline void *host_from_ram_block_offset(RAMBlock *block,
3701 ram_addr_t offset)
3702{
3703 if (!offset_in_ramblock(block, offset)) {
3704 return NULL;
3705 }
3706
3707 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003708}
3709
Zhang Chen13af18f2018-09-03 12:38:48 +08003710static inline void *colo_cache_from_block_offset(RAMBlock *block,
3711 ram_addr_t offset)
3712{
3713 if (!offset_in_ramblock(block, offset)) {
3714 return NULL;
3715 }
3716 if (!block->colo_cache) {
3717 error_report("%s: colo_cache is NULL in block :%s",
3718 __func__, block->idstr);
3719 return NULL;
3720 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003721
3722 /*
3723 * During colo checkpoint, we need bitmap of these migrated pages.
3724 * It help us to decide which pages in ram cache should be flushed
3725 * into VM's RAM later.
3726 */
3727 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3728 ram_state->migration_dirty_pages++;
3729 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003730 return block->colo_cache + offset;
3731}
3732
Juan Quintela3d0684b2017-03-23 15:06:39 +01003733/**
3734 * ram_handle_compressed: handle the zero page case
3735 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003736 * If a page (or a whole RDMA chunk) has been
3737 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003738 *
3739 * @host: host address for the zero page
3740 * @ch: what the page is filled from. We only support zero
3741 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003742 */
3743void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3744{
3745 if (ch != 0 || !is_zero_range(host, size)) {
3746 memset(host, ch, size);
3747 }
3748}
3749
Xiao Guangrong797ca152018-03-30 15:51:21 +08003750/* return the size after decompression, or negative value on error */
3751static int
3752qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3753 const uint8_t *source, size_t source_len)
3754{
3755 int err;
3756
3757 err = inflateReset(stream);
3758 if (err != Z_OK) {
3759 return -1;
3760 }
3761
3762 stream->avail_in = source_len;
3763 stream->next_in = (uint8_t *)source;
3764 stream->avail_out = dest_len;
3765 stream->next_out = dest;
3766
3767 err = inflate(stream, Z_NO_FLUSH);
3768 if (err != Z_STREAM_END) {
3769 return -1;
3770 }
3771
3772 return stream->total_out;
3773}
3774
Juan Quintela56e93d22015-05-07 19:33:31 +02003775static void *do_data_decompress(void *opaque)
3776{
3777 DecompressParam *param = opaque;
3778 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003779 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003780 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003781
Liang Li33d151f2016-05-05 15:32:58 +08003782 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003783 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003784 if (param->des) {
3785 des = param->des;
3786 len = param->len;
3787 param->des = 0;
3788 qemu_mutex_unlock(&param->mutex);
3789
Liang Li73a89122016-05-05 15:32:51 +08003790 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003791
3792 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3793 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003794 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003795 error_report("decompress data failed");
3796 qemu_file_set_error(decomp_file, ret);
3797 }
Liang Li73a89122016-05-05 15:32:51 +08003798
Liang Li33d151f2016-05-05 15:32:58 +08003799 qemu_mutex_lock(&decomp_done_lock);
3800 param->done = true;
3801 qemu_cond_signal(&decomp_done_cond);
3802 qemu_mutex_unlock(&decomp_done_lock);
3803
3804 qemu_mutex_lock(&param->mutex);
3805 } else {
3806 qemu_cond_wait(&param->cond, &param->mutex);
3807 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003808 }
Liang Li33d151f2016-05-05 15:32:58 +08003809 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003810
3811 return NULL;
3812}
3813
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003814static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003815{
3816 int idx, thread_count;
3817
3818 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003819 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003820 }
3821
3822 thread_count = migrate_decompress_threads();
3823 qemu_mutex_lock(&decomp_done_lock);
3824 for (idx = 0; idx < thread_count; idx++) {
3825 while (!decomp_param[idx].done) {
3826 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3827 }
3828 }
3829 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003830 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003831}
3832
Juan Quintelaf0afa332017-06-28 11:52:28 +02003833static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003834{
3835 int i, thread_count;
3836
Juan Quintela3416ab52016-04-20 11:56:01 +02003837 if (!migrate_use_compression()) {
3838 return;
3839 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003840 thread_count = migrate_decompress_threads();
3841 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003842 /*
3843 * we use it as a indicator which shows if the thread is
3844 * properly init'd or not
3845 */
3846 if (!decomp_param[i].compbuf) {
3847 break;
3848 }
3849
Juan Quintela56e93d22015-05-07 19:33:31 +02003850 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003851 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003852 qemu_cond_signal(&decomp_param[i].cond);
3853 qemu_mutex_unlock(&decomp_param[i].mutex);
3854 }
3855 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003856 if (!decomp_param[i].compbuf) {
3857 break;
3858 }
3859
Juan Quintela56e93d22015-05-07 19:33:31 +02003860 qemu_thread_join(decompress_threads + i);
3861 qemu_mutex_destroy(&decomp_param[i].mutex);
3862 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003863 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003864 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003865 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003866 }
3867 g_free(decompress_threads);
3868 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003869 decompress_threads = NULL;
3870 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003871 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003872}
3873
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003874static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003875{
3876 int i, thread_count;
3877
3878 if (!migrate_use_compression()) {
3879 return 0;
3880 }
3881
3882 thread_count = migrate_decompress_threads();
3883 decompress_threads = g_new0(QemuThread, thread_count);
3884 decomp_param = g_new0(DecompressParam, thread_count);
3885 qemu_mutex_init(&decomp_done_lock);
3886 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003887 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003888 for (i = 0; i < thread_count; i++) {
3889 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3890 goto exit;
3891 }
3892
3893 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3894 qemu_mutex_init(&decomp_param[i].mutex);
3895 qemu_cond_init(&decomp_param[i].cond);
3896 decomp_param[i].done = true;
3897 decomp_param[i].quit = false;
3898 qemu_thread_create(decompress_threads + i, "decompress",
3899 do_data_decompress, decomp_param + i,
3900 QEMU_THREAD_JOINABLE);
3901 }
3902 return 0;
3903exit:
3904 compress_threads_load_cleanup();
3905 return -1;
3906}
3907
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003908static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003909 void *host, int len)
3910{
3911 int idx, thread_count;
3912
3913 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003914 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003915 while (true) {
3916 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003917 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003918 decomp_param[idx].done = false;
3919 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003920 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003921 decomp_param[idx].des = host;
3922 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003923 qemu_cond_signal(&decomp_param[idx].cond);
3924 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003925 break;
3926 }
3927 }
3928 if (idx < thread_count) {
3929 break;
Liang Li73a89122016-05-05 15:32:51 +08003930 } else {
3931 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003932 }
3933 }
Liang Li73a89122016-05-05 15:32:51 +08003934 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003935}
3936
Zhang Chen13af18f2018-09-03 12:38:48 +08003937/*
3938 * colo cache: this is for secondary VM, we cache the whole
3939 * memory of the secondary VM, it is need to hold the global lock
3940 * to call this helper.
3941 */
3942int colo_init_ram_cache(void)
3943{
3944 RAMBlock *block;
3945
Paolo Bonzini44901b52019-12-13 15:07:22 +01003946 WITH_RCU_READ_LOCK_GUARD() {
3947 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3948 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3949 NULL,
3950 false);
3951 if (!block->colo_cache) {
3952 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3953 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3954 block->used_length);
3955 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3956 if (block->colo_cache) {
3957 qemu_anon_ram_free(block->colo_cache, block->used_length);
3958 block->colo_cache = NULL;
3959 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003960 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003961 return -errno;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003962 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003963 memcpy(block->colo_cache, block->host, block->used_length);
Zhang Chen13af18f2018-09-03 12:38:48 +08003964 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003965 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003966
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003967 /*
3968 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3969 * with to decide which page in cache should be flushed into SVM's RAM. Here
3970 * we use the same name 'ram_bitmap' as for migration.
3971 */
3972 if (ram_bytes_total()) {
3973 RAMBlock *block;
3974
Yury Kotovfbd162e2019-02-15 20:45:46 +03003975 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003976 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3977
3978 block->bmap = bitmap_new(pages);
3979 bitmap_set(block->bmap, 0, pages);
3980 }
3981 }
3982 ram_state = g_new0(RAMState, 1);
3983 ram_state->migration_dirty_pages = 0;
Zhang Chenc6e5baf2019-03-30 06:29:51 +08003984 qemu_mutex_init(&ram_state->bitmap_mutex);
zhanghailiangd1955d22018-09-03 12:38:55 +08003985 memory_global_dirty_log_start();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003986
Zhang Chen13af18f2018-09-03 12:38:48 +08003987 return 0;
Zhang Chen13af18f2018-09-03 12:38:48 +08003988}
3989
3990/* It is need to hold the global lock to call this helper */
3991void colo_release_ram_cache(void)
3992{
3993 RAMBlock *block;
3994
zhanghailiangd1955d22018-09-03 12:38:55 +08003995 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003996 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003997 g_free(block->bmap);
3998 block->bmap = NULL;
3999 }
4000
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004001 WITH_RCU_READ_LOCK_GUARD() {
4002 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4003 if (block->colo_cache) {
4004 qemu_anon_ram_free(block->colo_cache, block->used_length);
4005 block->colo_cache = NULL;
4006 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004007 }
4008 }
Zhang Chenc6e5baf2019-03-30 06:29:51 +08004009 qemu_mutex_destroy(&ram_state->bitmap_mutex);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004010 g_free(ram_state);
4011 ram_state = NULL;
Zhang Chen13af18f2018-09-03 12:38:48 +08004012}
4013
Juan Quintela3d0684b2017-03-23 15:06:39 +01004014/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004015 * ram_load_setup: Setup RAM for migration incoming side
4016 *
4017 * Returns zero to indicate success and negative for error
4018 *
4019 * @f: QEMUFile where to receive the data
4020 * @opaque: RAMState pointer
4021 */
4022static int ram_load_setup(QEMUFile *f, void *opaque)
4023{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004024 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08004025 return -1;
4026 }
4027
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004028 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004029 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08004030
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004031 return 0;
4032}
4033
4034static int ram_load_cleanup(void *opaque)
4035{
Alexey Perevalovf9494612017-10-05 14:13:20 +03004036 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08004037
Yury Kotovfbd162e2019-02-15 20:45:46 +03004038 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Beata Michalskabd108a42019-11-21 00:08:42 +00004039 qemu_ram_block_writeback(rb);
Junyan He56eb90a2018-07-18 15:48:03 +08004040 }
4041
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004042 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02004043 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004044
Yury Kotovfbd162e2019-02-15 20:45:46 +03004045 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03004046 g_free(rb->receivedmap);
4047 rb->receivedmap = NULL;
4048 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004049
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004050 return 0;
4051}
4052
4053/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01004054 * ram_postcopy_incoming_init: allocate postcopy data structures
4055 *
4056 * Returns 0 for success and negative if there was one error
4057 *
4058 * @mis: current migration incoming state
4059 *
4060 * Allocate data structures etc needed by incoming migration with
4061 * postcopy-ram. postcopy-ram's similarly names
4062 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004063 */
4064int ram_postcopy_incoming_init(MigrationIncomingState *mis)
4065{
David Hildenbrandc1361802018-06-20 22:27:36 +02004066 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004067}
4068
Juan Quintela3d0684b2017-03-23 15:06:39 +01004069/**
4070 * ram_load_postcopy: load a page in postcopy case
4071 *
4072 * Returns 0 for success or -errno in case of error
4073 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004074 * Called in postcopy mode by ram_load().
4075 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01004076 *
4077 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004078 */
4079static int ram_load_postcopy(QEMUFile *f)
4080{
4081 int flags = 0, ret = 0;
4082 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004083 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004084 MigrationIncomingState *mis = migration_incoming_get_current();
4085 /* Temporary page that is later 'placed' */
Wei Yang34143222019-10-05 21:50:20 +08004086 void *postcopy_host_page = mis->postcopy_tmp_page;
Wei Yang91ba4422019-11-07 20:39:06 +08004087 void *this_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00004088 bool all_zero = false;
Wei Yang4cbb3c62019-11-07 20:39:04 +08004089 int target_pages = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004090
4091 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4092 ram_addr_t addr;
4093 void *host = NULL;
4094 void *page_buffer = NULL;
4095 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004096 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004097 uint8_t ch;
Wei Yang644acf92019-11-07 20:39:07 +08004098 int len;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004099
4100 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004101
4102 /*
4103 * If qemu file error, we should stop here, and then "addr"
4104 * may be invalid
4105 */
4106 ret = qemu_file_get_error(f);
4107 if (ret) {
4108 break;
4109 }
4110
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004111 flags = addr & ~TARGET_PAGE_MASK;
4112 addr &= TARGET_PAGE_MASK;
4113
4114 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4115 place_needed = false;
Wei Yang644acf92019-11-07 20:39:07 +08004116 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
4117 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004118 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08004119
4120 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004121 if (!host) {
4122 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4123 ret = -EINVAL;
4124 break;
4125 }
Wei Yang4cbb3c62019-11-07 20:39:04 +08004126 target_pages++;
Peter Xu1aa83672018-07-10 17:18:53 +08004127 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004128 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004129 * Postcopy requires that we place whole host pages atomically;
4130 * these may be huge pages for RAMBlocks that are backed by
4131 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004132 * To make it atomic, the data is read into a temporary page
4133 * that's moved into place later.
4134 * The migration protocol uses, possibly smaller, target-pages
4135 * however the source ensures it always sends all the components
Wei Yang91ba4422019-11-07 20:39:06 +08004136 * of a host page in one chunk.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004137 */
4138 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004139 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004140 /* If all TP are zero then we can optimise the place */
Wei Yange5e73b02019-11-07 20:39:05 +08004141 if (target_pages == 1) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004142 all_zero = true;
Wei Yang91ba4422019-11-07 20:39:06 +08004143 this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
4144 block->page_size);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004145 } else {
4146 /* not the 1st TP within the HP */
Wei Yang91ba4422019-11-07 20:39:06 +08004147 if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
4148 (uintptr_t)this_host) {
4149 error_report("Non-same host page %p/%p",
4150 host, this_host);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004151 ret = -EINVAL;
4152 break;
4153 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004154 }
4155
4156 /*
4157 * If it's the last part of a host page then we place the host
4158 * page
4159 */
Wei Yang4cbb3c62019-11-07 20:39:04 +08004160 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
4161 place_needed = true;
4162 target_pages = 0;
4163 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004164 place_source = postcopy_host_page;
4165 }
4166
4167 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02004168 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004169 ch = qemu_get_byte(f);
Wei Yang2e36bc12019-11-07 20:39:02 +08004170 /*
4171 * Can skip to set page_buffer when
4172 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
4173 */
4174 if (ch || !matches_target_page_size) {
4175 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4176 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004177 if (ch) {
4178 all_zero = false;
4179 }
4180 break;
4181
4182 case RAM_SAVE_FLAG_PAGE:
4183 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004184 if (!matches_target_page_size) {
4185 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004186 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4187 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08004188 /*
4189 * For small pages that matches target page size, we
4190 * avoid the qemu_file copy. Instead we directly use
4191 * the buffer of QEMUFile to place the page. Note: we
4192 * cannot do any QEMUFile operation before using that
4193 * buffer to make sure the buffer is valid when
4194 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004195 */
4196 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4197 TARGET_PAGE_SIZE);
4198 }
4199 break;
Wei Yang644acf92019-11-07 20:39:07 +08004200 case RAM_SAVE_FLAG_COMPRESS_PAGE:
4201 all_zero = false;
4202 len = qemu_get_be32(f);
4203 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4204 error_report("Invalid compressed data length: %d", len);
4205 ret = -EINVAL;
4206 break;
4207 }
4208 decompress_data_with_multi_threads(f, page_buffer, len);
4209 break;
4210
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004211 case RAM_SAVE_FLAG_EOS:
4212 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004213 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004214 break;
4215 default:
4216 error_report("Unknown combination of migration flags: %#x"
4217 " (postcopy mode)", flags);
4218 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004219 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004220 }
4221
Wei Yang644acf92019-11-07 20:39:07 +08004222 /* Got the whole host page, wait for decompress before placing. */
4223 if (place_needed) {
4224 ret |= wait_for_decompress_done();
4225 }
4226
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004227 /* Detect for any possible file errors */
4228 if (!ret && qemu_file_get_error(f)) {
4229 ret = qemu_file_get_error(f);
4230 }
4231
4232 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004233 /* This gets called at the last target page in the host page */
Wei Yang91ba4422019-11-07 20:39:06 +08004234 void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
4235 block->page_size);
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004236
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004237 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004238 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004239 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004240 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004241 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004242 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004243 }
4244 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004245 }
4246
4247 return ret;
4248}
4249
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004250static bool postcopy_is_advised(void)
4251{
4252 PostcopyState ps = postcopy_state_get();
4253 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4254}
4255
4256static bool postcopy_is_running(void)
4257{
4258 PostcopyState ps = postcopy_state_get();
4259 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4260}
4261
Zhang Chene6f4aa12018-09-03 12:38:50 +08004262/*
4263 * Flush content of RAM cache into SVM's memory.
4264 * Only flush the pages that be dirtied by PVM or SVM or both.
4265 */
4266static void colo_flush_ram_cache(void)
4267{
4268 RAMBlock *block = NULL;
4269 void *dst_host;
4270 void *src_host;
4271 unsigned long offset = 0;
4272
zhanghailiangd1955d22018-09-03 12:38:55 +08004273 memory_global_dirty_log_sync();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004274 WITH_RCU_READ_LOCK_GUARD() {
4275 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4276 ramblock_sync_dirty_bitmap(ram_state, block);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004277 }
4278 }
4279
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004280 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4281 WITH_RCU_READ_LOCK_GUARD() {
4282 block = QLIST_FIRST_RCU(&ram_list.blocks);
4283
4284 while (block) {
4285 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4286
Alexey Romko8bba0042020-01-10 14:51:34 +01004287 if (((ram_addr_t)offset) << TARGET_PAGE_BITS
4288 >= block->used_length) {
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004289 offset = 0;
4290 block = QLIST_NEXT_RCU(block, next);
4291 } else {
4292 migration_bitmap_clear_dirty(ram_state, block, offset);
Alexey Romko8bba0042020-01-10 14:51:34 +01004293 dst_host = block->host
4294 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
4295 src_host = block->colo_cache
4296 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004297 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4298 }
4299 }
4300 }
Zhang Chene6f4aa12018-09-03 12:38:50 +08004301 trace_colo_flush_ram_cache_end();
4302}
4303
Wei Yang10da4a32019-07-25 08:20:23 +08004304/**
4305 * ram_load_precopy: load pages in precopy case
4306 *
4307 * Returns 0 for success or -errno in case of error
4308 *
4309 * Called in precopy mode by ram_load().
4310 * rcu_read_lock is taken prior to this being called.
4311 *
4312 * @f: QEMUFile where to send the data
4313 */
4314static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02004315{
Yury Kotove65cec52019-11-25 16:36:32 +03004316 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004317 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004318 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01004319 if (!migrate_use_compression()) {
4320 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4321 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004322
Wei Yang10da4a32019-07-25 08:20:23 +08004323 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02004324 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004325 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004326 uint8_t ch;
4327
Yury Kotove65cec52019-11-25 16:36:32 +03004328 /*
4329 * Yield periodically to let main loop run, but an iteration of
4330 * the main loop is expensive, so do it each some iterations
4331 */
4332 if ((i & 32767) == 0 && qemu_in_coroutine()) {
4333 aio_co_schedule(qemu_get_current_aio_context(),
4334 qemu_coroutine_self());
4335 qemu_coroutine_yield();
4336 }
4337 i++;
4338
Juan Quintela56e93d22015-05-07 19:33:31 +02004339 addr = qemu_get_be64(f);
4340 flags = addr & ~TARGET_PAGE_MASK;
4341 addr &= TARGET_PAGE_MASK;
4342
Juan Quintelaedc60122016-11-02 12:40:46 +01004343 if (flags & invalid_flags) {
4344 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4345 error_report("Received an unexpected compressed page");
4346 }
4347
4348 ret = -EINVAL;
4349 break;
4350 }
4351
Juan Quintelabb890ed2017-04-28 09:39:55 +02004352 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004353 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08004354 RAMBlock *block = ram_block_from_stream(f, flags);
4355
Zhang Chen13af18f2018-09-03 12:38:48 +08004356 /*
4357 * After going into COLO, we should load the Page into colo_cache.
4358 */
4359 if (migration_incoming_in_colo_state()) {
4360 host = colo_cache_from_block_offset(block, addr);
4361 } else {
4362 host = host_from_ram_block_offset(block, addr);
4363 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004364 if (!host) {
4365 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4366 ret = -EINVAL;
4367 break;
4368 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004369
4370 if (!migration_incoming_in_colo_state()) {
4371 ramblock_recv_bitmap_set(block, host);
4372 }
4373
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004374 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004375 }
4376
Juan Quintela56e93d22015-05-07 19:33:31 +02004377 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4378 case RAM_SAVE_FLAG_MEM_SIZE:
4379 /* Synchronize RAM block list */
4380 total_ram_bytes = addr;
4381 while (!ret && total_ram_bytes) {
4382 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004383 char id[256];
4384 ram_addr_t length;
4385
4386 len = qemu_get_byte(f);
4387 qemu_get_buffer(f, (uint8_t *)id, len);
4388 id[len] = 0;
4389 length = qemu_get_be64(f);
4390
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004391 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004392 if (block && !qemu_ram_is_migratable(block)) {
4393 error_report("block %s should not be migrated !", id);
4394 ret = -EINVAL;
4395 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004396 if (length != block->used_length) {
4397 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004398
Gongleifa53a0e2016-05-10 10:04:59 +08004399 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004400 &local_err);
4401 if (local_err) {
4402 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004403 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004404 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004405 /* For postcopy we need to check hugepage sizes match */
4406 if (postcopy_advised &&
4407 block->page_size != qemu_host_page_size) {
4408 uint64_t remote_page_size = qemu_get_be64(f);
4409 if (remote_page_size != block->page_size) {
4410 error_report("Mismatched RAM page size %s "
4411 "(local) %zd != %" PRId64,
4412 id, block->page_size,
4413 remote_page_size);
4414 ret = -EINVAL;
4415 }
4416 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004417 if (migrate_ignore_shared()) {
4418 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03004419 if (ramblock_is_ignored(block) &&
4420 block->mr->addr != addr) {
4421 error_report("Mismatched GPAs for block %s "
4422 "%" PRId64 "!= %" PRId64,
4423 id, (uint64_t)addr,
4424 (uint64_t)block->mr->addr);
4425 ret = -EINVAL;
4426 }
4427 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004428 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4429 block->idstr);
4430 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004431 error_report("Unknown ramblock \"%s\", cannot "
4432 "accept migration", id);
4433 ret = -EINVAL;
4434 }
4435
4436 total_ram_bytes -= length;
4437 }
4438 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004439
Juan Quintelabb890ed2017-04-28 09:39:55 +02004440 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004441 ch = qemu_get_byte(f);
4442 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4443 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004444
Juan Quintela56e93d22015-05-07 19:33:31 +02004445 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004446 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4447 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004448
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004449 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004450 len = qemu_get_be32(f);
4451 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4452 error_report("Invalid compressed data length: %d", len);
4453 ret = -EINVAL;
4454 break;
4455 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004456 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004457 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004458
Juan Quintela56e93d22015-05-07 19:33:31 +02004459 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004460 if (load_xbzrle(f, addr, host) < 0) {
4461 error_report("Failed to decompress XBZRLE page at "
4462 RAM_ADDR_FMT, addr);
4463 ret = -EINVAL;
4464 break;
4465 }
4466 break;
4467 case RAM_SAVE_FLAG_EOS:
4468 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004469 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004470 break;
4471 default:
4472 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004473 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004474 } else {
4475 error_report("Unknown combination of migration flags: %#x",
4476 flags);
4477 ret = -EINVAL;
4478 }
4479 }
4480 if (!ret) {
4481 ret = qemu_file_get_error(f);
4482 }
4483 }
4484
Wei Yangca1a6b72019-11-07 20:39:03 +08004485 ret |= wait_for_decompress_done();
Wei Yang10da4a32019-07-25 08:20:23 +08004486 return ret;
4487}
4488
4489static int ram_load(QEMUFile *f, void *opaque, int version_id)
4490{
4491 int ret = 0;
4492 static uint64_t seq_iter;
4493 /*
4494 * If system is running in postcopy mode, page inserts to host memory must
4495 * be atomic
4496 */
4497 bool postcopy_running = postcopy_is_running();
4498
4499 seq_iter++;
4500
4501 if (version_id != 4) {
4502 return -EINVAL;
4503 }
4504
4505 /*
4506 * This RCU critical section can be very long running.
4507 * When RCU reclaims in the code start to become numerous,
4508 * it will be necessary to reduce the granularity of this
4509 * critical section.
4510 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004511 WITH_RCU_READ_LOCK_GUARD() {
4512 if (postcopy_running) {
4513 ret = ram_load_postcopy(f);
4514 } else {
4515 ret = ram_load_precopy(f);
4516 }
Wei Yang10da4a32019-07-25 08:20:23 +08004517 }
Juan Quintela55c44462017-01-23 22:32:05 +01004518 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004519
4520 if (!ret && migration_incoming_in_colo_state()) {
4521 colo_flush_ram_cache();
4522 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004523 return ret;
4524}
4525
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004526static bool ram_has_postcopy(void *opaque)
4527{
Junyan He469dd512018-07-18 15:48:02 +08004528 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004529 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004530 if (ramblock_is_pmem(rb)) {
4531 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4532 "is not supported now!", rb->idstr, rb->host);
4533 return false;
4534 }
4535 }
4536
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004537 return migrate_postcopy_ram();
4538}
4539
Peter Xuedd090c2018-05-02 18:47:32 +08004540/* Sync all the dirty bitmap with destination VM. */
4541static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4542{
4543 RAMBlock *block;
4544 QEMUFile *file = s->to_dst_file;
4545 int ramblock_count = 0;
4546
4547 trace_ram_dirty_bitmap_sync_start();
4548
Yury Kotovfbd162e2019-02-15 20:45:46 +03004549 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004550 qemu_savevm_send_recv_bitmap(file, block->idstr);
4551 trace_ram_dirty_bitmap_request(block->idstr);
4552 ramblock_count++;
4553 }
4554
4555 trace_ram_dirty_bitmap_sync_wait();
4556
4557 /* Wait until all the ramblocks' dirty bitmap synced */
4558 while (ramblock_count--) {
4559 qemu_sem_wait(&s->rp_state.rp_sem);
4560 }
4561
4562 trace_ram_dirty_bitmap_sync_complete();
4563
4564 return 0;
4565}
4566
4567static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4568{
4569 qemu_sem_post(&s->rp_state.rp_sem);
4570}
4571
Peter Xua335deb2018-05-02 18:47:28 +08004572/*
4573 * Read the received bitmap, revert it as the initial dirty bitmap.
4574 * This is only used when the postcopy migration is paused but wants
4575 * to resume from a middle point.
4576 */
4577int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4578{
4579 int ret = -EINVAL;
4580 QEMUFile *file = s->rp_state.from_dst_file;
4581 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004582 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004583 uint64_t size, end_mark;
4584
4585 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4586
4587 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4588 error_report("%s: incorrect state %s", __func__,
4589 MigrationStatus_str(s->state));
4590 return -EINVAL;
4591 }
4592
4593 /*
4594 * Note: see comments in ramblock_recv_bitmap_send() on why we
4595 * need the endianess convertion, and the paddings.
4596 */
4597 local_size = ROUND_UP(local_size, 8);
4598
4599 /* Add paddings */
4600 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4601
4602 size = qemu_get_be64(file);
4603
4604 /* The size of the bitmap should match with our ramblock */
4605 if (size != local_size) {
4606 error_report("%s: ramblock '%s' bitmap size mismatch "
4607 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4608 block->idstr, size, local_size);
4609 ret = -EINVAL;
4610 goto out;
4611 }
4612
4613 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4614 end_mark = qemu_get_be64(file);
4615
4616 ret = qemu_file_get_error(file);
4617 if (ret || size != local_size) {
4618 error_report("%s: read bitmap failed for ramblock '%s': %d"
4619 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4620 __func__, block->idstr, ret, local_size, size);
4621 ret = -EIO;
4622 goto out;
4623 }
4624
4625 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4626 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4627 __func__, block->idstr, end_mark);
4628 ret = -EINVAL;
4629 goto out;
4630 }
4631
4632 /*
4633 * Endianess convertion. We are during postcopy (though paused).
4634 * The dirty bitmap won't change. We can directly modify it.
4635 */
4636 bitmap_from_le(block->bmap, le_bitmap, nbits);
4637
4638 /*
4639 * What we received is "received bitmap". Revert it as the initial
4640 * dirty bitmap for this ramblock.
4641 */
4642 bitmap_complement(block->bmap, block->bmap, nbits);
4643
4644 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4645
Peter Xuedd090c2018-05-02 18:47:32 +08004646 /*
4647 * We succeeded to sync bitmap for current ramblock. If this is
4648 * the last one to sync, we need to notify the main send thread.
4649 */
4650 ram_dirty_bitmap_reload_notify(s);
4651
Peter Xua335deb2018-05-02 18:47:28 +08004652 ret = 0;
4653out:
Peter Xubf269902018-05-25 09:50:42 +08004654 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004655 return ret;
4656}
4657
Peter Xuedd090c2018-05-02 18:47:32 +08004658static int ram_resume_prepare(MigrationState *s, void *opaque)
4659{
4660 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004661 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004662
Peter Xu08614f32018-05-02 18:47:33 +08004663 ret = ram_dirty_bitmap_sync_all(s, rs);
4664 if (ret) {
4665 return ret;
4666 }
4667
4668 ram_state_resume_prepare(rs, s->to_dst_file);
4669
4670 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004671}
4672
Juan Quintela56e93d22015-05-07 19:33:31 +02004673static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004674 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004675 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004676 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004677 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004678 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004679 .save_live_pending = ram_save_pending,
4680 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004681 .save_cleanup = ram_save_cleanup,
4682 .load_setup = ram_load_setup,
4683 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004684 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004685};
4686
4687void ram_mig_init(void)
4688{
4689 qemu_mutex_init(&XBZRLE.lock);
Dr. David Alan Gilbertce62df52019-08-22 12:54:33 +01004690 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004691}