blob: 92c7b788ae6433d130ccc7580ab2063e3c0432f9 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020030#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020033#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020034#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020035#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020036#include "migration.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020037#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020039#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020040#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030041#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010043#include "qapi/error.h"
Juan Quintelaab7cbb02019-05-15 13:37:46 +020044#include "qapi/qapi-types-migration.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010045#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020046#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020047#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020048#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030049#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080051#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030052#include "block.h"
Claudio Fontanab0c3cf92020-06-29 11:35:03 +020053#include "sysemu/cpu-throttle.h"
Peter Xuedd090c2018-05-02 18:47:32 +080054#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010055#include "qemu/iov.h"
Juan Quintelad32ca5a2020-01-22 16:16:07 +010056#include "multifd.h"
Andrey Gruzdev278e2f52021-01-29 13:14:05 +030057#include "sysemu/runstate.h"
58
59#if defined(__linux__)
60#include "qemu/userfaultfd.h"
61#endif /* defined(__linux__) */
Juan Quintela56e93d22015-05-07 19:33:31 +020062
Juan Quintela56e93d22015-05-07 19:33:31 +020063/***********************************************************/
64/* ram save/restore */
65
Juan Quintelabb890ed2017-04-28 09:39:55 +020066/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
67 * worked for pages that where filled with the same char. We switched
68 * it to only search for the zero value. And to avoid confusion with
69 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
70 */
71
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020073#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020074#define RAM_SAVE_FLAG_MEM_SIZE 0x04
75#define RAM_SAVE_FLAG_PAGE 0x08
76#define RAM_SAVE_FLAG_EOS 0x10
77#define RAM_SAVE_FLAG_CONTINUE 0x20
78#define RAM_SAVE_FLAG_XBZRLE 0x40
79/* 0x80 is reserved in migration.h start with 0x100 next */
80#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
81
Juan Quintela56e93d22015-05-07 19:33:31 +020082static inline bool is_zero_range(uint8_t *p, uint64_t size)
83{
Richard Hendersona1febc42016-08-29 11:46:14 -070084 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020085}
86
Juan Quintela93604472017-06-06 19:49:03 +020087XBZRLECacheStats xbzrle_counters;
88
Juan Quintela56e93d22015-05-07 19:33:31 +020089/* struct contains XBZRLE cache and a static page
90 used by the compression */
91static struct {
92 /* buffer used for XBZRLE encoding */
93 uint8_t *encoded_buf;
94 /* buffer for storing page content */
95 uint8_t *current_buf;
96 /* Cache for XBZRLE, Protected by lock. */
97 PageCache *cache;
98 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020099 /* it will store a page full of zeros */
100 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +0200101 /* buffer used for XBZRLE decoding */
102 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200103} XBZRLE;
104
Juan Quintela56e93d22015-05-07 19:33:31 +0200105static void XBZRLE_cache_lock(void)
106{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800107 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200108 qemu_mutex_lock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800109 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200110}
111
112static void XBZRLE_cache_unlock(void)
113{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800114 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200115 qemu_mutex_unlock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800116 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200117}
118
Juan Quintela3d0684b2017-03-23 15:06:39 +0100119/**
120 * xbzrle_cache_resize: resize the xbzrle cache
121 *
Daniel P. Berrangécbde7be2021-02-19 18:40:12 +0000122 * This function is called from migrate_params_apply in main
Juan Quintela3d0684b2017-03-23 15:06:39 +0100123 * thread, possibly while a migration is in progress. A running
124 * migration may be using the cache and might finish during this call,
125 * hence changes to the cache are protected by XBZRLE.lock().
126 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200127 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100128 *
129 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200130 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200131 */
Markus Armbruster8b9407a2021-02-02 15:17:32 +0100132int xbzrle_cache_resize(uint64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200133{
134 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200135 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200136
Juan Quintela8acabf62017-10-05 22:00:31 +0200137 /* Check for truncation */
138 if (new_size != (size_t)new_size) {
139 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
140 "exceeding address space");
141 return -1;
142 }
143
Juan Quintela2a313e52017-10-06 23:00:12 +0200144 if (new_size == migrate_xbzrle_cache_size()) {
145 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200146 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200147 }
148
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 XBZRLE_cache_lock();
150
151 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200152 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200153 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200154 ret = -1;
155 goto out;
156 }
157
158 cache_fini(XBZRLE.cache);
159 XBZRLE.cache = new_cache;
160 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200161out:
162 XBZRLE_cache_unlock();
163 return ret;
164}
165
Chuan Zheng3ded54b2020-09-16 14:22:00 +0800166bool ramblock_is_ignored(RAMBlock *block)
Yury Kotovfbd162e2019-02-15 20:45:46 +0300167{
168 return !qemu_ram_is_migratable(block) ||
169 (migrate_ignore_shared() && qemu_ram_is_shared(block));
170}
171
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100172#undef RAMBLOCK_FOREACH
173
Yury Kotovfbd162e2019-02-15 20:45:46 +0300174int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
175{
176 RAMBlock *block;
177 int ret = 0;
178
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +0100179 RCU_READ_LOCK_GUARD();
180
Yury Kotovfbd162e2019-02-15 20:45:46 +0300181 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
182 ret = func(block, opaque);
183 if (ret) {
184 break;
185 }
186 }
Yury Kotovfbd162e2019-02-15 20:45:46 +0300187 return ret;
188}
189
Alexey Perevalovf9494612017-10-05 14:13:20 +0300190static void ramblock_recv_map_init(void)
191{
192 RAMBlock *rb;
193
Yury Kotovfbd162e2019-02-15 20:45:46 +0300194 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300195 assert(!rb->receivedmap);
196 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
197 }
198}
199
200int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
201{
202 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
203 rb->receivedmap);
204}
205
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000206bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
207{
208 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
209}
210
Alexey Perevalovf9494612017-10-05 14:13:20 +0300211void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
212{
213 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
214}
215
216void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
217 size_t nr)
218{
219 bitmap_set_atomic(rb->receivedmap,
220 ramblock_recv_bitmap_offset(host_addr, rb),
221 nr);
222}
223
Peter Xua335deb2018-05-02 18:47:28 +0800224#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
225
226/*
227 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
228 *
229 * Returns >0 if success with sent bytes, or <0 if error.
230 */
231int64_t ramblock_recv_bitmap_send(QEMUFile *file,
232 const char *block_name)
233{
234 RAMBlock *block = qemu_ram_block_by_name(block_name);
235 unsigned long *le_bitmap, nbits;
236 uint64_t size;
237
238 if (!block) {
239 error_report("%s: invalid block name: %s", __func__, block_name);
240 return -1;
241 }
242
David Hildenbrand898ba902021-04-29 13:27:06 +0200243 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
Peter Xua335deb2018-05-02 18:47:28 +0800244
245 /*
246 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
247 * machines we may need 4 more bytes for padding (see below
248 * comment). So extend it a bit before hand.
249 */
250 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
251
252 /*
253 * Always use little endian when sending the bitmap. This is
254 * required that when source and destination VMs are not using the
zhaolichang3a4452d2020-09-17 15:50:21 +0800255 * same endianness. (Note: big endian won't work.)
Peter Xua335deb2018-05-02 18:47:28 +0800256 */
257 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
258
259 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800260 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800261
262 /*
263 * size is always aligned to 8 bytes for 64bit machines, but it
264 * may not be true for 32bit machines. We need this padding to
265 * make sure the migration can survive even between 32bit and
266 * 64bit machines.
267 */
268 size = ROUND_UP(size, 8);
269
270 qemu_put_be64(file, size);
271 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
272 /*
273 * Mark as an end, in case the middle part is screwed up due to
zhaolichang3a4452d2020-09-17 15:50:21 +0800274 * some "mysterious" reason.
Peter Xua335deb2018-05-02 18:47:28 +0800275 */
276 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
277 qemu_fflush(file);
278
Peter Xubf269902018-05-25 09:50:42 +0800279 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800280
281 if (qemu_file_get_error(file)) {
282 return qemu_file_get_error(file);
283 }
284
285 return size + sizeof(size);
286}
287
Juan Quintelaec481c62017-03-20 22:12:40 +0100288/*
289 * An outstanding page request, on the source, having been received
290 * and queued
291 */
292struct RAMSrcPageRequest {
293 RAMBlock *rb;
294 hwaddr offset;
295 hwaddr len;
296
297 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
298};
299
Juan Quintela6f37bb82017-03-13 19:26:29 +0100300/* State of RAM for migration */
301struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100302 /* QEMUFile used for this migration */
303 QEMUFile *f;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +0300304 /* UFFD file descriptor, used in 'write-tracking' migration */
305 int uffdio_fd;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100306 /* Last block that we have visited searching for dirty pages */
307 RAMBlock *last_seen_block;
308 /* Last block from where we have sent data */
309 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100310 /* Last dirty target page we have sent */
311 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100312 /* last ram version we have seen */
313 uint32_t last_version;
Juan Quintela8d820d62017-03-13 19:35:50 +0100314 /* How many times we have dirty too many pages */
315 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100316 /* these variables are used for bitmap sync */
317 /* last time we did a full bitmap_sync */
318 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200319 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200320 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200321 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200322 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100323 /* xbzrle misses since the beginning of the period */
324 uint64_t xbzrle_cache_miss_prev;
Wei Wange460a4b2020-04-30 08:59:35 +0800325 /* Amount of xbzrle pages since the beginning of the period */
326 uint64_t xbzrle_pages_prev;
327 /* Amount of xbzrle encoded bytes since the beginning of the period */
328 uint64_t xbzrle_bytes_prev;
David Hildenbrand1a373522021-02-16 11:50:39 +0100329 /* Start using XBZRLE (e.g., after the first round). */
330 bool xbzrle_enabled;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800331
332 /* compression statistics since the beginning of the period */
333 /* amount of count that no free thread to compress data */
334 uint64_t compress_thread_busy_prev;
335 /* amount bytes after compression */
336 uint64_t compressed_size_prev;
337 /* amount of compressed pages */
338 uint64_t compress_pages_prev;
339
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800340 /* total handled target pages at the beginning of period */
341 uint64_t target_page_count_prev;
342 /* total handled target pages since start */
343 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200344 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800345 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800346 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100347 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100348 /* The RAMBlock used in the last src_page_requests */
349 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100350 /* Queue of outstanding page requests from the destination */
351 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100352 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100353};
354typedef struct RAMState RAMState;
355
Juan Quintela53518d92017-05-04 11:46:24 +0200356static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100357
Wei Wangbd227062018-12-11 16:24:51 +0800358static NotifierWithReturnList precopy_notifier_list;
359
360void precopy_infrastructure_init(void)
361{
362 notifier_with_return_list_init(&precopy_notifier_list);
363}
364
365void precopy_add_notifier(NotifierWithReturn *n)
366{
367 notifier_with_return_list_add(&precopy_notifier_list, n);
368}
369
370void precopy_remove_notifier(NotifierWithReturn *n)
371{
372 notifier_with_return_remove(n);
373}
374
375int precopy_notify(PrecopyNotifyReason reason, Error **errp)
376{
377 PrecopyNotifyData pnd;
378 pnd.reason = reason;
379 pnd.errp = errp;
380
381 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
382}
383
Juan Quintela9edabd42017-03-14 12:02:16 +0100384uint64_t ram_bytes_remaining(void)
385{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000386 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
387 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100388}
389
Juan Quintela93604472017-06-06 19:49:03 +0200390MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100391
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100392/* used by the search for pages to send */
393struct PageSearchStatus {
394 /* Current block being searched */
395 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100396 /* Current page to search from */
397 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100398 /* Set once we wrap around */
399 bool complete_round;
400};
401typedef struct PageSearchStatus PageSearchStatus;
402
Xiao Guangrong76e03002018-09-06 15:01:00 +0800403CompressionStats compression_counters;
404
Juan Quintela56e93d22015-05-07 19:33:31 +0200405struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200406 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800407 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800408 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200409 QEMUFile *file;
410 QemuMutex mutex;
411 QemuCond cond;
412 RAMBlock *block;
413 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800414
415 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800416 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800417 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200418};
419typedef struct CompressParam CompressParam;
420
421struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800422 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800423 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200424 QemuMutex mutex;
425 QemuCond cond;
426 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000427 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800429 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200430};
431typedef struct DecompressParam DecompressParam;
432
433static CompressParam *comp_param;
434static QemuThread *compress_threads;
435/* comp_done_cond is used to wake up the migration thread when
436 * one of the compression threads has finished the compression.
437 * comp_done_lock is used to co-work with comp_done_cond.
438 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800439static QemuMutex comp_done_lock;
440static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200441/* The empty QEMUFileOps will be used by file in CompressParam */
442static const QEMUFileOps empty_ops = { };
443
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800444static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200445static DecompressParam *decomp_param;
446static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800447static QemuMutex decomp_done_lock;
448static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200449
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800450static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800451 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200452
453static void *do_data_compress(void *opaque)
454{
455 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800456 RAMBlock *block;
457 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800458 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200459
Liang Lia7a9a882016-05-05 15:32:57 +0800460 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800461 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800462 if (param->block) {
463 block = param->block;
464 offset = param->offset;
465 param->block = NULL;
466 qemu_mutex_unlock(&param->mutex);
467
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800468 zero_page = do_compress_ram_page(param->file, &param->stream,
469 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800470
Liang Li0d9f9a52016-05-05 15:32:59 +0800471 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800472 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800473 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800474 qemu_cond_signal(&comp_done_cond);
475 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800476
477 qemu_mutex_lock(&param->mutex);
478 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200479 qemu_cond_wait(&param->cond, &param->mutex);
480 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200481 }
Liang Lia7a9a882016-05-05 15:32:57 +0800482 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200483
484 return NULL;
485}
486
Juan Quintelaf0afa332017-06-28 11:52:28 +0200487static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200488{
489 int i, thread_count;
490
Fei Li05306932018-09-25 17:14:40 +0800491 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200492 return;
493 }
Fei Li05306932018-09-25 17:14:40 +0800494
Juan Quintela56e93d22015-05-07 19:33:31 +0200495 thread_count = migrate_compress_threads();
496 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800497 /*
498 * we use it as a indicator which shows if the thread is
499 * properly init'd or not
500 */
501 if (!comp_param[i].file) {
502 break;
503 }
Fei Li05306932018-09-25 17:14:40 +0800504
505 qemu_mutex_lock(&comp_param[i].mutex);
506 comp_param[i].quit = true;
507 qemu_cond_signal(&comp_param[i].cond);
508 qemu_mutex_unlock(&comp_param[i].mutex);
509
Juan Quintela56e93d22015-05-07 19:33:31 +0200510 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200511 qemu_mutex_destroy(&comp_param[i].mutex);
512 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800513 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800514 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800515 qemu_fclose(comp_param[i].file);
516 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200517 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800518 qemu_mutex_destroy(&comp_done_lock);
519 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 g_free(compress_threads);
521 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200522 compress_threads = NULL;
523 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200524}
525
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800526static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200527{
528 int i, thread_count;
529
530 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800531 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200532 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200533 thread_count = migrate_compress_threads();
534 compress_threads = g_new0(QemuThread, thread_count);
535 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800536 qemu_cond_init(&comp_done_cond);
537 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200538 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800539 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
540 if (!comp_param[i].originbuf) {
541 goto exit;
542 }
543
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800544 if (deflateInit(&comp_param[i].stream,
545 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800546 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800547 goto exit;
548 }
549
Cao jine110aa92016-07-29 15:10:31 +0800550 /* comp_param[i].file is just used as a dummy buffer to save data,
551 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200552 */
Peter Xuc6ad5be2021-07-22 13:58:40 -0400553 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
Juan Quintela56e93d22015-05-07 19:33:31 +0200554 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800555 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200556 qemu_mutex_init(&comp_param[i].mutex);
557 qemu_cond_init(&comp_param[i].cond);
558 qemu_thread_create(compress_threads + i, "compress",
559 do_data_compress, comp_param + i,
560 QEMU_THREAD_JOINABLE);
561 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800562 return 0;
563
564exit:
565 compress_threads_save_cleanup();
566 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200567}
568
569/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100570 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200571 *
572 * If this is the 1st block, it also writes the block identification
573 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100574 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200575 *
576 * @f: QEMUFile where to send the data
577 * @block: block that contains the page we want to send
578 * @offset: offset inside the block for the page
579 * in the lower bits, it contains flags
580 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200581static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
582 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200583{
Liang Li9f5f3802015-07-13 17:34:10 +0800584 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200585
Juan Quintela24795692017-03-21 11:45:01 +0100586 if (block == rs->last_sent_block) {
587 offset |= RAM_SAVE_FLAG_CONTINUE;
588 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200589 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200590 size = 8;
591
592 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800593 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200594 qemu_put_byte(f, len);
595 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800596 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100597 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200598 }
599 return size;
600}
601
Juan Quintela3d0684b2017-03-23 15:06:39 +0100602/**
Olaf Hering179a8082021-07-08 18:21:59 +0200603 * mig_throttle_guest_down: throttle down the guest
Juan Quintela3d0684b2017-03-23 15:06:39 +0100604 *
605 * Reduce amount of guest cpu execution to hopefully slow down memory
606 * writes. If guest dirty memory rate is reduced below the rate at
607 * which we can transfer pages to the destination then we should be
608 * able to complete migration. Some workloads dirty memory way too
609 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400610 */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800611static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
612 uint64_t bytes_dirty_threshold)
Jason J. Herne070afca2015-09-08 13:12:35 -0400613{
614 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100615 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
Keqian Zhucbbf8182020-04-13 18:15:08 +0800616 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
617 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
Li Qiang4cbc9c72018-08-01 06:00:20 -0700618 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -0400619
Keqian Zhucbbf8182020-04-13 18:15:08 +0800620 uint64_t throttle_now = cpu_throttle_get_percentage();
621 uint64_t cpu_now, cpu_ideal, throttle_inc;
622
Jason J. Herne070afca2015-09-08 13:12:35 -0400623 /* We have not started throttling yet. Let's start it. */
624 if (!cpu_throttle_active()) {
625 cpu_throttle_set(pct_initial);
626 } else {
627 /* Throttling already on, just increase the rate */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800628 if (!pct_tailslow) {
629 throttle_inc = pct_increment;
630 } else {
631 /* Compute the ideal CPU percentage used by Guest, which may
632 * make the dirty rate match the dirty rate threshold. */
633 cpu_now = 100 - throttle_now;
634 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
635 bytes_dirty_period);
636 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
637 }
638 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -0400639 }
640}
641
Juan Quintela3d0684b2017-03-23 15:06:39 +0100642/**
643 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
644 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100645 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100646 * @current_addr: address for the zero page
647 *
648 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200649 * The important thing is that a stale (not-yet-0'd) page be replaced
650 * by the new data.
651 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100652 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200653 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100654static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200655{
David Hildenbrand1a373522021-02-16 11:50:39 +0100656 if (!rs->xbzrle_enabled) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200657 return;
658 }
659
660 /* We don't care if this fails to allocate a new cache page
661 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200662 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200663 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200664}
665
666#define ENCODING_FLAG_XBZRLE 0x1
667
668/**
669 * save_xbzrle_page: compress and send current page
670 *
671 * Returns: 1 means that we wrote the page
672 * 0 means that page is identical to the one already sent
673 * -1 means that xbzrle would be longer than normal
674 *
Juan Quintela5a987732017-03-13 19:39:02 +0100675 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100676 * @current_data: pointer to the address of the page contents
677 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200678 * @block: block that contains the page we want to send
679 * @offset: offset inside the block for the page
680 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200681 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100682static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200683 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100684 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200685{
686 int encoded_len = 0, bytes_xbzrle;
687 uint8_t *prev_cached_page;
688
Juan Quintela93604472017-06-06 19:49:03 +0200689 if (!cache_is_cached(XBZRLE.cache, current_addr,
690 ram_counters.dirty_sync_count)) {
691 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200692 if (!last_stage) {
693 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200694 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200695 return -1;
696 } else {
697 /* update *current_data when the page has been
698 inserted into cache */
699 *current_data = get_cached_data(XBZRLE.cache, current_addr);
700 }
701 }
702 return -1;
703 }
704
Wei Wange460a4b2020-04-30 08:59:35 +0800705 /*
706 * Reaching here means the page has hit the xbzrle cache, no matter what
707 * encoding result it is (normal encoding, overflow or skipping the page),
zhaolichang3a4452d2020-09-17 15:50:21 +0800708 * count the page as encoded. This is used to calculate the encoding rate.
Wei Wange460a4b2020-04-30 08:59:35 +0800709 *
710 * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
711 * 2nd page turns out to be skipped (i.e. no new bytes written to the
712 * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
713 * skipped page included. In this way, the encoding rate can tell if the
714 * guest page is good for xbzrle encoding.
715 */
716 xbzrle_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200717 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
718
719 /* save current buffer into memory */
720 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
721
722 /* XBZRLE encoding (if there is no overflow) */
723 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
724 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
725 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +0800726
727 /*
728 * Update the cache contents, so that it corresponds to the data
729 * sent, in all cases except where we skip the page.
730 */
731 if (!last_stage && encoded_len != 0) {
732 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
733 /*
734 * In the case where we couldn't compress, ensure that the caller
735 * sends the data from the cache, since the guest might have
736 * changed the RAM since we copied it.
737 */
738 *current_data = prev_cached_page;
739 }
740
Juan Quintela56e93d22015-05-07 19:33:31 +0200741 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100742 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200743 return 0;
744 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100745 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200746 xbzrle_counters.overflow++;
Wei Wange460a4b2020-04-30 08:59:35 +0800747 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200748 return -1;
749 }
750
Juan Quintela56e93d22015-05-07 19:33:31 +0200751 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200752 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100753 offset | RAM_SAVE_FLAG_XBZRLE);
754 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
755 qemu_put_be16(rs->f, encoded_len);
756 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200757 bytes_xbzrle += encoded_len + 1 + 2;
Wei Wange460a4b2020-04-30 08:59:35 +0800758 /*
759 * Like compressed_size (please see update_compress_thread_counts),
760 * the xbzrle encoded bytes don't count the 8 byte header with
761 * RAM_SAVE_FLAG_CONTINUE.
762 */
763 xbzrle_counters.bytes += bytes_xbzrle - 8;
Juan Quintela93604472017-06-06 19:49:03 +0200764 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200765
766 return 1;
767}
768
Juan Quintela3d0684b2017-03-23 15:06:39 +0100769/**
770 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000771 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800772 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +0100773 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100774 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100775 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100776 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000777 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200778static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100779unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100780 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +0200781{
Juan Quintela6b6712e2017-03-22 15:18:04 +0100782 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
783 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200784
Yury Kotovfbd162e2019-02-15 20:45:46 +0300785 if (ramblock_is_ignored(rb)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +0200786 return size;
787 }
788
David Hildenbrand1a373522021-02-16 11:50:39 +0100789 return find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +0200790}
791
David Hildenbrand1230a252021-09-04 18:09:07 +0200792static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
Wei Wang3143577d2021-07-22 04:30:55 -0400793 unsigned long page)
794{
795 uint8_t shift;
796 hwaddr size, start;
797
798 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
799 return;
800 }
801
802 shift = rb->clear_bmap_shift;
803 /*
804 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
805 * can make things easier sometimes since then start address
806 * of the small chunk will always be 64 pages aligned so the
807 * bitmap will always be aligned to unsigned long. We should
808 * even be able to remove this restriction but I'm simply
809 * keeping it.
810 */
811 assert(shift >= 6);
812
813 size = 1ULL << (TARGET_PAGE_BITS + shift);
David Hildenbrand76482972021-10-11 19:53:44 +0200814 start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
Wei Wang3143577d2021-07-22 04:30:55 -0400815 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
816 memory_region_clear_dirty_bitmap(rb->mr, start, size);
817}
818
819static void
David Hildenbrand1230a252021-09-04 18:09:07 +0200820migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
Wei Wang3143577d2021-07-22 04:30:55 -0400821 unsigned long start,
822 unsigned long npages)
823{
824 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
825 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
826 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
827
828 /*
829 * Clear pages from start to start + npages - 1, so the end boundary is
830 * exclusive.
831 */
832 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
David Hildenbrand1230a252021-09-04 18:09:07 +0200833 migration_clear_memory_region_dirty_bitmap(rb, i);
Wei Wang3143577d2021-07-22 04:30:55 -0400834 }
835}
836
Juan Quintela06b10682017-03-21 15:18:05 +0100837static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100838 RAMBlock *rb,
839 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000840{
841 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000842
Peter Xu002cad62019-06-03 14:50:56 +0800843 /*
844 * Clear dirty bitmap if needed. This _must_ be called before we
845 * send any of the page in the chunk because we need to make sure
846 * we can capture further page content changes when we sync dirty
847 * log the next time. So as long as we are going to send any of
848 * the page in the chunk we clear the remote dirty bitmap for all.
849 * Clearing it earlier won't be a problem, but too late will.
850 */
David Hildenbrand1230a252021-09-04 18:09:07 +0200851 migration_clear_memory_region_dirty_bitmap(rb, page);
Peter Xu002cad62019-06-03 14:50:56 +0800852
Juan Quintela6b6712e2017-03-22 15:18:04 +0100853 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000854 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100855 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000856 }
Wei Wang386a9072018-12-11 16:24:49 +0800857
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000858 return ret;
859}
860
David Hildenbrandbe39b4c2021-10-11 19:53:41 +0200861static void dirty_bitmap_clear_section(MemoryRegionSection *section,
862 void *opaque)
863{
864 const hwaddr offset = section->offset_within_region;
865 const hwaddr size = int128_get64(section->size);
866 const unsigned long start = offset >> TARGET_PAGE_BITS;
867 const unsigned long npages = size >> TARGET_PAGE_BITS;
868 RAMBlock *rb = section->mr->ram_block;
869 uint64_t *cleared_bits = opaque;
870
871 /*
872 * We don't grab ram_state->bitmap_mutex because we expect to run
873 * only when starting migration or during postcopy recovery where
874 * we don't have concurrent access.
875 */
876 if (!migration_in_postcopy() && !migrate_background_snapshot()) {
877 migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
878 }
879 *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
880 bitmap_clear(rb->bmap, start, npages);
881}
882
883/*
884 * Exclude all dirty pages from migration that fall into a discarded range as
885 * managed by a RamDiscardManager responsible for the mapped memory region of
886 * the RAMBlock. Clear the corresponding bits in the dirty bitmaps.
887 *
888 * Discarded pages ("logically unplugged") have undefined content and must
889 * not get migrated, because even reading these pages for migration might
890 * result in undesired behavior.
891 *
892 * Returns the number of cleared bits in the RAMBlock dirty bitmap.
893 *
894 * Note: The result is only stable while migrating (precopy/postcopy).
895 */
896static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
897{
898 uint64_t cleared_bits = 0;
899
900 if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
901 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
902 MemoryRegionSection section = {
903 .mr = rb->mr,
904 .offset_within_region = 0,
905 .size = int128_make64(qemu_ram_get_used_length(rb)),
906 };
907
908 ram_discard_manager_replay_discarded(rdm, &section,
909 dirty_bitmap_clear_section,
910 &cleared_bits);
911 }
912 return cleared_bits;
913}
914
David Hildenbrand9470c5e2021-10-11 19:53:43 +0200915/*
916 * Check if a host-page aligned page falls into a discarded range as managed by
917 * a RamDiscardManager responsible for the mapped memory region of the RAMBlock.
918 *
919 * Note: The result is only stable while migrating (precopy/postcopy).
920 */
921bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
922{
923 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
924 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
925 MemoryRegionSection section = {
926 .mr = rb->mr,
927 .offset_within_region = start,
928 .size = int128_make64(qemu_ram_pagesize(rb)),
929 };
930
931 return !ram_discard_manager_is_populated(rdm, &section);
932 }
933 return false;
934}
935
Peter Xu267691b2019-06-03 14:50:46 +0800936/* Called with RCU critical section */
Wei Yang7a3e9572019-08-08 11:31:55 +0800937static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +0200938{
Keqian Zhufb613582020-06-22 11:20:37 +0800939 uint64_t new_dirty_pages =
940 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
941
942 rs->migration_dirty_pages += new_dirty_pages;
943 rs->num_dirty_pages_period += new_dirty_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200944}
945
Juan Quintela3d0684b2017-03-23 15:06:39 +0100946/**
947 * ram_pagesize_summary: calculate all the pagesizes of a VM
948 *
949 * Returns a summary bitmap of the page sizes of all RAMBlocks
950 *
951 * For VMs with just normal pages this is equivalent to the host page
952 * size. If it's got some huge pages then it's the OR of all the
953 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000954 */
955uint64_t ram_pagesize_summary(void)
956{
957 RAMBlock *block;
958 uint64_t summary = 0;
959
Yury Kotovfbd162e2019-02-15 20:45:46 +0300960 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000961 summary |= block->page_size;
962 }
963
964 return summary;
965}
966
Xiao Guangrongaecbfe92019-01-11 14:37:30 +0800967uint64_t ram_get_total_transferred_pages(void)
968{
969 return ram_counters.normal + ram_counters.duplicate +
970 compression_counters.pages + xbzrle_counters.pages;
971}
972
Xiao Guangrongb7340352018-06-04 17:55:12 +0800973static void migration_update_rates(RAMState *rs, int64_t end_time)
974{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800975 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800976 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +0800977
978 /* calculate period counters */
979 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
980 / (end_time - rs->time_last_bitmap_sync);
981
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800982 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +0800983 return;
984 }
985
986 if (migrate_use_xbzrle()) {
Wei Wange460a4b2020-04-30 08:59:35 +0800987 double encoded_size, unencoded_size;
988
Xiao Guangrongb7340352018-06-04 17:55:12 +0800989 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800990 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +0800991 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Wei Wange460a4b2020-04-30 08:59:35 +0800992 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
993 TARGET_PAGE_SIZE;
994 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
Wei Wang92271402020-06-17 13:13:05 -0700995 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
Wei Wange460a4b2020-04-30 08:59:35 +0800996 xbzrle_counters.encoding_rate = 0;
Wei Wange460a4b2020-04-30 08:59:35 +0800997 } else {
998 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
999 }
1000 rs->xbzrle_pages_prev = xbzrle_counters.pages;
1001 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001002 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001003
1004 if (migrate_use_compression()) {
1005 compression_counters.busy_rate = (double)(compression_counters.busy -
1006 rs->compress_thread_busy_prev) / page_count;
1007 rs->compress_thread_busy_prev = compression_counters.busy;
1008
1009 compressed_size = compression_counters.compressed_size -
1010 rs->compressed_size_prev;
1011 if (compressed_size) {
1012 double uncompressed_size = (compression_counters.pages -
1013 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1014
1015 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1016 compression_counters.compression_rate =
1017 uncompressed_size / compressed_size;
1018
1019 rs->compress_pages_prev = compression_counters.pages;
1020 rs->compressed_size_prev = compression_counters.compressed_size;
1021 }
1022 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001023}
1024
Keqian Zhudc14a472020-02-24 10:31:42 +08001025static void migration_trigger_throttle(RAMState *rs)
1026{
1027 MigrationState *s = migrate_get_current();
1028 uint64_t threshold = s->parameters.throttle_trigger_threshold;
1029
1030 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
1031 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1032 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1033
1034 /* During block migration the auto-converge logic incorrectly detects
1035 * that ram migration makes no progress. Avoid this by disabling the
1036 * throttling logic during the bulk phase of block migration. */
1037 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1038 /* The following detection logic can be refined later. For now:
1039 Check to see if the ratio between dirtied bytes and the approx.
1040 amount of bytes that just got transferred since the last time
1041 we were in this routine reaches the threshold. If that happens
1042 twice, start or increase throttling. */
1043
1044 if ((bytes_dirty_period > bytes_dirty_threshold) &&
1045 (++rs->dirty_rate_high_cnt >= 2)) {
1046 trace_migration_throttle();
1047 rs->dirty_rate_high_cnt = 0;
Keqian Zhucbbf8182020-04-13 18:15:08 +08001048 mig_throttle_guest_down(bytes_dirty_period,
1049 bytes_dirty_threshold);
Keqian Zhudc14a472020-02-24 10:31:42 +08001050 }
1051 }
1052}
1053
Juan Quintela8d820d62017-03-13 19:35:50 +01001054static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001055{
1056 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001057 int64_t end_time;
Juan Quintela56e93d22015-05-07 19:33:31 +02001058
Juan Quintela93604472017-06-06 19:49:03 +02001059 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001060
Juan Quintelaf664da82017-03-13 19:44:57 +01001061 if (!rs->time_last_bitmap_sync) {
1062 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001063 }
1064
1065 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001066 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001067
Juan Quintela108cfae2017-03-13 21:38:09 +01001068 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001069 WITH_RCU_READ_LOCK_GUARD() {
1070 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1071 ramblock_sync_dirty_bitmap(rs, block);
1072 }
1073 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001074 }
Juan Quintela108cfae2017-03-13 21:38:09 +01001075 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001076
Paolo Bonzini9458a9a2018-02-06 18:37:39 +01001077 memory_global_after_dirty_log_sync();
Juan Quintelaa66cd902017-03-28 15:02:43 +02001078 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001079
Juan Quintela56e93d22015-05-07 19:33:31 +02001080 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1081
1082 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001083 if (end_time > rs->time_last_bitmap_sync + 1000) {
Keqian Zhudc14a472020-02-24 10:31:42 +08001084 migration_trigger_throttle(rs);
Jason J. Herne070afca2015-09-08 13:12:35 -04001085
Xiao Guangrongb7340352018-06-04 17:55:12 +08001086 migration_update_rates(rs, end_time);
1087
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001088 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001089
1090 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001091 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001092 rs->num_dirty_pages_period = 0;
Keqian Zhudc14a472020-02-24 10:31:42 +08001093 rs->bytes_xfer_prev = ram_counters.transferred;
Juan Quintela56e93d22015-05-07 19:33:31 +02001094 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001095 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001096 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001097 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001098}
1099
Wei Wangbd227062018-12-11 16:24:51 +08001100static void migration_bitmap_sync_precopy(RAMState *rs)
1101{
1102 Error *local_err = NULL;
1103
1104 /*
1105 * The current notifier usage is just an optimization to migration, so we
1106 * don't stop the normal migration process in the error case.
1107 */
1108 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1109 error_report_err(local_err);
Vladimir Sementsov-Ogievskiyb4a17332020-03-24 18:36:29 +03001110 local_err = NULL;
Wei Wangbd227062018-12-11 16:24:51 +08001111 }
1112
1113 migration_bitmap_sync(rs);
1114
1115 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1116 error_report_err(local_err);
1117 }
1118}
1119
Juan Quintela56e93d22015-05-07 19:33:31 +02001120/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001121 * save_zero_page_to_file: send the zero page to the file
1122 *
1123 * Returns the size of data written to the file, 0 means the page is not
1124 * a zero page
1125 *
1126 * @rs: current RAM state
1127 * @file: the file where the data is saved
1128 * @block: block that contains the page we want to send
1129 * @offset: offset inside the block for the page
1130 */
1131static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1132 RAMBlock *block, ram_addr_t offset)
1133{
1134 uint8_t *p = block->host + offset;
1135 int len = 0;
1136
1137 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1138 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1139 qemu_put_byte(file, 0);
1140 len += 1;
1141 }
1142 return len;
1143}
1144
1145/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001146 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001147 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001148 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001149 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001150 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001151 * @block: block that contains the page we want to send
1152 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001153 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001154static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001155{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001156 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001157
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001158 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001159 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001160 ram_counters.transferred += len;
1161 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001162 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001163 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001164}
1165
Juan Quintela57273092017-03-20 22:25:28 +01001166static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001167{
Juan Quintela57273092017-03-20 22:25:28 +01001168 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001169 return;
1170 }
1171
Alexey Romko8bba0042020-01-10 14:51:34 +01001172 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001173}
1174
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001175/*
1176 * @pages: the number of pages written by the control path,
1177 * < 0 - error
1178 * > 0 - number of pages written
1179 *
1180 * Return true if the pages has been saved, otherwise false is returned.
1181 */
1182static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1183 int *pages)
1184{
1185 uint64_t bytes_xmit = 0;
1186 int ret;
1187
1188 *pages = -1;
1189 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1190 &bytes_xmit);
1191 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1192 return false;
1193 }
1194
1195 if (bytes_xmit) {
1196 ram_counters.transferred += bytes_xmit;
1197 *pages = 1;
1198 }
1199
1200 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1201 return true;
1202 }
1203
1204 if (bytes_xmit > 0) {
1205 ram_counters.normal++;
1206 } else if (bytes_xmit == 0) {
1207 ram_counters.duplicate++;
1208 }
1209
1210 return true;
1211}
1212
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001213/*
1214 * directly send the page to the stream
1215 *
1216 * Returns the number of pages written.
1217 *
1218 * @rs: current RAM state
1219 * @block: block that contains the page we want to send
1220 * @offset: offset inside the block for the page
1221 * @buf: the page to be sent
1222 * @async: send to page asyncly
1223 */
1224static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1225 uint8_t *buf, bool async)
1226{
1227 ram_counters.transferred += save_page_header(rs, rs->f, block,
1228 offset | RAM_SAVE_FLAG_PAGE);
1229 if (async) {
1230 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1231 migrate_release_ram() &
1232 migration_in_postcopy());
1233 } else {
1234 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1235 }
1236 ram_counters.transferred += TARGET_PAGE_SIZE;
1237 ram_counters.normal++;
1238 return 1;
1239}
1240
Juan Quintela56e93d22015-05-07 19:33:31 +02001241/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001242 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001243 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001244 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001245 * < 0 - error
1246 * >=0 - Number of pages written - this might legally be 0
1247 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001248 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001249 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001250 * @block: block that contains the page we want to send
1251 * @offset: offset inside the block for the page
1252 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001253 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001254static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001255{
1256 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001257 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001258 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001259 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01001260 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001261 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001262
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001263 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001264 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001265
Juan Quintela56e93d22015-05-07 19:33:31 +02001266 XBZRLE_cache_lock();
David Hildenbrand1a373522021-02-16 11:50:39 +01001267 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001268 pages = save_xbzrle_page(rs, &p, current_addr, block,
1269 offset, last_stage);
1270 if (!last_stage) {
1271 /* Can't send this cached data async, since the cache page
1272 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001273 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001274 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001275 }
1276 }
1277
1278 /* XBZRLE overflow or normal page */
1279 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001280 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001281 }
1282
1283 XBZRLE_cache_unlock();
1284
1285 return pages;
1286}
1287
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001288static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1289 ram_addr_t offset)
1290{
Juan Quintela67a4c892020-01-22 16:03:01 +01001291 if (multifd_queue_page(rs->f, block, offset) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08001292 return -1;
1293 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001294 ram_counters.normal++;
1295
1296 return 1;
1297}
1298
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001299static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001300 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001301{
Juan Quintela53518d92017-05-04 11:46:24 +02001302 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08001303 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001304 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001305 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02001306
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001307 if (save_zero_page_to_file(rs, f, block, offset)) {
1308 zero_page = true;
1309 goto exit;
1310 }
1311
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001312 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001313
1314 /*
1315 * copy it to a internal buffer to avoid it being modified by VM
1316 * so that we can catch up the error during compression and
1317 * decompression
1318 */
1319 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001320 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1321 if (ret < 0) {
1322 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08001323 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001324 return false;
Liang Lib3be2892016-05-05 15:32:54 +08001325 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001326
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001327exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001328 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001329 return zero_page;
1330}
1331
1332static void
1333update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1334{
Xiao Guangrong76e03002018-09-06 15:01:00 +08001335 ram_counters.transferred += bytes_xmit;
1336
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001337 if (param->zero_page) {
1338 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001339 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001340 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001341
1342 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
1343 compression_counters.compressed_size += bytes_xmit - 8;
1344 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001345}
1346
Xiao Guangrong32b05492018-09-06 15:01:01 +08001347static bool save_page_use_compression(RAMState *rs);
1348
Juan Quintelace25d332017-03-15 11:00:51 +01001349static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001350{
1351 int idx, len, thread_count;
1352
Xiao Guangrong32b05492018-09-06 15:01:01 +08001353 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001354 return;
1355 }
1356 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001357
Liang Li0d9f9a52016-05-05 15:32:59 +08001358 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001359 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001360 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001361 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001362 }
Liang Lia7a9a882016-05-05 15:32:57 +08001363 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001364 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001365
1366 for (idx = 0; idx < thread_count; idx++) {
1367 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001368 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001369 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001370 /*
1371 * it's safe to fetch zero_page without holding comp_done_lock
1372 * as there is no further request submitted to the thread,
1373 * i.e, the thread should be waiting for a request at this point.
1374 */
1375 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001376 }
Liang Lia7a9a882016-05-05 15:32:57 +08001377 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001378 }
1379}
1380
1381static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1382 ram_addr_t offset)
1383{
1384 param->block = block;
1385 param->offset = offset;
1386}
1387
Juan Quintelace25d332017-03-15 11:00:51 +01001388static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1389 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001390{
1391 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08001392 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001393
1394 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001395 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08001396retry:
1397 for (idx = 0; idx < thread_count; idx++) {
1398 if (comp_param[idx].done) {
1399 comp_param[idx].done = false;
1400 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1401 qemu_mutex_lock(&comp_param[idx].mutex);
1402 set_compress_params(&comp_param[idx], block, offset);
1403 qemu_cond_signal(&comp_param[idx].cond);
1404 qemu_mutex_unlock(&comp_param[idx].mutex);
1405 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001406 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02001407 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02001408 }
1409 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08001410
1411 /*
1412 * wait for the free thread if the user specifies 'compress-wait-thread',
1413 * otherwise we will post the page out in the main thread as normal page.
1414 */
1415 if (pages < 0 && wait) {
1416 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1417 goto retry;
1418 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001419 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001420
1421 return pages;
1422}
1423
1424/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001425 * find_dirty_block: find the next dirty page and update any state
1426 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001427 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001428 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001429 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001430 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001431 * @pss: data about the state of the current dirty page scan
1432 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001433 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001434static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001435{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001436 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001437 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001438 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001439 /*
1440 * We've been once around the RAM and haven't found anything.
1441 * Give up.
1442 */
1443 *again = false;
1444 return false;
1445 }
David Hildenbrand542147f2021-04-29 13:27:08 +02001446 if (!offset_in_ramblock(pss->block,
1447 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001448 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001449 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001450 pss->block = QLIST_NEXT_RCU(pss->block, next);
1451 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08001452 /*
1453 * If memory migration starts over, we will meet a dirtied page
1454 * which may still exists in compression threads's ring, so we
1455 * should flush the compressed data to make sure the new page
1456 * is not overwritten by the old one in the destination.
1457 *
1458 * Also If xbzrle is on, stop using the data compression at this
1459 * point. In theory, xbzrle can do better than compression.
1460 */
1461 flush_compressed_data(rs);
1462
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001463 /* Hit the end of the list */
1464 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1465 /* Flag that we've looped */
1466 pss->complete_round = true;
David Hildenbrand1a373522021-02-16 11:50:39 +01001467 /* After the first round, enable XBZRLE. */
1468 if (migrate_use_xbzrle()) {
1469 rs->xbzrle_enabled = true;
1470 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001471 }
1472 /* Didn't find anything this time, but try again on the new block */
1473 *again = true;
1474 return false;
1475 } else {
1476 /* Can go around again, but... */
1477 *again = true;
1478 /* We've found something so probably don't need to */
1479 return true;
1480 }
1481}
1482
Juan Quintela3d0684b2017-03-23 15:06:39 +01001483/**
1484 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001485 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001486 * Helper for 'get_queued_page' - gets a page off the queue
1487 *
1488 * Returns the block of the page (or NULL if none available)
1489 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001490 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001491 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001492 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001493static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001494{
1495 RAMBlock *block = NULL;
1496
Xiao Guangrongae526e32018-08-21 16:10:25 +08001497 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1498 return NULL;
1499 }
1500
Daniel Brodsky6e8a3552020-04-03 21:21:08 -07001501 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
Juan Quintelaec481c62017-03-20 22:12:40 +01001502 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1503 struct RAMSrcPageRequest *entry =
1504 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001505 block = entry->rb;
1506 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001507
1508 if (entry->len > TARGET_PAGE_SIZE) {
1509 entry->len -= TARGET_PAGE_SIZE;
1510 entry->offset += TARGET_PAGE_SIZE;
1511 } else {
1512 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001513 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001514 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01001515 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001516 }
1517 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001518
1519 return block;
1520}
1521
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001522#if defined(__linux__)
1523/**
1524 * poll_fault_page: try to get next UFFD write fault page and, if pending fault
1525 * is found, return RAM block pointer and page offset
1526 *
1527 * Returns pointer to the RAMBlock containing faulting page,
1528 * NULL if no write faults are pending
1529 *
1530 * @rs: current RAM state
1531 * @offset: page offset from the beginning of the block
1532 */
1533static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1534{
1535 struct uffd_msg uffd_msg;
1536 void *page_address;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001537 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001538 int res;
1539
1540 if (!migrate_background_snapshot()) {
1541 return NULL;
1542 }
1543
1544 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1545 if (res <= 0) {
1546 return NULL;
1547 }
1548
1549 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001550 block = qemu_ram_block_from_host(page_address, false, offset);
1551 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1552 return block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001553}
1554
1555/**
1556 * ram_save_release_protection: release UFFD write protection after
1557 * a range of pages has been saved
1558 *
1559 * @rs: current RAM state
1560 * @pss: page-search-status structure
1561 * @start_page: index of the first page in the range relative to pss->block
1562 *
1563 * Returns 0 on success, negative value in case of an error
1564*/
1565static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1566 unsigned long start_page)
1567{
1568 int res = 0;
1569
1570 /* Check if page is from UFFD-managed region. */
1571 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1572 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1573 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
1574
1575 /* Flush async buffers before un-protect. */
1576 qemu_fflush(rs->f);
1577 /* Un-protect memory range. */
1578 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1579 false, false);
1580 }
1581
1582 return res;
1583}
1584
1585/* ram_write_tracking_available: check if kernel supports required UFFD features
1586 *
1587 * Returns true if supports, false otherwise
1588 */
1589bool ram_write_tracking_available(void)
1590{
1591 uint64_t uffd_features;
1592 int res;
1593
1594 res = uffd_query_features(&uffd_features);
1595 return (res == 0 &&
1596 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1597}
1598
1599/* ram_write_tracking_compatible: check if guest configuration is
1600 * compatible with 'write-tracking'
1601 *
1602 * Returns true if compatible, false otherwise
1603 */
1604bool ram_write_tracking_compatible(void)
1605{
1606 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1607 int uffd_fd;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001608 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001609 bool ret = false;
1610
1611 /* Open UFFD file descriptor */
1612 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1613 if (uffd_fd < 0) {
1614 return false;
1615 }
1616
1617 RCU_READ_LOCK_GUARD();
1618
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001619 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001620 uint64_t uffd_ioctls;
1621
1622 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001623 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001624 continue;
1625 }
1626 /* Try to register block memory via UFFD-IO to track writes */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001627 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001628 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1629 goto out;
1630 }
1631 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1632 goto out;
1633 }
1634 }
1635 ret = true;
1636
1637out:
1638 uffd_close_fd(uffd_fd);
1639 return ret;
1640}
1641
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001642static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1643 ram_addr_t size)
1644{
1645 /*
1646 * We read one byte of each page; this will preallocate page tables if
1647 * required and populate the shared zeropage on MAP_PRIVATE anonymous memory
1648 * where no page was populated yet. This might require adaption when
1649 * supporting other mappings, like shmem.
1650 */
1651 for (; offset < size; offset += block->page_size) {
1652 char tmp = *((char *)block->host + offset);
1653
1654 /* Don't optimize the read out */
1655 asm volatile("" : "+r" (tmp));
1656 }
1657}
1658
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001659/*
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001660 * ram_block_populate_read: preallocate page tables and populate pages in the
1661 * RAM block by reading a byte of each page.
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001662 *
1663 * Since it's solely used for userfault_fd WP feature, here we just
1664 * hardcode page size to qemu_real_host_page_size.
1665 *
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001666 * @block: RAM block to populate
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001667 */
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001668static void ram_block_populate_read(RAMBlock *block)
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001669{
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001670 populate_read_range(block, 0, block->used_length);
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001671}
1672
1673/*
1674 * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking
1675 */
1676void ram_write_tracking_prepare(void)
1677{
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001678 RAMBlock *block;
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001679
1680 RCU_READ_LOCK_GUARD();
1681
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001682 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001683 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001684 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001685 continue;
1686 }
1687
1688 /*
1689 * Populate pages of the RAM block before enabling userfault_fd
1690 * write protection.
1691 *
1692 * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with
1693 * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
1694 * pages with pte_none() entries in page table.
1695 */
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001696 ram_block_populate_read(block);
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001697 }
1698}
1699
1700/*
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001701 * ram_write_tracking_start: start UFFD-WP memory tracking
1702 *
1703 * Returns 0 for success or negative value in case of error
1704 */
1705int ram_write_tracking_start(void)
1706{
1707 int uffd_fd;
1708 RAMState *rs = ram_state;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001709 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001710
1711 /* Open UFFD file descriptor */
1712 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1713 if (uffd_fd < 0) {
1714 return uffd_fd;
1715 }
1716 rs->uffdio_fd = uffd_fd;
1717
1718 RCU_READ_LOCK_GUARD();
1719
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001720 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001721 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001722 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001723 continue;
1724 }
1725
1726 /* Register block memory with UFFD to track writes */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001727 if (uffd_register_memory(rs->uffdio_fd, block->host,
1728 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001729 goto fail;
1730 }
1731 /* Apply UFFD write protection to the block memory range */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001732 if (uffd_change_protection(rs->uffdio_fd, block->host,
1733 block->max_length, true, false)) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001734 goto fail;
1735 }
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001736 block->flags |= RAM_UF_WRITEPROTECT;
1737 memory_region_ref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001738
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001739 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1740 block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001741 }
1742
1743 return 0;
1744
1745fail:
1746 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1747
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001748 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1749 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001750 continue;
1751 }
1752 /*
1753 * In case some memory block failed to be write-protected
1754 * remove protection and unregister all succeeded RAM blocks
1755 */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001756 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1757 false, false);
1758 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001759 /* Cleanup flags and remove reference */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001760 block->flags &= ~RAM_UF_WRITEPROTECT;
1761 memory_region_unref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001762 }
1763
1764 uffd_close_fd(uffd_fd);
1765 rs->uffdio_fd = -1;
1766 return -1;
1767}
1768
1769/**
1770 * ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection
1771 */
1772void ram_write_tracking_stop(void)
1773{
1774 RAMState *rs = ram_state;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001775 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001776
1777 RCU_READ_LOCK_GUARD();
1778
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001779 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1780 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001781 continue;
1782 }
1783 /* Remove protection and unregister all affected RAM blocks */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001784 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1785 false, false);
1786 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001787
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001788 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1789 block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001790
1791 /* Cleanup flags and remove reference */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001792 block->flags &= ~RAM_UF_WRITEPROTECT;
1793 memory_region_unref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001794 }
1795
1796 /* Finally close UFFD file descriptor */
1797 uffd_close_fd(rs->uffdio_fd);
1798 rs->uffdio_fd = -1;
1799}
1800
1801#else
1802/* No target OS support, stubs just fail or ignore */
1803
1804static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1805{
1806 (void) rs;
1807 (void) offset;
1808
1809 return NULL;
1810}
1811
1812static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1813 unsigned long start_page)
1814{
1815 (void) rs;
1816 (void) pss;
1817 (void) start_page;
1818
1819 return 0;
1820}
1821
1822bool ram_write_tracking_available(void)
1823{
1824 return false;
1825}
1826
1827bool ram_write_tracking_compatible(void)
1828{
1829 assert(0);
1830 return false;
1831}
1832
1833int ram_write_tracking_start(void)
1834{
1835 assert(0);
1836 return -1;
1837}
1838
1839void ram_write_tracking_stop(void)
1840{
1841 assert(0);
1842}
1843#endif /* defined(__linux__) */
1844
Juan Quintela3d0684b2017-03-23 15:06:39 +01001845/**
Li Qiangff1543a2019-05-24 23:28:32 -07001846 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001847 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001848 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001849 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001850 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01001851 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001852 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001853 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001854 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001855static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001856{
1857 RAMBlock *block;
1858 ram_addr_t offset;
1859 bool dirty;
1860
1861 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001862 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001863 /*
1864 * We're sending this page, and since it's postcopy nothing else
1865 * will dirty it, and we must make sure it doesn't get sent again
1866 * even if this queue request was received after the background
1867 * search already sent it.
1868 */
1869 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001870 unsigned long page;
1871
Juan Quintela6b6712e2017-03-22 15:18:04 +01001872 page = offset >> TARGET_PAGE_BITS;
1873 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001874 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001875 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Wei Yang64737602019-08-19 14:18:43 +08001876 page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001877 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001878 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001879 }
1880 }
1881
1882 } while (block && !dirty);
1883
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001884 if (!block) {
1885 /*
1886 * Poll write faults too if background snapshot is enabled; that's
1887 * when we have vcpus got blocked by the write protected pages.
1888 */
1889 block = poll_fault_page(rs, &offset);
1890 }
1891
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001892 if (block) {
1893 /*
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001894 * We want the background search to continue from the queued page
1895 * since the guest is likely to want other pages near to the page
1896 * it just requested.
1897 */
1898 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001899 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08001900
1901 /*
1902 * This unqueued page would break the "one round" check, even is
1903 * really rare.
1904 */
1905 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001906 }
1907
1908 return !!block;
1909}
1910
Juan Quintela56e93d22015-05-07 19:33:31 +02001911/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001912 * migration_page_queue_free: drop any remaining pages in the ram
1913 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001914 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001915 * It should be empty at the end anyway, but in error cases there may
1916 * be some left. in case that there is any page left, we drop it.
1917 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001918 */
Juan Quintela83c13382017-05-04 11:45:01 +02001919static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001920{
Juan Quintelaec481c62017-03-20 22:12:40 +01001921 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001922 /* This queue generally should be empty - but in the case of a failed
1923 * migration might have some droppings in.
1924 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001925 RCU_READ_LOCK_GUARD();
Juan Quintelaec481c62017-03-20 22:12:40 +01001926 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001927 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001928 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001929 g_free(mspr);
1930 }
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001931}
1932
1933/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001934 * ram_save_queue_pages: queue the page for transmission
1935 *
1936 * A request from postcopy destination for example.
1937 *
1938 * Returns zero on success or negative on error
1939 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001940 * @rbname: Name of the RAMBLock of the request. NULL means the
1941 * same that last one.
1942 * @start: starting address from the start of the RAMBlock
1943 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001944 */
Juan Quintela96506892017-03-14 18:41:03 +01001945int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001946{
1947 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02001948 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001949
Juan Quintela93604472017-06-06 19:49:03 +02001950 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001951 RCU_READ_LOCK_GUARD();
1952
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001953 if (!rbname) {
1954 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001955 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001956
1957 if (!ramblock) {
1958 /*
1959 * Shouldn't happen, we can't reuse the last RAMBlock if
1960 * it's the 1st request.
1961 */
1962 error_report("ram_save_queue_pages no previous block");
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001963 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001964 }
1965 } else {
1966 ramblock = qemu_ram_block_by_name(rbname);
1967
1968 if (!ramblock) {
1969 /* We shouldn't be asked for a non-existent RAMBlock */
1970 error_report("ram_save_queue_pages no block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001971 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001972 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001973 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001974 }
1975 trace_ram_save_queue_pages(ramblock->idstr, start, len);
David Hildenbrand542147f2021-04-29 13:27:08 +02001976 if (!offset_in_ramblock(ramblock, start + len - 1)) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001977 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1978 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001979 __func__, start, len, ramblock->used_length);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03001980 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001981 }
1982
Juan Quintelaec481c62017-03-20 22:12:40 +01001983 struct RAMSrcPageRequest *new_entry =
1984 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001985 new_entry->rb = ramblock;
1986 new_entry->offset = start;
1987 new_entry->len = len;
1988
1989 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001990 qemu_mutex_lock(&rs->src_page_req_mutex);
1991 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01001992 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01001993 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001994
1995 return 0;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001996}
1997
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001998static bool save_page_use_compression(RAMState *rs)
1999{
2000 if (!migrate_use_compression()) {
2001 return false;
2002 }
2003
2004 /*
David Hildenbrand1a373522021-02-16 11:50:39 +01002005 * If xbzrle is enabled (e.g., after first round of migration), stop
2006 * using the data compression. In theory, xbzrle can do better than
2007 * compression.
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002008 */
David Hildenbrand1a373522021-02-16 11:50:39 +01002009 if (rs->xbzrle_enabled) {
2010 return false;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002011 }
2012
David Hildenbrand1a373522021-02-16 11:50:39 +01002013 return true;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002014}
2015
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002016/*
2017 * try to compress the page before posting it out, return true if the page
2018 * has been properly handled by compression, otherwise needs other
2019 * paths to handle it
2020 */
2021static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2022{
2023 if (!save_page_use_compression(rs)) {
2024 return false;
2025 }
2026
2027 /*
2028 * When starting the process of a new block, the first page of
2029 * the block should be sent out before other pages in the same
2030 * block, and all the pages in last block should have been sent
2031 * out, keeping this order is important, because the 'cont' flag
2032 * is used to avoid resending the block name.
2033 *
2034 * We post the fist page as normal page as compression will take
2035 * much CPU resource.
2036 */
2037 if (block != rs->last_sent_block) {
2038 flush_compressed_data(rs);
2039 return false;
2040 }
2041
2042 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2043 return true;
2044 }
2045
Xiao Guangrong76e03002018-09-06 15:01:00 +08002046 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002047 return false;
2048}
2049
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002050/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002051 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002052 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002053 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002054 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002055 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002056 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002057 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002058 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002059static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002060 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002061{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002062 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01002063 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002064 int res;
2065
2066 if (control_save_page(rs, block, offset, &res)) {
2067 return res;
2068 }
2069
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002070 if (save_compress_page(rs, block, offset)) {
2071 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002072 }
2073
2074 res = save_zero_page(rs, block, offset);
2075 if (res > 0) {
2076 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2077 * page would be stale
2078 */
2079 if (!save_page_use_compression(rs)) {
2080 XBZRLE_cache_lock();
2081 xbzrle_cache_zero_page(rs, block->offset + offset);
2082 XBZRLE_cache_unlock();
2083 }
2084 ram_release_pages(block->idstr, offset, res);
2085 return res;
2086 }
2087
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002088 /*
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002089 * Do not use multifd for:
2090 * 1. Compression as the first page in the new block should be posted out
2091 * before sending the compressed page
2092 * 2. In postcopy as one whole host page should be placed
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002093 */
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002094 if (!save_page_use_compression(rs) && migrate_use_multifd()
2095 && !migration_in_postcopy()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002096 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002097 }
2098
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002099 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002100}
2101
2102/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002103 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002104 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002105 * Starting at *offset send pages up to the end of the current host
2106 * page. It's valid for the initial offset to point into the middle of
2107 * a host page in which case the remainder of the hostpage is sent.
2108 * Only dirty target pages are sent. Note that the host page size may
2109 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002110 * The saving stops at the boundary of the used_length of the block
2111 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002112 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002113 * Returns the number of pages written or negative on error
2114 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002115 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002116 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002117 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002118 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002119 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002120static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002121 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002122{
2123 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002124 size_t pagesize_bits =
2125 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002126 unsigned long hostpage_boundary =
2127 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002128 unsigned long start_page = pss->page;
2129 int res;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002130
Yury Kotovfbd162e2019-02-15 20:45:46 +03002131 if (ramblock_is_ignored(pss->block)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02002132 error_report("block %s should not be migrated !", pss->block->idstr);
2133 return 0;
2134 }
2135
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002136 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002137 /* Check the pages is dirty and if it is send it */
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002138 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2139 tmppages = ram_save_target_page(rs, pss, last_stage);
2140 if (tmppages < 0) {
2141 return tmppages;
2142 }
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002143
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002144 pages += tmppages;
2145 /*
2146 * Allow rate limiting to happen in the middle of huge pages if
2147 * something is sent in the current iteration.
2148 */
2149 if (pagesize_bits > 1 && tmppages > 0) {
2150 migration_rate_limit();
2151 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002152 }
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002153 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2154 } while ((pss->page < hostpage_boundary) &&
Alexey Romko8bba0042020-01-10 14:51:34 +01002155 offset_in_ramblock(pss->block,
2156 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002157 /* The offset we leave with is the min boundary of host page and block */
2158 pss->page = MIN(pss->page, hostpage_boundary) - 1;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002159
2160 res = ram_save_release_protection(rs, pss, start_page);
2161 return (res < 0 ? res : pages);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002162}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002163
2164/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002165 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002166 *
2167 * Called within an RCU critical section.
2168 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002169 * Returns the number of pages written where zero means no dirty pages,
2170 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002171 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002172 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002173 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002174 *
2175 * On systems where host-page-size > target-page-size it will send all the
2176 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002177 */
2178
Juan Quintelace25d332017-03-15 11:00:51 +01002179static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002180{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002181 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002182 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002183 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002184
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302185 /* No dirty page as there is zero RAM */
2186 if (!ram_bytes_total()) {
2187 return pages;
2188 }
2189
Juan Quintela6f37bb82017-03-13 19:26:29 +01002190 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002191 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002192 pss.complete_round = false;
2193
2194 if (!pss.block) {
2195 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2196 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002197
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002198 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002199 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002200 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002201
2202 if (!found) {
2203 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002204 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002205 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002206
2207 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002208 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002209 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002210 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002211
Juan Quintela6f37bb82017-03-13 19:26:29 +01002212 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002213 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002214
2215 return pages;
2216}
2217
2218void acct_update_position(QEMUFile *f, size_t size, bool zero)
2219{
2220 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002221
Juan Quintela56e93d22015-05-07 19:33:31 +02002222 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002223 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002224 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002225 ram_counters.normal += pages;
2226 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002227 qemu_update_position(f, size);
2228 }
2229}
2230
Yury Kotovfbd162e2019-02-15 20:45:46 +03002231static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002232{
2233 RAMBlock *block;
2234 uint64_t total = 0;
2235
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002236 RCU_READ_LOCK_GUARD();
2237
Yury Kotovfbd162e2019-02-15 20:45:46 +03002238 if (count_ignored) {
2239 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2240 total += block->used_length;
2241 }
2242 } else {
2243 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2244 total += block->used_length;
2245 }
Peter Xu99e15582017-05-12 12:17:39 +08002246 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002247 return total;
2248}
2249
Yury Kotovfbd162e2019-02-15 20:45:46 +03002250uint64_t ram_bytes_total(void)
2251{
2252 return ram_bytes_total_common(false);
2253}
2254
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002255static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002256{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002257 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002258}
2259
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002260static void xbzrle_load_cleanup(void)
2261{
2262 g_free(XBZRLE.decoded_buf);
2263 XBZRLE.decoded_buf = NULL;
2264}
2265
Peter Xu7d7c96b2017-10-19 14:31:58 +08002266static void ram_state_cleanup(RAMState **rsp)
2267{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002268 if (*rsp) {
2269 migration_page_queue_free(*rsp);
2270 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2271 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2272 g_free(*rsp);
2273 *rsp = NULL;
2274 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002275}
2276
Peter Xu84593a02017-10-19 14:31:59 +08002277static void xbzrle_cleanup(void)
2278{
2279 XBZRLE_cache_lock();
2280 if (XBZRLE.cache) {
2281 cache_fini(XBZRLE.cache);
2282 g_free(XBZRLE.encoded_buf);
2283 g_free(XBZRLE.current_buf);
2284 g_free(XBZRLE.zero_target_page);
2285 XBZRLE.cache = NULL;
2286 XBZRLE.encoded_buf = NULL;
2287 XBZRLE.current_buf = NULL;
2288 XBZRLE.zero_target_page = NULL;
2289 }
2290 XBZRLE_cache_unlock();
2291}
2292
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002293static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002294{
Juan Quintela53518d92017-05-04 11:46:24 +02002295 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002296 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002297
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002298 /* We don't use dirty log with background snapshots */
2299 if (!migrate_background_snapshot()) {
2300 /* caller have hold iothread lock or is in a bh, so there is
2301 * no writing race against the migration bitmap
2302 */
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00002303 if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
2304 /*
2305 * do not stop dirty log without starting it, since
2306 * memory_global_dirty_log_stop will assert that
2307 * memory_global_dirty_log_start/stop used in pairs
2308 */
2309 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
2310 }
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002311 }
Juan Quintela6b6712e2017-03-22 15:18:04 +01002312
Yury Kotovfbd162e2019-02-15 20:45:46 +03002313 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08002314 g_free(block->clear_bmap);
2315 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002316 g_free(block->bmap);
2317 block->bmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002318 }
2319
Peter Xu84593a02017-10-19 14:31:59 +08002320 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002321 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002322 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002323}
2324
Juan Quintela6f37bb82017-03-13 19:26:29 +01002325static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002326{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002327 rs->last_seen_block = NULL;
2328 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002329 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002330 rs->last_version = ram_list.version;
David Hildenbrand1a373522021-02-16 11:50:39 +01002331 rs->xbzrle_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002332}
2333
2334#define MAX_WAIT 50 /* ms, half buffered_file limit */
2335
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002336/*
2337 * 'expected' is the value you expect the bitmap mostly to be full
2338 * of; it won't bother printing lines that are all this value.
2339 * If 'todump' is null the migration bitmap is dumped.
2340 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002341void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2342 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002343{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002344 int64_t cur;
2345 int64_t linelen = 128;
2346 char linebuf[129];
2347
Juan Quintela6b6712e2017-03-22 15:18:04 +01002348 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002349 int64_t curb;
2350 bool found = false;
2351 /*
2352 * Last line; catch the case where the line length
2353 * is longer than remaining ram
2354 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002355 if (cur + linelen > pages) {
2356 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002357 }
2358 for (curb = 0; curb < linelen; curb++) {
2359 bool thisbit = test_bit(cur + curb, todump);
2360 linebuf[curb] = thisbit ? '1' : '.';
2361 found = found || (thisbit != expected);
2362 }
2363 if (found) {
2364 linebuf[curb] = '\0';
2365 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2366 }
2367 }
2368}
2369
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002370/* **** functions for postcopy ***** */
2371
Pavel Butsykinced1c612017-02-03 18:23:21 +03002372void ram_postcopy_migrated_memory_release(MigrationState *ms)
2373{
2374 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002375
Yury Kotovfbd162e2019-02-15 20:45:46 +03002376 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002377 unsigned long *bitmap = block->bmap;
2378 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2379 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002380
2381 while (run_start < range) {
2382 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Alexey Romko8bba0042020-01-10 14:51:34 +01002383 ram_discard_range(block->idstr,
2384 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2385 ((ram_addr_t)(run_end - run_start))
2386 << TARGET_PAGE_BITS);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002387 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2388 }
2389 }
2390}
2391
Juan Quintela3d0684b2017-03-23 15:06:39 +01002392/**
2393 * postcopy_send_discard_bm_ram: discard a RAMBlock
2394 *
2395 * Returns zero on success
2396 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002397 * Callback from postcopy_each_ram_send_discard for each RAMBlock
Juan Quintela3d0684b2017-03-23 15:06:39 +01002398 *
2399 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08002400 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002401 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002402static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002403{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002404 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002405 unsigned long current;
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002406 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002407
Juan Quintela6b6712e2017-03-22 15:18:04 +01002408 for (current = 0; current < end; ) {
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002409 unsigned long one = find_next_bit(bitmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08002410 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002411
Wei Yang33a5cb622019-06-27 10:08:21 +08002412 if (one >= end) {
2413 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002414 }
Wei Yang33a5cb622019-06-27 10:08:21 +08002415
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002416 zero = find_next_zero_bit(bitmap, end, one + 1);
Wei Yang33a5cb622019-06-27 10:08:21 +08002417
2418 if (zero >= end) {
2419 discard_length = end - one;
2420 } else {
2421 discard_length = zero - one;
2422 }
Wei Yang810cf2b2019-07-24 09:07:21 +08002423 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002424 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002425 }
2426
2427 return 0;
2428}
2429
Juan Quintela3d0684b2017-03-23 15:06:39 +01002430/**
2431 * postcopy_each_ram_send_discard: discard all RAMBlocks
2432 *
2433 * Returns 0 for success or negative for error
2434 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002435 * Utility for the outgoing postcopy code.
2436 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2437 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002438 * (qemu_ram_foreach_block ends up passing unscaled lengths
2439 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002440 *
2441 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002442 */
2443static int postcopy_each_ram_send_discard(MigrationState *ms)
2444{
2445 struct RAMBlock *block;
2446 int ret;
2447
Yury Kotovfbd162e2019-02-15 20:45:46 +03002448 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002449 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002450
2451 /*
2452 * Postcopy sends chunks of bitmap over the wire, but it
2453 * just needs indexes at this point, avoids it having
2454 * target page specific code.
2455 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002456 ret = postcopy_send_discard_bm_ram(ms, block);
2457 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002458 if (ret) {
2459 return ret;
2460 }
2461 }
2462
2463 return 0;
2464}
2465
Juan Quintela3d0684b2017-03-23 15:06:39 +01002466/**
Wei Yang8324ef82019-08-19 14:18:41 +08002467 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002468 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002469 * Helper for postcopy_chunk_hostpages; it's called twice to
2470 * canonicalize the two bitmaps, that are similar, but one is
2471 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002472 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002473 * Postcopy requires that all target pages in a hostpage are dirty or
2474 * clean, not a mix. This function canonicalizes the bitmaps.
2475 *
2476 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002477 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002478 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002479static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002480{
Juan Quintela53518d92017-05-04 11:46:24 +02002481 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002482 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002483 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002484 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002485 unsigned long run_start;
2486
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002487 if (block->page_size == TARGET_PAGE_SIZE) {
2488 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2489 return;
2490 }
2491
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002492 /* Find a dirty page */
2493 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002494
Juan Quintela6b6712e2017-03-22 15:18:04 +01002495 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002496
2497 /*
2498 * If the start of this run of pages is in the middle of a host
2499 * page, then we need to fixup this host page.
2500 */
Wei Yang9dec3cc2019-08-06 08:46:48 +08002501 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002502 /* Find the end of this run */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002503 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002504 /*
2505 * If the end isn't at the start of a host page, then the
2506 * run doesn't finish at the end of a host page
2507 * and we need to discard.
2508 */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002509 }
2510
Wei Yang9dec3cc2019-08-06 08:46:48 +08002511 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002512 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08002513 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2514 host_ratio);
2515 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002516
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002517 /* Clean up the bitmap */
2518 for (page = fixup_start_addr;
2519 page < fixup_start_addr + host_ratio; page++) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002520 /*
2521 * Remark them as dirty, updating the count for any pages
2522 * that weren't previously dirty.
2523 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002524 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002525 }
2526 }
2527
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002528 /* Find the next dirty page for the next iteration */
2529 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002530 }
2531}
2532
Juan Quintela3d0684b2017-03-23 15:06:39 +01002533/**
Wei Yang89dab312019-07-15 10:05:49 +08002534 * postcopy_chunk_hostpages: discard any partially sent host page
Juan Quintela3d0684b2017-03-23 15:06:39 +01002535 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002536 * Utility for the outgoing postcopy code.
2537 *
2538 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002539 * dirty host-page size chunks as all dirty. In this case the host-page
2540 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002541 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002542 * Returns zero on success
2543 *
2544 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002545 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002546 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002547static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002548{
Wei Yang810cf2b2019-07-24 09:07:21 +08002549 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002550
Juan Quintela6b6712e2017-03-22 15:18:04 +01002551 /*
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002552 * Ensure that all partially dirty host pages are made fully dirty.
Juan Quintela6b6712e2017-03-22 15:18:04 +01002553 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002554 postcopy_chunk_hostpages_pass(ms, block);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002555
Wei Yang810cf2b2019-07-24 09:07:21 +08002556 postcopy_discard_send_finish(ms);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002557 return 0;
2558}
2559
Juan Quintela3d0684b2017-03-23 15:06:39 +01002560/**
2561 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2562 *
2563 * Returns zero on success
2564 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002565 * Transmit the set of pages to be discarded after precopy to the target
2566 * these are pages that:
2567 * a) Have been previously transmitted but are now dirty again
2568 * b) Pages that have never been transmitted, this ensures that
2569 * any pages on the destination that have been mapped by background
2570 * tasks get discarded (transparent huge pages is the specific concern)
2571 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002572 *
2573 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002574 */
2575int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2576{
Juan Quintela53518d92017-05-04 11:46:24 +02002577 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002578 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002579 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002580
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002581 RCU_READ_LOCK_GUARD();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002582
2583 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002584 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002585
Juan Quintela6b6712e2017-03-22 15:18:04 +01002586 /* Easiest way to make sure we don't resume in the middle of a host-page */
2587 rs->last_seen_block = NULL;
2588 rs->last_sent_block = NULL;
2589 rs->last_page = 0;
2590
Yury Kotovfbd162e2019-02-15 20:45:46 +03002591 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002592 /* Deal with TPS != HPS and huge pages */
2593 ret = postcopy_chunk_hostpages(ms, block);
2594 if (ret) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002595 return ret;
2596 }
2597
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002598#ifdef DEBUG_POSTCOPY
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002599 ram_debug_dump_bitmap(block->bmap, true,
2600 block->used_length >> TARGET_PAGE_BITS);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002601#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002602 }
2603 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002604
Simran Singhalb3ac2b92020-04-01 22:23:14 +05302605 return postcopy_each_ram_send_discard(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002606}
2607
Juan Quintela3d0684b2017-03-23 15:06:39 +01002608/**
2609 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002610 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002611 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002612 *
Juan Quintela36449152017-03-23 15:11:59 +01002613 * @rbname: name of the RAMBlock of the request. NULL means the
2614 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002615 * @start: RAMBlock starting page
2616 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002617 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002618int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002619{
Juan Quintela36449152017-03-23 15:11:59 +01002620 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002621
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002622 RCU_READ_LOCK_GUARD();
Juan Quintela36449152017-03-23 15:11:59 +01002623 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002624
2625 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002626 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002627 return -1;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002628 }
2629
Peter Xu814bb082018-07-23 20:33:02 +08002630 /*
2631 * On source VM, we don't need to update the received bitmap since
2632 * we don't even have one.
2633 */
2634 if (rb->receivedmap) {
2635 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2636 length >> qemu_target_page_bits());
2637 }
2638
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002639 return ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002640}
2641
Peter Xu84593a02017-10-19 14:31:59 +08002642/*
2643 * For every allocation, we will try not to crash the VM if the
2644 * allocation failed.
2645 */
2646static int xbzrle_init(void)
2647{
2648 Error *local_err = NULL;
2649
2650 if (!migrate_use_xbzrle()) {
2651 return 0;
2652 }
2653
2654 XBZRLE_cache_lock();
2655
2656 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2657 if (!XBZRLE.zero_target_page) {
2658 error_report("%s: Error allocating zero page", __func__);
2659 goto err_out;
2660 }
2661
2662 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2663 TARGET_PAGE_SIZE, &local_err);
2664 if (!XBZRLE.cache) {
2665 error_report_err(local_err);
2666 goto free_zero_page;
2667 }
2668
2669 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2670 if (!XBZRLE.encoded_buf) {
2671 error_report("%s: Error allocating encoded_buf", __func__);
2672 goto free_cache;
2673 }
2674
2675 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2676 if (!XBZRLE.current_buf) {
2677 error_report("%s: Error allocating current_buf", __func__);
2678 goto free_encoded_buf;
2679 }
2680
2681 /* We are all good */
2682 XBZRLE_cache_unlock();
2683 return 0;
2684
2685free_encoded_buf:
2686 g_free(XBZRLE.encoded_buf);
2687 XBZRLE.encoded_buf = NULL;
2688free_cache:
2689 cache_fini(XBZRLE.cache);
2690 XBZRLE.cache = NULL;
2691free_zero_page:
2692 g_free(XBZRLE.zero_target_page);
2693 XBZRLE.zero_target_page = NULL;
2694err_out:
2695 XBZRLE_cache_unlock();
2696 return -ENOMEM;
2697}
2698
Juan Quintela53518d92017-05-04 11:46:24 +02002699static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002700{
Peter Xu7d00ee62017-10-19 14:31:57 +08002701 *rsp = g_try_new0(RAMState, 1);
2702
2703 if (!*rsp) {
2704 error_report("%s: Init ramstate fail", __func__);
2705 return -1;
2706 }
Juan Quintela53518d92017-05-04 11:46:24 +02002707
2708 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2709 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2710 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002711
Peter Xu7d00ee62017-10-19 14:31:57 +08002712 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002713 * Count the total number of pages used by ram blocks not including any
2714 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08002715 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08002716 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002717 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08002718 ram_state_reset(*rsp);
2719
2720 return 0;
2721}
2722
Peter Xud6eff5d2017-10-19 14:32:00 +08002723static void ram_list_init_bitmaps(void)
2724{
Peter Xu002cad62019-06-03 14:50:56 +08002725 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08002726 RAMBlock *block;
2727 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08002728 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08002729
2730 /* Skip setting bitmap if there is no RAM */
2731 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08002732 shift = ms->clear_bitmap_shift;
2733 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2734 error_report("clear_bitmap_shift (%u) too big, using "
2735 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2736 shift = CLEAR_BITMAP_SHIFT_MAX;
2737 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2738 error_report("clear_bitmap_shift (%u) too small, using "
2739 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2740 shift = CLEAR_BITMAP_SHIFT_MIN;
2741 }
2742
Yury Kotovfbd162e2019-02-15 20:45:46 +03002743 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08002744 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08002745 /*
2746 * The initial dirty bitmap for migration must be set with all
2747 * ones to make sure we'll migrate every guest RAM page to
2748 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002749 * Here we set RAMBlock.bmap all to 1 because when rebegin a
2750 * new migration after a failed migration, ram_list.
2751 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
2752 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08002753 */
Peter Xud6eff5d2017-10-19 14:32:00 +08002754 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002755 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08002756 block->clear_bmap_shift = shift;
2757 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08002758 }
2759 }
2760}
2761
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02002762static void migration_bitmap_clear_discarded_pages(RAMState *rs)
2763{
2764 unsigned long pages;
2765 RAMBlock *rb;
2766
2767 RCU_READ_LOCK_GUARD();
2768
2769 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
2770 pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
2771 rs->migration_dirty_pages -= pages;
2772 }
2773}
2774
Peter Xud6eff5d2017-10-19 14:32:00 +08002775static void ram_init_bitmaps(RAMState *rs)
2776{
2777 /* For memory_global_dirty_log_start below. */
2778 qemu_mutex_lock_iothread();
2779 qemu_mutex_lock_ramlist();
Peter Xud6eff5d2017-10-19 14:32:00 +08002780
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002781 WITH_RCU_READ_LOCK_GUARD() {
2782 ram_list_init_bitmaps();
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002783 /* We don't use dirty log with background snapshots */
2784 if (!migrate_background_snapshot()) {
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00002785 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002786 migration_bitmap_sync_precopy(rs);
2787 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002788 }
Peter Xud6eff5d2017-10-19 14:32:00 +08002789 qemu_mutex_unlock_ramlist();
2790 qemu_mutex_unlock_iothread();
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02002791
2792 /*
2793 * After an eventual first bitmap sync, fixup the initial bitmap
2794 * containing all 1s to exclude any discarded pages from migration.
2795 */
2796 migration_bitmap_clear_discarded_pages(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08002797}
2798
Peter Xu7d00ee62017-10-19 14:31:57 +08002799static int ram_init_all(RAMState **rsp)
2800{
Peter Xu7d00ee62017-10-19 14:31:57 +08002801 if (ram_state_init(rsp)) {
2802 return -1;
2803 }
2804
Peter Xu84593a02017-10-19 14:31:59 +08002805 if (xbzrle_init()) {
2806 ram_state_cleanup(rsp);
2807 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002808 }
2809
Peter Xud6eff5d2017-10-19 14:32:00 +08002810 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002811
2812 return 0;
2813}
2814
Peter Xu08614f32018-05-02 18:47:33 +08002815static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2816{
2817 RAMBlock *block;
2818 uint64_t pages = 0;
2819
2820 /*
2821 * Postcopy is not using xbzrle/compression, so no need for that.
2822 * Also, since source are already halted, we don't need to care
2823 * about dirty page logging as well.
2824 */
2825
Yury Kotovfbd162e2019-02-15 20:45:46 +03002826 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08002827 pages += bitmap_count_one(block->bmap,
2828 block->used_length >> TARGET_PAGE_BITS);
2829 }
2830
2831 /* This may not be aligned with current bitmaps. Recalculate. */
2832 rs->migration_dirty_pages = pages;
2833
David Hildenbrand1a373522021-02-16 11:50:39 +01002834 ram_state_reset(rs);
Peter Xu08614f32018-05-02 18:47:33 +08002835
2836 /* Update RAMState cache of output QEMUFile */
2837 rs->f = out;
2838
2839 trace_ram_state_resume_prepare(pages);
2840}
2841
Juan Quintela3d0684b2017-03-23 15:06:39 +01002842/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08002843 * This function clears bits of the free pages reported by the caller from the
2844 * migration dirty bitmap. @addr is the host address corresponding to the
2845 * start of the continuous guest free pages, and @len is the total bytes of
2846 * those pages.
2847 */
2848void qemu_guest_free_page_hint(void *addr, size_t len)
2849{
2850 RAMBlock *block;
2851 ram_addr_t offset;
2852 size_t used_len, start, npages;
2853 MigrationState *s = migrate_get_current();
2854
2855 /* This function is currently expected to be used during live migration */
2856 if (!migration_is_setup_or_active(s->state)) {
2857 return;
2858 }
2859
2860 for (; len > 0; len -= used_len, addr += used_len) {
2861 block = qemu_ram_block_from_host(addr, false, &offset);
2862 if (unlikely(!block || offset >= block->used_length)) {
2863 /*
2864 * The implementation might not support RAMBlock resize during
2865 * live migration, but it could happen in theory with future
2866 * updates. So we add a check here to capture that case.
2867 */
2868 error_report_once("%s unexpected error", __func__);
2869 return;
2870 }
2871
2872 if (len <= block->used_length - offset) {
2873 used_len = len;
2874 } else {
2875 used_len = block->used_length - offset;
2876 }
2877
2878 start = offset >> TARGET_PAGE_BITS;
2879 npages = used_len >> TARGET_PAGE_BITS;
2880
2881 qemu_mutex_lock(&ram_state->bitmap_mutex);
Wei Wang3143577d2021-07-22 04:30:55 -04002882 /*
2883 * The skipped free pages are equavalent to be sent from clear_bmap's
2884 * perspective, so clear the bits from the memory region bitmap which
2885 * are initially set. Otherwise those skipped pages will be sent in
2886 * the next round after syncing from the memory region bitmap.
2887 */
David Hildenbrand1230a252021-09-04 18:09:07 +02002888 migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
Wei Wang6bcb05f2018-12-11 16:24:50 +08002889 ram_state->migration_dirty_pages -=
2890 bitmap_count_one_with_offset(block->bmap, start, npages);
2891 bitmap_clear(block->bmap, start, npages);
2892 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2893 }
2894}
2895
2896/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01002897 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002898 * long-running RCU critical section. When rcu-reclaims in the code
2899 * start to become numerous it will be necessary to reduce the
2900 * granularity of these critical sections.
2901 */
2902
Juan Quintela3d0684b2017-03-23 15:06:39 +01002903/**
2904 * ram_save_setup: Setup RAM for migration
2905 *
2906 * Returns zero to indicate success and negative for error
2907 *
2908 * @f: QEMUFile where to send the data
2909 * @opaque: RAMState pointer
2910 */
zhanghailianga91246c2016-10-27 14:42:59 +08002911static int ram_save_setup(QEMUFile *f, void *opaque)
2912{
Juan Quintela53518d92017-05-04 11:46:24 +02002913 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002914 RAMBlock *block;
2915
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002916 if (compress_threads_save_setup()) {
2917 return -1;
2918 }
2919
zhanghailianga91246c2016-10-27 14:42:59 +08002920 /* migration has already setup the bitmap, reuse it. */
2921 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08002922 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002923 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08002924 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02002925 }
zhanghailianga91246c2016-10-27 14:42:59 +08002926 }
Juan Quintela53518d92017-05-04 11:46:24 +02002927 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002928
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002929 WITH_RCU_READ_LOCK_GUARD() {
2930 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002931
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002932 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2933 qemu_put_byte(f, strlen(block->idstr));
2934 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2935 qemu_put_be64(f, block->used_length);
2936 if (migrate_postcopy_ram() && block->page_size !=
2937 qemu_host_page_size) {
2938 qemu_put_be64(f, block->page_size);
2939 }
2940 if (migrate_ignore_shared()) {
2941 qemu_put_be64(f, block->mr->addr);
2942 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03002943 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002944 }
2945
Juan Quintela56e93d22015-05-07 19:33:31 +02002946 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2947 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2948
Juan Quintela99f2c6f2020-01-22 16:04:53 +01002949 multifd_send_sync_main(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002950 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02002951 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002952
2953 return 0;
2954}
2955
Juan Quintela3d0684b2017-03-23 15:06:39 +01002956/**
2957 * ram_save_iterate: iterative stage for migration
2958 *
2959 * Returns zero to indicate success and negative for error
2960 *
2961 * @f: QEMUFile where to send the data
2962 * @opaque: RAMState pointer
2963 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002964static int ram_save_iterate(QEMUFile *f, void *opaque)
2965{
Juan Quintela53518d92017-05-04 11:46:24 +02002966 RAMState **temp = opaque;
2967 RAMState *rs = *temp;
Juan Quintela3d4095b2019-12-18 05:12:36 +01002968 int ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002969 int i;
2970 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002971 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002972
Peter Lievenb2557342018-03-08 12:18:24 +01002973 if (blk_mig_bulk_active()) {
2974 /* Avoid transferring ram during bulk phase of block migration as
2975 * the bulk phase will usually take a long time and transferring
2976 * ram updates during that time is pointless. */
2977 goto out;
2978 }
2979
Peter Xu63268c42021-06-30 16:08:05 -04002980 /*
2981 * We'll take this lock a little bit long, but it's okay for two reasons.
2982 * Firstly, the only possible other thread to take it is who calls
2983 * qemu_guest_free_page_hint(), which should be rare; secondly, see
2984 * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
2985 * guarantees that we'll at least released it in a regular basis.
2986 */
2987 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002988 WITH_RCU_READ_LOCK_GUARD() {
2989 if (ram_list.version != rs->last_version) {
2990 ram_state_reset(rs);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002991 }
2992
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002993 /* Read version before ram_list.blocks */
2994 smp_rmb();
Xiao Guangronge8f37352018-09-03 17:26:44 +08002995
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002996 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
Xiao Guangronge8f37352018-09-03 17:26:44 +08002997
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002998 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2999 i = 0;
3000 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3001 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3002 int pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003003
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003004 if (qemu_file_get_error(f)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003005 break;
3006 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003007
3008 pages = ram_find_and_save_block(rs, false);
3009 /* no more pages to sent */
3010 if (pages == 0) {
3011 done = 1;
3012 break;
3013 }
3014
3015 if (pages < 0) {
3016 qemu_file_set_error(f, pages);
3017 break;
3018 }
3019
3020 rs->target_page_count += pages;
3021
3022 /*
Wei Yang644acf92019-11-07 20:39:07 +08003023 * During postcopy, it is necessary to make sure one whole host
3024 * page is sent in one chunk.
3025 */
3026 if (migrate_postcopy_ram()) {
3027 flush_compressed_data(rs);
3028 }
3029
3030 /*
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003031 * we want to check in the 1st loop, just in case it was the 1st
3032 * time and we had to sync the dirty bitmap.
3033 * qemu_clock_get_ns() is a bit expensive, so we only check each
3034 * some iterations
3035 */
3036 if ((i & 63) == 0) {
3037 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3038 1000000;
3039 if (t1 > MAX_WAIT) {
3040 trace_ram_save_iterate_big_wait(t1, i);
3041 break;
3042 }
3043 }
3044 i++;
Juan Quintela56e93d22015-05-07 19:33:31 +02003045 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003046 }
Peter Xu63268c42021-06-30 16:08:05 -04003047 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003048
3049 /*
3050 * Must occur before EOS (or any QEMUFile operation)
3051 * because of RDMA protocol.
3052 */
3053 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3054
Peter Lievenb2557342018-03-08 12:18:24 +01003055out:
Juan Quintelab69a0222020-01-22 11:36:12 +01003056 if (ret >= 0
3057 && migration_is_setup_or_active(migrate_get_current()->state)) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01003058 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01003059 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3060 qemu_fflush(f);
3061 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003062
Juan Quintela3d4095b2019-12-18 05:12:36 +01003063 ret = qemu_file_get_error(f);
3064 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003065 if (ret < 0) {
3066 return ret;
3067 }
3068
Thomas Huth5c903082016-11-04 14:10:17 +01003069 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003070}
3071
Juan Quintela3d0684b2017-03-23 15:06:39 +01003072/**
3073 * ram_save_complete: function called to send the remaining amount of ram
3074 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003075 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003076 *
3077 * Called with iothread lock
3078 *
3079 * @f: QEMUFile where to send the data
3080 * @opaque: RAMState pointer
3081 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003082static int ram_save_complete(QEMUFile *f, void *opaque)
3083{
Juan Quintela53518d92017-05-04 11:46:24 +02003084 RAMState **temp = opaque;
3085 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003086 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003087
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003088 WITH_RCU_READ_LOCK_GUARD() {
3089 if (!migration_in_postcopy()) {
3090 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003091 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003092
3093 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3094
3095 /* try transferring iterative blocks of memory */
3096
3097 /* flush all remaining blocks regardless of rate limiting */
3098 while (true) {
3099 int pages;
3100
3101 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3102 /* no more blocks to sent */
3103 if (pages == 0) {
3104 break;
3105 }
3106 if (pages < 0) {
3107 ret = pages;
3108 break;
3109 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003110 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003111
3112 flush_compressed_data(rs);
3113 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003114 }
3115
Juan Quintela3d4095b2019-12-18 05:12:36 +01003116 if (ret >= 0) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01003117 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01003118 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3119 qemu_fflush(f);
3120 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003121
Xiao Guangronge8f37352018-09-03 17:26:44 +08003122 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003123}
3124
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003125static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003126 uint64_t *res_precopy_only,
3127 uint64_t *res_compatible,
3128 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003129{
Juan Quintela53518d92017-05-04 11:46:24 +02003130 RAMState **temp = opaque;
3131 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003132 uint64_t remaining_size;
3133
Juan Quintela9edabd42017-03-14 12:02:16 +01003134 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003135
Juan Quintela57273092017-03-20 22:25:28 +01003136 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003137 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003138 qemu_mutex_lock_iothread();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003139 WITH_RCU_READ_LOCK_GUARD() {
3140 migration_bitmap_sync_precopy(rs);
3141 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003142 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003143 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003144 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003145
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003146 if (migrate_postcopy_ram()) {
3147 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003148 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003149 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003150 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003151 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003152}
3153
3154static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3155{
3156 unsigned int xh_len;
3157 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003158 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003159
Juan Quintela56e93d22015-05-07 19:33:31 +02003160 /* extract RLE header */
3161 xh_flags = qemu_get_byte(f);
3162 xh_len = qemu_get_be16(f);
3163
3164 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3165 error_report("Failed to load XBZRLE page - wrong compression!");
3166 return -1;
3167 }
3168
3169 if (xh_len > TARGET_PAGE_SIZE) {
3170 error_report("Failed to load XBZRLE page - len overflow!");
3171 return -1;
3172 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003173 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003174 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003175 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003176 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003177
3178 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003179 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003180 TARGET_PAGE_SIZE) == -1) {
3181 error_report("Failed to load XBZRLE page - decode error!");
3182 return -1;
3183 }
3184
3185 return 0;
3186}
3187
Juan Quintela3d0684b2017-03-23 15:06:39 +01003188/**
3189 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003190 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003191 * Must be called from within a rcu critical section.
3192 *
3193 * Returns a pointer from within the RCU-protected ram_list.
3194 *
3195 * @f: QEMUFile where to read the data from
3196 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003197 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003198static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003199{
Bihong Yu49324e92020-10-20 11:10:46 +08003200 static RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003201 char id[256];
3202 uint8_t len;
3203
3204 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003205 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003206 error_report("Ack, bad migration stream!");
3207 return NULL;
3208 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003209 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003210 }
3211
3212 len = qemu_get_byte(f);
3213 qemu_get_buffer(f, (uint8_t *)id, len);
3214 id[len] = 0;
3215
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003216 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003217 if (!block) {
3218 error_report("Can't find block %s", id);
3219 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003220 }
3221
Yury Kotovfbd162e2019-02-15 20:45:46 +03003222 if (ramblock_is_ignored(block)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02003223 error_report("block %s should not be migrated !", id);
3224 return NULL;
3225 }
3226
zhanghailiang4c4bad42016-01-15 11:37:41 +08003227 return block;
3228}
3229
3230static inline void *host_from_ram_block_offset(RAMBlock *block,
3231 ram_addr_t offset)
3232{
3233 if (!offset_in_ramblock(block, offset)) {
3234 return NULL;
3235 }
3236
3237 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003238}
3239
David Hildenbrand6a23f632021-04-29 13:27:05 +02003240static void *host_page_from_ram_block_offset(RAMBlock *block,
3241 ram_addr_t offset)
3242{
3243 /* Note: Explicitly no check against offset_in_ramblock(). */
3244 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3245 block->page_size);
3246}
3247
3248static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3249 ram_addr_t offset)
3250{
3251 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3252}
3253
Zhang Chen13af18f2018-09-03 12:38:48 +08003254static inline void *colo_cache_from_block_offset(RAMBlock *block,
zhanghailiang8af66372020-02-24 14:54:11 +08003255 ram_addr_t offset, bool record_bitmap)
Zhang Chen13af18f2018-09-03 12:38:48 +08003256{
3257 if (!offset_in_ramblock(block, offset)) {
3258 return NULL;
3259 }
3260 if (!block->colo_cache) {
3261 error_report("%s: colo_cache is NULL in block :%s",
3262 __func__, block->idstr);
3263 return NULL;
3264 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003265
3266 /*
3267 * During colo checkpoint, we need bitmap of these migrated pages.
3268 * It help us to decide which pages in ram cache should be flushed
3269 * into VM's RAM later.
3270 */
zhanghailiang8af66372020-02-24 14:54:11 +08003271 if (record_bitmap &&
3272 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003273 ram_state->migration_dirty_pages++;
3274 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003275 return block->colo_cache + offset;
3276}
3277
Juan Quintela3d0684b2017-03-23 15:06:39 +01003278/**
3279 * ram_handle_compressed: handle the zero page case
3280 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003281 * If a page (or a whole RDMA chunk) has been
3282 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003283 *
3284 * @host: host address for the zero page
3285 * @ch: what the page is filled from. We only support zero
3286 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003287 */
3288void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3289{
3290 if (ch != 0 || !is_zero_range(host, size)) {
3291 memset(host, ch, size);
3292 }
3293}
3294
Xiao Guangrong797ca152018-03-30 15:51:21 +08003295/* return the size after decompression, or negative value on error */
3296static int
3297qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3298 const uint8_t *source, size_t source_len)
3299{
3300 int err;
3301
3302 err = inflateReset(stream);
3303 if (err != Z_OK) {
3304 return -1;
3305 }
3306
3307 stream->avail_in = source_len;
3308 stream->next_in = (uint8_t *)source;
3309 stream->avail_out = dest_len;
3310 stream->next_out = dest;
3311
3312 err = inflate(stream, Z_NO_FLUSH);
3313 if (err != Z_STREAM_END) {
3314 return -1;
3315 }
3316
3317 return stream->total_out;
3318}
3319
Juan Quintela56e93d22015-05-07 19:33:31 +02003320static void *do_data_decompress(void *opaque)
3321{
3322 DecompressParam *param = opaque;
3323 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003324 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003325 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003326
Liang Li33d151f2016-05-05 15:32:58 +08003327 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003328 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003329 if (param->des) {
3330 des = param->des;
3331 len = param->len;
3332 param->des = 0;
3333 qemu_mutex_unlock(&param->mutex);
3334
Liang Li73a89122016-05-05 15:32:51 +08003335 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003336
3337 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3338 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003339 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003340 error_report("decompress data failed");
3341 qemu_file_set_error(decomp_file, ret);
3342 }
Liang Li73a89122016-05-05 15:32:51 +08003343
Liang Li33d151f2016-05-05 15:32:58 +08003344 qemu_mutex_lock(&decomp_done_lock);
3345 param->done = true;
3346 qemu_cond_signal(&decomp_done_cond);
3347 qemu_mutex_unlock(&decomp_done_lock);
3348
3349 qemu_mutex_lock(&param->mutex);
3350 } else {
3351 qemu_cond_wait(&param->cond, &param->mutex);
3352 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003353 }
Liang Li33d151f2016-05-05 15:32:58 +08003354 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003355
3356 return NULL;
3357}
3358
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003359static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003360{
3361 int idx, thread_count;
3362
3363 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003364 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003365 }
3366
3367 thread_count = migrate_decompress_threads();
3368 qemu_mutex_lock(&decomp_done_lock);
3369 for (idx = 0; idx < thread_count; idx++) {
3370 while (!decomp_param[idx].done) {
3371 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3372 }
3373 }
3374 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003375 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003376}
3377
Juan Quintelaf0afa332017-06-28 11:52:28 +02003378static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003379{
3380 int i, thread_count;
3381
Juan Quintela3416ab52016-04-20 11:56:01 +02003382 if (!migrate_use_compression()) {
3383 return;
3384 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003385 thread_count = migrate_decompress_threads();
3386 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003387 /*
3388 * we use it as a indicator which shows if the thread is
3389 * properly init'd or not
3390 */
3391 if (!decomp_param[i].compbuf) {
3392 break;
3393 }
3394
Juan Quintela56e93d22015-05-07 19:33:31 +02003395 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003396 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003397 qemu_cond_signal(&decomp_param[i].cond);
3398 qemu_mutex_unlock(&decomp_param[i].mutex);
3399 }
3400 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003401 if (!decomp_param[i].compbuf) {
3402 break;
3403 }
3404
Juan Quintela56e93d22015-05-07 19:33:31 +02003405 qemu_thread_join(decompress_threads + i);
3406 qemu_mutex_destroy(&decomp_param[i].mutex);
3407 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003408 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003409 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003410 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003411 }
3412 g_free(decompress_threads);
3413 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003414 decompress_threads = NULL;
3415 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003416 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003417}
3418
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003419static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003420{
3421 int i, thread_count;
3422
3423 if (!migrate_use_compression()) {
3424 return 0;
3425 }
3426
3427 thread_count = migrate_decompress_threads();
3428 decompress_threads = g_new0(QemuThread, thread_count);
3429 decomp_param = g_new0(DecompressParam, thread_count);
3430 qemu_mutex_init(&decomp_done_lock);
3431 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003432 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003433 for (i = 0; i < thread_count; i++) {
3434 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3435 goto exit;
3436 }
3437
3438 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3439 qemu_mutex_init(&decomp_param[i].mutex);
3440 qemu_cond_init(&decomp_param[i].cond);
3441 decomp_param[i].done = true;
3442 decomp_param[i].quit = false;
3443 qemu_thread_create(decompress_threads + i, "decompress",
3444 do_data_decompress, decomp_param + i,
3445 QEMU_THREAD_JOINABLE);
3446 }
3447 return 0;
3448exit:
3449 compress_threads_load_cleanup();
3450 return -1;
3451}
3452
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003453static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003454 void *host, int len)
3455{
3456 int idx, thread_count;
3457
3458 thread_count = migrate_decompress_threads();
Mahmoud Mandour37396952021-03-11 05:15:35 +02003459 QEMU_LOCK_GUARD(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003460 while (true) {
3461 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003462 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003463 decomp_param[idx].done = false;
3464 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003465 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003466 decomp_param[idx].des = host;
3467 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003468 qemu_cond_signal(&decomp_param[idx].cond);
3469 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003470 break;
3471 }
3472 }
3473 if (idx < thread_count) {
3474 break;
Liang Li73a89122016-05-05 15:32:51 +08003475 } else {
3476 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003477 }
3478 }
3479}
3480
Rao, Leib70cb3b2020-10-16 13:52:01 +08003481static void colo_init_ram_state(void)
3482{
3483 ram_state_init(&ram_state);
Rao, Leib70cb3b2020-10-16 13:52:01 +08003484}
3485
Zhang Chen13af18f2018-09-03 12:38:48 +08003486/*
3487 * colo cache: this is for secondary VM, we cache the whole
3488 * memory of the secondary VM, it is need to hold the global lock
3489 * to call this helper.
3490 */
3491int colo_init_ram_cache(void)
3492{
3493 RAMBlock *block;
3494
Paolo Bonzini44901b52019-12-13 15:07:22 +01003495 WITH_RCU_READ_LOCK_GUARD() {
3496 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3497 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
David Hildenbrand8dbe22c2021-05-10 13:43:21 +02003498 NULL, false, false);
Paolo Bonzini44901b52019-12-13 15:07:22 +01003499 if (!block->colo_cache) {
3500 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3501 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3502 block->used_length);
3503 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3504 if (block->colo_cache) {
3505 qemu_anon_ram_free(block->colo_cache, block->used_length);
3506 block->colo_cache = NULL;
3507 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003508 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003509 return -errno;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003510 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003511 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003512 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003513
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003514 /*
3515 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3516 * with to decide which page in cache should be flushed into SVM's RAM. Here
3517 * we use the same name 'ram_bitmap' as for migration.
3518 */
3519 if (ram_bytes_total()) {
3520 RAMBlock *block;
3521
Yury Kotovfbd162e2019-02-15 20:45:46 +03003522 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003523 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003524 block->bmap = bitmap_new(pages);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003525 }
3526 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003527
Rao, Leib70cb3b2020-10-16 13:52:01 +08003528 colo_init_ram_state();
Zhang Chen13af18f2018-09-03 12:38:48 +08003529 return 0;
Zhang Chen13af18f2018-09-03 12:38:48 +08003530}
3531
zhanghailiang03930312020-02-24 14:54:10 +08003532/* TODO: duplicated with ram_init_bitmaps */
3533void colo_incoming_start_dirty_log(void)
3534{
3535 RAMBlock *block = NULL;
3536 /* For memory_global_dirty_log_start below. */
3537 qemu_mutex_lock_iothread();
3538 qemu_mutex_lock_ramlist();
3539
3540 memory_global_dirty_log_sync();
3541 WITH_RCU_READ_LOCK_GUARD() {
3542 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3543 ramblock_sync_dirty_bitmap(ram_state, block);
3544 /* Discard this dirty bitmap record */
3545 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3546 }
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00003547 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
zhanghailiang03930312020-02-24 14:54:10 +08003548 }
3549 ram_state->migration_dirty_pages = 0;
3550 qemu_mutex_unlock_ramlist();
3551 qemu_mutex_unlock_iothread();
3552}
3553
Zhang Chen13af18f2018-09-03 12:38:48 +08003554/* It is need to hold the global lock to call this helper */
3555void colo_release_ram_cache(void)
3556{
3557 RAMBlock *block;
3558
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00003559 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003560 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003561 g_free(block->bmap);
3562 block->bmap = NULL;
3563 }
3564
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003565 WITH_RCU_READ_LOCK_GUARD() {
3566 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3567 if (block->colo_cache) {
3568 qemu_anon_ram_free(block->colo_cache, block->used_length);
3569 block->colo_cache = NULL;
3570 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003571 }
3572 }
zhanghailiang03930312020-02-24 14:54:10 +08003573 ram_state_cleanup(&ram_state);
Zhang Chen13af18f2018-09-03 12:38:48 +08003574}
3575
Juan Quintela3d0684b2017-03-23 15:06:39 +01003576/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003577 * ram_load_setup: Setup RAM for migration incoming side
3578 *
3579 * Returns zero to indicate success and negative for error
3580 *
3581 * @f: QEMUFile where to receive the data
3582 * @opaque: RAMState pointer
3583 */
3584static int ram_load_setup(QEMUFile *f, void *opaque)
3585{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003586 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003587 return -1;
3588 }
3589
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003590 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003591 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003592
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003593 return 0;
3594}
3595
3596static int ram_load_cleanup(void *opaque)
3597{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003598 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003599
Yury Kotovfbd162e2019-02-15 20:45:46 +03003600 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Beata Michalskabd108a42019-11-21 00:08:42 +00003601 qemu_ram_block_writeback(rb);
Junyan He56eb90a2018-07-18 15:48:03 +08003602 }
3603
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003604 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003605 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003606
Yury Kotovfbd162e2019-02-15 20:45:46 +03003607 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003608 g_free(rb->receivedmap);
3609 rb->receivedmap = NULL;
3610 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003611
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003612 return 0;
3613}
3614
3615/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003616 * ram_postcopy_incoming_init: allocate postcopy data structures
3617 *
3618 * Returns 0 for success and negative if there was one error
3619 *
3620 * @mis: current migration incoming state
3621 *
3622 * Allocate data structures etc needed by incoming migration with
3623 * postcopy-ram. postcopy-ram's similarly names
3624 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003625 */
3626int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3627{
David Hildenbrandc1361802018-06-20 22:27:36 +02003628 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003629}
3630
Juan Quintela3d0684b2017-03-23 15:06:39 +01003631/**
3632 * ram_load_postcopy: load a page in postcopy case
3633 *
3634 * Returns 0 for success or -errno in case of error
3635 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003636 * Called in postcopy mode by ram_load().
3637 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003638 *
3639 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003640 */
3641static int ram_load_postcopy(QEMUFile *f)
3642{
3643 int flags = 0, ret = 0;
3644 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003645 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003646 MigrationIncomingState *mis = migration_incoming_get_current();
3647 /* Temporary page that is later 'placed' */
Wei Yang34143222019-10-05 21:50:20 +08003648 void *postcopy_host_page = mis->postcopy_tmp_page;
David Hildenbrand6a23f632021-04-29 13:27:05 +02003649 void *host_page = NULL;
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003650 bool all_zero = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003651 int target_pages = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003652
3653 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3654 ram_addr_t addr;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003655 void *page_buffer = NULL;
3656 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003657 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003658 uint8_t ch;
Wei Yang644acf92019-11-07 20:39:07 +08003659 int len;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003660
3661 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003662
3663 /*
3664 * If qemu file error, we should stop here, and then "addr"
3665 * may be invalid
3666 */
3667 ret = qemu_file_get_error(f);
3668 if (ret) {
3669 break;
3670 }
3671
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003672 flags = addr & ~TARGET_PAGE_MASK;
3673 addr &= TARGET_PAGE_MASK;
3674
3675 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
Wei Yang644acf92019-11-07 20:39:07 +08003676 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3677 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003678 block = ram_block_from_stream(f, flags);
David Hildenbrand6a23f632021-04-29 13:27:05 +02003679 if (!block) {
3680 ret = -EINVAL;
3681 break;
3682 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003683
David Hildenbrand898ba902021-04-29 13:27:06 +02003684 /*
3685 * Relying on used_length is racy and can result in false positives.
3686 * We might place pages beyond used_length in case RAM was shrunk
3687 * while in postcopy, which is fine - trying to place via
3688 * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.
3689 */
3690 if (!block->host || addr >= block->postcopy_length) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003691 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3692 ret = -EINVAL;
3693 break;
3694 }
Wei Yang4cbb3c62019-11-07 20:39:04 +08003695 target_pages++;
Peter Xu1aa83672018-07-10 17:18:53 +08003696 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003697 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003698 * Postcopy requires that we place whole host pages atomically;
3699 * these may be huge pages for RAMBlocks that are backed by
3700 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003701 * To make it atomic, the data is read into a temporary page
3702 * that's moved into place later.
3703 * The migration protocol uses, possibly smaller, target-pages
3704 * however the source ensures it always sends all the components
Wei Yang91ba4422019-11-07 20:39:06 +08003705 * of a host page in one chunk.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003706 */
3707 page_buffer = postcopy_host_page +
David Hildenbrand6a23f632021-04-29 13:27:05 +02003708 host_page_offset_from_ram_block_offset(block, addr);
3709 /* If all TP are zero then we can optimise the place */
Wei Yange5e73b02019-11-07 20:39:05 +08003710 if (target_pages == 1) {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003711 host_page = host_page_from_ram_block_offset(block, addr);
3712 } else if (host_page != host_page_from_ram_block_offset(block,
3713 addr)) {
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003714 /* not the 1st TP within the HP */
David Hildenbrand6a23f632021-04-29 13:27:05 +02003715 error_report("Non-same host page %p/%p", host_page,
3716 host_page_from_ram_block_offset(block, addr));
3717 ret = -EINVAL;
3718 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003719 }
3720
3721 /*
3722 * If it's the last part of a host page then we place the host
3723 * page
3724 */
Wei Yang4cbb3c62019-11-07 20:39:04 +08003725 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3726 place_needed = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003727 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003728 place_source = postcopy_host_page;
3729 }
3730
3731 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003732 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003733 ch = qemu_get_byte(f);
Wei Yang2e36bc12019-11-07 20:39:02 +08003734 /*
3735 * Can skip to set page_buffer when
3736 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
3737 */
3738 if (ch || !matches_target_page_size) {
3739 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3740 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003741 if (ch) {
3742 all_zero = false;
3743 }
3744 break;
3745
3746 case RAM_SAVE_FLAG_PAGE:
3747 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003748 if (!matches_target_page_size) {
3749 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003750 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3751 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08003752 /*
3753 * For small pages that matches target page size, we
3754 * avoid the qemu_file copy. Instead we directly use
3755 * the buffer of QEMUFile to place the page. Note: we
3756 * cannot do any QEMUFile operation before using that
3757 * buffer to make sure the buffer is valid when
3758 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003759 */
3760 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3761 TARGET_PAGE_SIZE);
3762 }
3763 break;
Wei Yang644acf92019-11-07 20:39:07 +08003764 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3765 all_zero = false;
3766 len = qemu_get_be32(f);
3767 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3768 error_report("Invalid compressed data length: %d", len);
3769 ret = -EINVAL;
3770 break;
3771 }
3772 decompress_data_with_multi_threads(f, page_buffer, len);
3773 break;
3774
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003775 case RAM_SAVE_FLAG_EOS:
3776 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003777 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003778 break;
3779 default:
Bihong Yu29fccad2020-10-20 11:10:42 +08003780 error_report("Unknown combination of migration flags: 0x%x"
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003781 " (postcopy mode)", flags);
3782 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003783 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003784 }
3785
Wei Yang644acf92019-11-07 20:39:07 +08003786 /* Got the whole host page, wait for decompress before placing. */
3787 if (place_needed) {
3788 ret |= wait_for_decompress_done();
3789 }
3790
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003791 /* Detect for any possible file errors */
3792 if (!ret && qemu_file_get_error(f)) {
3793 ret = qemu_file_get_error(f);
3794 }
3795
3796 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003797 if (all_zero) {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003798 ret = postcopy_place_page_zero(mis, host_page, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003799 } else {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003800 ret = postcopy_place_page(mis, host_page, place_source,
3801 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003802 }
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003803 place_needed = false;
3804 target_pages = 0;
3805 /* Assume we have a zero page until we detect something different */
3806 all_zero = true;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003807 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003808 }
3809
3810 return ret;
3811}
3812
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003813static bool postcopy_is_advised(void)
3814{
3815 PostcopyState ps = postcopy_state_get();
3816 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3817}
3818
3819static bool postcopy_is_running(void)
3820{
3821 PostcopyState ps = postcopy_state_get();
3822 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3823}
3824
Zhang Chene6f4aa12018-09-03 12:38:50 +08003825/*
3826 * Flush content of RAM cache into SVM's memory.
3827 * Only flush the pages that be dirtied by PVM or SVM or both.
3828 */
Lukas Straub24fa16f2020-05-11 13:10:51 +02003829void colo_flush_ram_cache(void)
Zhang Chene6f4aa12018-09-03 12:38:50 +08003830{
3831 RAMBlock *block = NULL;
3832 void *dst_host;
3833 void *src_host;
3834 unsigned long offset = 0;
3835
zhanghailiangd1955d22018-09-03 12:38:55 +08003836 memory_global_dirty_log_sync();
Peter Xu63268c42021-06-30 16:08:05 -04003837 qemu_mutex_lock(&ram_state->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003838 WITH_RCU_READ_LOCK_GUARD() {
3839 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3840 ramblock_sync_dirty_bitmap(ram_state, block);
Zhang Chene6f4aa12018-09-03 12:38:50 +08003841 }
3842 }
3843
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003844 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3845 WITH_RCU_READ_LOCK_GUARD() {
3846 block = QLIST_FIRST_RCU(&ram_list.blocks);
3847
3848 while (block) {
3849 offset = migration_bitmap_find_dirty(ram_state, block, offset);
3850
David Hildenbrand542147f2021-04-29 13:27:08 +02003851 if (!offset_in_ramblock(block,
3852 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003853 offset = 0;
3854 block = QLIST_NEXT_RCU(block, next);
3855 } else {
3856 migration_bitmap_clear_dirty(ram_state, block, offset);
Alexey Romko8bba0042020-01-10 14:51:34 +01003857 dst_host = block->host
3858 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3859 src_host = block->colo_cache
3860 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003861 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
3862 }
3863 }
3864 }
Zhang Chene6f4aa12018-09-03 12:38:50 +08003865 trace_colo_flush_ram_cache_end();
Peter Xu63268c42021-06-30 16:08:05 -04003866 qemu_mutex_unlock(&ram_state->bitmap_mutex);
Zhang Chene6f4aa12018-09-03 12:38:50 +08003867}
3868
Wei Yang10da4a32019-07-25 08:20:23 +08003869/**
3870 * ram_load_precopy: load pages in precopy case
3871 *
3872 * Returns 0 for success or -errno in case of error
3873 *
3874 * Called in precopy mode by ram_load().
3875 * rcu_read_lock is taken prior to this being called.
3876 *
3877 * @f: QEMUFile where to send the data
3878 */
3879static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02003880{
Yury Kotove65cec52019-11-25 16:36:32 +03003881 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003882 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003883 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01003884 if (!migrate_use_compression()) {
3885 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3886 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003887
Wei Yang10da4a32019-07-25 08:20:23 +08003888 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003889 ram_addr_t addr, total_ram_bytes;
zhanghailiang03930312020-02-24 14:54:10 +08003890 void *host = NULL, *host_bak = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003891 uint8_t ch;
3892
Yury Kotove65cec52019-11-25 16:36:32 +03003893 /*
3894 * Yield periodically to let main loop run, but an iteration of
3895 * the main loop is expensive, so do it each some iterations
3896 */
3897 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3898 aio_co_schedule(qemu_get_current_aio_context(),
3899 qemu_coroutine_self());
3900 qemu_coroutine_yield();
3901 }
3902 i++;
3903
Juan Quintela56e93d22015-05-07 19:33:31 +02003904 addr = qemu_get_be64(f);
3905 flags = addr & ~TARGET_PAGE_MASK;
3906 addr &= TARGET_PAGE_MASK;
3907
Juan Quintelaedc60122016-11-02 12:40:46 +01003908 if (flags & invalid_flags) {
3909 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3910 error_report("Received an unexpected compressed page");
3911 }
3912
3913 ret = -EINVAL;
3914 break;
3915 }
3916
Juan Quintelabb890ed2017-04-28 09:39:55 +02003917 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003918 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003919 RAMBlock *block = ram_block_from_stream(f, flags);
3920
zhanghailiang03930312020-02-24 14:54:10 +08003921 host = host_from_ram_block_offset(block, addr);
Zhang Chen13af18f2018-09-03 12:38:48 +08003922 /*
zhanghailiang03930312020-02-24 14:54:10 +08003923 * After going into COLO stage, we should not load the page
3924 * into SVM's memory directly, we put them into colo_cache firstly.
3925 * NOTE: We need to keep a copy of SVM's ram in colo_cache.
3926 * Previously, we copied all these memory in preparing stage of COLO
3927 * while we need to stop VM, which is a time-consuming process.
3928 * Here we optimize it by a trick, back-up every page while in
3929 * migration process while COLO is enabled, though it affects the
3930 * speed of the migration, but it obviously reduce the downtime of
3931 * back-up all SVM'S memory in COLO preparing stage.
Zhang Chen13af18f2018-09-03 12:38:48 +08003932 */
zhanghailiang03930312020-02-24 14:54:10 +08003933 if (migration_incoming_colo_enabled()) {
3934 if (migration_incoming_in_colo_state()) {
3935 /* In COLO stage, put all pages into cache temporarily */
zhanghailiang8af66372020-02-24 14:54:11 +08003936 host = colo_cache_from_block_offset(block, addr, true);
zhanghailiang03930312020-02-24 14:54:10 +08003937 } else {
3938 /*
3939 * In migration stage but before COLO stage,
3940 * Put all pages into both cache and SVM's memory.
3941 */
zhanghailiang8af66372020-02-24 14:54:11 +08003942 host_bak = colo_cache_from_block_offset(block, addr, false);
zhanghailiang03930312020-02-24 14:54:10 +08003943 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003944 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003945 if (!host) {
3946 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3947 ret = -EINVAL;
3948 break;
3949 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003950 if (!migration_incoming_in_colo_state()) {
3951 ramblock_recv_bitmap_set(block, host);
3952 }
3953
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003954 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003955 }
3956
Juan Quintela56e93d22015-05-07 19:33:31 +02003957 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3958 case RAM_SAVE_FLAG_MEM_SIZE:
3959 /* Synchronize RAM block list */
3960 total_ram_bytes = addr;
3961 while (!ret && total_ram_bytes) {
3962 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003963 char id[256];
3964 ram_addr_t length;
3965
3966 len = qemu_get_byte(f);
3967 qemu_get_buffer(f, (uint8_t *)id, len);
3968 id[len] = 0;
3969 length = qemu_get_be64(f);
3970
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003971 block = qemu_ram_block_by_name(id);
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02003972 if (block && !qemu_ram_is_migratable(block)) {
3973 error_report("block %s should not be migrated !", id);
3974 ret = -EINVAL;
3975 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003976 if (length != block->used_length) {
3977 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003978
Gongleifa53a0e2016-05-10 10:04:59 +08003979 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003980 &local_err);
3981 if (local_err) {
3982 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003983 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003984 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003985 /* For postcopy we need to check hugepage sizes match */
Stefan Reitere846b742021-02-04 17:35:22 +01003986 if (postcopy_advised && migrate_postcopy_ram() &&
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003987 block->page_size != qemu_host_page_size) {
3988 uint64_t remote_page_size = qemu_get_be64(f);
3989 if (remote_page_size != block->page_size) {
3990 error_report("Mismatched RAM page size %s "
3991 "(local) %zd != %" PRId64,
3992 id, block->page_size,
3993 remote_page_size);
3994 ret = -EINVAL;
3995 }
3996 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003997 if (migrate_ignore_shared()) {
3998 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003999 if (ramblock_is_ignored(block) &&
4000 block->mr->addr != addr) {
4001 error_report("Mismatched GPAs for block %s "
4002 "%" PRId64 "!= %" PRId64,
4003 id, (uint64_t)addr,
4004 (uint64_t)block->mr->addr);
4005 ret = -EINVAL;
4006 }
4007 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004008 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4009 block->idstr);
4010 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004011 error_report("Unknown ramblock \"%s\", cannot "
4012 "accept migration", id);
4013 ret = -EINVAL;
4014 }
4015
4016 total_ram_bytes -= length;
4017 }
4018 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004019
Juan Quintelabb890ed2017-04-28 09:39:55 +02004020 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004021 ch = qemu_get_byte(f);
4022 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4023 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004024
Juan Quintela56e93d22015-05-07 19:33:31 +02004025 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004026 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4027 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004028
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004029 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004030 len = qemu_get_be32(f);
4031 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4032 error_report("Invalid compressed data length: %d", len);
4033 ret = -EINVAL;
4034 break;
4035 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004036 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004037 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004038
Juan Quintela56e93d22015-05-07 19:33:31 +02004039 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004040 if (load_xbzrle(f, addr, host) < 0) {
4041 error_report("Failed to decompress XBZRLE page at "
4042 RAM_ADDR_FMT, addr);
4043 ret = -EINVAL;
4044 break;
4045 }
4046 break;
4047 case RAM_SAVE_FLAG_EOS:
4048 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004049 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004050 break;
4051 default:
4052 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004053 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004054 } else {
Bihong Yu29fccad2020-10-20 11:10:42 +08004055 error_report("Unknown combination of migration flags: 0x%x",
Juan Quintela56e93d22015-05-07 19:33:31 +02004056 flags);
4057 ret = -EINVAL;
4058 }
4059 }
4060 if (!ret) {
4061 ret = qemu_file_get_error(f);
4062 }
zhanghailiang03930312020-02-24 14:54:10 +08004063 if (!ret && host_bak) {
4064 memcpy(host_bak, host, TARGET_PAGE_SIZE);
4065 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004066 }
4067
Wei Yangca1a6b72019-11-07 20:39:03 +08004068 ret |= wait_for_decompress_done();
Wei Yang10da4a32019-07-25 08:20:23 +08004069 return ret;
4070}
4071
4072static int ram_load(QEMUFile *f, void *opaque, int version_id)
4073{
4074 int ret = 0;
4075 static uint64_t seq_iter;
4076 /*
4077 * If system is running in postcopy mode, page inserts to host memory must
4078 * be atomic
4079 */
4080 bool postcopy_running = postcopy_is_running();
4081
4082 seq_iter++;
4083
4084 if (version_id != 4) {
4085 return -EINVAL;
4086 }
4087
4088 /*
4089 * This RCU critical section can be very long running.
4090 * When RCU reclaims in the code start to become numerous,
4091 * it will be necessary to reduce the granularity of this
4092 * critical section.
4093 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004094 WITH_RCU_READ_LOCK_GUARD() {
4095 if (postcopy_running) {
4096 ret = ram_load_postcopy(f);
4097 } else {
4098 ret = ram_load_precopy(f);
4099 }
Wei Yang10da4a32019-07-25 08:20:23 +08004100 }
Juan Quintela55c44462017-01-23 22:32:05 +01004101 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004102
Juan Quintela56e93d22015-05-07 19:33:31 +02004103 return ret;
4104}
4105
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004106static bool ram_has_postcopy(void *opaque)
4107{
Junyan He469dd512018-07-18 15:48:02 +08004108 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004109 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004110 if (ramblock_is_pmem(rb)) {
4111 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4112 "is not supported now!", rb->idstr, rb->host);
4113 return false;
4114 }
4115 }
4116
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004117 return migrate_postcopy_ram();
4118}
4119
Peter Xuedd090c2018-05-02 18:47:32 +08004120/* Sync all the dirty bitmap with destination VM. */
4121static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4122{
4123 RAMBlock *block;
4124 QEMUFile *file = s->to_dst_file;
4125 int ramblock_count = 0;
4126
4127 trace_ram_dirty_bitmap_sync_start();
4128
Yury Kotovfbd162e2019-02-15 20:45:46 +03004129 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004130 qemu_savevm_send_recv_bitmap(file, block->idstr);
4131 trace_ram_dirty_bitmap_request(block->idstr);
4132 ramblock_count++;
4133 }
4134
4135 trace_ram_dirty_bitmap_sync_wait();
4136
4137 /* Wait until all the ramblocks' dirty bitmap synced */
4138 while (ramblock_count--) {
4139 qemu_sem_wait(&s->rp_state.rp_sem);
4140 }
4141
4142 trace_ram_dirty_bitmap_sync_complete();
4143
4144 return 0;
4145}
4146
4147static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4148{
4149 qemu_sem_post(&s->rp_state.rp_sem);
4150}
4151
Peter Xua335deb2018-05-02 18:47:28 +08004152/*
4153 * Read the received bitmap, revert it as the initial dirty bitmap.
4154 * This is only used when the postcopy migration is paused but wants
4155 * to resume from a middle point.
4156 */
4157int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4158{
4159 int ret = -EINVAL;
Peter Xu43044ac2021-07-22 13:58:38 -04004160 /* from_dst_file is always valid because we're within rp_thread */
Peter Xua335deb2018-05-02 18:47:28 +08004161 QEMUFile *file = s->rp_state.from_dst_file;
4162 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004163 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004164 uint64_t size, end_mark;
4165
4166 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4167
4168 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4169 error_report("%s: incorrect state %s", __func__,
4170 MigrationStatus_str(s->state));
4171 return -EINVAL;
4172 }
4173
4174 /*
4175 * Note: see comments in ramblock_recv_bitmap_send() on why we
zhaolichang3a4452d2020-09-17 15:50:21 +08004176 * need the endianness conversion, and the paddings.
Peter Xua335deb2018-05-02 18:47:28 +08004177 */
4178 local_size = ROUND_UP(local_size, 8);
4179
4180 /* Add paddings */
4181 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4182
4183 size = qemu_get_be64(file);
4184
4185 /* The size of the bitmap should match with our ramblock */
4186 if (size != local_size) {
4187 error_report("%s: ramblock '%s' bitmap size mismatch "
4188 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4189 block->idstr, size, local_size);
4190 ret = -EINVAL;
4191 goto out;
4192 }
4193
4194 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4195 end_mark = qemu_get_be64(file);
4196
4197 ret = qemu_file_get_error(file);
4198 if (ret || size != local_size) {
4199 error_report("%s: read bitmap failed for ramblock '%s': %d"
4200 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4201 __func__, block->idstr, ret, local_size, size);
4202 ret = -EIO;
4203 goto out;
4204 }
4205
4206 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
Philippe Mathieu-Daudéaf3bbbe2020-11-03 12:25:58 +01004207 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
Peter Xua335deb2018-05-02 18:47:28 +08004208 __func__, block->idstr, end_mark);
4209 ret = -EINVAL;
4210 goto out;
4211 }
4212
4213 /*
zhaolichang3a4452d2020-09-17 15:50:21 +08004214 * Endianness conversion. We are during postcopy (though paused).
Peter Xua335deb2018-05-02 18:47:28 +08004215 * The dirty bitmap won't change. We can directly modify it.
4216 */
4217 bitmap_from_le(block->bmap, le_bitmap, nbits);
4218
4219 /*
4220 * What we received is "received bitmap". Revert it as the initial
4221 * dirty bitmap for this ramblock.
4222 */
4223 bitmap_complement(block->bmap, block->bmap, nbits);
4224
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02004225 /* Clear dirty bits of discarded ranges that we don't want to migrate. */
4226 ramblock_dirty_bitmap_clear_discarded_pages(block);
4227
4228 /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */
Peter Xua335deb2018-05-02 18:47:28 +08004229 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4230
Peter Xuedd090c2018-05-02 18:47:32 +08004231 /*
4232 * We succeeded to sync bitmap for current ramblock. If this is
4233 * the last one to sync, we need to notify the main send thread.
4234 */
4235 ram_dirty_bitmap_reload_notify(s);
4236
Peter Xua335deb2018-05-02 18:47:28 +08004237 ret = 0;
4238out:
Peter Xubf269902018-05-25 09:50:42 +08004239 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004240 return ret;
4241}
4242
Peter Xuedd090c2018-05-02 18:47:32 +08004243static int ram_resume_prepare(MigrationState *s, void *opaque)
4244{
4245 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004246 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004247
Peter Xu08614f32018-05-02 18:47:33 +08004248 ret = ram_dirty_bitmap_sync_all(s, rs);
4249 if (ret) {
4250 return ret;
4251 }
4252
4253 ram_state_resume_prepare(rs, s->to_dst_file);
4254
4255 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004256}
4257
Juan Quintela56e93d22015-05-07 19:33:31 +02004258static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004259 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004260 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004261 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004262 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004263 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004264 .save_live_pending = ram_save_pending,
4265 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004266 .save_cleanup = ram_save_cleanup,
4267 .load_setup = ram_load_setup,
4268 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004269 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004270};
4271
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004272static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4273 size_t old_size, size_t new_size)
4274{
David Hildenbrandcc61c702021-04-29 13:27:04 +02004275 PostcopyState ps = postcopy_state_get();
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004276 ram_addr_t offset;
4277 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4278 Error *err = NULL;
4279
4280 if (ramblock_is_ignored(rb)) {
4281 return;
4282 }
4283
4284 if (!migration_is_idle()) {
4285 /*
4286 * Precopy code on the source cannot deal with the size of RAM blocks
4287 * changing at random points in time - especially after sending the
4288 * RAM block sizes in the migration stream, they must no longer change.
4289 * Abort and indicate a proper reason.
4290 */
4291 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4292 migrate_set_error(migrate_get_current(), err);
4293 error_free(err);
4294 migration_cancel();
4295 }
David Hildenbrandcc61c702021-04-29 13:27:04 +02004296
4297 switch (ps) {
4298 case POSTCOPY_INCOMING_ADVISE:
4299 /*
4300 * Update what ram_postcopy_incoming_init()->init_range() does at the
4301 * time postcopy was advised. Syncing RAM blocks with the source will
4302 * result in RAM resizes.
4303 */
4304 if (old_size < new_size) {
4305 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4306 error_report("RAM block '%s' discard of resized RAM failed",
4307 rb->idstr);
4308 }
4309 }
David Hildenbrand898ba902021-04-29 13:27:06 +02004310 rb->postcopy_length = new_size;
David Hildenbrandcc61c702021-04-29 13:27:04 +02004311 break;
4312 case POSTCOPY_INCOMING_NONE:
4313 case POSTCOPY_INCOMING_RUNNING:
4314 case POSTCOPY_INCOMING_END:
4315 /*
4316 * Once our guest is running, postcopy does no longer care about
4317 * resizes. When growing, the new memory was not available on the
4318 * source, no handler needed.
4319 */
4320 break;
4321 default:
4322 error_report("RAM block '%s' resized during postcopy state: %d",
4323 rb->idstr, ps);
4324 exit(-1);
4325 }
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004326}
4327
4328static RAMBlockNotifier ram_mig_ram_notifier = {
4329 .ram_block_resized = ram_mig_ram_block_resized,
4330};
4331
Juan Quintela56e93d22015-05-07 19:33:31 +02004332void ram_mig_init(void)
4333{
4334 qemu_mutex_init(&XBZRLE.lock);
Dr. David Alan Gilbertce62df52019-08-22 12:54:33 +01004335 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004336 ram_block_notifier_add(&ram_mig_ram_notifier);
Juan Quintela56e93d22015-05-07 19:33:31 +02004337}