blob: d74a8c04b6a2a72d109f2bea9972d40afc599fc5 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020030#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include "qemu/bitops.h"
32#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020033#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020034#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020035#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020036#include "migration.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020037#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020039#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020040#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030041#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010043#include "qapi/error.h"
Juan Quintelaab7cbb02019-05-15 13:37:46 +020044#include "qapi/qapi-types-migration.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010045#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020046#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020047#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020048#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030049#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080051#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030052#include "block.h"
Claudio Fontanab0c3cf92020-06-29 11:35:03 +020053#include "sysemu/cpu-throttle.h"
Peter Xuedd090c2018-05-02 18:47:32 +080054#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010055#include "qemu/iov.h"
Juan Quintelad32ca5a2020-01-22 16:16:07 +010056#include "multifd.h"
Andrey Gruzdev278e2f52021-01-29 13:14:05 +030057#include "sysemu/runstate.h"
58
Lukas Straube5fdf922021-07-04 18:14:44 +020059#include "hw/boards.h" /* for machine_dump_guest_core() */
60
Andrey Gruzdev278e2f52021-01-29 13:14:05 +030061#if defined(__linux__)
62#include "qemu/userfaultfd.h"
63#endif /* defined(__linux__) */
Juan Quintela56e93d22015-05-07 19:33:31 +020064
Juan Quintela56e93d22015-05-07 19:33:31 +020065/***********************************************************/
66/* ram save/restore */
67
Juan Quintelabb890ed2017-04-28 09:39:55 +020068/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
69 * worked for pages that where filled with the same char. We switched
70 * it to only search for the zero value. And to avoid confusion with
71 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
72 */
73
Juan Quintela56e93d22015-05-07 19:33:31 +020074#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020075#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020076#define RAM_SAVE_FLAG_MEM_SIZE 0x04
77#define RAM_SAVE_FLAG_PAGE 0x08
78#define RAM_SAVE_FLAG_EOS 0x10
79#define RAM_SAVE_FLAG_CONTINUE 0x20
80#define RAM_SAVE_FLAG_XBZRLE 0x40
81/* 0x80 is reserved in migration.h start with 0x100 next */
82#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
83
Juan Quintela93604472017-06-06 19:49:03 +020084XBZRLECacheStats xbzrle_counters;
85
Juan Quintela56e93d22015-05-07 19:33:31 +020086/* struct contains XBZRLE cache and a static page
87 used by the compression */
88static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020096 /* it will store a page full of zeros */
97 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020098 /* buffer used for XBZRLE decoding */
99 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200100} XBZRLE;
101
Juan Quintela56e93d22015-05-07 19:33:31 +0200102static void XBZRLE_cache_lock(void)
103{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800104 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200105 qemu_mutex_lock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800106 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200107}
108
109static void XBZRLE_cache_unlock(void)
110{
Bihong Yuf4c51a62020-10-20 11:10:45 +0800111 if (migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200112 qemu_mutex_unlock(&XBZRLE.lock);
Bihong Yuf4c51a62020-10-20 11:10:45 +0800113 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200114}
115
Juan Quintela3d0684b2017-03-23 15:06:39 +0100116/**
117 * xbzrle_cache_resize: resize the xbzrle cache
118 *
Daniel P. Berrangécbde7be2021-02-19 18:40:12 +0000119 * This function is called from migrate_params_apply in main
Juan Quintela3d0684b2017-03-23 15:06:39 +0100120 * thread, possibly while a migration is in progress. A running
121 * migration may be using the cache and might finish during this call,
122 * hence changes to the cache are protected by XBZRLE.lock().
123 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200124 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100125 *
126 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200127 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200128 */
Markus Armbruster8b9407a2021-02-02 15:17:32 +0100129int xbzrle_cache_resize(uint64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200130{
131 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200132 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200133
Juan Quintela8acabf62017-10-05 22:00:31 +0200134 /* Check for truncation */
135 if (new_size != (size_t)new_size) {
136 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
137 "exceeding address space");
138 return -1;
139 }
140
Juan Quintela2a313e52017-10-06 23:00:12 +0200141 if (new_size == migrate_xbzrle_cache_size()) {
142 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200143 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200144 }
145
Juan Quintela56e93d22015-05-07 19:33:31 +0200146 XBZRLE_cache_lock();
147
148 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200149 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200151 ret = -1;
152 goto out;
153 }
154
155 cache_fini(XBZRLE.cache);
156 XBZRLE.cache = new_cache;
157 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200158out:
159 XBZRLE_cache_unlock();
160 return ret;
161}
162
Chuan Zheng3ded54b2020-09-16 14:22:00 +0800163bool ramblock_is_ignored(RAMBlock *block)
Yury Kotovfbd162e2019-02-15 20:45:46 +0300164{
165 return !qemu_ram_is_migratable(block) ||
166 (migrate_ignore_shared() && qemu_ram_is_shared(block));
167}
168
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100169#undef RAMBLOCK_FOREACH
170
Yury Kotovfbd162e2019-02-15 20:45:46 +0300171int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
172{
173 RAMBlock *block;
174 int ret = 0;
175
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +0100176 RCU_READ_LOCK_GUARD();
177
Yury Kotovfbd162e2019-02-15 20:45:46 +0300178 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
179 ret = func(block, opaque);
180 if (ret) {
181 break;
182 }
183 }
Yury Kotovfbd162e2019-02-15 20:45:46 +0300184 return ret;
185}
186
Alexey Perevalovf9494612017-10-05 14:13:20 +0300187static void ramblock_recv_map_init(void)
188{
189 RAMBlock *rb;
190
Yury Kotovfbd162e2019-02-15 20:45:46 +0300191 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300192 assert(!rb->receivedmap);
193 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
194 }
195}
196
197int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
198{
199 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
200 rb->receivedmap);
201}
202
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000203bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
204{
205 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
206}
207
Alexey Perevalovf9494612017-10-05 14:13:20 +0300208void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
209{
210 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
211}
212
213void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
214 size_t nr)
215{
216 bitmap_set_atomic(rb->receivedmap,
217 ramblock_recv_bitmap_offset(host_addr, rb),
218 nr);
219}
220
Peter Xua335deb2018-05-02 18:47:28 +0800221#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
222
223/*
224 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
225 *
226 * Returns >0 if success with sent bytes, or <0 if error.
227 */
228int64_t ramblock_recv_bitmap_send(QEMUFile *file,
229 const char *block_name)
230{
231 RAMBlock *block = qemu_ram_block_by_name(block_name);
232 unsigned long *le_bitmap, nbits;
233 uint64_t size;
234
235 if (!block) {
236 error_report("%s: invalid block name: %s", __func__, block_name);
237 return -1;
238 }
239
David Hildenbrand898ba902021-04-29 13:27:06 +0200240 nbits = block->postcopy_length >> TARGET_PAGE_BITS;
Peter Xua335deb2018-05-02 18:47:28 +0800241
242 /*
243 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
244 * machines we may need 4 more bytes for padding (see below
245 * comment). So extend it a bit before hand.
246 */
247 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
248
249 /*
250 * Always use little endian when sending the bitmap. This is
251 * required that when source and destination VMs are not using the
zhaolichang3a4452d2020-09-17 15:50:21 +0800252 * same endianness. (Note: big endian won't work.)
Peter Xua335deb2018-05-02 18:47:28 +0800253 */
254 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
255
256 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800257 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800258
259 /*
260 * size is always aligned to 8 bytes for 64bit machines, but it
261 * may not be true for 32bit machines. We need this padding to
262 * make sure the migration can survive even between 32bit and
263 * 64bit machines.
264 */
265 size = ROUND_UP(size, 8);
266
267 qemu_put_be64(file, size);
268 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
269 /*
270 * Mark as an end, in case the middle part is screwed up due to
zhaolichang3a4452d2020-09-17 15:50:21 +0800271 * some "mysterious" reason.
Peter Xua335deb2018-05-02 18:47:28 +0800272 */
273 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
274 qemu_fflush(file);
275
Peter Xubf269902018-05-25 09:50:42 +0800276 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800277
278 if (qemu_file_get_error(file)) {
279 return qemu_file_get_error(file);
280 }
281
282 return size + sizeof(size);
283}
284
Juan Quintelaec481c62017-03-20 22:12:40 +0100285/*
286 * An outstanding page request, on the source, having been received
287 * and queued
288 */
289struct RAMSrcPageRequest {
290 RAMBlock *rb;
291 hwaddr offset;
292 hwaddr len;
293
294 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
295};
296
Juan Quintela6f37bb82017-03-13 19:26:29 +0100297/* State of RAM for migration */
298struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100299 /* QEMUFile used for this migration */
300 QEMUFile *f;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +0300301 /* UFFD file descriptor, used in 'write-tracking' migration */
302 int uffdio_fd;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100303 /* Last block that we have visited searching for dirty pages */
304 RAMBlock *last_seen_block;
305 /* Last block from where we have sent data */
306 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100307 /* Last dirty target page we have sent */
308 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100309 /* last ram version we have seen */
310 uint32_t last_version;
Juan Quintela8d820d62017-03-13 19:35:50 +0100311 /* How many times we have dirty too many pages */
312 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100313 /* these variables are used for bitmap sync */
314 /* last time we did a full bitmap_sync */
315 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200316 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200317 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200318 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200319 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100320 /* xbzrle misses since the beginning of the period */
321 uint64_t xbzrle_cache_miss_prev;
Wei Wange460a4b2020-04-30 08:59:35 +0800322 /* Amount of xbzrle pages since the beginning of the period */
323 uint64_t xbzrle_pages_prev;
324 /* Amount of xbzrle encoded bytes since the beginning of the period */
325 uint64_t xbzrle_bytes_prev;
David Hildenbrand1a373522021-02-16 11:50:39 +0100326 /* Start using XBZRLE (e.g., after the first round). */
327 bool xbzrle_enabled;
Juan Quintela05931ec2021-12-15 19:01:21 +0100328 /* Are we on the last stage of migration */
329 bool last_stage;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800330 /* compression statistics since the beginning of the period */
331 /* amount of count that no free thread to compress data */
332 uint64_t compress_thread_busy_prev;
333 /* amount bytes after compression */
334 uint64_t compressed_size_prev;
335 /* amount of compressed pages */
336 uint64_t compress_pages_prev;
337
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800338 /* total handled target pages at the beginning of period */
339 uint64_t target_page_count_prev;
340 /* total handled target pages since start */
341 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200342 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800343 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800344 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100345 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100346 /* The RAMBlock used in the last src_page_requests */
347 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100348 /* Queue of outstanding page requests from the destination */
349 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100350 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100351};
352typedef struct RAMState RAMState;
353
Juan Quintela53518d92017-05-04 11:46:24 +0200354static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100355
Wei Wangbd227062018-12-11 16:24:51 +0800356static NotifierWithReturnList precopy_notifier_list;
357
358void precopy_infrastructure_init(void)
359{
360 notifier_with_return_list_init(&precopy_notifier_list);
361}
362
363void precopy_add_notifier(NotifierWithReturn *n)
364{
365 notifier_with_return_list_add(&precopy_notifier_list, n);
366}
367
368void precopy_remove_notifier(NotifierWithReturn *n)
369{
370 notifier_with_return_remove(n);
371}
372
373int precopy_notify(PrecopyNotifyReason reason, Error **errp)
374{
375 PrecopyNotifyData pnd;
376 pnd.reason = reason;
377 pnd.errp = errp;
378
379 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
380}
381
Juan Quintela9edabd42017-03-14 12:02:16 +0100382uint64_t ram_bytes_remaining(void)
383{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000384 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
385 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100386}
387
Juan Quintela93604472017-06-06 19:49:03 +0200388MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100389
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100390/* used by the search for pages to send */
391struct PageSearchStatus {
392 /* Current block being searched */
393 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100394 /* Current page to search from */
395 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100396 /* Set once we wrap around */
397 bool complete_round;
398};
399typedef struct PageSearchStatus PageSearchStatus;
400
Xiao Guangrong76e03002018-09-06 15:01:00 +0800401CompressionStats compression_counters;
402
Juan Quintela56e93d22015-05-07 19:33:31 +0200403struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200404 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800405 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800406 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200407 QEMUFile *file;
408 QemuMutex mutex;
409 QemuCond cond;
410 RAMBlock *block;
411 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800412
413 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800414 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800415 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200416};
417typedef struct CompressParam CompressParam;
418
419struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800420 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800421 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200422 QemuMutex mutex;
423 QemuCond cond;
424 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000425 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200426 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800427 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428};
429typedef struct DecompressParam DecompressParam;
430
431static CompressParam *comp_param;
432static QemuThread *compress_threads;
433/* comp_done_cond is used to wake up the migration thread when
434 * one of the compression threads has finished the compression.
435 * comp_done_lock is used to co-work with comp_done_cond.
436 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800437static QemuMutex comp_done_lock;
438static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200439/* The empty QEMUFileOps will be used by file in CompressParam */
440static const QEMUFileOps empty_ops = { };
441
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800442static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200443static DecompressParam *decomp_param;
444static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800445static QemuMutex decomp_done_lock;
446static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200447
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800448static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800449 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200450
451static void *do_data_compress(void *opaque)
452{
453 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800454 RAMBlock *block;
455 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800456 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200457
Liang Lia7a9a882016-05-05 15:32:57 +0800458 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800459 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800460 if (param->block) {
461 block = param->block;
462 offset = param->offset;
463 param->block = NULL;
464 qemu_mutex_unlock(&param->mutex);
465
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800466 zero_page = do_compress_ram_page(param->file, &param->stream,
467 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800468
Liang Li0d9f9a52016-05-05 15:32:59 +0800469 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800470 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800471 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800472 qemu_cond_signal(&comp_done_cond);
473 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800474
475 qemu_mutex_lock(&param->mutex);
476 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200477 qemu_cond_wait(&param->cond, &param->mutex);
478 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200479 }
Liang Lia7a9a882016-05-05 15:32:57 +0800480 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200481
482 return NULL;
483}
484
Juan Quintelaf0afa332017-06-28 11:52:28 +0200485static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200486{
487 int i, thread_count;
488
Fei Li05306932018-09-25 17:14:40 +0800489 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200490 return;
491 }
Fei Li05306932018-09-25 17:14:40 +0800492
Juan Quintela56e93d22015-05-07 19:33:31 +0200493 thread_count = migrate_compress_threads();
494 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800495 /*
496 * we use it as a indicator which shows if the thread is
497 * properly init'd or not
498 */
499 if (!comp_param[i].file) {
500 break;
501 }
Fei Li05306932018-09-25 17:14:40 +0800502
503 qemu_mutex_lock(&comp_param[i].mutex);
504 comp_param[i].quit = true;
505 qemu_cond_signal(&comp_param[i].cond);
506 qemu_mutex_unlock(&comp_param[i].mutex);
507
Juan Quintela56e93d22015-05-07 19:33:31 +0200508 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200509 qemu_mutex_destroy(&comp_param[i].mutex);
510 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800511 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800512 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800513 qemu_fclose(comp_param[i].file);
514 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200515 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800516 qemu_mutex_destroy(&comp_done_lock);
517 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200518 g_free(compress_threads);
519 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 compress_threads = NULL;
521 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200522}
523
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800524static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200525{
526 int i, thread_count;
527
528 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800529 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200530 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200531 thread_count = migrate_compress_threads();
532 compress_threads = g_new0(QemuThread, thread_count);
533 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800534 qemu_cond_init(&comp_done_cond);
535 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200536 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800537 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
538 if (!comp_param[i].originbuf) {
539 goto exit;
540 }
541
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800542 if (deflateInit(&comp_param[i].stream,
543 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800544 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800545 goto exit;
546 }
547
Cao jine110aa92016-07-29 15:10:31 +0800548 /* comp_param[i].file is just used as a dummy buffer to save data,
549 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200550 */
Peter Xuc6ad5be2021-07-22 13:58:40 -0400551 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
Juan Quintela56e93d22015-05-07 19:33:31 +0200552 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800553 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200554 qemu_mutex_init(&comp_param[i].mutex);
555 qemu_cond_init(&comp_param[i].cond);
556 qemu_thread_create(compress_threads + i, "compress",
557 do_data_compress, comp_param + i,
558 QEMU_THREAD_JOINABLE);
559 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800560 return 0;
561
562exit:
563 compress_threads_save_cleanup();
564 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200565}
566
567/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100568 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200569 *
570 * If this is the 1st block, it also writes the block identification
571 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100572 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200573 *
574 * @f: QEMUFile where to send the data
575 * @block: block that contains the page we want to send
576 * @offset: offset inside the block for the page
577 * in the lower bits, it contains flags
578 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200579static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
580 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200581{
Liang Li9f5f3802015-07-13 17:34:10 +0800582 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200583
Juan Quintela24795692017-03-21 11:45:01 +0100584 if (block == rs->last_sent_block) {
585 offset |= RAM_SAVE_FLAG_CONTINUE;
586 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200587 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200588 size = 8;
589
590 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800591 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200592 qemu_put_byte(f, len);
593 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800594 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100595 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200596 }
597 return size;
598}
599
Juan Quintela3d0684b2017-03-23 15:06:39 +0100600/**
Olaf Hering179a8082021-07-08 18:21:59 +0200601 * mig_throttle_guest_down: throttle down the guest
Juan Quintela3d0684b2017-03-23 15:06:39 +0100602 *
603 * Reduce amount of guest cpu execution to hopefully slow down memory
604 * writes. If guest dirty memory rate is reduced below the rate at
605 * which we can transfer pages to the destination then we should be
606 * able to complete migration. Some workloads dirty memory way too
607 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400608 */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800609static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
610 uint64_t bytes_dirty_threshold)
Jason J. Herne070afca2015-09-08 13:12:35 -0400611{
612 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100613 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
Keqian Zhucbbf8182020-04-13 18:15:08 +0800614 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
615 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
Li Qiang4cbc9c72018-08-01 06:00:20 -0700616 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -0400617
Keqian Zhucbbf8182020-04-13 18:15:08 +0800618 uint64_t throttle_now = cpu_throttle_get_percentage();
619 uint64_t cpu_now, cpu_ideal, throttle_inc;
620
Jason J. Herne070afca2015-09-08 13:12:35 -0400621 /* We have not started throttling yet. Let's start it. */
622 if (!cpu_throttle_active()) {
623 cpu_throttle_set(pct_initial);
624 } else {
625 /* Throttling already on, just increase the rate */
Keqian Zhucbbf8182020-04-13 18:15:08 +0800626 if (!pct_tailslow) {
627 throttle_inc = pct_increment;
628 } else {
629 /* Compute the ideal CPU percentage used by Guest, which may
630 * make the dirty rate match the dirty rate threshold. */
631 cpu_now = 100 - throttle_now;
632 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
633 bytes_dirty_period);
634 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
635 }
636 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -0400637 }
638}
639
Rao, Lei91fe9a82021-11-09 11:04:54 +0800640void mig_throttle_counter_reset(void)
641{
642 RAMState *rs = ram_state;
643
644 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
645 rs->num_dirty_pages_period = 0;
646 rs->bytes_xfer_prev = ram_counters.transferred;
647}
648
Juan Quintela3d0684b2017-03-23 15:06:39 +0100649/**
650 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
651 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100652 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100653 * @current_addr: address for the zero page
654 *
655 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200656 * The important thing is that a stale (not-yet-0'd) page be replaced
657 * by the new data.
658 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100659 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200660 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100661static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200662{
David Hildenbrand1a373522021-02-16 11:50:39 +0100663 if (!rs->xbzrle_enabled) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200664 return;
665 }
666
667 /* We don't care if this fails to allocate a new cache page
668 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200669 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200670 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200671}
672
673#define ENCODING_FLAG_XBZRLE 0x1
674
675/**
676 * save_xbzrle_page: compress and send current page
677 *
678 * Returns: 1 means that we wrote the page
679 * 0 means that page is identical to the one already sent
680 * -1 means that xbzrle would be longer than normal
681 *
Juan Quintela5a987732017-03-13 19:39:02 +0100682 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100683 * @current_data: pointer to the address of the page contents
684 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200685 * @block: block that contains the page we want to send
686 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200687 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100688static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200689 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela05931ec2021-12-15 19:01:21 +0100690 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200691{
692 int encoded_len = 0, bytes_xbzrle;
693 uint8_t *prev_cached_page;
694
Juan Quintela93604472017-06-06 19:49:03 +0200695 if (!cache_is_cached(XBZRLE.cache, current_addr,
696 ram_counters.dirty_sync_count)) {
697 xbzrle_counters.cache_miss++;
Juan Quintela05931ec2021-12-15 19:01:21 +0100698 if (!rs->last_stage) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200699 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200700 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200701 return -1;
702 } else {
703 /* update *current_data when the page has been
704 inserted into cache */
705 *current_data = get_cached_data(XBZRLE.cache, current_addr);
706 }
707 }
708 return -1;
709 }
710
Wei Wange460a4b2020-04-30 08:59:35 +0800711 /*
712 * Reaching here means the page has hit the xbzrle cache, no matter what
713 * encoding result it is (normal encoding, overflow or skipping the page),
zhaolichang3a4452d2020-09-17 15:50:21 +0800714 * count the page as encoded. This is used to calculate the encoding rate.
Wei Wange460a4b2020-04-30 08:59:35 +0800715 *
716 * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
717 * 2nd page turns out to be skipped (i.e. no new bytes written to the
718 * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
719 * skipped page included. In this way, the encoding rate can tell if the
720 * guest page is good for xbzrle encoding.
721 */
722 xbzrle_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200723 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
724
725 /* save current buffer into memory */
726 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
727
728 /* XBZRLE encoding (if there is no overflow) */
729 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
730 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
731 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +0800732
733 /*
734 * Update the cache contents, so that it corresponds to the data
735 * sent, in all cases except where we skip the page.
736 */
Juan Quintela05931ec2021-12-15 19:01:21 +0100737 if (!rs->last_stage && encoded_len != 0) {
Wei Yangca353802019-06-10 08:41:59 +0800738 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
739 /*
740 * In the case where we couldn't compress, ensure that the caller
741 * sends the data from the cache, since the guest might have
742 * changed the RAM since we copied it.
743 */
744 *current_data = prev_cached_page;
745 }
746
Juan Quintela56e93d22015-05-07 19:33:31 +0200747 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100748 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200749 return 0;
750 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100751 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200752 xbzrle_counters.overflow++;
Wei Wange460a4b2020-04-30 08:59:35 +0800753 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200754 return -1;
755 }
756
Juan Quintela56e93d22015-05-07 19:33:31 +0200757 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200758 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100759 offset | RAM_SAVE_FLAG_XBZRLE);
760 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
761 qemu_put_be16(rs->f, encoded_len);
762 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200763 bytes_xbzrle += encoded_len + 1 + 2;
Wei Wange460a4b2020-04-30 08:59:35 +0800764 /*
765 * Like compressed_size (please see update_compress_thread_counts),
766 * the xbzrle encoded bytes don't count the 8 byte header with
767 * RAM_SAVE_FLAG_CONTINUE.
768 */
769 xbzrle_counters.bytes += bytes_xbzrle - 8;
Juan Quintela93604472017-06-06 19:49:03 +0200770 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200771
772 return 1;
773}
774
Juan Quintela3d0684b2017-03-23 15:06:39 +0100775/**
776 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000777 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800778 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +0100779 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100780 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100781 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100782 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000783 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200784static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100785unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100786 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +0200787{
Juan Quintela6b6712e2017-03-22 15:18:04 +0100788 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
789 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200790
Yury Kotovfbd162e2019-02-15 20:45:46 +0300791 if (ramblock_is_ignored(rb)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +0200792 return size;
793 }
794
David Hildenbrand1a373522021-02-16 11:50:39 +0100795 return find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +0200796}
797
David Hildenbrand1230a252021-09-04 18:09:07 +0200798static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
Wei Wang3143577d2021-07-22 04:30:55 -0400799 unsigned long page)
800{
801 uint8_t shift;
802 hwaddr size, start;
803
804 if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
805 return;
806 }
807
808 shift = rb->clear_bmap_shift;
809 /*
810 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
811 * can make things easier sometimes since then start address
812 * of the small chunk will always be 64 pages aligned so the
813 * bitmap will always be aligned to unsigned long. We should
814 * even be able to remove this restriction but I'm simply
815 * keeping it.
816 */
817 assert(shift >= 6);
818
819 size = 1ULL << (TARGET_PAGE_BITS + shift);
David Hildenbrand76482972021-10-11 19:53:44 +0200820 start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
Wei Wang3143577d2021-07-22 04:30:55 -0400821 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
822 memory_region_clear_dirty_bitmap(rb->mr, start, size);
823}
824
825static void
David Hildenbrand1230a252021-09-04 18:09:07 +0200826migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
Wei Wang3143577d2021-07-22 04:30:55 -0400827 unsigned long start,
828 unsigned long npages)
829{
830 unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
831 unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
832 unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
833
834 /*
835 * Clear pages from start to start + npages - 1, so the end boundary is
836 * exclusive.
837 */
838 for (i = chunk_start; i < chunk_end; i += chunk_pages) {
David Hildenbrand1230a252021-09-04 18:09:07 +0200839 migration_clear_memory_region_dirty_bitmap(rb, i);
Wei Wang3143577d2021-07-22 04:30:55 -0400840 }
841}
842
Rao, Leia6a83ce2021-11-09 11:04:55 +0800843/*
844 * colo_bitmap_find_diry:find contiguous dirty pages from start
845 *
846 * Returns the page offset within memory region of the start of the contiguout
847 * dirty page
848 *
849 * @rs: current RAM state
850 * @rb: RAMBlock where to search for dirty pages
851 * @start: page where we start the search
852 * @num: the number of contiguous dirty pages
853 */
854static inline
855unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
856 unsigned long start, unsigned long *num)
857{
858 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
859 unsigned long *bitmap = rb->bmap;
860 unsigned long first, next;
861
862 *num = 0;
863
864 if (ramblock_is_ignored(rb)) {
865 return size;
866 }
867
868 first = find_next_bit(bitmap, size, start);
869 if (first >= size) {
870 return first;
871 }
872 next = find_next_zero_bit(bitmap, size, first + 1);
873 assert(next >= first);
874 *num = next - first;
875 return first;
876}
877
Juan Quintela06b10682017-03-21 15:18:05 +0100878static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100879 RAMBlock *rb,
880 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000881{
882 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000883
Peter Xu002cad62019-06-03 14:50:56 +0800884 /*
885 * Clear dirty bitmap if needed. This _must_ be called before we
886 * send any of the page in the chunk because we need to make sure
887 * we can capture further page content changes when we sync dirty
888 * log the next time. So as long as we are going to send any of
889 * the page in the chunk we clear the remote dirty bitmap for all.
890 * Clearing it earlier won't be a problem, but too late will.
891 */
David Hildenbrand1230a252021-09-04 18:09:07 +0200892 migration_clear_memory_region_dirty_bitmap(rb, page);
Peter Xu002cad62019-06-03 14:50:56 +0800893
Juan Quintela6b6712e2017-03-22 15:18:04 +0100894 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000895 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100896 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000897 }
Wei Wang386a9072018-12-11 16:24:49 +0800898
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000899 return ret;
900}
901
David Hildenbrandbe39b4c2021-10-11 19:53:41 +0200902static void dirty_bitmap_clear_section(MemoryRegionSection *section,
903 void *opaque)
904{
905 const hwaddr offset = section->offset_within_region;
906 const hwaddr size = int128_get64(section->size);
907 const unsigned long start = offset >> TARGET_PAGE_BITS;
908 const unsigned long npages = size >> TARGET_PAGE_BITS;
909 RAMBlock *rb = section->mr->ram_block;
910 uint64_t *cleared_bits = opaque;
911
912 /*
913 * We don't grab ram_state->bitmap_mutex because we expect to run
914 * only when starting migration or during postcopy recovery where
915 * we don't have concurrent access.
916 */
917 if (!migration_in_postcopy() && !migrate_background_snapshot()) {
918 migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
919 }
920 *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
921 bitmap_clear(rb->bmap, start, npages);
922}
923
924/*
925 * Exclude all dirty pages from migration that fall into a discarded range as
926 * managed by a RamDiscardManager responsible for the mapped memory region of
927 * the RAMBlock. Clear the corresponding bits in the dirty bitmaps.
928 *
929 * Discarded pages ("logically unplugged") have undefined content and must
930 * not get migrated, because even reading these pages for migration might
931 * result in undesired behavior.
932 *
933 * Returns the number of cleared bits in the RAMBlock dirty bitmap.
934 *
935 * Note: The result is only stable while migrating (precopy/postcopy).
936 */
937static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
938{
939 uint64_t cleared_bits = 0;
940
941 if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
942 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
943 MemoryRegionSection section = {
944 .mr = rb->mr,
945 .offset_within_region = 0,
946 .size = int128_make64(qemu_ram_get_used_length(rb)),
947 };
948
949 ram_discard_manager_replay_discarded(rdm, &section,
950 dirty_bitmap_clear_section,
951 &cleared_bits);
952 }
953 return cleared_bits;
954}
955
David Hildenbrand9470c5e2021-10-11 19:53:43 +0200956/*
957 * Check if a host-page aligned page falls into a discarded range as managed by
958 * a RamDiscardManager responsible for the mapped memory region of the RAMBlock.
959 *
960 * Note: The result is only stable while migrating (precopy/postcopy).
961 */
962bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
963{
964 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
965 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
966 MemoryRegionSection section = {
967 .mr = rb->mr,
968 .offset_within_region = start,
969 .size = int128_make64(qemu_ram_pagesize(rb)),
970 };
971
972 return !ram_discard_manager_is_populated(rdm, &section);
973 }
974 return false;
975}
976
Peter Xu267691b2019-06-03 14:50:46 +0800977/* Called with RCU critical section */
Wei Yang7a3e9572019-08-08 11:31:55 +0800978static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +0200979{
Keqian Zhufb613582020-06-22 11:20:37 +0800980 uint64_t new_dirty_pages =
981 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
982
983 rs->migration_dirty_pages += new_dirty_pages;
984 rs->num_dirty_pages_period += new_dirty_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200985}
986
Juan Quintela3d0684b2017-03-23 15:06:39 +0100987/**
988 * ram_pagesize_summary: calculate all the pagesizes of a VM
989 *
990 * Returns a summary bitmap of the page sizes of all RAMBlocks
991 *
992 * For VMs with just normal pages this is equivalent to the host page
993 * size. If it's got some huge pages then it's the OR of all the
994 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000995 */
996uint64_t ram_pagesize_summary(void)
997{
998 RAMBlock *block;
999 uint64_t summary = 0;
1000
Yury Kotovfbd162e2019-02-15 20:45:46 +03001001 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001002 summary |= block->page_size;
1003 }
1004
1005 return summary;
1006}
1007
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001008uint64_t ram_get_total_transferred_pages(void)
1009{
1010 return ram_counters.normal + ram_counters.duplicate +
1011 compression_counters.pages + xbzrle_counters.pages;
1012}
1013
Xiao Guangrongb7340352018-06-04 17:55:12 +08001014static void migration_update_rates(RAMState *rs, int64_t end_time)
1015{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001016 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001017 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001018
1019 /* calculate period counters */
1020 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1021 / (end_time - rs->time_last_bitmap_sync);
1022
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001023 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001024 return;
1025 }
1026
1027 if (migrate_use_xbzrle()) {
Wei Wange460a4b2020-04-30 08:59:35 +08001028 double encoded_size, unencoded_size;
1029
Xiao Guangrongb7340352018-06-04 17:55:12 +08001030 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001031 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001032 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Wei Wange460a4b2020-04-30 08:59:35 +08001033 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
1034 TARGET_PAGE_SIZE;
1035 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
Wei Wang92271402020-06-17 13:13:05 -07001036 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
Wei Wange460a4b2020-04-30 08:59:35 +08001037 xbzrle_counters.encoding_rate = 0;
Wei Wange460a4b2020-04-30 08:59:35 +08001038 } else {
1039 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
1040 }
1041 rs->xbzrle_pages_prev = xbzrle_counters.pages;
1042 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001043 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001044
1045 if (migrate_use_compression()) {
1046 compression_counters.busy_rate = (double)(compression_counters.busy -
1047 rs->compress_thread_busy_prev) / page_count;
1048 rs->compress_thread_busy_prev = compression_counters.busy;
1049
1050 compressed_size = compression_counters.compressed_size -
1051 rs->compressed_size_prev;
1052 if (compressed_size) {
1053 double uncompressed_size = (compression_counters.pages -
1054 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1055
1056 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1057 compression_counters.compression_rate =
1058 uncompressed_size / compressed_size;
1059
1060 rs->compress_pages_prev = compression_counters.pages;
1061 rs->compressed_size_prev = compression_counters.compressed_size;
1062 }
1063 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001064}
1065
Keqian Zhudc14a472020-02-24 10:31:42 +08001066static void migration_trigger_throttle(RAMState *rs)
1067{
1068 MigrationState *s = migrate_get_current();
1069 uint64_t threshold = s->parameters.throttle_trigger_threshold;
1070
1071 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
1072 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1073 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1074
1075 /* During block migration the auto-converge logic incorrectly detects
1076 * that ram migration makes no progress. Avoid this by disabling the
1077 * throttling logic during the bulk phase of block migration. */
1078 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1079 /* The following detection logic can be refined later. For now:
1080 Check to see if the ratio between dirtied bytes and the approx.
1081 amount of bytes that just got transferred since the last time
1082 we were in this routine reaches the threshold. If that happens
1083 twice, start or increase throttling. */
1084
1085 if ((bytes_dirty_period > bytes_dirty_threshold) &&
1086 (++rs->dirty_rate_high_cnt >= 2)) {
1087 trace_migration_throttle();
1088 rs->dirty_rate_high_cnt = 0;
Keqian Zhucbbf8182020-04-13 18:15:08 +08001089 mig_throttle_guest_down(bytes_dirty_period,
1090 bytes_dirty_threshold);
Keqian Zhudc14a472020-02-24 10:31:42 +08001091 }
1092 }
1093}
1094
Juan Quintela8d820d62017-03-13 19:35:50 +01001095static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001096{
1097 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001098 int64_t end_time;
Juan Quintela56e93d22015-05-07 19:33:31 +02001099
Juan Quintela93604472017-06-06 19:49:03 +02001100 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001101
Juan Quintelaf664da82017-03-13 19:44:57 +01001102 if (!rs->time_last_bitmap_sync) {
1103 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001104 }
1105
1106 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001107 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001108
Juan Quintela108cfae2017-03-13 21:38:09 +01001109 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001110 WITH_RCU_READ_LOCK_GUARD() {
1111 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1112 ramblock_sync_dirty_bitmap(rs, block);
1113 }
1114 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001115 }
Juan Quintela108cfae2017-03-13 21:38:09 +01001116 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001117
Paolo Bonzini9458a9a2018-02-06 18:37:39 +01001118 memory_global_after_dirty_log_sync();
Juan Quintelaa66cd902017-03-28 15:02:43 +02001119 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001120
Juan Quintela56e93d22015-05-07 19:33:31 +02001121 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1122
1123 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001124 if (end_time > rs->time_last_bitmap_sync + 1000) {
Keqian Zhudc14a472020-02-24 10:31:42 +08001125 migration_trigger_throttle(rs);
Jason J. Herne070afca2015-09-08 13:12:35 -04001126
Xiao Guangrongb7340352018-06-04 17:55:12 +08001127 migration_update_rates(rs, end_time);
1128
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001129 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001130
1131 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001132 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001133 rs->num_dirty_pages_period = 0;
Keqian Zhudc14a472020-02-24 10:31:42 +08001134 rs->bytes_xfer_prev = ram_counters.transferred;
Juan Quintela56e93d22015-05-07 19:33:31 +02001135 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001136 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001137 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001138 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001139}
1140
Wei Wangbd227062018-12-11 16:24:51 +08001141static void migration_bitmap_sync_precopy(RAMState *rs)
1142{
1143 Error *local_err = NULL;
1144
1145 /*
1146 * The current notifier usage is just an optimization to migration, so we
1147 * don't stop the normal migration process in the error case.
1148 */
1149 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1150 error_report_err(local_err);
Vladimir Sementsov-Ogievskiyb4a17332020-03-24 18:36:29 +03001151 local_err = NULL;
Wei Wangbd227062018-12-11 16:24:51 +08001152 }
1153
1154 migration_bitmap_sync(rs);
1155
1156 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1157 error_report_err(local_err);
1158 }
1159}
1160
Juan Quintela47fe16f2021-12-16 09:58:49 +01001161static void ram_release_page(const char *rbname, uint64_t offset)
1162{
1163 if (!migrate_release_ram() || !migration_in_postcopy()) {
1164 return;
1165 }
1166
1167 ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);
1168}
1169
Juan Quintela56e93d22015-05-07 19:33:31 +02001170/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001171 * save_zero_page_to_file: send the zero page to the file
1172 *
1173 * Returns the size of data written to the file, 0 means the page is not
1174 * a zero page
1175 *
1176 * @rs: current RAM state
1177 * @file: the file where the data is saved
1178 * @block: block that contains the page we want to send
1179 * @offset: offset inside the block for the page
1180 */
1181static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1182 RAMBlock *block, ram_addr_t offset)
1183{
1184 uint8_t *p = block->host + offset;
1185 int len = 0;
1186
Juan Quintelabad452a2021-11-18 15:56:38 +01001187 if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001188 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1189 qemu_put_byte(file, 0);
1190 len += 1;
Juan Quintela47fe16f2021-12-16 09:58:49 +01001191 ram_release_page(block->idstr, offset);
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001192 }
1193 return len;
1194}
1195
1196/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001197 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001198 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001199 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001200 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001201 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001202 * @block: block that contains the page we want to send
1203 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001204 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001205static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001206{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001207 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001208
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001209 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001210 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001211 ram_counters.transferred += len;
1212 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001213 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001214 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001215}
1216
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001217/*
1218 * @pages: the number of pages written by the control path,
1219 * < 0 - error
1220 * > 0 - number of pages written
1221 *
1222 * Return true if the pages has been saved, otherwise false is returned.
1223 */
1224static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1225 int *pages)
1226{
1227 uint64_t bytes_xmit = 0;
1228 int ret;
1229
1230 *pages = -1;
1231 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1232 &bytes_xmit);
1233 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1234 return false;
1235 }
1236
1237 if (bytes_xmit) {
1238 ram_counters.transferred += bytes_xmit;
1239 *pages = 1;
1240 }
1241
1242 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1243 return true;
1244 }
1245
1246 if (bytes_xmit > 0) {
1247 ram_counters.normal++;
1248 } else if (bytes_xmit == 0) {
1249 ram_counters.duplicate++;
1250 }
1251
1252 return true;
1253}
1254
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001255/*
1256 * directly send the page to the stream
1257 *
1258 * Returns the number of pages written.
1259 *
1260 * @rs: current RAM state
1261 * @block: block that contains the page we want to send
1262 * @offset: offset inside the block for the page
1263 * @buf: the page to be sent
1264 * @async: send to page asyncly
1265 */
1266static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1267 uint8_t *buf, bool async)
1268{
1269 ram_counters.transferred += save_page_header(rs, rs->f, block,
1270 offset | RAM_SAVE_FLAG_PAGE);
1271 if (async) {
1272 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1273 migrate_release_ram() &
1274 migration_in_postcopy());
1275 } else {
1276 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1277 }
1278 ram_counters.transferred += TARGET_PAGE_SIZE;
1279 ram_counters.normal++;
1280 return 1;
1281}
1282
Juan Quintela56e93d22015-05-07 19:33:31 +02001283/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001284 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001285 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001286 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001287 * < 0 - error
1288 * >=0 - Number of pages written - this might legally be 0
1289 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001290 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001291 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001292 * @block: block that contains the page we want to send
1293 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001294 */
Juan Quintela05931ec2021-12-15 19:01:21 +01001295static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
Juan Quintela56e93d22015-05-07 19:33:31 +02001296{
1297 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001298 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001299 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001300 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01001301 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001302 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001303
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001304 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001305 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001306
Juan Quintela56e93d22015-05-07 19:33:31 +02001307 XBZRLE_cache_lock();
David Hildenbrand1a373522021-02-16 11:50:39 +01001308 if (rs->xbzrle_enabled && !migration_in_postcopy()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001309 pages = save_xbzrle_page(rs, &p, current_addr, block,
Juan Quintela05931ec2021-12-15 19:01:21 +01001310 offset);
1311 if (!rs->last_stage) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001312 /* Can't send this cached data async, since the cache page
1313 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001314 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001315 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001316 }
1317 }
1318
1319 /* XBZRLE overflow or normal page */
1320 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001321 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001322 }
1323
1324 XBZRLE_cache_unlock();
1325
1326 return pages;
1327}
1328
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001329static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1330 ram_addr_t offset)
1331{
Juan Quintela67a4c892020-01-22 16:03:01 +01001332 if (multifd_queue_page(rs->f, block, offset) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08001333 return -1;
1334 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001335 ram_counters.normal++;
1336
1337 return 1;
1338}
1339
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001340static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001341 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001342{
Juan Quintela53518d92017-05-04 11:46:24 +02001343 RAMState *rs = ram_state;
Juan Quintela20d549c2021-12-21 10:28:16 +01001344 uint8_t *p = block->host + offset;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001345 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02001346
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001347 if (save_zero_page_to_file(rs, f, block, offset)) {
Juan Quintelae7f2e192021-12-16 09:39:49 +01001348 return true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001349 }
1350
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001351 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001352
1353 /*
1354 * copy it to a internal buffer to avoid it being modified by VM
1355 * so that we can catch up the error during compression and
1356 * decompression
1357 */
1358 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001359 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1360 if (ret < 0) {
1361 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08001362 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +08001363 }
Juan Quintelae7f2e192021-12-16 09:39:49 +01001364 return false;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001365}
1366
1367static void
1368update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1369{
Xiao Guangrong76e03002018-09-06 15:01:00 +08001370 ram_counters.transferred += bytes_xmit;
1371
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001372 if (param->zero_page) {
1373 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001374 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001375 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001376
1377 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
1378 compression_counters.compressed_size += bytes_xmit - 8;
1379 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001380}
1381
Xiao Guangrong32b05492018-09-06 15:01:01 +08001382static bool save_page_use_compression(RAMState *rs);
1383
Juan Quintelace25d332017-03-15 11:00:51 +01001384static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001385{
1386 int idx, len, thread_count;
1387
Xiao Guangrong32b05492018-09-06 15:01:01 +08001388 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001389 return;
1390 }
1391 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001392
Liang Li0d9f9a52016-05-05 15:32:59 +08001393 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001394 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001395 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001396 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001397 }
Liang Lia7a9a882016-05-05 15:32:57 +08001398 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001399 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001400
1401 for (idx = 0; idx < thread_count; idx++) {
1402 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001403 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001404 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001405 /*
1406 * it's safe to fetch zero_page without holding comp_done_lock
1407 * as there is no further request submitted to the thread,
1408 * i.e, the thread should be waiting for a request at this point.
1409 */
1410 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001411 }
Liang Lia7a9a882016-05-05 15:32:57 +08001412 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001413 }
1414}
1415
1416static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1417 ram_addr_t offset)
1418{
1419 param->block = block;
1420 param->offset = offset;
1421}
1422
Juan Quintelace25d332017-03-15 11:00:51 +01001423static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1424 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001425{
1426 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08001427 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001428
1429 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001430 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08001431retry:
1432 for (idx = 0; idx < thread_count; idx++) {
1433 if (comp_param[idx].done) {
1434 comp_param[idx].done = false;
1435 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1436 qemu_mutex_lock(&comp_param[idx].mutex);
1437 set_compress_params(&comp_param[idx], block, offset);
1438 qemu_cond_signal(&comp_param[idx].cond);
1439 qemu_mutex_unlock(&comp_param[idx].mutex);
1440 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001441 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02001442 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02001443 }
1444 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08001445
1446 /*
1447 * wait for the free thread if the user specifies 'compress-wait-thread',
1448 * otherwise we will post the page out in the main thread as normal page.
1449 */
1450 if (pages < 0 && wait) {
1451 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1452 goto retry;
1453 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001454 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001455
1456 return pages;
1457}
1458
1459/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001460 * find_dirty_block: find the next dirty page and update any state
1461 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001462 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001463 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001464 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001465 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001466 * @pss: data about the state of the current dirty page scan
1467 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001468 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001469static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001470{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001471 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001472 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001473 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001474 /*
1475 * We've been once around the RAM and haven't found anything.
1476 * Give up.
1477 */
1478 *again = false;
1479 return false;
1480 }
David Hildenbrand542147f2021-04-29 13:27:08 +02001481 if (!offset_in_ramblock(pss->block,
1482 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001483 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001484 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001485 pss->block = QLIST_NEXT_RCU(pss->block, next);
1486 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08001487 /*
1488 * If memory migration starts over, we will meet a dirtied page
1489 * which may still exists in compression threads's ring, so we
1490 * should flush the compressed data to make sure the new page
1491 * is not overwritten by the old one in the destination.
1492 *
1493 * Also If xbzrle is on, stop using the data compression at this
1494 * point. In theory, xbzrle can do better than compression.
1495 */
1496 flush_compressed_data(rs);
1497
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001498 /* Hit the end of the list */
1499 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1500 /* Flag that we've looped */
1501 pss->complete_round = true;
David Hildenbrand1a373522021-02-16 11:50:39 +01001502 /* After the first round, enable XBZRLE. */
1503 if (migrate_use_xbzrle()) {
1504 rs->xbzrle_enabled = true;
1505 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001506 }
1507 /* Didn't find anything this time, but try again on the new block */
1508 *again = true;
1509 return false;
1510 } else {
1511 /* Can go around again, but... */
1512 *again = true;
1513 /* We've found something so probably don't need to */
1514 return true;
1515 }
1516}
1517
Juan Quintela3d0684b2017-03-23 15:06:39 +01001518/**
1519 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001520 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001521 * Helper for 'get_queued_page' - gets a page off the queue
1522 *
1523 * Returns the block of the page (or NULL if none available)
1524 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001525 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001526 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001527 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001528static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001529{
1530 RAMBlock *block = NULL;
1531
Xiao Guangrongae526e32018-08-21 16:10:25 +08001532 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1533 return NULL;
1534 }
1535
Daniel Brodsky6e8a3552020-04-03 21:21:08 -07001536 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
Juan Quintelaec481c62017-03-20 22:12:40 +01001537 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1538 struct RAMSrcPageRequest *entry =
1539 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001540 block = entry->rb;
1541 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001542
1543 if (entry->len > TARGET_PAGE_SIZE) {
1544 entry->len -= TARGET_PAGE_SIZE;
1545 entry->offset += TARGET_PAGE_SIZE;
1546 } else {
1547 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001548 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001549 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01001550 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001551 }
1552 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001553
1554 return block;
1555}
1556
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001557#if defined(__linux__)
1558/**
1559 * poll_fault_page: try to get next UFFD write fault page and, if pending fault
1560 * is found, return RAM block pointer and page offset
1561 *
1562 * Returns pointer to the RAMBlock containing faulting page,
1563 * NULL if no write faults are pending
1564 *
1565 * @rs: current RAM state
1566 * @offset: page offset from the beginning of the block
1567 */
1568static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1569{
1570 struct uffd_msg uffd_msg;
1571 void *page_address;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001572 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001573 int res;
1574
1575 if (!migrate_background_snapshot()) {
1576 return NULL;
1577 }
1578
1579 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1580 if (res <= 0) {
1581 return NULL;
1582 }
1583
1584 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001585 block = qemu_ram_block_from_host(page_address, false, offset);
1586 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
1587 return block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001588}
1589
1590/**
1591 * ram_save_release_protection: release UFFD write protection after
1592 * a range of pages has been saved
1593 *
1594 * @rs: current RAM state
1595 * @pss: page-search-status structure
1596 * @start_page: index of the first page in the range relative to pss->block
1597 *
1598 * Returns 0 on success, negative value in case of an error
1599*/
1600static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1601 unsigned long start_page)
1602{
1603 int res = 0;
1604
1605 /* Check if page is from UFFD-managed region. */
1606 if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1607 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1608 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS;
1609
1610 /* Flush async buffers before un-protect. */
1611 qemu_fflush(rs->f);
1612 /* Un-protect memory range. */
1613 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1614 false, false);
1615 }
1616
1617 return res;
1618}
1619
1620/* ram_write_tracking_available: check if kernel supports required UFFD features
1621 *
1622 * Returns true if supports, false otherwise
1623 */
1624bool ram_write_tracking_available(void)
1625{
1626 uint64_t uffd_features;
1627 int res;
1628
1629 res = uffd_query_features(&uffd_features);
1630 return (res == 0 &&
1631 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1632}
1633
1634/* ram_write_tracking_compatible: check if guest configuration is
1635 * compatible with 'write-tracking'
1636 *
1637 * Returns true if compatible, false otherwise
1638 */
1639bool ram_write_tracking_compatible(void)
1640{
1641 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1642 int uffd_fd;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001643 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001644 bool ret = false;
1645
1646 /* Open UFFD file descriptor */
1647 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1648 if (uffd_fd < 0) {
1649 return false;
1650 }
1651
1652 RCU_READ_LOCK_GUARD();
1653
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001654 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001655 uint64_t uffd_ioctls;
1656
1657 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001658 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001659 continue;
1660 }
1661 /* Try to register block memory via UFFD-IO to track writes */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001662 if (uffd_register_memory(uffd_fd, block->host, block->max_length,
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001663 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1664 goto out;
1665 }
1666 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1667 goto out;
1668 }
1669 }
1670 ret = true;
1671
1672out:
1673 uffd_close_fd(uffd_fd);
1674 return ret;
1675}
1676
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001677static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1678 ram_addr_t size)
1679{
1680 /*
1681 * We read one byte of each page; this will preallocate page tables if
1682 * required and populate the shared zeropage on MAP_PRIVATE anonymous memory
1683 * where no page was populated yet. This might require adaption when
1684 * supporting other mappings, like shmem.
1685 */
1686 for (; offset < size; offset += block->page_size) {
1687 char tmp = *((char *)block->host + offset);
1688
1689 /* Don't optimize the read out */
1690 asm volatile("" : "+r" (tmp));
1691 }
1692}
1693
David Hildenbrand6fee3a12021-10-11 19:53:46 +02001694static inline int populate_read_section(MemoryRegionSection *section,
1695 void *opaque)
1696{
1697 const hwaddr size = int128_get64(section->size);
1698 hwaddr offset = section->offset_within_region;
1699 RAMBlock *block = section->mr->ram_block;
1700
1701 populate_read_range(block, offset, size);
1702 return 0;
1703}
1704
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001705/*
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001706 * ram_block_populate_read: preallocate page tables and populate pages in the
1707 * RAM block by reading a byte of each page.
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001708 *
1709 * Since it's solely used for userfault_fd WP feature, here we just
1710 * hardcode page size to qemu_real_host_page_size.
1711 *
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001712 * @block: RAM block to populate
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001713 */
David Hildenbrand6fee3a12021-10-11 19:53:46 +02001714static void ram_block_populate_read(RAMBlock *rb)
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001715{
David Hildenbrand6fee3a12021-10-11 19:53:46 +02001716 /*
1717 * Skip populating all pages that fall into a discarded range as managed by
1718 * a RamDiscardManager responsible for the mapped memory region of the
1719 * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock
1720 * must not get populated automatically. We don't have to track
1721 * modifications via userfaultfd WP reliably, because these pages will
1722 * not be part of the migration stream either way -- see
1723 * ramblock_dirty_bitmap_exclude_discarded_pages().
1724 *
1725 * Note: The result is only stable while migrating (precopy/postcopy).
1726 */
1727 if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1728 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1729 MemoryRegionSection section = {
1730 .mr = rb->mr,
1731 .offset_within_region = 0,
1732 .size = rb->mr->size,
1733 };
1734
1735 ram_discard_manager_replay_populated(rdm, &section,
1736 populate_read_section, NULL);
1737 } else {
1738 populate_read_range(rb, 0, rb->used_length);
1739 }
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001740}
1741
1742/*
1743 * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking
1744 */
1745void ram_write_tracking_prepare(void)
1746{
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001747 RAMBlock *block;
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001748
1749 RCU_READ_LOCK_GUARD();
1750
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001751 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001752 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001753 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001754 continue;
1755 }
1756
1757 /*
1758 * Populate pages of the RAM block before enabling userfault_fd
1759 * write protection.
1760 *
1761 * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with
1762 * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
1763 * pages with pte_none() entries in page table.
1764 */
David Hildenbrandf7b9dcf2021-10-11 19:53:45 +02001765 ram_block_populate_read(block);
Andrey Gruzdeveeccb992021-04-01 12:22:25 +03001766 }
1767}
1768
1769/*
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001770 * ram_write_tracking_start: start UFFD-WP memory tracking
1771 *
1772 * Returns 0 for success or negative value in case of error
1773 */
1774int ram_write_tracking_start(void)
1775{
1776 int uffd_fd;
1777 RAMState *rs = ram_state;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001778 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001779
1780 /* Open UFFD file descriptor */
1781 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1782 if (uffd_fd < 0) {
1783 return uffd_fd;
1784 }
1785 rs->uffdio_fd = uffd_fd;
1786
1787 RCU_READ_LOCK_GUARD();
1788
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001789 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001790 /* Nothing to do with read-only and MMIO-writable regions */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001791 if (block->mr->readonly || block->mr->rom_device) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001792 continue;
1793 }
1794
1795 /* Register block memory with UFFD to track writes */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001796 if (uffd_register_memory(rs->uffdio_fd, block->host,
1797 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001798 goto fail;
1799 }
1800 /* Apply UFFD write protection to the block memory range */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001801 if (uffd_change_protection(rs->uffdio_fd, block->host,
1802 block->max_length, true, false)) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001803 goto fail;
1804 }
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001805 block->flags |= RAM_UF_WRITEPROTECT;
1806 memory_region_ref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001807
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001808 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
1809 block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001810 }
1811
1812 return 0;
1813
1814fail:
1815 error_report("ram_write_tracking_start() failed: restoring initial memory state");
1816
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001817 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1818 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001819 continue;
1820 }
1821 /*
1822 * In case some memory block failed to be write-protected
1823 * remove protection and unregister all succeeded RAM blocks
1824 */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001825 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1826 false, false);
1827 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001828 /* Cleanup flags and remove reference */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001829 block->flags &= ~RAM_UF_WRITEPROTECT;
1830 memory_region_unref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001831 }
1832
1833 uffd_close_fd(uffd_fd);
1834 rs->uffdio_fd = -1;
1835 return -1;
1836}
1837
1838/**
1839 * ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection
1840 */
1841void ram_write_tracking_stop(void)
1842{
1843 RAMState *rs = ram_state;
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001844 RAMBlock *block;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001845
1846 RCU_READ_LOCK_GUARD();
1847
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001848 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1849 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001850 continue;
1851 }
1852 /* Remove protection and unregister all affected RAM blocks */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001853 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,
1854 false, false);
1855 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001856
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001857 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
1858 block->host, block->max_length);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001859
1860 /* Cleanup flags and remove reference */
Andrey Gruzdev82ea3e32021-04-01 12:22:26 +03001861 block->flags &= ~RAM_UF_WRITEPROTECT;
1862 memory_region_unref(block->mr);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001863 }
1864
1865 /* Finally close UFFD file descriptor */
1866 uffd_close_fd(rs->uffdio_fd);
1867 rs->uffdio_fd = -1;
1868}
1869
1870#else
1871/* No target OS support, stubs just fail or ignore */
1872
1873static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1874{
1875 (void) rs;
1876 (void) offset;
1877
1878 return NULL;
1879}
1880
1881static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1882 unsigned long start_page)
1883{
1884 (void) rs;
1885 (void) pss;
1886 (void) start_page;
1887
1888 return 0;
1889}
1890
1891bool ram_write_tracking_available(void)
1892{
1893 return false;
1894}
1895
1896bool ram_write_tracking_compatible(void)
1897{
1898 assert(0);
1899 return false;
1900}
1901
1902int ram_write_tracking_start(void)
1903{
1904 assert(0);
1905 return -1;
1906}
1907
1908void ram_write_tracking_stop(void)
1909{
1910 assert(0);
1911}
1912#endif /* defined(__linux__) */
1913
Juan Quintela3d0684b2017-03-23 15:06:39 +01001914/**
Li Qiangff1543a2019-05-24 23:28:32 -07001915 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001916 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001917 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001918 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001919 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01001920 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001921 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001922 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001923 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001924static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001925{
1926 RAMBlock *block;
1927 ram_addr_t offset;
1928 bool dirty;
1929
1930 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001931 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001932 /*
1933 * We're sending this page, and since it's postcopy nothing else
1934 * will dirty it, and we must make sure it doesn't get sent again
1935 * even if this queue request was received after the background
1936 * search already sent it.
1937 */
1938 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001939 unsigned long page;
1940
Juan Quintela6b6712e2017-03-22 15:18:04 +01001941 page = offset >> TARGET_PAGE_BITS;
1942 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001943 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001944 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Wei Yang64737602019-08-19 14:18:43 +08001945 page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001946 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001947 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001948 }
1949 }
1950
1951 } while (block && !dirty);
1952
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03001953 if (!block) {
1954 /*
1955 * Poll write faults too if background snapshot is enabled; that's
1956 * when we have vcpus got blocked by the write protected pages.
1957 */
1958 block = poll_fault_page(rs, &offset);
1959 }
1960
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001961 if (block) {
1962 /*
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001963 * We want the background search to continue from the queued page
1964 * since the guest is likely to want other pages near to the page
1965 * it just requested.
1966 */
1967 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001968 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08001969
1970 /*
1971 * This unqueued page would break the "one round" check, even is
1972 * really rare.
1973 */
1974 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001975 }
1976
1977 return !!block;
1978}
1979
Juan Quintela56e93d22015-05-07 19:33:31 +02001980/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001981 * migration_page_queue_free: drop any remaining pages in the ram
1982 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001983 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001984 * It should be empty at the end anyway, but in error cases there may
1985 * be some left. in case that there is any page left, we drop it.
1986 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001987 */
Juan Quintela83c13382017-05-04 11:45:01 +02001988static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001989{
Juan Quintelaec481c62017-03-20 22:12:40 +01001990 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001991 /* This queue generally should be empty - but in the case of a failed
1992 * migration might have some droppings in.
1993 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01001994 RCU_READ_LOCK_GUARD();
Juan Quintelaec481c62017-03-20 22:12:40 +01001995 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001996 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001997 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001998 g_free(mspr);
1999 }
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002000}
2001
2002/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002003 * ram_save_queue_pages: queue the page for transmission
2004 *
2005 * A request from postcopy destination for example.
2006 *
2007 * Returns zero on success or negative on error
2008 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002009 * @rbname: Name of the RAMBLock of the request. NULL means the
2010 * same that last one.
2011 * @start: starting address from the start of the RAMBlock
2012 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002013 */
Juan Quintela96506892017-03-14 18:41:03 +01002014int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002015{
2016 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002017 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002018
Juan Quintela93604472017-06-06 19:49:03 +02002019 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002020 RCU_READ_LOCK_GUARD();
2021
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002022 if (!rbname) {
2023 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002024 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002025
2026 if (!ramblock) {
2027 /*
2028 * Shouldn't happen, we can't reuse the last RAMBlock if
2029 * it's the 1st request.
2030 */
2031 error_report("ram_save_queue_pages no previous block");
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002032 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002033 }
2034 } else {
2035 ramblock = qemu_ram_block_by_name(rbname);
2036
2037 if (!ramblock) {
2038 /* We shouldn't be asked for a non-existent RAMBlock */
2039 error_report("ram_save_queue_pages no block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002040 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002041 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002042 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002043 }
2044 trace_ram_save_queue_pages(ramblock->idstr, start, len);
David Hildenbrand542147f2021-04-29 13:27:08 +02002045 if (!offset_in_ramblock(ramblock, start + len - 1)) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002046 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2047 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002048 __func__, start, len, ramblock->used_length);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002049 return -1;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002050 }
2051
Juan Quintelaec481c62017-03-20 22:12:40 +01002052 struct RAMSrcPageRequest *new_entry =
2053 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002054 new_entry->rb = ramblock;
2055 new_entry->offset = start;
2056 new_entry->len = len;
2057
2058 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002059 qemu_mutex_lock(&rs->src_page_req_mutex);
2060 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002061 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002062 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002063
2064 return 0;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002065}
2066
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002067static bool save_page_use_compression(RAMState *rs)
2068{
2069 if (!migrate_use_compression()) {
2070 return false;
2071 }
2072
2073 /*
David Hildenbrand1a373522021-02-16 11:50:39 +01002074 * If xbzrle is enabled (e.g., after first round of migration), stop
2075 * using the data compression. In theory, xbzrle can do better than
2076 * compression.
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002077 */
David Hildenbrand1a373522021-02-16 11:50:39 +01002078 if (rs->xbzrle_enabled) {
2079 return false;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002080 }
2081
David Hildenbrand1a373522021-02-16 11:50:39 +01002082 return true;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002083}
2084
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002085/*
2086 * try to compress the page before posting it out, return true if the page
2087 * has been properly handled by compression, otherwise needs other
2088 * paths to handle it
2089 */
2090static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2091{
2092 if (!save_page_use_compression(rs)) {
2093 return false;
2094 }
2095
2096 /*
2097 * When starting the process of a new block, the first page of
2098 * the block should be sent out before other pages in the same
2099 * block, and all the pages in last block should have been sent
2100 * out, keeping this order is important, because the 'cont' flag
2101 * is used to avoid resending the block name.
2102 *
2103 * We post the fist page as normal page as compression will take
2104 * much CPU resource.
2105 */
2106 if (block != rs->last_sent_block) {
2107 flush_compressed_data(rs);
2108 return false;
2109 }
2110
2111 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2112 return true;
2113 }
2114
Xiao Guangrong76e03002018-09-06 15:01:00 +08002115 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002116 return false;
2117}
2118
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002119/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002120 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002121 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002122 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002123 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002124 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002125 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002126 */
Juan Quintela05931ec2021-12-15 19:01:21 +01002127static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002128{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002129 RAMBlock *block = pss->block;
Alexey Romko8bba0042020-01-10 14:51:34 +01002130 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002131 int res;
2132
2133 if (control_save_page(rs, block, offset, &res)) {
2134 return res;
2135 }
2136
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002137 if (save_compress_page(rs, block, offset)) {
2138 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002139 }
2140
2141 res = save_zero_page(rs, block, offset);
2142 if (res > 0) {
2143 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2144 * page would be stale
2145 */
2146 if (!save_page_use_compression(rs)) {
2147 XBZRLE_cache_lock();
2148 xbzrle_cache_zero_page(rs, block->offset + offset);
2149 XBZRLE_cache_unlock();
2150 }
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002151 return res;
2152 }
2153
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002154 /*
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002155 * Do not use multifd for:
2156 * 1. Compression as the first page in the new block should be posted out
2157 * before sending the compressed page
2158 * 2. In postcopy as one whole host page should be placed
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002159 */
Wei Yangc6b3a2e2019-10-26 07:20:00 +08002160 if (!save_page_use_compression(rs) && migrate_use_multifd()
2161 && !migration_in_postcopy()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002162 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002163 }
2164
Juan Quintela05931ec2021-12-15 19:01:21 +01002165 return ram_save_page(rs, pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002166}
2167
2168/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002169 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002170 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002171 * Starting at *offset send pages up to the end of the current host
2172 * page. It's valid for the initial offset to point into the middle of
2173 * a host page in which case the remainder of the hostpage is sent.
2174 * Only dirty target pages are sent. Note that the host page size may
2175 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002176 * The saving stops at the boundary of the used_length of the block
2177 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002178 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002179 * Returns the number of pages written or negative on error
2180 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002181 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002182 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002183 */
Juan Quintela05931ec2021-12-15 19:01:21 +01002184static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002185{
2186 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002187 size_t pagesize_bits =
2188 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002189 unsigned long hostpage_boundary =
2190 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002191 unsigned long start_page = pss->page;
2192 int res;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002193
Yury Kotovfbd162e2019-02-15 20:45:46 +03002194 if (ramblock_is_ignored(pss->block)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02002195 error_report("block %s should not be migrated !", pss->block->idstr);
2196 return 0;
2197 }
2198
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002199 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002200 /* Check the pages is dirty and if it is send it */
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002201 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
Juan Quintela05931ec2021-12-15 19:01:21 +01002202 tmppages = ram_save_target_page(rs, pss);
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002203 if (tmppages < 0) {
2204 return tmppages;
2205 }
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002206
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002207 pages += tmppages;
2208 /*
2209 * Allow rate limiting to happen in the middle of huge pages if
2210 * something is sent in the current iteration.
2211 */
2212 if (pagesize_bits > 1 && tmppages > 0) {
2213 migration_rate_limit();
2214 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002215 }
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002216 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
2217 } while ((pss->page < hostpage_boundary) &&
Alexey Romko8bba0042020-01-10 14:51:34 +01002218 offset_in_ramblock(pss->block,
2219 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
Kunkun Jiangba1b7c82021-03-16 20:57:16 +08002220 /* The offset we leave with is the min boundary of host page and block */
2221 pss->page = MIN(pss->page, hostpage_boundary) - 1;
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002222
2223 res = ram_save_release_protection(rs, pss, start_page);
2224 return (res < 0 ? res : pages);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002225}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002226
2227/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002228 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002229 *
2230 * Called within an RCU critical section.
2231 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002232 * Returns the number of pages written where zero means no dirty pages,
2233 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002234 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002235 * @rs: current RAM state
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002236 *
2237 * On systems where host-page-size > target-page-size it will send all the
2238 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002239 */
Juan Quintela05931ec2021-12-15 19:01:21 +01002240static int ram_find_and_save_block(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002241{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002242 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002243 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002244 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002245
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302246 /* No dirty page as there is zero RAM */
2247 if (!ram_bytes_total()) {
2248 return pages;
2249 }
2250
Juan Quintela6f37bb82017-03-13 19:26:29 +01002251 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002252 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002253 pss.complete_round = false;
2254
2255 if (!pss.block) {
2256 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2257 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002258
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002259 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002260 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002261 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002262
2263 if (!found) {
2264 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002265 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002266 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002267
2268 if (found) {
Juan Quintela05931ec2021-12-15 19:01:21 +01002269 pages = ram_save_host_page(rs, &pss);
Juan Quintela56e93d22015-05-07 19:33:31 +02002270 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002271 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002272
Juan Quintela6f37bb82017-03-13 19:26:29 +01002273 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002274 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002275
2276 return pages;
2277}
2278
2279void acct_update_position(QEMUFile *f, size_t size, bool zero)
2280{
2281 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002282
Juan Quintela56e93d22015-05-07 19:33:31 +02002283 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002284 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002285 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002286 ram_counters.normal += pages;
2287 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002288 qemu_update_position(f, size);
2289 }
2290}
2291
Yury Kotovfbd162e2019-02-15 20:45:46 +03002292static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002293{
2294 RAMBlock *block;
2295 uint64_t total = 0;
2296
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002297 RCU_READ_LOCK_GUARD();
2298
Yury Kotovfbd162e2019-02-15 20:45:46 +03002299 if (count_ignored) {
2300 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2301 total += block->used_length;
2302 }
2303 } else {
2304 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2305 total += block->used_length;
2306 }
Peter Xu99e15582017-05-12 12:17:39 +08002307 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002308 return total;
2309}
2310
Yury Kotovfbd162e2019-02-15 20:45:46 +03002311uint64_t ram_bytes_total(void)
2312{
2313 return ram_bytes_total_common(false);
2314}
2315
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002316static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002317{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002318 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002319}
2320
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002321static void xbzrle_load_cleanup(void)
2322{
2323 g_free(XBZRLE.decoded_buf);
2324 XBZRLE.decoded_buf = NULL;
2325}
2326
Peter Xu7d7c96b2017-10-19 14:31:58 +08002327static void ram_state_cleanup(RAMState **rsp)
2328{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002329 if (*rsp) {
2330 migration_page_queue_free(*rsp);
2331 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2332 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2333 g_free(*rsp);
2334 *rsp = NULL;
2335 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002336}
2337
Peter Xu84593a02017-10-19 14:31:59 +08002338static void xbzrle_cleanup(void)
2339{
2340 XBZRLE_cache_lock();
2341 if (XBZRLE.cache) {
2342 cache_fini(XBZRLE.cache);
2343 g_free(XBZRLE.encoded_buf);
2344 g_free(XBZRLE.current_buf);
2345 g_free(XBZRLE.zero_target_page);
2346 XBZRLE.cache = NULL;
2347 XBZRLE.encoded_buf = NULL;
2348 XBZRLE.current_buf = NULL;
2349 XBZRLE.zero_target_page = NULL;
2350 }
2351 XBZRLE_cache_unlock();
2352}
2353
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002354static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002355{
Juan Quintela53518d92017-05-04 11:46:24 +02002356 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002357 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002358
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002359 /* We don't use dirty log with background snapshots */
2360 if (!migrate_background_snapshot()) {
2361 /* caller have hold iothread lock or is in a bh, so there is
2362 * no writing race against the migration bitmap
2363 */
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00002364 if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
2365 /*
2366 * do not stop dirty log without starting it, since
2367 * memory_global_dirty_log_stop will assert that
2368 * memory_global_dirty_log_start/stop used in pairs
2369 */
2370 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
2371 }
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002372 }
Juan Quintela6b6712e2017-03-22 15:18:04 +01002373
Yury Kotovfbd162e2019-02-15 20:45:46 +03002374 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08002375 g_free(block->clear_bmap);
2376 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002377 g_free(block->bmap);
2378 block->bmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002379 }
2380
Peter Xu84593a02017-10-19 14:31:59 +08002381 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002382 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002383 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002384}
2385
Juan Quintela6f37bb82017-03-13 19:26:29 +01002386static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002387{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002388 rs->last_seen_block = NULL;
2389 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002390 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002391 rs->last_version = ram_list.version;
David Hildenbrand1a373522021-02-16 11:50:39 +01002392 rs->xbzrle_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002393}
2394
2395#define MAX_WAIT 50 /* ms, half buffered_file limit */
2396
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002397/* **** functions for postcopy ***** */
2398
Pavel Butsykinced1c612017-02-03 18:23:21 +03002399void ram_postcopy_migrated_memory_release(MigrationState *ms)
2400{
2401 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002402
Yury Kotovfbd162e2019-02-15 20:45:46 +03002403 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002404 unsigned long *bitmap = block->bmap;
2405 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2406 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002407
2408 while (run_start < range) {
2409 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Alexey Romko8bba0042020-01-10 14:51:34 +01002410 ram_discard_range(block->idstr,
2411 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
2412 ((ram_addr_t)(run_end - run_start))
2413 << TARGET_PAGE_BITS);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002414 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2415 }
2416 }
2417}
2418
Juan Quintela3d0684b2017-03-23 15:06:39 +01002419/**
2420 * postcopy_send_discard_bm_ram: discard a RAMBlock
2421 *
2422 * Returns zero on success
2423 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002424 * Callback from postcopy_each_ram_send_discard for each RAMBlock
Juan Quintela3d0684b2017-03-23 15:06:39 +01002425 *
2426 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08002427 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002428 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002429static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002430{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002431 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002432 unsigned long current;
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002433 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002434
Juan Quintela6b6712e2017-03-22 15:18:04 +01002435 for (current = 0; current < end; ) {
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002436 unsigned long one = find_next_bit(bitmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08002437 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002438
Wei Yang33a5cb622019-06-27 10:08:21 +08002439 if (one >= end) {
2440 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002441 }
Wei Yang33a5cb622019-06-27 10:08:21 +08002442
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002443 zero = find_next_zero_bit(bitmap, end, one + 1);
Wei Yang33a5cb622019-06-27 10:08:21 +08002444
2445 if (zero >= end) {
2446 discard_length = end - one;
2447 } else {
2448 discard_length = zero - one;
2449 }
Wei Yang810cf2b2019-07-24 09:07:21 +08002450 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002451 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002452 }
2453
2454 return 0;
2455}
2456
Peter Xuf30c2e52021-12-07 19:50:13 +08002457static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block);
2458
Juan Quintela3d0684b2017-03-23 15:06:39 +01002459/**
2460 * postcopy_each_ram_send_discard: discard all RAMBlocks
2461 *
2462 * Returns 0 for success or negative for error
2463 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002464 * Utility for the outgoing postcopy code.
2465 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2466 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002467 * (qemu_ram_foreach_block ends up passing unscaled lengths
2468 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002469 *
2470 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002471 */
2472static int postcopy_each_ram_send_discard(MigrationState *ms)
2473{
2474 struct RAMBlock *block;
2475 int ret;
2476
Yury Kotovfbd162e2019-02-15 20:45:46 +03002477 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002478 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002479
2480 /*
Peter Xuf30c2e52021-12-07 19:50:13 +08002481 * Deal with TPS != HPS and huge pages. It discard any partially sent
2482 * host-page size chunks, mark any partially dirty host-page size
2483 * chunks as all dirty. In this case the host-page is the host-page
2484 * for the particular RAMBlock, i.e. it might be a huge page.
2485 */
2486 postcopy_chunk_hostpages_pass(ms, block);
2487
2488 /*
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002489 * Postcopy sends chunks of bitmap over the wire, but it
2490 * just needs indexes at this point, avoids it having
2491 * target page specific code.
2492 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002493 ret = postcopy_send_discard_bm_ram(ms, block);
2494 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002495 if (ret) {
2496 return ret;
2497 }
2498 }
2499
2500 return 0;
2501}
2502
Juan Quintela3d0684b2017-03-23 15:06:39 +01002503/**
Wei Yang8324ef82019-08-19 14:18:41 +08002504 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002505 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002506 * Helper for postcopy_chunk_hostpages; it's called twice to
2507 * canonicalize the two bitmaps, that are similar, but one is
2508 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002509 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002510 * Postcopy requires that all target pages in a hostpage are dirty or
2511 * clean, not a mix. This function canonicalizes the bitmaps.
2512 *
2513 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002514 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002515 */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002516static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002517{
Juan Quintela53518d92017-05-04 11:46:24 +02002518 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002519 unsigned long *bitmap = block->bmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002520 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002521 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002522 unsigned long run_start;
2523
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002524 if (block->page_size == TARGET_PAGE_SIZE) {
2525 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2526 return;
2527 }
2528
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002529 /* Find a dirty page */
2530 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002531
Juan Quintela6b6712e2017-03-22 15:18:04 +01002532 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002533
2534 /*
2535 * If the start of this run of pages is in the middle of a host
2536 * page, then we need to fixup this host page.
2537 */
Wei Yang9dec3cc2019-08-06 08:46:48 +08002538 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002539 /* Find the end of this run */
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002540 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002541 /*
2542 * If the end isn't at the start of a host page, then the
2543 * run doesn't finish at the end of a host page
2544 * and we need to discard.
2545 */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002546 }
2547
Wei Yang9dec3cc2019-08-06 08:46:48 +08002548 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002549 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08002550 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2551 host_ratio);
2552 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002553
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002554 /* Clean up the bitmap */
2555 for (page = fixup_start_addr;
2556 page < fixup_start_addr + host_ratio; page++) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002557 /*
2558 * Remark them as dirty, updating the count for any pages
2559 * that weren't previously dirty.
2560 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002561 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002562 }
2563 }
2564
Wei Yang1e7cf8c2019-08-19 14:18:42 +08002565 /* Find the next dirty page for the next iteration */
2566 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002567 }
2568}
2569
Juan Quintela3d0684b2017-03-23 15:06:39 +01002570/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002571 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2572 *
2573 * Returns zero on success
2574 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002575 * Transmit the set of pages to be discarded after precopy to the target
2576 * these are pages that:
2577 * a) Have been previously transmitted but are now dirty again
2578 * b) Pages that have never been transmitted, this ensures that
2579 * any pages on the destination that have been mapped by background
2580 * tasks get discarded (transparent huge pages is the specific concern)
2581 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002582 *
2583 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002584 */
2585int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2586{
Juan Quintela53518d92017-05-04 11:46:24 +02002587 RAMState *rs = ram_state;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002588
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002589 RCU_READ_LOCK_GUARD();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002590
2591 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002592 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002593
Juan Quintela6b6712e2017-03-22 15:18:04 +01002594 /* Easiest way to make sure we don't resume in the middle of a host-page */
2595 rs->last_seen_block = NULL;
2596 rs->last_sent_block = NULL;
2597 rs->last_page = 0;
2598
Juan Quintela6b6712e2017-03-22 15:18:04 +01002599 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002600
Simran Singhalb3ac2b92020-04-01 22:23:14 +05302601 return postcopy_each_ram_send_discard(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002602}
2603
Juan Quintela3d0684b2017-03-23 15:06:39 +01002604/**
2605 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002606 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002607 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002608 *
Juan Quintela36449152017-03-23 15:11:59 +01002609 * @rbname: name of the RAMBlock of the request. NULL means the
2610 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002611 * @start: RAMBlock starting page
2612 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002613 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002614int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002615{
Juan Quintela36449152017-03-23 15:11:59 +01002616 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002617
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002618 RCU_READ_LOCK_GUARD();
Juan Quintela36449152017-03-23 15:11:59 +01002619 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002620
2621 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002622 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002623 return -1;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002624 }
2625
Peter Xu814bb082018-07-23 20:33:02 +08002626 /*
2627 * On source VM, we don't need to update the received bitmap since
2628 * we don't even have one.
2629 */
2630 if (rb->receivedmap) {
2631 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2632 length >> qemu_target_page_bits());
2633 }
2634
Daniel Henrique Barboza03acb4e2020-01-06 15:23:31 -03002635 return ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002636}
2637
Peter Xu84593a02017-10-19 14:31:59 +08002638/*
2639 * For every allocation, we will try not to crash the VM if the
2640 * allocation failed.
2641 */
2642static int xbzrle_init(void)
2643{
2644 Error *local_err = NULL;
2645
2646 if (!migrate_use_xbzrle()) {
2647 return 0;
2648 }
2649
2650 XBZRLE_cache_lock();
2651
2652 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2653 if (!XBZRLE.zero_target_page) {
2654 error_report("%s: Error allocating zero page", __func__);
2655 goto err_out;
2656 }
2657
2658 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2659 TARGET_PAGE_SIZE, &local_err);
2660 if (!XBZRLE.cache) {
2661 error_report_err(local_err);
2662 goto free_zero_page;
2663 }
2664
2665 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2666 if (!XBZRLE.encoded_buf) {
2667 error_report("%s: Error allocating encoded_buf", __func__);
2668 goto free_cache;
2669 }
2670
2671 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2672 if (!XBZRLE.current_buf) {
2673 error_report("%s: Error allocating current_buf", __func__);
2674 goto free_encoded_buf;
2675 }
2676
2677 /* We are all good */
2678 XBZRLE_cache_unlock();
2679 return 0;
2680
2681free_encoded_buf:
2682 g_free(XBZRLE.encoded_buf);
2683 XBZRLE.encoded_buf = NULL;
2684free_cache:
2685 cache_fini(XBZRLE.cache);
2686 XBZRLE.cache = NULL;
2687free_zero_page:
2688 g_free(XBZRLE.zero_target_page);
2689 XBZRLE.zero_target_page = NULL;
2690err_out:
2691 XBZRLE_cache_unlock();
2692 return -ENOMEM;
2693}
2694
Juan Quintela53518d92017-05-04 11:46:24 +02002695static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002696{
Peter Xu7d00ee62017-10-19 14:31:57 +08002697 *rsp = g_try_new0(RAMState, 1);
2698
2699 if (!*rsp) {
2700 error_report("%s: Init ramstate fail", __func__);
2701 return -1;
2702 }
Juan Quintela53518d92017-05-04 11:46:24 +02002703
2704 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2705 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2706 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002707
Peter Xu7d00ee62017-10-19 14:31:57 +08002708 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002709 * Count the total number of pages used by ram blocks not including any
2710 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08002711 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08002712 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002713 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08002714 ram_state_reset(*rsp);
2715
2716 return 0;
2717}
2718
Peter Xud6eff5d2017-10-19 14:32:00 +08002719static void ram_list_init_bitmaps(void)
2720{
Peter Xu002cad62019-06-03 14:50:56 +08002721 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08002722 RAMBlock *block;
2723 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08002724 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08002725
2726 /* Skip setting bitmap if there is no RAM */
2727 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08002728 shift = ms->clear_bitmap_shift;
2729 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2730 error_report("clear_bitmap_shift (%u) too big, using "
2731 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2732 shift = CLEAR_BITMAP_SHIFT_MAX;
2733 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2734 error_report("clear_bitmap_shift (%u) too small, using "
2735 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2736 shift = CLEAR_BITMAP_SHIFT_MIN;
2737 }
2738
Yury Kotovfbd162e2019-02-15 20:45:46 +03002739 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08002740 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08002741 /*
2742 * The initial dirty bitmap for migration must be set with all
2743 * ones to make sure we'll migrate every guest RAM page to
2744 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002745 * Here we set RAMBlock.bmap all to 1 because when rebegin a
2746 * new migration after a failed migration, ram_list.
2747 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
2748 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08002749 */
Peter Xud6eff5d2017-10-19 14:32:00 +08002750 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08002751 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08002752 block->clear_bmap_shift = shift;
2753 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08002754 }
2755 }
2756}
2757
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02002758static void migration_bitmap_clear_discarded_pages(RAMState *rs)
2759{
2760 unsigned long pages;
2761 RAMBlock *rb;
2762
2763 RCU_READ_LOCK_GUARD();
2764
2765 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
2766 pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
2767 rs->migration_dirty_pages -= pages;
2768 }
2769}
2770
Peter Xud6eff5d2017-10-19 14:32:00 +08002771static void ram_init_bitmaps(RAMState *rs)
2772{
2773 /* For memory_global_dirty_log_start below. */
2774 qemu_mutex_lock_iothread();
2775 qemu_mutex_lock_ramlist();
Peter Xud6eff5d2017-10-19 14:32:00 +08002776
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002777 WITH_RCU_READ_LOCK_GUARD() {
2778 ram_list_init_bitmaps();
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002779 /* We don't use dirty log with background snapshots */
2780 if (!migrate_background_snapshot()) {
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00002781 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
Andrey Gruzdev278e2f52021-01-29 13:14:05 +03002782 migration_bitmap_sync_precopy(rs);
2783 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002784 }
Peter Xud6eff5d2017-10-19 14:32:00 +08002785 qemu_mutex_unlock_ramlist();
2786 qemu_mutex_unlock_iothread();
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02002787
2788 /*
2789 * After an eventual first bitmap sync, fixup the initial bitmap
2790 * containing all 1s to exclude any discarded pages from migration.
2791 */
2792 migration_bitmap_clear_discarded_pages(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08002793}
2794
Peter Xu7d00ee62017-10-19 14:31:57 +08002795static int ram_init_all(RAMState **rsp)
2796{
Peter Xu7d00ee62017-10-19 14:31:57 +08002797 if (ram_state_init(rsp)) {
2798 return -1;
2799 }
2800
Peter Xu84593a02017-10-19 14:31:59 +08002801 if (xbzrle_init()) {
2802 ram_state_cleanup(rsp);
2803 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002804 }
2805
Peter Xud6eff5d2017-10-19 14:32:00 +08002806 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002807
2808 return 0;
2809}
2810
Peter Xu08614f32018-05-02 18:47:33 +08002811static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2812{
2813 RAMBlock *block;
2814 uint64_t pages = 0;
2815
2816 /*
2817 * Postcopy is not using xbzrle/compression, so no need for that.
2818 * Also, since source are already halted, we don't need to care
2819 * about dirty page logging as well.
2820 */
2821
Yury Kotovfbd162e2019-02-15 20:45:46 +03002822 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08002823 pages += bitmap_count_one(block->bmap,
2824 block->used_length >> TARGET_PAGE_BITS);
2825 }
2826
2827 /* This may not be aligned with current bitmaps. Recalculate. */
2828 rs->migration_dirty_pages = pages;
2829
David Hildenbrand1a373522021-02-16 11:50:39 +01002830 ram_state_reset(rs);
Peter Xu08614f32018-05-02 18:47:33 +08002831
2832 /* Update RAMState cache of output QEMUFile */
2833 rs->f = out;
2834
2835 trace_ram_state_resume_prepare(pages);
2836}
2837
Juan Quintela3d0684b2017-03-23 15:06:39 +01002838/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08002839 * This function clears bits of the free pages reported by the caller from the
2840 * migration dirty bitmap. @addr is the host address corresponding to the
2841 * start of the continuous guest free pages, and @len is the total bytes of
2842 * those pages.
2843 */
2844void qemu_guest_free_page_hint(void *addr, size_t len)
2845{
2846 RAMBlock *block;
2847 ram_addr_t offset;
2848 size_t used_len, start, npages;
2849 MigrationState *s = migrate_get_current();
2850
2851 /* This function is currently expected to be used during live migration */
2852 if (!migration_is_setup_or_active(s->state)) {
2853 return;
2854 }
2855
2856 for (; len > 0; len -= used_len, addr += used_len) {
2857 block = qemu_ram_block_from_host(addr, false, &offset);
2858 if (unlikely(!block || offset >= block->used_length)) {
2859 /*
2860 * The implementation might not support RAMBlock resize during
2861 * live migration, but it could happen in theory with future
2862 * updates. So we add a check here to capture that case.
2863 */
2864 error_report_once("%s unexpected error", __func__);
2865 return;
2866 }
2867
2868 if (len <= block->used_length - offset) {
2869 used_len = len;
2870 } else {
2871 used_len = block->used_length - offset;
2872 }
2873
2874 start = offset >> TARGET_PAGE_BITS;
2875 npages = used_len >> TARGET_PAGE_BITS;
2876
2877 qemu_mutex_lock(&ram_state->bitmap_mutex);
Wei Wang3143577d2021-07-22 04:30:55 -04002878 /*
2879 * The skipped free pages are equavalent to be sent from clear_bmap's
2880 * perspective, so clear the bits from the memory region bitmap which
2881 * are initially set. Otherwise those skipped pages will be sent in
2882 * the next round after syncing from the memory region bitmap.
2883 */
David Hildenbrand1230a252021-09-04 18:09:07 +02002884 migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
Wei Wang6bcb05f2018-12-11 16:24:50 +08002885 ram_state->migration_dirty_pages -=
2886 bitmap_count_one_with_offset(block->bmap, start, npages);
2887 bitmap_clear(block->bmap, start, npages);
2888 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2889 }
2890}
2891
2892/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01002893 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002894 * long-running RCU critical section. When rcu-reclaims in the code
2895 * start to become numerous it will be necessary to reduce the
2896 * granularity of these critical sections.
2897 */
2898
Juan Quintela3d0684b2017-03-23 15:06:39 +01002899/**
2900 * ram_save_setup: Setup RAM for migration
2901 *
2902 * Returns zero to indicate success and negative for error
2903 *
2904 * @f: QEMUFile where to send the data
2905 * @opaque: RAMState pointer
2906 */
zhanghailianga91246c2016-10-27 14:42:59 +08002907static int ram_save_setup(QEMUFile *f, void *opaque)
2908{
Juan Quintela53518d92017-05-04 11:46:24 +02002909 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002910 RAMBlock *block;
2911
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002912 if (compress_threads_save_setup()) {
2913 return -1;
2914 }
2915
zhanghailianga91246c2016-10-27 14:42:59 +08002916 /* migration has already setup the bitmap, reuse it. */
2917 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08002918 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002919 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08002920 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02002921 }
zhanghailianga91246c2016-10-27 14:42:59 +08002922 }
Juan Quintela53518d92017-05-04 11:46:24 +02002923 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002924
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002925 WITH_RCU_READ_LOCK_GUARD() {
2926 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002927
Dr. David Alan Gilbert0e6ebd42019-10-07 15:36:38 +01002928 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2929 qemu_put_byte(f, strlen(block->idstr));
2930 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2931 qemu_put_be64(f, block->used_length);
2932 if (migrate_postcopy_ram() && block->page_size !=
2933 qemu_host_page_size) {
2934 qemu_put_be64(f, block->page_size);
2935 }
2936 if (migrate_ignore_shared()) {
2937 qemu_put_be64(f, block->mr->addr);
2938 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03002939 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002940 }
2941
Juan Quintela56e93d22015-05-07 19:33:31 +02002942 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2943 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2944
Juan Quintela99f2c6f2020-01-22 16:04:53 +01002945 multifd_send_sync_main(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002946 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02002947 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02002948
2949 return 0;
2950}
2951
Juan Quintela3d0684b2017-03-23 15:06:39 +01002952/**
2953 * ram_save_iterate: iterative stage for migration
2954 *
2955 * Returns zero to indicate success and negative for error
2956 *
2957 * @f: QEMUFile where to send the data
2958 * @opaque: RAMState pointer
2959 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002960static int ram_save_iterate(QEMUFile *f, void *opaque)
2961{
Juan Quintela53518d92017-05-04 11:46:24 +02002962 RAMState **temp = opaque;
2963 RAMState *rs = *temp;
Juan Quintela3d4095b2019-12-18 05:12:36 +01002964 int ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002965 int i;
2966 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002967 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002968
Peter Lievenb2557342018-03-08 12:18:24 +01002969 if (blk_mig_bulk_active()) {
2970 /* Avoid transferring ram during bulk phase of block migration as
2971 * the bulk phase will usually take a long time and transferring
2972 * ram updates during that time is pointless. */
2973 goto out;
2974 }
2975
Peter Xu63268c42021-06-30 16:08:05 -04002976 /*
2977 * We'll take this lock a little bit long, but it's okay for two reasons.
2978 * Firstly, the only possible other thread to take it is who calls
2979 * qemu_guest_free_page_hint(), which should be rare; secondly, see
2980 * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
2981 * guarantees that we'll at least released it in a regular basis.
2982 */
2983 qemu_mutex_lock(&rs->bitmap_mutex);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002984 WITH_RCU_READ_LOCK_GUARD() {
2985 if (ram_list.version != rs->last_version) {
2986 ram_state_reset(rs);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002987 }
2988
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002989 /* Read version before ram_list.blocks */
2990 smp_rmb();
Xiao Guangronge8f37352018-09-03 17:26:44 +08002991
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002992 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
Xiao Guangronge8f37352018-09-03 17:26:44 +08002993
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01002994 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2995 i = 0;
2996 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2997 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2998 int pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04002999
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003000 if (qemu_file_get_error(f)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003001 break;
3002 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003003
Juan Quintela05931ec2021-12-15 19:01:21 +01003004 pages = ram_find_and_save_block(rs);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003005 /* no more pages to sent */
3006 if (pages == 0) {
3007 done = 1;
3008 break;
3009 }
3010
3011 if (pages < 0) {
3012 qemu_file_set_error(f, pages);
3013 break;
3014 }
3015
3016 rs->target_page_count += pages;
3017
3018 /*
Wei Yang644acf92019-11-07 20:39:07 +08003019 * During postcopy, it is necessary to make sure one whole host
3020 * page is sent in one chunk.
3021 */
3022 if (migrate_postcopy_ram()) {
3023 flush_compressed_data(rs);
3024 }
3025
3026 /*
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003027 * we want to check in the 1st loop, just in case it was the 1st
3028 * time and we had to sync the dirty bitmap.
3029 * qemu_clock_get_ns() is a bit expensive, so we only check each
3030 * some iterations
3031 */
3032 if ((i & 63) == 0) {
3033 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
3034 1000000;
3035 if (t1 > MAX_WAIT) {
3036 trace_ram_save_iterate_big_wait(t1, i);
3037 break;
3038 }
3039 }
3040 i++;
Juan Quintela56e93d22015-05-07 19:33:31 +02003041 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003042 }
Peter Xu63268c42021-06-30 16:08:05 -04003043 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003044
3045 /*
3046 * Must occur before EOS (or any QEMUFile operation)
3047 * because of RDMA protocol.
3048 */
3049 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3050
Peter Lievenb2557342018-03-08 12:18:24 +01003051out:
Juan Quintelab69a0222020-01-22 11:36:12 +01003052 if (ret >= 0
3053 && migration_is_setup_or_active(migrate_get_current()->state)) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01003054 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01003055 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3056 qemu_fflush(f);
3057 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003058
Juan Quintela3d4095b2019-12-18 05:12:36 +01003059 ret = qemu_file_get_error(f);
3060 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003061 if (ret < 0) {
3062 return ret;
3063 }
3064
Thomas Huth5c903082016-11-04 14:10:17 +01003065 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003066}
3067
Juan Quintela3d0684b2017-03-23 15:06:39 +01003068/**
3069 * ram_save_complete: function called to send the remaining amount of ram
3070 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003071 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003072 *
3073 * Called with iothread lock
3074 *
3075 * @f: QEMUFile where to send the data
3076 * @opaque: RAMState pointer
3077 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003078static int ram_save_complete(QEMUFile *f, void *opaque)
3079{
Juan Quintela53518d92017-05-04 11:46:24 +02003080 RAMState **temp = opaque;
3081 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003082 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003083
Juan Quintela05931ec2021-12-15 19:01:21 +01003084 rs->last_stage = !migration_in_colo_state();
3085
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003086 WITH_RCU_READ_LOCK_GUARD() {
3087 if (!migration_in_postcopy()) {
3088 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003089 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003090
3091 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3092
3093 /* try transferring iterative blocks of memory */
3094
3095 /* flush all remaining blocks regardless of rate limiting */
3096 while (true) {
3097 int pages;
3098
Juan Quintela05931ec2021-12-15 19:01:21 +01003099 pages = ram_find_and_save_block(rs);
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003100 /* no more blocks to sent */
3101 if (pages == 0) {
3102 break;
3103 }
3104 if (pages < 0) {
3105 ret = pages;
3106 break;
3107 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003108 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003109
3110 flush_compressed_data(rs);
3111 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003112 }
3113
Juan Quintela3d4095b2019-12-18 05:12:36 +01003114 if (ret >= 0) {
Juan Quintela99f2c6f2020-01-22 16:04:53 +01003115 multifd_send_sync_main(rs->f);
Juan Quintela3d4095b2019-12-18 05:12:36 +01003116 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3117 qemu_fflush(f);
3118 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003119
Xiao Guangronge8f37352018-09-03 17:26:44 +08003120 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003121}
3122
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003123static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003124 uint64_t *res_precopy_only,
3125 uint64_t *res_compatible,
3126 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003127{
Juan Quintela53518d92017-05-04 11:46:24 +02003128 RAMState **temp = opaque;
3129 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003130 uint64_t remaining_size;
3131
Juan Quintela9edabd42017-03-14 12:02:16 +01003132 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003133
Juan Quintela57273092017-03-20 22:25:28 +01003134 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003135 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003136 qemu_mutex_lock_iothread();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003137 WITH_RCU_READ_LOCK_GUARD() {
3138 migration_bitmap_sync_precopy(rs);
3139 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003140 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003141 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003142 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003143
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003144 if (migrate_postcopy_ram()) {
3145 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003146 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003147 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003148 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003149 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003150}
3151
3152static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3153{
3154 unsigned int xh_len;
3155 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003156 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003157
Juan Quintela56e93d22015-05-07 19:33:31 +02003158 /* extract RLE header */
3159 xh_flags = qemu_get_byte(f);
3160 xh_len = qemu_get_be16(f);
3161
3162 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3163 error_report("Failed to load XBZRLE page - wrong compression!");
3164 return -1;
3165 }
3166
3167 if (xh_len > TARGET_PAGE_SIZE) {
3168 error_report("Failed to load XBZRLE page - len overflow!");
3169 return -1;
3170 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003171 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003172 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003173 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003174 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003175
3176 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003177 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003178 TARGET_PAGE_SIZE) == -1) {
3179 error_report("Failed to load XBZRLE page - decode error!");
3180 return -1;
3181 }
3182
3183 return 0;
3184}
3185
Juan Quintela3d0684b2017-03-23 15:06:39 +01003186/**
3187 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003188 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003189 * Must be called from within a rcu critical section.
3190 *
3191 * Returns a pointer from within the RCU-protected ram_list.
3192 *
3193 * @f: QEMUFile where to read the data from
3194 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003195 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003196static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003197{
Bihong Yu49324e92020-10-20 11:10:46 +08003198 static RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003199 char id[256];
3200 uint8_t len;
3201
3202 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003203 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003204 error_report("Ack, bad migration stream!");
3205 return NULL;
3206 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003207 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003208 }
3209
3210 len = qemu_get_byte(f);
3211 qemu_get_buffer(f, (uint8_t *)id, len);
3212 id[len] = 0;
3213
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003214 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003215 if (!block) {
3216 error_report("Can't find block %s", id);
3217 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003218 }
3219
Yury Kotovfbd162e2019-02-15 20:45:46 +03003220 if (ramblock_is_ignored(block)) {
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02003221 error_report("block %s should not be migrated !", id);
3222 return NULL;
3223 }
3224
zhanghailiang4c4bad42016-01-15 11:37:41 +08003225 return block;
3226}
3227
3228static inline void *host_from_ram_block_offset(RAMBlock *block,
3229 ram_addr_t offset)
3230{
3231 if (!offset_in_ramblock(block, offset)) {
3232 return NULL;
3233 }
3234
3235 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003236}
3237
David Hildenbrand6a23f632021-04-29 13:27:05 +02003238static void *host_page_from_ram_block_offset(RAMBlock *block,
3239 ram_addr_t offset)
3240{
3241 /* Note: Explicitly no check against offset_in_ramblock(). */
3242 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
3243 block->page_size);
3244}
3245
3246static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
3247 ram_addr_t offset)
3248{
3249 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
3250}
3251
Zhang Chen13af18f2018-09-03 12:38:48 +08003252static inline void *colo_cache_from_block_offset(RAMBlock *block,
zhanghailiang8af66372020-02-24 14:54:11 +08003253 ram_addr_t offset, bool record_bitmap)
Zhang Chen13af18f2018-09-03 12:38:48 +08003254{
3255 if (!offset_in_ramblock(block, offset)) {
3256 return NULL;
3257 }
3258 if (!block->colo_cache) {
3259 error_report("%s: colo_cache is NULL in block :%s",
3260 __func__, block->idstr);
3261 return NULL;
3262 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003263
3264 /*
3265 * During colo checkpoint, we need bitmap of these migrated pages.
3266 * It help us to decide which pages in ram cache should be flushed
3267 * into VM's RAM later.
3268 */
zhanghailiang8af66372020-02-24 14:54:11 +08003269 if (record_bitmap &&
3270 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003271 ram_state->migration_dirty_pages++;
3272 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003273 return block->colo_cache + offset;
3274}
3275
Juan Quintela3d0684b2017-03-23 15:06:39 +01003276/**
3277 * ram_handle_compressed: handle the zero page case
3278 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003279 * If a page (or a whole RDMA chunk) has been
3280 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003281 *
3282 * @host: host address for the zero page
3283 * @ch: what the page is filled from. We only support zero
3284 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003285 */
3286void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3287{
Juan Quintelabad452a2021-11-18 15:56:38 +01003288 if (ch != 0 || !buffer_is_zero(host, size)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003289 memset(host, ch, size);
3290 }
3291}
3292
Xiao Guangrong797ca152018-03-30 15:51:21 +08003293/* return the size after decompression, or negative value on error */
3294static int
3295qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3296 const uint8_t *source, size_t source_len)
3297{
3298 int err;
3299
3300 err = inflateReset(stream);
3301 if (err != Z_OK) {
3302 return -1;
3303 }
3304
3305 stream->avail_in = source_len;
3306 stream->next_in = (uint8_t *)source;
3307 stream->avail_out = dest_len;
3308 stream->next_out = dest;
3309
3310 err = inflate(stream, Z_NO_FLUSH);
3311 if (err != Z_STREAM_END) {
3312 return -1;
3313 }
3314
3315 return stream->total_out;
3316}
3317
Juan Quintela56e93d22015-05-07 19:33:31 +02003318static void *do_data_decompress(void *opaque)
3319{
3320 DecompressParam *param = opaque;
3321 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003322 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003323 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003324
Liang Li33d151f2016-05-05 15:32:58 +08003325 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003326 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003327 if (param->des) {
3328 des = param->des;
3329 len = param->len;
3330 param->des = 0;
3331 qemu_mutex_unlock(&param->mutex);
3332
Liang Li73a89122016-05-05 15:32:51 +08003333 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003334
3335 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3336 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003337 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003338 error_report("decompress data failed");
3339 qemu_file_set_error(decomp_file, ret);
3340 }
Liang Li73a89122016-05-05 15:32:51 +08003341
Liang Li33d151f2016-05-05 15:32:58 +08003342 qemu_mutex_lock(&decomp_done_lock);
3343 param->done = true;
3344 qemu_cond_signal(&decomp_done_cond);
3345 qemu_mutex_unlock(&decomp_done_lock);
3346
3347 qemu_mutex_lock(&param->mutex);
3348 } else {
3349 qemu_cond_wait(&param->cond, &param->mutex);
3350 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003351 }
Liang Li33d151f2016-05-05 15:32:58 +08003352 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003353
3354 return NULL;
3355}
3356
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003357static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003358{
3359 int idx, thread_count;
3360
3361 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003362 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003363 }
3364
3365 thread_count = migrate_decompress_threads();
3366 qemu_mutex_lock(&decomp_done_lock);
3367 for (idx = 0; idx < thread_count; idx++) {
3368 while (!decomp_param[idx].done) {
3369 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3370 }
3371 }
3372 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003373 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003374}
3375
Juan Quintelaf0afa332017-06-28 11:52:28 +02003376static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003377{
3378 int i, thread_count;
3379
Juan Quintela3416ab52016-04-20 11:56:01 +02003380 if (!migrate_use_compression()) {
3381 return;
3382 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003383 thread_count = migrate_decompress_threads();
3384 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003385 /*
3386 * we use it as a indicator which shows if the thread is
3387 * properly init'd or not
3388 */
3389 if (!decomp_param[i].compbuf) {
3390 break;
3391 }
3392
Juan Quintela56e93d22015-05-07 19:33:31 +02003393 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003394 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003395 qemu_cond_signal(&decomp_param[i].cond);
3396 qemu_mutex_unlock(&decomp_param[i].mutex);
3397 }
3398 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003399 if (!decomp_param[i].compbuf) {
3400 break;
3401 }
3402
Juan Quintela56e93d22015-05-07 19:33:31 +02003403 qemu_thread_join(decompress_threads + i);
3404 qemu_mutex_destroy(&decomp_param[i].mutex);
3405 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003406 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003407 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003408 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003409 }
3410 g_free(decompress_threads);
3411 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003412 decompress_threads = NULL;
3413 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003414 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003415}
3416
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003417static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003418{
3419 int i, thread_count;
3420
3421 if (!migrate_use_compression()) {
3422 return 0;
3423 }
3424
3425 thread_count = migrate_decompress_threads();
3426 decompress_threads = g_new0(QemuThread, thread_count);
3427 decomp_param = g_new0(DecompressParam, thread_count);
3428 qemu_mutex_init(&decomp_done_lock);
3429 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003430 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003431 for (i = 0; i < thread_count; i++) {
3432 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3433 goto exit;
3434 }
3435
3436 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3437 qemu_mutex_init(&decomp_param[i].mutex);
3438 qemu_cond_init(&decomp_param[i].cond);
3439 decomp_param[i].done = true;
3440 decomp_param[i].quit = false;
3441 qemu_thread_create(decompress_threads + i, "decompress",
3442 do_data_decompress, decomp_param + i,
3443 QEMU_THREAD_JOINABLE);
3444 }
3445 return 0;
3446exit:
3447 compress_threads_load_cleanup();
3448 return -1;
3449}
3450
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003451static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003452 void *host, int len)
3453{
3454 int idx, thread_count;
3455
3456 thread_count = migrate_decompress_threads();
Mahmoud Mandour37396952021-03-11 05:15:35 +02003457 QEMU_LOCK_GUARD(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003458 while (true) {
3459 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003460 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003461 decomp_param[idx].done = false;
3462 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003463 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003464 decomp_param[idx].des = host;
3465 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003466 qemu_cond_signal(&decomp_param[idx].cond);
3467 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003468 break;
3469 }
3470 }
3471 if (idx < thread_count) {
3472 break;
Liang Li73a89122016-05-05 15:32:51 +08003473 } else {
3474 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003475 }
3476 }
3477}
3478
Rao, Leib70cb3b2020-10-16 13:52:01 +08003479static void colo_init_ram_state(void)
3480{
3481 ram_state_init(&ram_state);
Rao, Leib70cb3b2020-10-16 13:52:01 +08003482}
3483
Zhang Chen13af18f2018-09-03 12:38:48 +08003484/*
3485 * colo cache: this is for secondary VM, we cache the whole
3486 * memory of the secondary VM, it is need to hold the global lock
3487 * to call this helper.
3488 */
3489int colo_init_ram_cache(void)
3490{
3491 RAMBlock *block;
3492
Paolo Bonzini44901b52019-12-13 15:07:22 +01003493 WITH_RCU_READ_LOCK_GUARD() {
3494 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3495 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
David Hildenbrand8dbe22c2021-05-10 13:43:21 +02003496 NULL, false, false);
Paolo Bonzini44901b52019-12-13 15:07:22 +01003497 if (!block->colo_cache) {
3498 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3499 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3500 block->used_length);
3501 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3502 if (block->colo_cache) {
3503 qemu_anon_ram_free(block->colo_cache, block->used_length);
3504 block->colo_cache = NULL;
3505 }
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003506 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003507 return -errno;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003508 }
Lukas Straube5fdf922021-07-04 18:14:44 +02003509 if (!machine_dump_guest_core(current_machine)) {
3510 qemu_madvise(block->colo_cache, block->used_length,
3511 QEMU_MADV_DONTDUMP);
3512 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003513 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003514 }
Paolo Bonzini44901b52019-12-13 15:07:22 +01003515
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003516 /*
3517 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3518 * with to decide which page in cache should be flushed into SVM's RAM. Here
3519 * we use the same name 'ram_bitmap' as for migration.
3520 */
3521 if (ram_bytes_total()) {
3522 RAMBlock *block;
3523
Yury Kotovfbd162e2019-02-15 20:45:46 +03003524 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003525 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003526 block->bmap = bitmap_new(pages);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003527 }
3528 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003529
Rao, Leib70cb3b2020-10-16 13:52:01 +08003530 colo_init_ram_state();
Zhang Chen13af18f2018-09-03 12:38:48 +08003531 return 0;
Zhang Chen13af18f2018-09-03 12:38:48 +08003532}
3533
zhanghailiang03930312020-02-24 14:54:10 +08003534/* TODO: duplicated with ram_init_bitmaps */
3535void colo_incoming_start_dirty_log(void)
3536{
3537 RAMBlock *block = NULL;
3538 /* For memory_global_dirty_log_start below. */
3539 qemu_mutex_lock_iothread();
3540 qemu_mutex_lock_ramlist();
3541
3542 memory_global_dirty_log_sync();
3543 WITH_RCU_READ_LOCK_GUARD() {
3544 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3545 ramblock_sync_dirty_bitmap(ram_state, block);
3546 /* Discard this dirty bitmap record */
3547 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3548 }
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00003549 memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
zhanghailiang03930312020-02-24 14:54:10 +08003550 }
3551 ram_state->migration_dirty_pages = 0;
3552 qemu_mutex_unlock_ramlist();
3553 qemu_mutex_unlock_iothread();
3554}
3555
Zhang Chen13af18f2018-09-03 12:38:48 +08003556/* It is need to hold the global lock to call this helper */
3557void colo_release_ram_cache(void)
3558{
3559 RAMBlock *block;
3560
Hyman Huang(黄勇)63b41db2021-06-29 16:01:19 +00003561 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003562 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003563 g_free(block->bmap);
3564 block->bmap = NULL;
3565 }
3566
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003567 WITH_RCU_READ_LOCK_GUARD() {
3568 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3569 if (block->colo_cache) {
3570 qemu_anon_ram_free(block->colo_cache, block->used_length);
3571 block->colo_cache = NULL;
3572 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003573 }
3574 }
zhanghailiang03930312020-02-24 14:54:10 +08003575 ram_state_cleanup(&ram_state);
Zhang Chen13af18f2018-09-03 12:38:48 +08003576}
3577
Juan Quintela3d0684b2017-03-23 15:06:39 +01003578/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003579 * ram_load_setup: Setup RAM for migration incoming side
3580 *
3581 * Returns zero to indicate success and negative for error
3582 *
3583 * @f: QEMUFile where to receive the data
3584 * @opaque: RAMState pointer
3585 */
3586static int ram_load_setup(QEMUFile *f, void *opaque)
3587{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003588 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003589 return -1;
3590 }
3591
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003592 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003593 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003594
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003595 return 0;
3596}
3597
3598static int ram_load_cleanup(void *opaque)
3599{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003600 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003601
Yury Kotovfbd162e2019-02-15 20:45:46 +03003602 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Beata Michalskabd108a42019-11-21 00:08:42 +00003603 qemu_ram_block_writeback(rb);
Junyan He56eb90a2018-07-18 15:48:03 +08003604 }
3605
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003606 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003607 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003608
Yury Kotovfbd162e2019-02-15 20:45:46 +03003609 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003610 g_free(rb->receivedmap);
3611 rb->receivedmap = NULL;
3612 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003613
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003614 return 0;
3615}
3616
3617/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003618 * ram_postcopy_incoming_init: allocate postcopy data structures
3619 *
3620 * Returns 0 for success and negative if there was one error
3621 *
3622 * @mis: current migration incoming state
3623 *
3624 * Allocate data structures etc needed by incoming migration with
3625 * postcopy-ram. postcopy-ram's similarly names
3626 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003627 */
3628int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3629{
David Hildenbrandc1361802018-06-20 22:27:36 +02003630 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003631}
3632
Juan Quintela3d0684b2017-03-23 15:06:39 +01003633/**
3634 * ram_load_postcopy: load a page in postcopy case
3635 *
3636 * Returns 0 for success or -errno in case of error
3637 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003638 * Called in postcopy mode by ram_load().
3639 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003640 *
3641 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003642 */
3643static int ram_load_postcopy(QEMUFile *f)
3644{
3645 int flags = 0, ret = 0;
3646 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003647 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003648 MigrationIncomingState *mis = migration_incoming_get_current();
3649 /* Temporary page that is later 'placed' */
Wei Yang34143222019-10-05 21:50:20 +08003650 void *postcopy_host_page = mis->postcopy_tmp_page;
David Hildenbrand6a23f632021-04-29 13:27:05 +02003651 void *host_page = NULL;
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003652 bool all_zero = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003653 int target_pages = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003654
3655 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3656 ram_addr_t addr;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003657 void *page_buffer = NULL;
3658 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003659 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003660 uint8_t ch;
Wei Yang644acf92019-11-07 20:39:07 +08003661 int len;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003662
3663 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003664
3665 /*
3666 * If qemu file error, we should stop here, and then "addr"
3667 * may be invalid
3668 */
3669 ret = qemu_file_get_error(f);
3670 if (ret) {
3671 break;
3672 }
3673
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003674 flags = addr & ~TARGET_PAGE_MASK;
3675 addr &= TARGET_PAGE_MASK;
3676
3677 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
Wei Yang644acf92019-11-07 20:39:07 +08003678 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3679 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003680 block = ram_block_from_stream(f, flags);
David Hildenbrand6a23f632021-04-29 13:27:05 +02003681 if (!block) {
3682 ret = -EINVAL;
3683 break;
3684 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003685
David Hildenbrand898ba902021-04-29 13:27:06 +02003686 /*
3687 * Relying on used_length is racy and can result in false positives.
3688 * We might place pages beyond used_length in case RAM was shrunk
3689 * while in postcopy, which is fine - trying to place via
3690 * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.
3691 */
3692 if (!block->host || addr >= block->postcopy_length) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003693 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3694 ret = -EINVAL;
3695 break;
3696 }
Wei Yang4cbb3c62019-11-07 20:39:04 +08003697 target_pages++;
Peter Xu1aa83672018-07-10 17:18:53 +08003698 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003699 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003700 * Postcopy requires that we place whole host pages atomically;
3701 * these may be huge pages for RAMBlocks that are backed by
3702 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003703 * To make it atomic, the data is read into a temporary page
3704 * that's moved into place later.
3705 * The migration protocol uses, possibly smaller, target-pages
3706 * however the source ensures it always sends all the components
Wei Yang91ba4422019-11-07 20:39:06 +08003707 * of a host page in one chunk.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003708 */
3709 page_buffer = postcopy_host_page +
David Hildenbrand6a23f632021-04-29 13:27:05 +02003710 host_page_offset_from_ram_block_offset(block, addr);
3711 /* If all TP are zero then we can optimise the place */
Wei Yange5e73b02019-11-07 20:39:05 +08003712 if (target_pages == 1) {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003713 host_page = host_page_from_ram_block_offset(block, addr);
3714 } else if (host_page != host_page_from_ram_block_offset(block,
3715 addr)) {
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003716 /* not the 1st TP within the HP */
David Hildenbrand6a23f632021-04-29 13:27:05 +02003717 error_report("Non-same host page %p/%p", host_page,
3718 host_page_from_ram_block_offset(block, addr));
3719 ret = -EINVAL;
3720 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003721 }
3722
3723 /*
3724 * If it's the last part of a host page then we place the host
3725 * page
3726 */
Wei Yang4cbb3c62019-11-07 20:39:04 +08003727 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3728 place_needed = true;
Wei Yang4cbb3c62019-11-07 20:39:04 +08003729 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003730 place_source = postcopy_host_page;
3731 }
3732
3733 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003734 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003735 ch = qemu_get_byte(f);
Wei Yang2e36bc12019-11-07 20:39:02 +08003736 /*
3737 * Can skip to set page_buffer when
3738 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
3739 */
3740 if (ch || !matches_target_page_size) {
3741 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3742 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003743 if (ch) {
3744 all_zero = false;
3745 }
3746 break;
3747
3748 case RAM_SAVE_FLAG_PAGE:
3749 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003750 if (!matches_target_page_size) {
3751 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003752 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3753 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08003754 /*
3755 * For small pages that matches target page size, we
3756 * avoid the qemu_file copy. Instead we directly use
3757 * the buffer of QEMUFile to place the page. Note: we
3758 * cannot do any QEMUFile operation before using that
3759 * buffer to make sure the buffer is valid when
3760 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003761 */
3762 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3763 TARGET_PAGE_SIZE);
3764 }
3765 break;
Wei Yang644acf92019-11-07 20:39:07 +08003766 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3767 all_zero = false;
3768 len = qemu_get_be32(f);
3769 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3770 error_report("Invalid compressed data length: %d", len);
3771 ret = -EINVAL;
3772 break;
3773 }
3774 decompress_data_with_multi_threads(f, page_buffer, len);
3775 break;
3776
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003777 case RAM_SAVE_FLAG_EOS:
3778 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003779 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003780 break;
3781 default:
Bihong Yu29fccad2020-10-20 11:10:42 +08003782 error_report("Unknown combination of migration flags: 0x%x"
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003783 " (postcopy mode)", flags);
3784 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003785 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003786 }
3787
Wei Yang644acf92019-11-07 20:39:07 +08003788 /* Got the whole host page, wait for decompress before placing. */
3789 if (place_needed) {
3790 ret |= wait_for_decompress_done();
3791 }
3792
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003793 /* Detect for any possible file errors */
3794 if (!ret && qemu_file_get_error(f)) {
3795 ret = qemu_file_get_error(f);
3796 }
3797
3798 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003799 if (all_zero) {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003800 ret = postcopy_place_page_zero(mis, host_page, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003801 } else {
David Hildenbrand6a23f632021-04-29 13:27:05 +02003802 ret = postcopy_place_page(mis, host_page, place_source,
3803 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003804 }
David Hildenbrandddf35bd2020-04-21 10:52:56 +02003805 place_needed = false;
3806 target_pages = 0;
3807 /* Assume we have a zero page until we detect something different */
3808 all_zero = true;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003809 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003810 }
3811
3812 return ret;
3813}
3814
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003815static bool postcopy_is_advised(void)
3816{
3817 PostcopyState ps = postcopy_state_get();
3818 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3819}
3820
3821static bool postcopy_is_running(void)
3822{
3823 PostcopyState ps = postcopy_state_get();
3824 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3825}
3826
Zhang Chene6f4aa12018-09-03 12:38:50 +08003827/*
3828 * Flush content of RAM cache into SVM's memory.
3829 * Only flush the pages that be dirtied by PVM or SVM or both.
3830 */
Lukas Straub24fa16f2020-05-11 13:10:51 +02003831void colo_flush_ram_cache(void)
Zhang Chene6f4aa12018-09-03 12:38:50 +08003832{
3833 RAMBlock *block = NULL;
3834 void *dst_host;
3835 void *src_host;
3836 unsigned long offset = 0;
3837
zhanghailiangd1955d22018-09-03 12:38:55 +08003838 memory_global_dirty_log_sync();
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003839 WITH_RCU_READ_LOCK_GUARD() {
3840 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3841 ramblock_sync_dirty_bitmap(ram_state, block);
Zhang Chene6f4aa12018-09-03 12:38:50 +08003842 }
3843 }
3844
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003845 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3846 WITH_RCU_READ_LOCK_GUARD() {
3847 block = QLIST_FIRST_RCU(&ram_list.blocks);
3848
3849 while (block) {
Rao, Leia6a83ce2021-11-09 11:04:55 +08003850 unsigned long num = 0;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003851
Rao, Leia6a83ce2021-11-09 11:04:55 +08003852 offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
David Hildenbrand542147f2021-04-29 13:27:08 +02003853 if (!offset_in_ramblock(block,
3854 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003855 offset = 0;
Rao, Leia6a83ce2021-11-09 11:04:55 +08003856 num = 0;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003857 block = QLIST_NEXT_RCU(block, next);
3858 } else {
Rao, Leia6a83ce2021-11-09 11:04:55 +08003859 unsigned long i = 0;
3860
3861 for (i = 0; i < num; i++) {
3862 migration_bitmap_clear_dirty(ram_state, block, offset + i);
3863 }
Alexey Romko8bba0042020-01-10 14:51:34 +01003864 dst_host = block->host
3865 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3866 src_host = block->colo_cache
3867 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
Rao, Leia6a83ce2021-11-09 11:04:55 +08003868 memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
3869 offset += num;
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01003870 }
3871 }
3872 }
Zhang Chene6f4aa12018-09-03 12:38:50 +08003873 trace_colo_flush_ram_cache_end();
3874}
3875
Wei Yang10da4a32019-07-25 08:20:23 +08003876/**
3877 * ram_load_precopy: load pages in precopy case
3878 *
3879 * Returns 0 for success or -errno in case of error
3880 *
3881 * Called in precopy mode by ram_load().
3882 * rcu_read_lock is taken prior to this being called.
3883 *
3884 * @f: QEMUFile where to send the data
3885 */
3886static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02003887{
Yury Kotove65cec52019-11-25 16:36:32 +03003888 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003889 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003890 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01003891 if (!migrate_use_compression()) {
3892 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3893 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003894
Wei Yang10da4a32019-07-25 08:20:23 +08003895 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003896 ram_addr_t addr, total_ram_bytes;
zhanghailiang03930312020-02-24 14:54:10 +08003897 void *host = NULL, *host_bak = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003898 uint8_t ch;
3899
Yury Kotove65cec52019-11-25 16:36:32 +03003900 /*
3901 * Yield periodically to let main loop run, but an iteration of
3902 * the main loop is expensive, so do it each some iterations
3903 */
3904 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3905 aio_co_schedule(qemu_get_current_aio_context(),
3906 qemu_coroutine_self());
3907 qemu_coroutine_yield();
3908 }
3909 i++;
3910
Juan Quintela56e93d22015-05-07 19:33:31 +02003911 addr = qemu_get_be64(f);
3912 flags = addr & ~TARGET_PAGE_MASK;
3913 addr &= TARGET_PAGE_MASK;
3914
Juan Quintelaedc60122016-11-02 12:40:46 +01003915 if (flags & invalid_flags) {
3916 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3917 error_report("Received an unexpected compressed page");
3918 }
3919
3920 ret = -EINVAL;
3921 break;
3922 }
3923
Juan Quintelabb890ed2017-04-28 09:39:55 +02003924 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003925 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003926 RAMBlock *block = ram_block_from_stream(f, flags);
3927
zhanghailiang03930312020-02-24 14:54:10 +08003928 host = host_from_ram_block_offset(block, addr);
Zhang Chen13af18f2018-09-03 12:38:48 +08003929 /*
zhanghailiang03930312020-02-24 14:54:10 +08003930 * After going into COLO stage, we should not load the page
3931 * into SVM's memory directly, we put them into colo_cache firstly.
3932 * NOTE: We need to keep a copy of SVM's ram in colo_cache.
3933 * Previously, we copied all these memory in preparing stage of COLO
3934 * while we need to stop VM, which is a time-consuming process.
3935 * Here we optimize it by a trick, back-up every page while in
3936 * migration process while COLO is enabled, though it affects the
3937 * speed of the migration, but it obviously reduce the downtime of
3938 * back-up all SVM'S memory in COLO preparing stage.
Zhang Chen13af18f2018-09-03 12:38:48 +08003939 */
zhanghailiang03930312020-02-24 14:54:10 +08003940 if (migration_incoming_colo_enabled()) {
3941 if (migration_incoming_in_colo_state()) {
3942 /* In COLO stage, put all pages into cache temporarily */
zhanghailiang8af66372020-02-24 14:54:11 +08003943 host = colo_cache_from_block_offset(block, addr, true);
zhanghailiang03930312020-02-24 14:54:10 +08003944 } else {
3945 /*
3946 * In migration stage but before COLO stage,
3947 * Put all pages into both cache and SVM's memory.
3948 */
zhanghailiang8af66372020-02-24 14:54:11 +08003949 host_bak = colo_cache_from_block_offset(block, addr, false);
zhanghailiang03930312020-02-24 14:54:10 +08003950 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003951 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003952 if (!host) {
3953 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3954 ret = -EINVAL;
3955 break;
3956 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003957 if (!migration_incoming_in_colo_state()) {
3958 ramblock_recv_bitmap_set(block, host);
3959 }
3960
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003961 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003962 }
3963
Juan Quintela56e93d22015-05-07 19:33:31 +02003964 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3965 case RAM_SAVE_FLAG_MEM_SIZE:
3966 /* Synchronize RAM block list */
3967 total_ram_bytes = addr;
3968 while (!ret && total_ram_bytes) {
3969 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003970 char id[256];
3971 ram_addr_t length;
3972
3973 len = qemu_get_byte(f);
3974 qemu_get_buffer(f, (uint8_t *)id, len);
3975 id[len] = 0;
3976 length = qemu_get_be64(f);
3977
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003978 block = qemu_ram_block_by_name(id);
CĂ©dric Le Goaterb895de52018-05-14 08:57:00 +02003979 if (block && !qemu_ram_is_migratable(block)) {
3980 error_report("block %s should not be migrated !", id);
3981 ret = -EINVAL;
3982 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003983 if (length != block->used_length) {
3984 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003985
Gongleifa53a0e2016-05-10 10:04:59 +08003986 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003987 &local_err);
3988 if (local_err) {
3989 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003990 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003991 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003992 /* For postcopy we need to check hugepage sizes match */
Stefan Reitere846b742021-02-04 17:35:22 +01003993 if (postcopy_advised && migrate_postcopy_ram() &&
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003994 block->page_size != qemu_host_page_size) {
3995 uint64_t remote_page_size = qemu_get_be64(f);
3996 if (remote_page_size != block->page_size) {
3997 error_report("Mismatched RAM page size %s "
3998 "(local) %zd != %" PRId64,
3999 id, block->page_size,
4000 remote_page_size);
4001 ret = -EINVAL;
4002 }
4003 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004004 if (migrate_ignore_shared()) {
4005 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03004006 if (ramblock_is_ignored(block) &&
4007 block->mr->addr != addr) {
4008 error_report("Mismatched GPAs for block %s "
4009 "%" PRId64 "!= %" PRId64,
4010 id, (uint64_t)addr,
4011 (uint64_t)block->mr->addr);
4012 ret = -EINVAL;
4013 }
4014 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004015 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4016 block->idstr);
4017 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004018 error_report("Unknown ramblock \"%s\", cannot "
4019 "accept migration", id);
4020 ret = -EINVAL;
4021 }
4022
4023 total_ram_bytes -= length;
4024 }
4025 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004026
Juan Quintelabb890ed2017-04-28 09:39:55 +02004027 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004028 ch = qemu_get_byte(f);
4029 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4030 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004031
Juan Quintela56e93d22015-05-07 19:33:31 +02004032 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004033 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4034 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004035
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004036 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004037 len = qemu_get_be32(f);
4038 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4039 error_report("Invalid compressed data length: %d", len);
4040 ret = -EINVAL;
4041 break;
4042 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004043 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004044 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004045
Juan Quintela56e93d22015-05-07 19:33:31 +02004046 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004047 if (load_xbzrle(f, addr, host) < 0) {
4048 error_report("Failed to decompress XBZRLE page at "
4049 RAM_ADDR_FMT, addr);
4050 ret = -EINVAL;
4051 break;
4052 }
4053 break;
4054 case RAM_SAVE_FLAG_EOS:
4055 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004056 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004057 break;
4058 default:
4059 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004060 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004061 } else {
Bihong Yu29fccad2020-10-20 11:10:42 +08004062 error_report("Unknown combination of migration flags: 0x%x",
Juan Quintela56e93d22015-05-07 19:33:31 +02004063 flags);
4064 ret = -EINVAL;
4065 }
4066 }
4067 if (!ret) {
4068 ret = qemu_file_get_error(f);
4069 }
zhanghailiang03930312020-02-24 14:54:10 +08004070 if (!ret && host_bak) {
4071 memcpy(host_bak, host, TARGET_PAGE_SIZE);
4072 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004073 }
4074
Wei Yangca1a6b72019-11-07 20:39:03 +08004075 ret |= wait_for_decompress_done();
Wei Yang10da4a32019-07-25 08:20:23 +08004076 return ret;
4077}
4078
4079static int ram_load(QEMUFile *f, void *opaque, int version_id)
4080{
4081 int ret = 0;
4082 static uint64_t seq_iter;
4083 /*
4084 * If system is running in postcopy mode, page inserts to host memory must
4085 * be atomic
4086 */
4087 bool postcopy_running = postcopy_is_running();
4088
4089 seq_iter++;
4090
4091 if (version_id != 4) {
4092 return -EINVAL;
4093 }
4094
4095 /*
4096 * This RCU critical section can be very long running.
4097 * When RCU reclaims in the code start to become numerous,
4098 * it will be necessary to reduce the granularity of this
4099 * critical section.
4100 */
Dr. David Alan Gilbert89ac5a12019-10-07 15:36:39 +01004101 WITH_RCU_READ_LOCK_GUARD() {
4102 if (postcopy_running) {
4103 ret = ram_load_postcopy(f);
4104 } else {
4105 ret = ram_load_precopy(f);
4106 }
Wei Yang10da4a32019-07-25 08:20:23 +08004107 }
Juan Quintela55c44462017-01-23 22:32:05 +01004108 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004109
Juan Quintela56e93d22015-05-07 19:33:31 +02004110 return ret;
4111}
4112
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004113static bool ram_has_postcopy(void *opaque)
4114{
Junyan He469dd512018-07-18 15:48:02 +08004115 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004116 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004117 if (ramblock_is_pmem(rb)) {
4118 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4119 "is not supported now!", rb->idstr, rb->host);
4120 return false;
4121 }
4122 }
4123
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004124 return migrate_postcopy_ram();
4125}
4126
Peter Xuedd090c2018-05-02 18:47:32 +08004127/* Sync all the dirty bitmap with destination VM. */
4128static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4129{
4130 RAMBlock *block;
4131 QEMUFile *file = s->to_dst_file;
4132 int ramblock_count = 0;
4133
4134 trace_ram_dirty_bitmap_sync_start();
4135
Yury Kotovfbd162e2019-02-15 20:45:46 +03004136 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004137 qemu_savevm_send_recv_bitmap(file, block->idstr);
4138 trace_ram_dirty_bitmap_request(block->idstr);
4139 ramblock_count++;
4140 }
4141
4142 trace_ram_dirty_bitmap_sync_wait();
4143
4144 /* Wait until all the ramblocks' dirty bitmap synced */
4145 while (ramblock_count--) {
4146 qemu_sem_wait(&s->rp_state.rp_sem);
4147 }
4148
4149 trace_ram_dirty_bitmap_sync_complete();
4150
4151 return 0;
4152}
4153
4154static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4155{
4156 qemu_sem_post(&s->rp_state.rp_sem);
4157}
4158
Peter Xua335deb2018-05-02 18:47:28 +08004159/*
4160 * Read the received bitmap, revert it as the initial dirty bitmap.
4161 * This is only used when the postcopy migration is paused but wants
4162 * to resume from a middle point.
4163 */
4164int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4165{
4166 int ret = -EINVAL;
Peter Xu43044ac2021-07-22 13:58:38 -04004167 /* from_dst_file is always valid because we're within rp_thread */
Peter Xua335deb2018-05-02 18:47:28 +08004168 QEMUFile *file = s->rp_state.from_dst_file;
4169 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004170 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004171 uint64_t size, end_mark;
4172
4173 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4174
4175 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4176 error_report("%s: incorrect state %s", __func__,
4177 MigrationStatus_str(s->state));
4178 return -EINVAL;
4179 }
4180
4181 /*
4182 * Note: see comments in ramblock_recv_bitmap_send() on why we
zhaolichang3a4452d2020-09-17 15:50:21 +08004183 * need the endianness conversion, and the paddings.
Peter Xua335deb2018-05-02 18:47:28 +08004184 */
4185 local_size = ROUND_UP(local_size, 8);
4186
4187 /* Add paddings */
4188 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4189
4190 size = qemu_get_be64(file);
4191
4192 /* The size of the bitmap should match with our ramblock */
4193 if (size != local_size) {
4194 error_report("%s: ramblock '%s' bitmap size mismatch "
4195 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4196 block->idstr, size, local_size);
4197 ret = -EINVAL;
4198 goto out;
4199 }
4200
4201 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4202 end_mark = qemu_get_be64(file);
4203
4204 ret = qemu_file_get_error(file);
4205 if (ret || size != local_size) {
4206 error_report("%s: read bitmap failed for ramblock '%s': %d"
4207 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4208 __func__, block->idstr, ret, local_size, size);
4209 ret = -EIO;
4210 goto out;
4211 }
4212
4213 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
Philippe Mathieu-Daudéaf3bbbe2020-11-03 12:25:58 +01004214 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,
Peter Xua335deb2018-05-02 18:47:28 +08004215 __func__, block->idstr, end_mark);
4216 ret = -EINVAL;
4217 goto out;
4218 }
4219
4220 /*
zhaolichang3a4452d2020-09-17 15:50:21 +08004221 * Endianness conversion. We are during postcopy (though paused).
Peter Xua335deb2018-05-02 18:47:28 +08004222 * The dirty bitmap won't change. We can directly modify it.
4223 */
4224 bitmap_from_le(block->bmap, le_bitmap, nbits);
4225
4226 /*
4227 * What we received is "received bitmap". Revert it as the initial
4228 * dirty bitmap for this ramblock.
4229 */
4230 bitmap_complement(block->bmap, block->bmap, nbits);
4231
David Hildenbrandbe39b4c2021-10-11 19:53:41 +02004232 /* Clear dirty bits of discarded ranges that we don't want to migrate. */
4233 ramblock_dirty_bitmap_clear_discarded_pages(block);
4234
4235 /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */
Peter Xua335deb2018-05-02 18:47:28 +08004236 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4237
Peter Xuedd090c2018-05-02 18:47:32 +08004238 /*
4239 * We succeeded to sync bitmap for current ramblock. If this is
4240 * the last one to sync, we need to notify the main send thread.
4241 */
4242 ram_dirty_bitmap_reload_notify(s);
4243
Peter Xua335deb2018-05-02 18:47:28 +08004244 ret = 0;
4245out:
Peter Xubf269902018-05-25 09:50:42 +08004246 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004247 return ret;
4248}
4249
Peter Xuedd090c2018-05-02 18:47:32 +08004250static int ram_resume_prepare(MigrationState *s, void *opaque)
4251{
4252 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004253 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004254
Peter Xu08614f32018-05-02 18:47:33 +08004255 ret = ram_dirty_bitmap_sync_all(s, rs);
4256 if (ret) {
4257 return ret;
4258 }
4259
4260 ram_state_resume_prepare(rs, s->to_dst_file);
4261
4262 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004263}
4264
Juan Quintela56e93d22015-05-07 19:33:31 +02004265static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004266 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004267 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004268 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004269 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004270 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004271 .save_live_pending = ram_save_pending,
4272 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004273 .save_cleanup = ram_save_cleanup,
4274 .load_setup = ram_load_setup,
4275 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004276 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004277};
4278
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004279static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4280 size_t old_size, size_t new_size)
4281{
David Hildenbrandcc61c702021-04-29 13:27:04 +02004282 PostcopyState ps = postcopy_state_get();
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004283 ram_addr_t offset;
4284 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4285 Error *err = NULL;
4286
4287 if (ramblock_is_ignored(rb)) {
4288 return;
4289 }
4290
4291 if (!migration_is_idle()) {
4292 /*
4293 * Precopy code on the source cannot deal with the size of RAM blocks
4294 * changing at random points in time - especially after sending the
4295 * RAM block sizes in the migration stream, they must no longer change.
4296 * Abort and indicate a proper reason.
4297 */
4298 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
Laurent Vivier458fecc2021-09-29 16:43:10 +02004299 migration_cancel(err);
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004300 error_free(err);
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004301 }
David Hildenbrandcc61c702021-04-29 13:27:04 +02004302
4303 switch (ps) {
4304 case POSTCOPY_INCOMING_ADVISE:
4305 /*
4306 * Update what ram_postcopy_incoming_init()->init_range() does at the
4307 * time postcopy was advised. Syncing RAM blocks with the source will
4308 * result in RAM resizes.
4309 */
4310 if (old_size < new_size) {
4311 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4312 error_report("RAM block '%s' discard of resized RAM failed",
4313 rb->idstr);
4314 }
4315 }
David Hildenbrand898ba902021-04-29 13:27:06 +02004316 rb->postcopy_length = new_size;
David Hildenbrandcc61c702021-04-29 13:27:04 +02004317 break;
4318 case POSTCOPY_INCOMING_NONE:
4319 case POSTCOPY_INCOMING_RUNNING:
4320 case POSTCOPY_INCOMING_END:
4321 /*
4322 * Once our guest is running, postcopy does no longer care about
4323 * resizes. When growing, the new memory was not available on the
4324 * source, no handler needed.
4325 */
4326 break;
4327 default:
4328 error_report("RAM block '%s' resized during postcopy state: %d",
4329 rb->idstr, ps);
4330 exit(-1);
4331 }
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004332}
4333
4334static RAMBlockNotifier ram_mig_ram_notifier = {
4335 .ram_block_resized = ram_mig_ram_block_resized,
4336};
4337
Juan Quintela56e93d22015-05-07 19:33:31 +02004338void ram_mig_init(void)
4339{
4340 qemu_mutex_init(&XBZRLE.lock);
Dr. David Alan Gilbertce62df52019-08-22 12:54:33 +01004341 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
David Hildenbrandc7c0e722021-04-29 13:27:02 +02004342 ram_block_notifier_add(&ram_mig_ram_notifier);
Juan Quintela56e93d22015-05-07 19:33:31 +02004343}