blob: 082aea9d23f615d87aac082fd0579e1215e062a5 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Junyan He56eb90a2018-07-18 15:48:03 +080036#include "qemu/pmem.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020037#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020039#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010040#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020044#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030045#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010047#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010048#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020049#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030052#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080054#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030055#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020056#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080058#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010059#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020060
Juan Quintela56e93d22015-05-07 19:33:31 +020061/***********************************************************/
62/* ram save/restore */
63
Juan Quintelabb890ed2017-04-28 09:39:55 +020064/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
65 * worked for pages that where filled with the same char. We switched
66 * it to only search for the zero value. And to avoid confusion with
67 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 */
69
Juan Quintela56e93d22015-05-07 19:33:31 +020070#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020071#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77/* 0x80 is reserved in migration.h start with 0x100 next */
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
Richard Hendersona1febc42016-08-29 11:46:14 -070082 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020083}
84
Juan Quintela93604472017-06-06 19:49:03 +020085XBZRLECacheStats xbzrle_counters;
86
Juan Quintela56e93d22015-05-07 19:33:31 +020087/* struct contains XBZRLE cache and a static page
88 used by the compression */
89static struct {
90 /* buffer used for XBZRLE encoding */
91 uint8_t *encoded_buf;
92 /* buffer for storing page content */
93 uint8_t *current_buf;
94 /* Cache for XBZRLE, Protected by lock. */
95 PageCache *cache;
96 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020097 /* it will store a page full of zeros */
98 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020099 /* buffer used for XBZRLE decoding */
100 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200101} XBZRLE;
102
Juan Quintela56e93d22015-05-07 19:33:31 +0200103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
Juan Quintela3d0684b2017-03-23 15:06:39 +0100115/**
116 * xbzrle_cache_resize: resize the xbzrle cache
117 *
118 * This function is called from qmp_migrate_set_cache_size in main
119 * thread, possibly while a migration is in progress. A running
120 * migration may be using the cache and might finish during this call,
121 * hence changes to the cache are protected by XBZRLE.lock().
122 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200123 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100124 *
125 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200126 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200127 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200128int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200129{
130 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200131 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200132
Juan Quintela8acabf62017-10-05 22:00:31 +0200133 /* Check for truncation */
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 if (new_size == migrate_xbzrle_cache_size()) {
141 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200142 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200143 }
144
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
Yury Kotovfbd162e2019-02-15 20:45:46 +0300162static bool ramblock_is_ignored(RAMBlock *block)
163{
164 return !qemu_ram_is_migratable(block) ||
165 (migrate_ignore_shared() && qemu_ram_is_shared(block));
166}
167
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200168/* Should be holding either ram_list.mutex, or the RCU lock. */
Yury Kotovfbd162e2019-02-15 20:45:46 +0300169#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
170 INTERNAL_RAMBLOCK_FOREACH(block) \
171 if (ramblock_is_ignored(block)) {} else
172
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200173#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100174 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200175 if (!qemu_ram_is_migratable(block)) {} else
176
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100177#undef RAMBLOCK_FOREACH
178
Yury Kotovfbd162e2019-02-15 20:45:46 +0300179int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
180{
181 RAMBlock *block;
182 int ret = 0;
183
184 rcu_read_lock();
185 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
186 ret = func(block, opaque);
187 if (ret) {
188 break;
189 }
190 }
191 rcu_read_unlock();
192 return ret;
193}
194
Alexey Perevalovf9494612017-10-05 14:13:20 +0300195static void ramblock_recv_map_init(void)
196{
197 RAMBlock *rb;
198
Yury Kotovfbd162e2019-02-15 20:45:46 +0300199 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300200 assert(!rb->receivedmap);
201 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
202 }
203}
204
205int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
206{
207 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
208 rb->receivedmap);
209}
210
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000211bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
212{
213 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
214}
215
Alexey Perevalovf9494612017-10-05 14:13:20 +0300216void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
217{
218 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
219}
220
221void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
222 size_t nr)
223{
224 bitmap_set_atomic(rb->receivedmap,
225 ramblock_recv_bitmap_offset(host_addr, rb),
226 nr);
227}
228
Peter Xua335deb2018-05-02 18:47:28 +0800229#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
230
231/*
232 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
233 *
234 * Returns >0 if success with sent bytes, or <0 if error.
235 */
236int64_t ramblock_recv_bitmap_send(QEMUFile *file,
237 const char *block_name)
238{
239 RAMBlock *block = qemu_ram_block_by_name(block_name);
240 unsigned long *le_bitmap, nbits;
241 uint64_t size;
242
243 if (!block) {
244 error_report("%s: invalid block name: %s", __func__, block_name);
245 return -1;
246 }
247
248 nbits = block->used_length >> TARGET_PAGE_BITS;
249
250 /*
251 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
252 * machines we may need 4 more bytes for padding (see below
253 * comment). So extend it a bit before hand.
254 */
255 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
256
257 /*
258 * Always use little endian when sending the bitmap. This is
259 * required that when source and destination VMs are not using the
260 * same endianess. (Note: big endian won't work.)
261 */
262 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
263
264 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800265 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800266
267 /*
268 * size is always aligned to 8 bytes for 64bit machines, but it
269 * may not be true for 32bit machines. We need this padding to
270 * make sure the migration can survive even between 32bit and
271 * 64bit machines.
272 */
273 size = ROUND_UP(size, 8);
274
275 qemu_put_be64(file, size);
276 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
277 /*
278 * Mark as an end, in case the middle part is screwed up due to
279 * some "misterious" reason.
280 */
281 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
282 qemu_fflush(file);
283
Peter Xubf269902018-05-25 09:50:42 +0800284 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800285
286 if (qemu_file_get_error(file)) {
287 return qemu_file_get_error(file);
288 }
289
290 return size + sizeof(size);
291}
292
Juan Quintelaec481c62017-03-20 22:12:40 +0100293/*
294 * An outstanding page request, on the source, having been received
295 * and queued
296 */
297struct RAMSrcPageRequest {
298 RAMBlock *rb;
299 hwaddr offset;
300 hwaddr len;
301
302 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
303};
304
Juan Quintela6f37bb82017-03-13 19:26:29 +0100305/* State of RAM for migration */
306struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100307 /* QEMUFile used for this migration */
308 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100309 /* Last block that we have visited searching for dirty pages */
310 RAMBlock *last_seen_block;
311 /* Last block from where we have sent data */
312 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100313 /* Last dirty target page we have sent */
314 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100315 /* last ram version we have seen */
316 uint32_t last_version;
317 /* We are in the first round */
318 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800319 /* The free page optimization is enabled */
320 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100321 /* How many times we have dirty too many pages */
322 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100323 /* these variables are used for bitmap sync */
324 /* last time we did a full bitmap_sync */
325 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200326 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200327 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200328 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200329 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100330 /* xbzrle misses since the beginning of the period */
331 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800332
333 /* compression statistics since the beginning of the period */
334 /* amount of count that no free thread to compress data */
335 uint64_t compress_thread_busy_prev;
336 /* amount bytes after compression */
337 uint64_t compressed_size_prev;
338 /* amount of compressed pages */
339 uint64_t compress_pages_prev;
340
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800341 /* total handled target pages at the beginning of period */
342 uint64_t target_page_count_prev;
343 /* total handled target pages since start */
344 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200345 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800346 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800347 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100348 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100349 /* The RAMBlock used in the last src_page_requests */
350 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100351 /* Queue of outstanding page requests from the destination */
352 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100353 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100354};
355typedef struct RAMState RAMState;
356
Juan Quintela53518d92017-05-04 11:46:24 +0200357static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100358
Wei Wangbd227062018-12-11 16:24:51 +0800359static NotifierWithReturnList precopy_notifier_list;
360
361void precopy_infrastructure_init(void)
362{
363 notifier_with_return_list_init(&precopy_notifier_list);
364}
365
366void precopy_add_notifier(NotifierWithReturn *n)
367{
368 notifier_with_return_list_add(&precopy_notifier_list, n);
369}
370
371void precopy_remove_notifier(NotifierWithReturn *n)
372{
373 notifier_with_return_remove(n);
374}
375
376int precopy_notify(PrecopyNotifyReason reason, Error **errp)
377{
378 PrecopyNotifyData pnd;
379 pnd.reason = reason;
380 pnd.errp = errp;
381
382 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
383}
384
Wei Wang6eeb63f2018-12-11 16:24:52 +0800385void precopy_enable_free_page_optimization(void)
386{
387 if (!ram_state) {
388 return;
389 }
390
391 ram_state->fpo_enabled = true;
392}
393
Juan Quintela9edabd42017-03-14 12:02:16 +0100394uint64_t ram_bytes_remaining(void)
395{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000396 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
397 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100398}
399
Juan Quintela93604472017-06-06 19:49:03 +0200400MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100401
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100402/* used by the search for pages to send */
403struct PageSearchStatus {
404 /* Current block being searched */
405 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100406 /* Current page to search from */
407 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100408 /* Set once we wrap around */
409 bool complete_round;
410};
411typedef struct PageSearchStatus PageSearchStatus;
412
Xiao Guangrong76e03002018-09-06 15:01:00 +0800413CompressionStats compression_counters;
414
Juan Quintela56e93d22015-05-07 19:33:31 +0200415struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200416 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800417 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800418 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200419 QEMUFile *file;
420 QemuMutex mutex;
421 QemuCond cond;
422 RAMBlock *block;
423 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800424
425 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800426 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800427 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428};
429typedef struct CompressParam CompressParam;
430
431struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800432 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800433 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 QemuMutex mutex;
435 QemuCond cond;
436 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000437 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200438 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800439 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200440};
441typedef struct DecompressParam DecompressParam;
442
443static CompressParam *comp_param;
444static QemuThread *compress_threads;
445/* comp_done_cond is used to wake up the migration thread when
446 * one of the compression threads has finished the compression.
447 * comp_done_lock is used to co-work with comp_done_cond.
448 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800449static QemuMutex comp_done_lock;
450static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200451/* The empty QEMUFileOps will be used by file in CompressParam */
452static const QEMUFileOps empty_ops = { };
453
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800454static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200455static DecompressParam *decomp_param;
456static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800457static QemuMutex decomp_done_lock;
458static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200459
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800460static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800461 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462
463static void *do_data_compress(void *opaque)
464{
465 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800466 RAMBlock *block;
467 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800468 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200469
Liang Lia7a9a882016-05-05 15:32:57 +0800470 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800471 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800472 if (param->block) {
473 block = param->block;
474 offset = param->offset;
475 param->block = NULL;
476 qemu_mutex_unlock(&param->mutex);
477
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800478 zero_page = do_compress_ram_page(param->file, &param->stream,
479 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800480
Liang Li0d9f9a52016-05-05 15:32:59 +0800481 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800482 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800483 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800484 qemu_cond_signal(&comp_done_cond);
485 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800486
487 qemu_mutex_lock(&param->mutex);
488 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200489 qemu_cond_wait(&param->cond, &param->mutex);
490 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200491 }
Liang Lia7a9a882016-05-05 15:32:57 +0800492 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200493
494 return NULL;
495}
496
Juan Quintelaf0afa332017-06-28 11:52:28 +0200497static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200498{
499 int i, thread_count;
500
Fei Li05306932018-09-25 17:14:40 +0800501 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200502 return;
503 }
Fei Li05306932018-09-25 17:14:40 +0800504
Juan Quintela56e93d22015-05-07 19:33:31 +0200505 thread_count = migrate_compress_threads();
506 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800507 /*
508 * we use it as a indicator which shows if the thread is
509 * properly init'd or not
510 */
511 if (!comp_param[i].file) {
512 break;
513 }
Fei Li05306932018-09-25 17:14:40 +0800514
515 qemu_mutex_lock(&comp_param[i].mutex);
516 comp_param[i].quit = true;
517 qemu_cond_signal(&comp_param[i].cond);
518 qemu_mutex_unlock(&comp_param[i].mutex);
519
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200521 qemu_mutex_destroy(&comp_param[i].mutex);
522 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800523 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800524 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800525 qemu_fclose(comp_param[i].file);
526 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200527 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800528 qemu_mutex_destroy(&comp_done_lock);
529 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200530 g_free(compress_threads);
531 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200532 compress_threads = NULL;
533 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200534}
535
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800536static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200537{
538 int i, thread_count;
539
540 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800541 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200542 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200543 thread_count = migrate_compress_threads();
544 compress_threads = g_new0(QemuThread, thread_count);
545 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800546 qemu_cond_init(&comp_done_cond);
547 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200548 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800549 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
550 if (!comp_param[i].originbuf) {
551 goto exit;
552 }
553
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800554 if (deflateInit(&comp_param[i].stream,
555 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800556 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800557 goto exit;
558 }
559
Cao jine110aa92016-07-29 15:10:31 +0800560 /* comp_param[i].file is just used as a dummy buffer to save data,
561 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 */
563 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
564 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800565 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200566 qemu_mutex_init(&comp_param[i].mutex);
567 qemu_cond_init(&comp_param[i].cond);
568 qemu_thread_create(compress_threads + i, "compress",
569 do_data_compress, comp_param + i,
570 QEMU_THREAD_JOINABLE);
571 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800572 return 0;
573
574exit:
575 compress_threads_save_cleanup();
576 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200577}
578
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100579/* Multiple fd's */
580
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200581#define MULTIFD_MAGIC 0x11223344U
582#define MULTIFD_VERSION 1
583
Juan Quintela6df264a2018-02-28 09:10:07 +0100584#define MULTIFD_FLAG_SYNC (1 << 0)
585
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100586/* This value needs to be a multiple of qemu_target_page_size() */
Juan Quintela4b0c7262019-02-20 12:45:57 +0100587#define MULTIFD_PACKET_SIZE (512 * 1024)
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100588
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200589typedef struct {
590 uint32_t magic;
591 uint32_t version;
592 unsigned char uuid[16]; /* QemuUUID */
593 uint8_t id;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100594 uint8_t unused1[7]; /* Reserved for future use */
595 uint64_t unused2[4]; /* Reserved for future use */
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200596} __attribute__((packed)) MultiFDInit_t;
597
Juan Quintela8c4598f2018-04-07 13:59:07 +0200598typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200599 uint32_t magic;
600 uint32_t version;
601 uint32_t flags;
Juan Quintela6f862692019-02-20 12:04:04 +0100602 /* maximum number of allocated pages */
603 uint32_t pages_alloc;
604 uint32_t pages_used;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100605 /* size of the next packet that contains pages */
606 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200607 uint64_t packet_num;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100608 uint64_t unused[4]; /* Reserved for future use */
Juan Quintela2a26c972018-04-04 11:26:58 +0200609 char ramblock[256];
610 uint64_t offset[];
611} __attribute__((packed)) MultiFDPacket_t;
612
613typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200614 /* number of used pages */
615 uint32_t used;
616 /* number of allocated pages */
617 uint32_t allocated;
618 /* global number of generated multifd packets */
619 uint64_t packet_num;
620 /* offset of each page */
621 ram_addr_t *offset;
622 /* pointer to each page */
623 struct iovec *iov;
624 RAMBlock *block;
625} MultiFDPages_t;
626
627typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200628 /* this fields are not changed once the thread is created */
629 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100630 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200631 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100632 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200633 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100634 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200635 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100636 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200637 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100638 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200639 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100640 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200641 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100642 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200643 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100644 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200645 /* thread has work to do */
646 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200647 /* array of pages to sent */
648 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200649 /* packet allocated len */
650 uint32_t packet_len;
651 /* pointer to the packet */
652 MultiFDPacket_t *packet;
653 /* multifd flags for each packet */
654 uint32_t flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100655 /* size of the next packet that contains pages */
656 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200657 /* global number of generated multifd packets */
658 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200659 /* thread local variables */
660 /* packets sent through this channel */
661 uint64_t num_packets;
662 /* pages sent through this channel */
663 uint64_t num_pages;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200664} MultiFDSendParams;
665
666typedef struct {
667 /* this fields are not changed once the thread is created */
668 /* channel number */
669 uint8_t id;
670 /* channel thread name */
671 char *name;
672 /* channel thread id */
673 QemuThread thread;
674 /* communication channel */
675 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200676 /* this mutex protects the following parameters */
677 QemuMutex mutex;
678 /* is this channel thread running */
679 bool running;
Juan Quintela34c55a92018-04-10 23:35:15 +0200680 /* array of pages to receive */
681 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200682 /* packet allocated len */
683 uint32_t packet_len;
684 /* pointer to the packet */
685 MultiFDPacket_t *packet;
686 /* multifd flags for each packet */
687 uint32_t flags;
688 /* global number of generated multifd packets */
689 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200690 /* thread local variables */
Juan Quintela2a34ee52019-01-04 19:45:39 +0100691 /* size of the next packet that contains pages */
692 uint32_t next_packet_size;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200693 /* packets sent through this channel */
694 uint64_t num_packets;
695 /* pages sent through this channel */
696 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100697 /* syncs main thread and channels */
698 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200699} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100700
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200701static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
702{
703 MultiFDInit_t msg;
704 int ret;
705
706 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
707 msg.version = cpu_to_be32(MULTIFD_VERSION);
708 msg.id = p->id;
709 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
710
711 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
712 if (ret != 0) {
713 return -1;
714 }
715 return 0;
716}
717
718static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
719{
720 MultiFDInit_t msg;
721 int ret;
722
723 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
724 if (ret != 0) {
725 return -1;
726 }
727
Peter Maydell341ba0d2018-09-25 17:19:24 +0100728 msg.magic = be32_to_cpu(msg.magic);
729 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200730
731 if (msg.magic != MULTIFD_MAGIC) {
732 error_setg(errp, "multifd: received packet magic %x "
733 "expected %x", msg.magic, MULTIFD_MAGIC);
734 return -1;
735 }
736
737 if (msg.version != MULTIFD_VERSION) {
738 error_setg(errp, "multifd: received packet version %d "
739 "expected %d", msg.version, MULTIFD_VERSION);
740 return -1;
741 }
742
743 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
744 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
745 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
746
747 error_setg(errp, "multifd: received uuid '%s' and expected "
748 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
749 g_free(uuid);
750 g_free(msg_uuid);
751 return -1;
752 }
753
754 if (msg.id > migrate_multifd_channels()) {
755 error_setg(errp, "multifd: received channel version %d "
756 "expected %d", msg.version, MULTIFD_VERSION);
757 return -1;
758 }
759
760 return msg.id;
761}
762
Juan Quintela34c55a92018-04-10 23:35:15 +0200763static MultiFDPages_t *multifd_pages_init(size_t size)
764{
765 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
766
767 pages->allocated = size;
768 pages->iov = g_new0(struct iovec, size);
769 pages->offset = g_new0(ram_addr_t, size);
770
771 return pages;
772}
773
774static void multifd_pages_clear(MultiFDPages_t *pages)
775{
776 pages->used = 0;
777 pages->allocated = 0;
778 pages->packet_num = 0;
779 pages->block = NULL;
780 g_free(pages->iov);
781 pages->iov = NULL;
782 g_free(pages->offset);
783 pages->offset = NULL;
784 g_free(pages);
785}
786
Juan Quintela2a26c972018-04-04 11:26:58 +0200787static void multifd_send_fill_packet(MultiFDSendParams *p)
788{
789 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100790 uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200791 int i;
792
793 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
794 packet->version = cpu_to_be32(MULTIFD_VERSION);
795 packet->flags = cpu_to_be32(p->flags);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100796 packet->pages_alloc = cpu_to_be32(page_max);
Juan Quintela6f862692019-02-20 12:04:04 +0100797 packet->pages_used = cpu_to_be32(p->pages->used);
Juan Quintela2a34ee52019-01-04 19:45:39 +0100798 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200799 packet->packet_num = cpu_to_be64(p->packet_num);
800
801 if (p->pages->block) {
802 strncpy(packet->ramblock, p->pages->block->idstr, 256);
803 }
804
805 for (i = 0; i < p->pages->used; i++) {
806 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
807 }
808}
809
810static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
811{
812 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100813 uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200814 RAMBlock *block;
815 int i;
816
Peter Maydell341ba0d2018-09-25 17:19:24 +0100817 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200818 if (packet->magic != MULTIFD_MAGIC) {
819 error_setg(errp, "multifd: received packet "
820 "magic %x and expected magic %x",
821 packet->magic, MULTIFD_MAGIC);
822 return -1;
823 }
824
Peter Maydell341ba0d2018-09-25 17:19:24 +0100825 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200826 if (packet->version != MULTIFD_VERSION) {
827 error_setg(errp, "multifd: received packet "
828 "version %d and expected version %d",
829 packet->version, MULTIFD_VERSION);
830 return -1;
831 }
832
833 p->flags = be32_to_cpu(packet->flags);
834
Juan Quintela6f862692019-02-20 12:04:04 +0100835 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100836 /*
837 * If we recevied a packet that is 100 times bigger than expected
838 * just stop migration. It is a magic number.
839 */
840 if (packet->pages_alloc > pages_max * 100) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200841 error_setg(errp, "multifd: received packet "
Juan Quintela7ed379b2019-02-20 12:44:07 +0100842 "with size %d and expected a maximum size of %d",
843 packet->pages_alloc, pages_max * 100) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200844 return -1;
845 }
Juan Quintela7ed379b2019-02-20 12:44:07 +0100846 /*
847 * We received a packet that is bigger than expected but inside
848 * reasonable limits (see previous comment). Just reallocate.
849 */
850 if (packet->pages_alloc > p->pages->allocated) {
851 multifd_pages_clear(p->pages);
Peter Maydellf151f8a2019-04-09 16:18:30 +0100852 p->pages = multifd_pages_init(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100853 }
Juan Quintela2a26c972018-04-04 11:26:58 +0200854
Juan Quintela6f862692019-02-20 12:04:04 +0100855 p->pages->used = be32_to_cpu(packet->pages_used);
856 if (p->pages->used > packet->pages_alloc) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200857 error_setg(errp, "multifd: received packet "
Juan Quintela6f862692019-02-20 12:04:04 +0100858 "with %d pages and expected maximum pages are %d",
859 p->pages->used, packet->pages_alloc) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200860 return -1;
861 }
862
Juan Quintela2a34ee52019-01-04 19:45:39 +0100863 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200864 p->packet_num = be64_to_cpu(packet->packet_num);
865
866 if (p->pages->used) {
867 /* make sure that ramblock is 0 terminated */
868 packet->ramblock[255] = 0;
869 block = qemu_ram_block_by_name(packet->ramblock);
870 if (!block) {
871 error_setg(errp, "multifd: unknown ram block %s",
872 packet->ramblock);
873 return -1;
874 }
875 }
876
877 for (i = 0; i < p->pages->used; i++) {
878 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
879
880 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
881 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
882 " (max " RAM_ADDR_FMT ")",
883 offset, block->max_length);
884 return -1;
885 }
886 p->pages->iov[i].iov_base = block->host + offset;
887 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
888 }
889
890 return 0;
891}
892
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100893struct {
894 MultiFDSendParams *params;
Juan Quintela34c55a92018-04-10 23:35:15 +0200895 /* array of pages to sent */
896 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100897 /* syncs main thread and channels */
898 QemuSemaphore sem_sync;
899 /* global number of generated multifd packets */
900 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100901 /* send channels ready */
902 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100903} *multifd_send_state;
904
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100905/*
906 * How we use multifd_send_state->pages and channel->pages?
907 *
908 * We create a pages for each channel, and a main one. Each time that
909 * we need to send a batch of pages we interchange the ones between
910 * multifd_send_state and the channel that is sending it. There are
911 * two reasons for that:
912 * - to not have to do so many mallocs during migration
913 * - to make easier to know what to free at the end of migration
914 *
915 * This way we always know who is the owner of each "pages" struct,
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800916 * and we don't need any locking. It belongs to the migration thread
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100917 * or to the channel thread. Switching is safe because the migration
918 * thread is using the channel mutex when changing it, and the channel
919 * have to had finish with its own, otherwise pending_job can't be
920 * false.
921 */
922
923static void multifd_send_pages(void)
924{
925 int i;
926 static int next_channel;
927 MultiFDSendParams *p = NULL; /* make happy gcc */
928 MultiFDPages_t *pages = multifd_send_state->pages;
929 uint64_t transferred;
930
931 qemu_sem_wait(&multifd_send_state->channels_ready);
932 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
933 p = &multifd_send_state->params[i];
934
935 qemu_mutex_lock(&p->mutex);
936 if (!p->pending_job) {
937 p->pending_job++;
938 next_channel = (i + 1) % migrate_multifd_channels();
939 break;
940 }
941 qemu_mutex_unlock(&p->mutex);
942 }
943 p->pages->used = 0;
944
945 p->packet_num = multifd_send_state->packet_num++;
946 p->pages->block = NULL;
947 multifd_send_state->pages = p->pages;
948 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800949 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100950 ram_counters.multifd_bytes += transferred;
951 ram_counters.transferred += transferred;;
952 qemu_mutex_unlock(&p->mutex);
953 qemu_sem_post(&p->sem);
954}
955
956static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
957{
958 MultiFDPages_t *pages = multifd_send_state->pages;
959
960 if (!pages->block) {
961 pages->block = block;
962 }
963
964 if (pages->block == block) {
965 pages->offset[pages->used] = offset;
966 pages->iov[pages->used].iov_base = block->host + offset;
967 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
968 pages->used++;
969
970 if (pages->used < pages->allocated) {
971 return;
972 }
973 }
974
975 multifd_send_pages();
976
977 if (pages->block != block) {
978 multifd_queue_page(block, offset);
979 }
980}
981
Juan Quintela66770702018-02-19 19:01:45 +0100982static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100983{
984 int i;
985
Juan Quintela7a169d72018-02-19 19:01:15 +0100986 if (err) {
987 MigrationState *s = migrate_get_current();
988 migrate_set_error(s, err);
989 if (s->state == MIGRATION_STATUS_SETUP ||
990 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
991 s->state == MIGRATION_STATUS_DEVICE ||
992 s->state == MIGRATION_STATUS_ACTIVE) {
993 migrate_set_state(&s->state, s->state,
994 MIGRATION_STATUS_FAILED);
995 }
996 }
997
Juan Quintela66770702018-02-19 19:01:45 +0100998 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100999 MultiFDSendParams *p = &multifd_send_state->params[i];
1000
1001 qemu_mutex_lock(&p->mutex);
1002 p->quit = true;
1003 qemu_sem_post(&p->sem);
1004 qemu_mutex_unlock(&p->mutex);
1005 }
1006}
1007
Fei Li1398b2e2019-01-13 22:08:47 +08001008void multifd_save_cleanup(void)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001009{
1010 int i;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001011
1012 if (!migrate_use_multifd()) {
Fei Li1398b2e2019-01-13 22:08:47 +08001013 return;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001014 }
Juan Quintela66770702018-02-19 19:01:45 +01001015 multifd_send_terminate_threads(NULL);
1016 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001017 MultiFDSendParams *p = &multifd_send_state->params[i];
1018
Juan Quintela66770702018-02-19 19:01:45 +01001019 if (p->running) {
1020 qemu_thread_join(&p->thread);
1021 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001022 socket_send_channel_destroy(p->c);
1023 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001024 qemu_mutex_destroy(&p->mutex);
1025 qemu_sem_destroy(&p->sem);
1026 g_free(p->name);
1027 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001028 multifd_pages_clear(p->pages);
1029 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001030 p->packet_len = 0;
1031 g_free(p->packet);
1032 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001033 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001034 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintela6df264a2018-02-28 09:10:07 +01001035 qemu_sem_destroy(&multifd_send_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001036 g_free(multifd_send_state->params);
1037 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001038 multifd_pages_clear(multifd_send_state->pages);
1039 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001040 g_free(multifd_send_state);
1041 multifd_send_state = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001042}
1043
Juan Quintela6df264a2018-02-28 09:10:07 +01001044static void multifd_send_sync_main(void)
1045{
1046 int i;
1047
1048 if (!migrate_use_multifd()) {
1049 return;
1050 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001051 if (multifd_send_state->pages->used) {
1052 multifd_send_pages();
1053 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001054 for (i = 0; i < migrate_multifd_channels(); i++) {
1055 MultiFDSendParams *p = &multifd_send_state->params[i];
1056
1057 trace_multifd_send_sync_main_signal(p->id);
1058
1059 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001060
1061 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +01001062 p->flags |= MULTIFD_FLAG_SYNC;
1063 p->pending_job++;
1064 qemu_mutex_unlock(&p->mutex);
1065 qemu_sem_post(&p->sem);
1066 }
1067 for (i = 0; i < migrate_multifd_channels(); i++) {
1068 MultiFDSendParams *p = &multifd_send_state->params[i];
1069
1070 trace_multifd_send_sync_main_wait(p->id);
1071 qemu_sem_wait(&multifd_send_state->sem_sync);
1072 }
1073 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1074}
1075
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001076static void *multifd_send_thread(void *opaque)
1077{
1078 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001079 Error *local_err = NULL;
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001080 int ret;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001081
Juan Quintela408ea6a2018-04-06 18:28:59 +02001082 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001083 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001084
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001085 if (multifd_send_initial_packet(p, &local_err) < 0) {
1086 goto out;
1087 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001088 /* initial packet */
1089 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001090
1091 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001092 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001093 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001094
1095 if (p->pending_job) {
1096 uint32_t used = p->pages->used;
1097 uint64_t packet_num = p->packet_num;
1098 uint32_t flags = p->flags;
1099
Juan Quintela2a34ee52019-01-04 19:45:39 +01001100 p->next_packet_size = used * qemu_target_page_size();
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001101 multifd_send_fill_packet(p);
1102 p->flags = 0;
1103 p->num_packets++;
1104 p->num_pages += used;
1105 p->pages->used = 0;
1106 qemu_mutex_unlock(&p->mutex);
1107
Juan Quintela2a34ee52019-01-04 19:45:39 +01001108 trace_multifd_send(p->id, packet_num, used, flags,
1109 p->next_packet_size);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001110
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001111 ret = qio_channel_write_all(p->c, (void *)p->packet,
1112 p->packet_len, &local_err);
1113 if (ret != 0) {
1114 break;
1115 }
1116
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001117 if (used) {
1118 ret = qio_channel_writev_all(p->c, p->pages->iov,
1119 used, &local_err);
1120 if (ret != 0) {
1121 break;
1122 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001123 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001124
1125 qemu_mutex_lock(&p->mutex);
1126 p->pending_job--;
1127 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001128
1129 if (flags & MULTIFD_FLAG_SYNC) {
1130 qemu_sem_post(&multifd_send_state->sem_sync);
1131 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001132 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001133 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001134 qemu_mutex_unlock(&p->mutex);
1135 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001136 } else {
1137 qemu_mutex_unlock(&p->mutex);
1138 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001139 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001140 }
1141
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001142out:
1143 if (local_err) {
1144 multifd_send_terminate_threads(local_err);
1145 }
1146
Juan Quintela66770702018-02-19 19:01:45 +01001147 qemu_mutex_lock(&p->mutex);
1148 p->running = false;
1149 qemu_mutex_unlock(&p->mutex);
1150
Lidong Chen74637e62018-08-06 21:29:29 +08001151 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001152 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1153
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001154 return NULL;
1155}
1156
Juan Quintela60df2d42018-03-07 07:56:15 +01001157static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1158{
1159 MultiFDSendParams *p = opaque;
1160 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1161 Error *local_err = NULL;
1162
1163 if (qio_task_propagate_error(task, &local_err)) {
Fei Li1398b2e2019-01-13 22:08:47 +08001164 migrate_set_error(migrate_get_current(), local_err);
1165 multifd_save_cleanup();
Juan Quintela60df2d42018-03-07 07:56:15 +01001166 } else {
1167 p->c = QIO_CHANNEL(sioc);
1168 qio_channel_set_delay(p->c, false);
1169 p->running = true;
1170 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1171 QEMU_THREAD_JOINABLE);
Juan Quintela60df2d42018-03-07 07:56:15 +01001172 }
1173}
1174
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001175int multifd_save_setup(void)
1176{
1177 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001178 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001179 uint8_t i;
1180
1181 if (!migrate_use_multifd()) {
1182 return 0;
1183 }
1184 thread_count = migrate_multifd_channels();
1185 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1186 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela34c55a92018-04-10 23:35:15 +02001187 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintela6df264a2018-02-28 09:10:07 +01001188 qemu_sem_init(&multifd_send_state->sem_sync, 0);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001189 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001190
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001191 for (i = 0; i < thread_count; i++) {
1192 MultiFDSendParams *p = &multifd_send_state->params[i];
1193
1194 qemu_mutex_init(&p->mutex);
1195 qemu_sem_init(&p->sem, 0);
1196 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001197 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001198 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001199 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001200 p->packet_len = sizeof(MultiFDPacket_t)
1201 + sizeof(ram_addr_t) * page_count;
1202 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001203 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001204 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001205 }
1206 return 0;
1207}
1208
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001209struct {
1210 MultiFDRecvParams *params;
1211 /* number of created threads */
1212 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001213 /* syncs main thread and channels */
1214 QemuSemaphore sem_sync;
1215 /* global number of generated multifd packets */
1216 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001217} *multifd_recv_state;
1218
Juan Quintela66770702018-02-19 19:01:45 +01001219static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001220{
1221 int i;
1222
Juan Quintela7a169d72018-02-19 19:01:15 +01001223 if (err) {
1224 MigrationState *s = migrate_get_current();
1225 migrate_set_error(s, err);
1226 if (s->state == MIGRATION_STATUS_SETUP ||
1227 s->state == MIGRATION_STATUS_ACTIVE) {
1228 migrate_set_state(&s->state, s->state,
1229 MIGRATION_STATUS_FAILED);
1230 }
1231 }
1232
Juan Quintela66770702018-02-19 19:01:45 +01001233 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001234 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1235
1236 qemu_mutex_lock(&p->mutex);
Juan Quintela7a5cc332018-04-18 00:49:19 +02001237 /* We could arrive here for two reasons:
1238 - normal quit, i.e. everything went fine, just finished
1239 - error quit: We close the channels so the channel threads
1240 finish the qio_channel_read_all_eof() */
1241 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001242 qemu_mutex_unlock(&p->mutex);
1243 }
1244}
1245
1246int multifd_load_cleanup(Error **errp)
1247{
1248 int i;
1249 int ret = 0;
1250
1251 if (!migrate_use_multifd()) {
1252 return 0;
1253 }
Juan Quintela66770702018-02-19 19:01:45 +01001254 multifd_recv_terminate_threads(NULL);
1255 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001256 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1257
Juan Quintela66770702018-02-19 19:01:45 +01001258 if (p->running) {
1259 qemu_thread_join(&p->thread);
1260 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001261 object_unref(OBJECT(p->c));
1262 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001263 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001264 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001265 g_free(p->name);
1266 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001267 multifd_pages_clear(p->pages);
1268 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001269 p->packet_len = 0;
1270 g_free(p->packet);
1271 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001272 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001273 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001274 g_free(multifd_recv_state->params);
1275 multifd_recv_state->params = NULL;
1276 g_free(multifd_recv_state);
1277 multifd_recv_state = NULL;
1278
1279 return ret;
1280}
1281
Juan Quintela6df264a2018-02-28 09:10:07 +01001282static void multifd_recv_sync_main(void)
1283{
1284 int i;
1285
1286 if (!migrate_use_multifd()) {
1287 return;
1288 }
1289 for (i = 0; i < migrate_multifd_channels(); i++) {
1290 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1291
Juan Quintela6df264a2018-02-28 09:10:07 +01001292 trace_multifd_recv_sync_main_wait(p->id);
1293 qemu_sem_wait(&multifd_recv_state->sem_sync);
1294 qemu_mutex_lock(&p->mutex);
1295 if (multifd_recv_state->packet_num < p->packet_num) {
1296 multifd_recv_state->packet_num = p->packet_num;
1297 }
1298 qemu_mutex_unlock(&p->mutex);
1299 }
1300 for (i = 0; i < migrate_multifd_channels(); i++) {
1301 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1302
1303 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001304 qemu_sem_post(&p->sem_sync);
1305 }
1306 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1307}
1308
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001309static void *multifd_recv_thread(void *opaque)
1310{
1311 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001312 Error *local_err = NULL;
1313 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001314
Juan Quintela408ea6a2018-04-06 18:28:59 +02001315 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001316 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001317
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001318 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001319 uint32_t used;
1320 uint32_t flags;
1321
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001322 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1323 p->packet_len, &local_err);
1324 if (ret == 0) { /* EOF */
1325 break;
1326 }
1327 if (ret == -1) { /* Error */
1328 break;
1329 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001330
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001331 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001332 ret = multifd_recv_unfill_packet(p, &local_err);
1333 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001334 qemu_mutex_unlock(&p->mutex);
1335 break;
1336 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001337
1338 used = p->pages->used;
1339 flags = p->flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +01001340 trace_multifd_recv(p->id, p->packet_num, used, flags,
1341 p->next_packet_size);
Juan Quintela6df264a2018-02-28 09:10:07 +01001342 p->num_packets++;
1343 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001344 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001345
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001346 if (used) {
1347 ret = qio_channel_readv_all(p->c, p->pages->iov,
1348 used, &local_err);
1349 if (ret != 0) {
1350 break;
1351 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001352 }
1353
Juan Quintela6df264a2018-02-28 09:10:07 +01001354 if (flags & MULTIFD_FLAG_SYNC) {
1355 qemu_sem_post(&multifd_recv_state->sem_sync);
1356 qemu_sem_wait(&p->sem_sync);
1357 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001358 }
1359
Juan Quintelad82628e2018-04-11 02:44:24 +02001360 if (local_err) {
1361 multifd_recv_terminate_threads(local_err);
1362 }
Juan Quintela66770702018-02-19 19:01:45 +01001363 qemu_mutex_lock(&p->mutex);
1364 p->running = false;
1365 qemu_mutex_unlock(&p->mutex);
1366
Lidong Chen74637e62018-08-06 21:29:29 +08001367 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001368 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1369
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001370 return NULL;
1371}
1372
1373int multifd_load_setup(void)
1374{
1375 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001376 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001377 uint8_t i;
1378
1379 if (!migrate_use_multifd()) {
1380 return 0;
1381 }
1382 thread_count = migrate_multifd_channels();
1383 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1384 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001385 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001386 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001387
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001388 for (i = 0; i < thread_count; i++) {
1389 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1390
1391 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001392 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001393 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001394 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001395 p->packet_len = sizeof(MultiFDPacket_t)
1396 + sizeof(ram_addr_t) * page_count;
1397 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001398 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001399 }
1400 return 0;
1401}
1402
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001403bool multifd_recv_all_channels_created(void)
1404{
1405 int thread_count = migrate_multifd_channels();
1406
1407 if (!migrate_use_multifd()) {
1408 return true;
1409 }
1410
1411 return thread_count == atomic_read(&multifd_recv_state->count);
1412}
1413
Fei Li49ed0d22019-01-13 22:08:46 +08001414/*
1415 * Try to receive all multifd channels to get ready for the migration.
1416 * - Return true and do not set @errp when correctly receving all channels;
1417 * - Return false and do not set @errp when correctly receiving the current one;
1418 * - Return false and set @errp when failing to receive the current channel.
1419 */
1420bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001421{
Juan Quintela60df2d42018-03-07 07:56:15 +01001422 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001423 Error *local_err = NULL;
1424 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001425
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001426 id = multifd_recv_initial_packet(ioc, &local_err);
1427 if (id < 0) {
1428 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001429 error_propagate_prepend(errp, local_err,
1430 "failed to receive packet"
1431 " via multifd channel %d: ",
1432 atomic_read(&multifd_recv_state->count));
Peter Xu81e62052018-06-27 21:22:44 +08001433 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001434 }
1435
1436 p = &multifd_recv_state->params[id];
1437 if (p->c != NULL) {
1438 error_setg(&local_err, "multifd: received id '%d' already setup'",
1439 id);
1440 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001441 error_propagate(errp, local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001442 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001443 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001444 p->c = ioc;
1445 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001446 /* initial packet */
1447 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001448
1449 p->running = true;
1450 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1451 QEMU_THREAD_JOINABLE);
1452 atomic_inc(&multifd_recv_state->count);
Fei Li49ed0d22019-01-13 22:08:46 +08001453 return atomic_read(&multifd_recv_state->count) ==
1454 migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001455}
1456
Juan Quintela56e93d22015-05-07 19:33:31 +02001457/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001458 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001459 *
1460 * If this is the 1st block, it also writes the block identification
1461 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001462 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001463 *
1464 * @f: QEMUFile where to send the data
1465 * @block: block that contains the page we want to send
1466 * @offset: offset inside the block for the page
1467 * in the lower bits, it contains flags
1468 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001469static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1470 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001471{
Liang Li9f5f3802015-07-13 17:34:10 +08001472 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001473
Juan Quintela24795692017-03-21 11:45:01 +01001474 if (block == rs->last_sent_block) {
1475 offset |= RAM_SAVE_FLAG_CONTINUE;
1476 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001477 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001478 size = 8;
1479
1480 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001481 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001482 qemu_put_byte(f, len);
1483 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001484 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001485 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001486 }
1487 return size;
1488}
1489
Juan Quintela3d0684b2017-03-23 15:06:39 +01001490/**
1491 * mig_throttle_guest_down: throotle down the guest
1492 *
1493 * Reduce amount of guest cpu execution to hopefully slow down memory
1494 * writes. If guest dirty memory rate is reduced below the rate at
1495 * which we can transfer pages to the destination then we should be
1496 * able to complete migration. Some workloads dirty memory way too
1497 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001498 */
1499static void mig_throttle_guest_down(void)
1500{
1501 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001502 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1503 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001504 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001505
1506 /* We have not started throttling yet. Let's start it. */
1507 if (!cpu_throttle_active()) {
1508 cpu_throttle_set(pct_initial);
1509 } else {
1510 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001511 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1512 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001513 }
1514}
1515
Juan Quintela3d0684b2017-03-23 15:06:39 +01001516/**
1517 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1518 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001519 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001520 * @current_addr: address for the zero page
1521 *
1522 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001523 * The important thing is that a stale (not-yet-0'd) page be replaced
1524 * by the new data.
1525 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001526 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001527 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001528static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001529{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001530 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001531 return;
1532 }
1533
1534 /* We don't care if this fails to allocate a new cache page
1535 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001536 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001537 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001538}
1539
1540#define ENCODING_FLAG_XBZRLE 0x1
1541
1542/**
1543 * save_xbzrle_page: compress and send current page
1544 *
1545 * Returns: 1 means that we wrote the page
1546 * 0 means that page is identical to the one already sent
1547 * -1 means that xbzrle would be longer than normal
1548 *
Juan Quintela5a987732017-03-13 19:39:02 +01001549 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001550 * @current_data: pointer to the address of the page contents
1551 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001552 * @block: block that contains the page we want to send
1553 * @offset: offset inside the block for the page
1554 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001555 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001556static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001557 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001558 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001559{
1560 int encoded_len = 0, bytes_xbzrle;
1561 uint8_t *prev_cached_page;
1562
Juan Quintela93604472017-06-06 19:49:03 +02001563 if (!cache_is_cached(XBZRLE.cache, current_addr,
1564 ram_counters.dirty_sync_count)) {
1565 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001566 if (!last_stage) {
1567 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001568 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001569 return -1;
1570 } else {
1571 /* update *current_data when the page has been
1572 inserted into cache */
1573 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1574 }
1575 }
1576 return -1;
1577 }
1578
1579 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1580
1581 /* save current buffer into memory */
1582 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1583
1584 /* XBZRLE encoding (if there is no overflow) */
1585 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1586 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1587 TARGET_PAGE_SIZE);
1588 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001589 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001590 return 0;
1591 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001592 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001593 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001594 /* update data in the cache */
1595 if (!last_stage) {
1596 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1597 *current_data = prev_cached_page;
1598 }
1599 return -1;
1600 }
1601
1602 /* we need to update the data in the cache, in order to get the same data */
1603 if (!last_stage) {
1604 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1605 }
1606
1607 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001608 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001609 offset | RAM_SAVE_FLAG_XBZRLE);
1610 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1611 qemu_put_be16(rs->f, encoded_len);
1612 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001613 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001614 xbzrle_counters.pages++;
1615 xbzrle_counters.bytes += bytes_xbzrle;
1616 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001617
1618 return 1;
1619}
1620
Juan Quintela3d0684b2017-03-23 15:06:39 +01001621/**
1622 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001623 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001624 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +01001625 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001626 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001627 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001628 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001629 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001630static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001631unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001632 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001633{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001634 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1635 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001636 unsigned long next;
1637
Yury Kotovfbd162e2019-02-15 20:45:46 +03001638 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001639 return size;
1640 }
1641
Wei Wang6eeb63f2018-12-11 16:24:52 +08001642 /*
1643 * When the free page optimization is enabled, we need to check the bitmap
1644 * to send the non-free pages rather than all the pages in the bulk stage.
1645 */
1646 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001647 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001648 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001649 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001650 }
1651
Juan Quintela6b6712e2017-03-22 15:18:04 +01001652 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001653}
1654
Juan Quintela06b10682017-03-21 15:18:05 +01001655static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001656 RAMBlock *rb,
1657 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001658{
1659 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001660
Wei Wang386a9072018-12-11 16:24:49 +08001661 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela6b6712e2017-03-22 15:18:04 +01001662 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001663
1664 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001665 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001666 }
Wei Wang386a9072018-12-11 16:24:49 +08001667 qemu_mutex_unlock(&rs->bitmap_mutex);
1668
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001669 return ret;
1670}
1671
Juan Quintela15440dd2017-03-21 09:35:04 +01001672static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
Wei Yangbf212972019-04-30 11:44:10 +08001673 ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +02001674{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001675 rs->migration_dirty_pages +=
Wei Yangbf212972019-04-30 11:44:10 +08001676 cpu_physical_memory_sync_dirty_bitmap(rb, 0, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001677 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001678}
1679
Juan Quintela3d0684b2017-03-23 15:06:39 +01001680/**
1681 * ram_pagesize_summary: calculate all the pagesizes of a VM
1682 *
1683 * Returns a summary bitmap of the page sizes of all RAMBlocks
1684 *
1685 * For VMs with just normal pages this is equivalent to the host page
1686 * size. If it's got some huge pages then it's the OR of all the
1687 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001688 */
1689uint64_t ram_pagesize_summary(void)
1690{
1691 RAMBlock *block;
1692 uint64_t summary = 0;
1693
Yury Kotovfbd162e2019-02-15 20:45:46 +03001694 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001695 summary |= block->page_size;
1696 }
1697
1698 return summary;
1699}
1700
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001701uint64_t ram_get_total_transferred_pages(void)
1702{
1703 return ram_counters.normal + ram_counters.duplicate +
1704 compression_counters.pages + xbzrle_counters.pages;
1705}
1706
Xiao Guangrongb7340352018-06-04 17:55:12 +08001707static void migration_update_rates(RAMState *rs, int64_t end_time)
1708{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001709 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001710 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001711
1712 /* calculate period counters */
1713 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1714 / (end_time - rs->time_last_bitmap_sync);
1715
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001716 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001717 return;
1718 }
1719
1720 if (migrate_use_xbzrle()) {
1721 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001722 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001723 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1724 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001725
1726 if (migrate_use_compression()) {
1727 compression_counters.busy_rate = (double)(compression_counters.busy -
1728 rs->compress_thread_busy_prev) / page_count;
1729 rs->compress_thread_busy_prev = compression_counters.busy;
1730
1731 compressed_size = compression_counters.compressed_size -
1732 rs->compressed_size_prev;
1733 if (compressed_size) {
1734 double uncompressed_size = (compression_counters.pages -
1735 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1736
1737 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1738 compression_counters.compression_rate =
1739 uncompressed_size / compressed_size;
1740
1741 rs->compress_pages_prev = compression_counters.pages;
1742 rs->compressed_size_prev = compression_counters.compressed_size;
1743 }
1744 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001745}
1746
Juan Quintela8d820d62017-03-13 19:35:50 +01001747static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001748{
1749 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001750 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001751 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001752
Juan Quintela93604472017-06-06 19:49:03 +02001753 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001754
Juan Quintelaf664da82017-03-13 19:44:57 +01001755 if (!rs->time_last_bitmap_sync) {
1756 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001757 }
1758
1759 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001760 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001761
Juan Quintela108cfae2017-03-13 21:38:09 +01001762 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001763 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03001764 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yangbf212972019-04-30 11:44:10 +08001765 migration_bitmap_sync_range(rs, block, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +02001766 }
Balamuruhan S650af892018-06-12 14:20:09 +05301767 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001768 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001769 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001770
Juan Quintelaa66cd902017-03-28 15:02:43 +02001771 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001772
Juan Quintela56e93d22015-05-07 19:33:31 +02001773 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1774
1775 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001776 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001777 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001778
Peter Lieven9ac78b62017-09-26 12:33:16 +02001779 /* During block migration the auto-converge logic incorrectly detects
1780 * that ram migration makes no progress. Avoid this by disabling the
1781 * throttling logic during the bulk phase of block migration. */
1782 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001783 /* The following detection logic can be refined later. For now:
1784 Check to see if the dirtied bytes is 50% more than the approx.
1785 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001786 were in this routine. If that happens twice, start or increase
1787 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001788
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001789 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001790 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001791 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001792 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001793 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001794 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001795 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001796 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001797
Xiao Guangrongb7340352018-06-04 17:55:12 +08001798 migration_update_rates(rs, end_time);
1799
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001800 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001801
1802 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001803 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001804 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001805 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001806 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001807 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001808 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001809 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001810}
1811
Wei Wangbd227062018-12-11 16:24:51 +08001812static void migration_bitmap_sync_precopy(RAMState *rs)
1813{
1814 Error *local_err = NULL;
1815
1816 /*
1817 * The current notifier usage is just an optimization to migration, so we
1818 * don't stop the normal migration process in the error case.
1819 */
1820 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1821 error_report_err(local_err);
1822 }
1823
1824 migration_bitmap_sync(rs);
1825
1826 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1827 error_report_err(local_err);
1828 }
1829}
1830
Juan Quintela56e93d22015-05-07 19:33:31 +02001831/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001832 * save_zero_page_to_file: send the zero page to the file
1833 *
1834 * Returns the size of data written to the file, 0 means the page is not
1835 * a zero page
1836 *
1837 * @rs: current RAM state
1838 * @file: the file where the data is saved
1839 * @block: block that contains the page we want to send
1840 * @offset: offset inside the block for the page
1841 */
1842static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1843 RAMBlock *block, ram_addr_t offset)
1844{
1845 uint8_t *p = block->host + offset;
1846 int len = 0;
1847
1848 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1849 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1850 qemu_put_byte(file, 0);
1851 len += 1;
1852 }
1853 return len;
1854}
1855
1856/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001857 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001858 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001859 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001860 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001861 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001862 * @block: block that contains the page we want to send
1863 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001864 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001865static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001866{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001867 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001868
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001869 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001870 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001871 ram_counters.transferred += len;
1872 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001873 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001874 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001875}
1876
Juan Quintela57273092017-03-20 22:25:28 +01001877static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001878{
Juan Quintela57273092017-03-20 22:25:28 +01001879 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001880 return;
1881 }
1882
Juan Quintelaaaa20642017-03-21 11:35:24 +01001883 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001884}
1885
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001886/*
1887 * @pages: the number of pages written by the control path,
1888 * < 0 - error
1889 * > 0 - number of pages written
1890 *
1891 * Return true if the pages has been saved, otherwise false is returned.
1892 */
1893static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1894 int *pages)
1895{
1896 uint64_t bytes_xmit = 0;
1897 int ret;
1898
1899 *pages = -1;
1900 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1901 &bytes_xmit);
1902 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1903 return false;
1904 }
1905
1906 if (bytes_xmit) {
1907 ram_counters.transferred += bytes_xmit;
1908 *pages = 1;
1909 }
1910
1911 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1912 return true;
1913 }
1914
1915 if (bytes_xmit > 0) {
1916 ram_counters.normal++;
1917 } else if (bytes_xmit == 0) {
1918 ram_counters.duplicate++;
1919 }
1920
1921 return true;
1922}
1923
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001924/*
1925 * directly send the page to the stream
1926 *
1927 * Returns the number of pages written.
1928 *
1929 * @rs: current RAM state
1930 * @block: block that contains the page we want to send
1931 * @offset: offset inside the block for the page
1932 * @buf: the page to be sent
1933 * @async: send to page asyncly
1934 */
1935static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1936 uint8_t *buf, bool async)
1937{
1938 ram_counters.transferred += save_page_header(rs, rs->f, block,
1939 offset | RAM_SAVE_FLAG_PAGE);
1940 if (async) {
1941 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1942 migrate_release_ram() &
1943 migration_in_postcopy());
1944 } else {
1945 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1946 }
1947 ram_counters.transferred += TARGET_PAGE_SIZE;
1948 ram_counters.normal++;
1949 return 1;
1950}
1951
Juan Quintela56e93d22015-05-07 19:33:31 +02001952/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001953 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001954 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001955 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001956 * < 0 - error
1957 * >=0 - Number of pages written - this might legally be 0
1958 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001959 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001960 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001961 * @block: block that contains the page we want to send
1962 * @offset: offset inside the block for the page
1963 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001964 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001965static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001966{
1967 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001968 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001969 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001970 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001971 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001972 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001973
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001974 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001975 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001976
Juan Quintela56e93d22015-05-07 19:33:31 +02001977 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001978 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1979 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001980 pages = save_xbzrle_page(rs, &p, current_addr, block,
1981 offset, last_stage);
1982 if (!last_stage) {
1983 /* Can't send this cached data async, since the cache page
1984 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001985 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001986 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001987 }
1988 }
1989
1990 /* XBZRLE overflow or normal page */
1991 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001992 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001993 }
1994
1995 XBZRLE_cache_unlock();
1996
1997 return pages;
1998}
1999
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002000static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
2001 ram_addr_t offset)
2002{
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002003 multifd_queue_page(block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002004 ram_counters.normal++;
2005
2006 return 1;
2007}
2008
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002009static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002010 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02002011{
Juan Quintela53518d92017-05-04 11:46:24 +02002012 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08002013 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002014 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002015 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002016
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002017 if (save_zero_page_to_file(rs, f, block, offset)) {
2018 zero_page = true;
2019 goto exit;
2020 }
2021
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002022 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002023
2024 /*
2025 * copy it to a internal buffer to avoid it being modified by VM
2026 * so that we can catch up the error during compression and
2027 * decompression
2028 */
2029 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002030 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2031 if (ret < 0) {
2032 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08002033 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002034 return false;
Liang Lib3be2892016-05-05 15:32:54 +08002035 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002036
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002037exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002038 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002039 return zero_page;
2040}
2041
2042static void
2043update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2044{
Xiao Guangrong76e03002018-09-06 15:01:00 +08002045 ram_counters.transferred += bytes_xmit;
2046
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002047 if (param->zero_page) {
2048 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08002049 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002050 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08002051
2052 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
2053 compression_counters.compressed_size += bytes_xmit - 8;
2054 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002055}
2056
Xiao Guangrong32b05492018-09-06 15:01:01 +08002057static bool save_page_use_compression(RAMState *rs);
2058
Juan Quintelace25d332017-03-15 11:00:51 +01002059static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002060{
2061 int idx, len, thread_count;
2062
Xiao Guangrong32b05492018-09-06 15:01:01 +08002063 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002064 return;
2065 }
2066 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08002067
Liang Li0d9f9a52016-05-05 15:32:59 +08002068 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002069 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08002070 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08002071 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002072 }
Liang Lia7a9a882016-05-05 15:32:57 +08002073 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002074 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08002075
2076 for (idx = 0; idx < thread_count; idx++) {
2077 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002078 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01002079 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002080 /*
2081 * it's safe to fetch zero_page without holding comp_done_lock
2082 * as there is no further request submitted to the thread,
2083 * i.e, the thread should be waiting for a request at this point.
2084 */
2085 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002086 }
Liang Lia7a9a882016-05-05 15:32:57 +08002087 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002088 }
2089}
2090
2091static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2092 ram_addr_t offset)
2093{
2094 param->block = block;
2095 param->offset = offset;
2096}
2097
Juan Quintelace25d332017-03-15 11:00:51 +01002098static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2099 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02002100{
2101 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08002102 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02002103
2104 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08002105 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08002106retry:
2107 for (idx = 0; idx < thread_count; idx++) {
2108 if (comp_param[idx].done) {
2109 comp_param[idx].done = false;
2110 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2111 qemu_mutex_lock(&comp_param[idx].mutex);
2112 set_compress_params(&comp_param[idx], block, offset);
2113 qemu_cond_signal(&comp_param[idx].cond);
2114 qemu_mutex_unlock(&comp_param[idx].mutex);
2115 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002116 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02002117 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002118 }
2119 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08002120
2121 /*
2122 * wait for the free thread if the user specifies 'compress-wait-thread',
2123 * otherwise we will post the page out in the main thread as normal page.
2124 */
2125 if (pages < 0 && wait) {
2126 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2127 goto retry;
2128 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002129 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002130
2131 return pages;
2132}
2133
2134/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002135 * find_dirty_block: find the next dirty page and update any state
2136 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002137 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002138 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002139 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002140 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002141 * @pss: data about the state of the current dirty page scan
2142 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002143 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002144static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002145{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002146 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002147 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002148 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002149 /*
2150 * We've been once around the RAM and haven't found anything.
2151 * Give up.
2152 */
2153 *again = false;
2154 return false;
2155 }
Juan Quintelaa935e302017-03-21 15:36:51 +01002156 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002157 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002158 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002159 pss->block = QLIST_NEXT_RCU(pss->block, next);
2160 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002161 /*
2162 * If memory migration starts over, we will meet a dirtied page
2163 * which may still exists in compression threads's ring, so we
2164 * should flush the compressed data to make sure the new page
2165 * is not overwritten by the old one in the destination.
2166 *
2167 * Also If xbzrle is on, stop using the data compression at this
2168 * point. In theory, xbzrle can do better than compression.
2169 */
2170 flush_compressed_data(rs);
2171
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002172 /* Hit the end of the list */
2173 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2174 /* Flag that we've looped */
2175 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002176 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002177 }
2178 /* Didn't find anything this time, but try again on the new block */
2179 *again = true;
2180 return false;
2181 } else {
2182 /* Can go around again, but... */
2183 *again = true;
2184 /* We've found something so probably don't need to */
2185 return true;
2186 }
2187}
2188
Juan Quintela3d0684b2017-03-23 15:06:39 +01002189/**
2190 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002191 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002192 * Helper for 'get_queued_page' - gets a page off the queue
2193 *
2194 * Returns the block of the page (or NULL if none available)
2195 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002196 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002197 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002198 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002199static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002200{
2201 RAMBlock *block = NULL;
2202
Xiao Guangrongae526e32018-08-21 16:10:25 +08002203 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2204 return NULL;
2205 }
2206
Juan Quintelaec481c62017-03-20 22:12:40 +01002207 qemu_mutex_lock(&rs->src_page_req_mutex);
2208 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2209 struct RAMSrcPageRequest *entry =
2210 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002211 block = entry->rb;
2212 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002213
2214 if (entry->len > TARGET_PAGE_SIZE) {
2215 entry->len -= TARGET_PAGE_SIZE;
2216 entry->offset += TARGET_PAGE_SIZE;
2217 } else {
2218 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002219 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002220 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002221 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002222 }
2223 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002224 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002225
2226 return block;
2227}
2228
Juan Quintela3d0684b2017-03-23 15:06:39 +01002229/**
2230 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002231 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002232 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002233 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002234 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01002235 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002236 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002237 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002238 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002239static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002240{
2241 RAMBlock *block;
2242 ram_addr_t offset;
2243 bool dirty;
2244
2245 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002246 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002247 /*
2248 * We're sending this page, and since it's postcopy nothing else
2249 * will dirty it, and we must make sure it doesn't get sent again
2250 * even if this queue request was received after the background
2251 * search already sent it.
2252 */
2253 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002254 unsigned long page;
2255
Juan Quintela6b6712e2017-03-22 15:18:04 +01002256 page = offset >> TARGET_PAGE_BITS;
2257 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002258 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002259 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002260 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002261 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002262 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002263 }
2264 }
2265
2266 } while (block && !dirty);
2267
2268 if (block) {
2269 /*
2270 * As soon as we start servicing pages out of order, then we have
2271 * to kill the bulk stage, since the bulk stage assumes
2272 * in (migration_bitmap_find_and_reset_dirty) that every page is
2273 * dirty, that's no longer true.
2274 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002275 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002276
2277 /*
2278 * We want the background search to continue from the queued page
2279 * since the guest is likely to want other pages near to the page
2280 * it just requested.
2281 */
2282 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002283 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002284 }
2285
2286 return !!block;
2287}
2288
Juan Quintela56e93d22015-05-07 19:33:31 +02002289/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002290 * migration_page_queue_free: drop any remaining pages in the ram
2291 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002292 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002293 * It should be empty at the end anyway, but in error cases there may
2294 * be some left. in case that there is any page left, we drop it.
2295 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002296 */
Juan Quintela83c13382017-05-04 11:45:01 +02002297static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002298{
Juan Quintelaec481c62017-03-20 22:12:40 +01002299 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002300 /* This queue generally should be empty - but in the case of a failed
2301 * migration might have some droppings in.
2302 */
2303 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002304 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002305 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002306 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002307 g_free(mspr);
2308 }
2309 rcu_read_unlock();
2310}
2311
2312/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002313 * ram_save_queue_pages: queue the page for transmission
2314 *
2315 * A request from postcopy destination for example.
2316 *
2317 * Returns zero on success or negative on error
2318 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002319 * @rbname: Name of the RAMBLock of the request. NULL means the
2320 * same that last one.
2321 * @start: starting address from the start of the RAMBlock
2322 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002323 */
Juan Quintela96506892017-03-14 18:41:03 +01002324int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002325{
2326 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002327 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002328
Juan Quintela93604472017-06-06 19:49:03 +02002329 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002330 rcu_read_lock();
2331 if (!rbname) {
2332 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002333 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002334
2335 if (!ramblock) {
2336 /*
2337 * Shouldn't happen, we can't reuse the last RAMBlock if
2338 * it's the 1st request.
2339 */
2340 error_report("ram_save_queue_pages no previous block");
2341 goto err;
2342 }
2343 } else {
2344 ramblock = qemu_ram_block_by_name(rbname);
2345
2346 if (!ramblock) {
2347 /* We shouldn't be asked for a non-existent RAMBlock */
2348 error_report("ram_save_queue_pages no block '%s'", rbname);
2349 goto err;
2350 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002351 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002352 }
2353 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2354 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002355 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2356 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002357 __func__, start, len, ramblock->used_length);
2358 goto err;
2359 }
2360
Juan Quintelaec481c62017-03-20 22:12:40 +01002361 struct RAMSrcPageRequest *new_entry =
2362 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002363 new_entry->rb = ramblock;
2364 new_entry->offset = start;
2365 new_entry->len = len;
2366
2367 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002368 qemu_mutex_lock(&rs->src_page_req_mutex);
2369 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002370 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002371 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002372 rcu_read_unlock();
2373
2374 return 0;
2375
2376err:
2377 rcu_read_unlock();
2378 return -1;
2379}
2380
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002381static bool save_page_use_compression(RAMState *rs)
2382{
2383 if (!migrate_use_compression()) {
2384 return false;
2385 }
2386
2387 /*
2388 * If xbzrle is on, stop using the data compression after first
2389 * round of migration even if compression is enabled. In theory,
2390 * xbzrle can do better than compression.
2391 */
2392 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2393 return true;
2394 }
2395
2396 return false;
2397}
2398
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002399/*
2400 * try to compress the page before posting it out, return true if the page
2401 * has been properly handled by compression, otherwise needs other
2402 * paths to handle it
2403 */
2404static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2405{
2406 if (!save_page_use_compression(rs)) {
2407 return false;
2408 }
2409
2410 /*
2411 * When starting the process of a new block, the first page of
2412 * the block should be sent out before other pages in the same
2413 * block, and all the pages in last block should have been sent
2414 * out, keeping this order is important, because the 'cont' flag
2415 * is used to avoid resending the block name.
2416 *
2417 * We post the fist page as normal page as compression will take
2418 * much CPU resource.
2419 */
2420 if (block != rs->last_sent_block) {
2421 flush_compressed_data(rs);
2422 return false;
2423 }
2424
2425 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2426 return true;
2427 }
2428
Xiao Guangrong76e03002018-09-06 15:01:00 +08002429 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002430 return false;
2431}
2432
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002433/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002434 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002435 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002436 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002437 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002438 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002439 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002440 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002441 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002442static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002443 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002444{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002445 RAMBlock *block = pss->block;
2446 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2447 int res;
2448
2449 if (control_save_page(rs, block, offset, &res)) {
2450 return res;
2451 }
2452
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002453 if (save_compress_page(rs, block, offset)) {
2454 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002455 }
2456
2457 res = save_zero_page(rs, block, offset);
2458 if (res > 0) {
2459 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2460 * page would be stale
2461 */
2462 if (!save_page_use_compression(rs)) {
2463 XBZRLE_cache_lock();
2464 xbzrle_cache_zero_page(rs, block->offset + offset);
2465 XBZRLE_cache_unlock();
2466 }
2467 ram_release_pages(block->idstr, offset, res);
2468 return res;
2469 }
2470
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002471 /*
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002472 * do not use multifd for compression as the first page in the new
2473 * block should be posted out before sending the compressed page
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002474 */
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002475 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002476 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002477 }
2478
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002479 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002480}
2481
2482/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002483 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002484 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002485 * Starting at *offset send pages up to the end of the current host
2486 * page. It's valid for the initial offset to point into the middle of
2487 * a host page in which case the remainder of the hostpage is sent.
2488 * Only dirty target pages are sent. Note that the host page size may
2489 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002490 * The saving stops at the boundary of the used_length of the block
2491 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002492 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002493 * Returns the number of pages written or negative on error
2494 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002495 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002496 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002497 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002498 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002499 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002500static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002501 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002502{
2503 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002504 size_t pagesize_bits =
2505 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002506
Yury Kotovfbd162e2019-02-15 20:45:46 +03002507 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002508 error_report("block %s should not be migrated !", pss->block->idstr);
2509 return 0;
2510 }
2511
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002512 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002513 /* Check the pages is dirty and if it is send it */
2514 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2515 pss->page++;
2516 continue;
2517 }
2518
Juan Quintelaf20e2862017-03-21 16:19:05 +01002519 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002520 if (tmppages < 0) {
2521 return tmppages;
2522 }
2523
2524 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002525 if (pss->block->unsentmap) {
2526 clear_bit(pss->page, pss->block->unsentmap);
2527 }
2528
Juan Quintelaa935e302017-03-21 15:36:51 +01002529 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002530 } while ((pss->page & (pagesize_bits - 1)) &&
2531 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002532
2533 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002534 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002535 return pages;
2536}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002537
2538/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002539 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002540 *
2541 * Called within an RCU critical section.
2542 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002543 * Returns the number of pages written where zero means no dirty pages,
2544 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002545 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002546 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002547 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002548 *
2549 * On systems where host-page-size > target-page-size it will send all the
2550 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002551 */
2552
Juan Quintelace25d332017-03-15 11:00:51 +01002553static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002554{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002555 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002556 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002557 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002558
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302559 /* No dirty page as there is zero RAM */
2560 if (!ram_bytes_total()) {
2561 return pages;
2562 }
2563
Juan Quintela6f37bb82017-03-13 19:26:29 +01002564 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002565 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002566 pss.complete_round = false;
2567
2568 if (!pss.block) {
2569 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2570 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002571
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002572 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002573 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002574 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002575
2576 if (!found) {
2577 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002578 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002579 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002580
2581 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002582 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002583 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002584 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002585
Juan Quintela6f37bb82017-03-13 19:26:29 +01002586 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002587 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002588
2589 return pages;
2590}
2591
2592void acct_update_position(QEMUFile *f, size_t size, bool zero)
2593{
2594 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002595
Juan Quintela56e93d22015-05-07 19:33:31 +02002596 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002597 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002598 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002599 ram_counters.normal += pages;
2600 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002601 qemu_update_position(f, size);
2602 }
2603}
2604
Yury Kotovfbd162e2019-02-15 20:45:46 +03002605static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002606{
2607 RAMBlock *block;
2608 uint64_t total = 0;
2609
2610 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03002611 if (count_ignored) {
2612 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2613 total += block->used_length;
2614 }
2615 } else {
2616 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2617 total += block->used_length;
2618 }
Peter Xu99e15582017-05-12 12:17:39 +08002619 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002620 rcu_read_unlock();
2621 return total;
2622}
2623
Yury Kotovfbd162e2019-02-15 20:45:46 +03002624uint64_t ram_bytes_total(void)
2625{
2626 return ram_bytes_total_common(false);
2627}
2628
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002629static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002630{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002631 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002632}
2633
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002634static void xbzrle_load_cleanup(void)
2635{
2636 g_free(XBZRLE.decoded_buf);
2637 XBZRLE.decoded_buf = NULL;
2638}
2639
Peter Xu7d7c96b2017-10-19 14:31:58 +08002640static void ram_state_cleanup(RAMState **rsp)
2641{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002642 if (*rsp) {
2643 migration_page_queue_free(*rsp);
2644 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2645 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2646 g_free(*rsp);
2647 *rsp = NULL;
2648 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002649}
2650
Peter Xu84593a02017-10-19 14:31:59 +08002651static void xbzrle_cleanup(void)
2652{
2653 XBZRLE_cache_lock();
2654 if (XBZRLE.cache) {
2655 cache_fini(XBZRLE.cache);
2656 g_free(XBZRLE.encoded_buf);
2657 g_free(XBZRLE.current_buf);
2658 g_free(XBZRLE.zero_target_page);
2659 XBZRLE.cache = NULL;
2660 XBZRLE.encoded_buf = NULL;
2661 XBZRLE.current_buf = NULL;
2662 XBZRLE.zero_target_page = NULL;
2663 }
2664 XBZRLE_cache_unlock();
2665}
2666
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002667static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002668{
Juan Quintela53518d92017-05-04 11:46:24 +02002669 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002670 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002671
Li Zhijian2ff64032015-07-02 20:18:05 +08002672 /* caller have hold iothread lock or is in a bh, so there is
Yi Wang46334562019-04-15 14:51:29 +08002673 * no writing race against the migration bitmap
Li Zhijian2ff64032015-07-02 20:18:05 +08002674 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002675 memory_global_dirty_log_stop();
2676
Yury Kotovfbd162e2019-02-15 20:45:46 +03002677 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002678 g_free(block->bmap);
2679 block->bmap = NULL;
2680 g_free(block->unsentmap);
2681 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002682 }
2683
Peter Xu84593a02017-10-19 14:31:59 +08002684 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002685 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002686 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002687}
2688
Juan Quintela6f37bb82017-03-13 19:26:29 +01002689static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002690{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002691 rs->last_seen_block = NULL;
2692 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002693 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002694 rs->last_version = ram_list.version;
2695 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08002696 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002697}
2698
2699#define MAX_WAIT 50 /* ms, half buffered_file limit */
2700
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002701/*
2702 * 'expected' is the value you expect the bitmap mostly to be full
2703 * of; it won't bother printing lines that are all this value.
2704 * If 'todump' is null the migration bitmap is dumped.
2705 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002706void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2707 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002708{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002709 int64_t cur;
2710 int64_t linelen = 128;
2711 char linebuf[129];
2712
Juan Quintela6b6712e2017-03-22 15:18:04 +01002713 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002714 int64_t curb;
2715 bool found = false;
2716 /*
2717 * Last line; catch the case where the line length
2718 * is longer than remaining ram
2719 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002720 if (cur + linelen > pages) {
2721 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002722 }
2723 for (curb = 0; curb < linelen; curb++) {
2724 bool thisbit = test_bit(cur + curb, todump);
2725 linebuf[curb] = thisbit ? '1' : '.';
2726 found = found || (thisbit != expected);
2727 }
2728 if (found) {
2729 linebuf[curb] = '\0';
2730 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2731 }
2732 }
2733}
2734
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002735/* **** functions for postcopy ***** */
2736
Pavel Butsykinced1c612017-02-03 18:23:21 +03002737void ram_postcopy_migrated_memory_release(MigrationState *ms)
2738{
2739 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002740
Yury Kotovfbd162e2019-02-15 20:45:46 +03002741 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002742 unsigned long *bitmap = block->bmap;
2743 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2744 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002745
2746 while (run_start < range) {
2747 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002748 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002749 (run_end - run_start) << TARGET_PAGE_BITS);
2750 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2751 }
2752 }
2753}
2754
Juan Quintela3d0684b2017-03-23 15:06:39 +01002755/**
2756 * postcopy_send_discard_bm_ram: discard a RAMBlock
2757 *
2758 * Returns zero on success
2759 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002760 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2761 * Note: At this point the 'unsentmap' is the processed bitmap combined
2762 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002763 *
2764 * @ms: current migration state
2765 * @pds: state for postcopy
2766 * @start: RAMBlock starting page
2767 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002768 */
2769static int postcopy_send_discard_bm_ram(MigrationState *ms,
2770 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002771 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002772{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002773 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002774 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002775 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002776
Juan Quintela6b6712e2017-03-22 15:18:04 +01002777 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002778 unsigned long one = find_next_bit(unsentmap, end, current);
2779
2780 if (one <= end) {
2781 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2782 unsigned long discard_length;
2783
2784 if (zero >= end) {
2785 discard_length = end - one;
2786 } else {
2787 discard_length = zero - one;
2788 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01002789 if (discard_length) {
2790 postcopy_discard_send_range(ms, pds, one, discard_length);
2791 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002792 current = one + discard_length;
2793 } else {
2794 current = one;
2795 }
2796 }
2797
2798 return 0;
2799}
2800
Juan Quintela3d0684b2017-03-23 15:06:39 +01002801/**
2802 * postcopy_each_ram_send_discard: discard all RAMBlocks
2803 *
2804 * Returns 0 for success or negative for error
2805 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002806 * Utility for the outgoing postcopy code.
2807 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2808 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002809 * (qemu_ram_foreach_block ends up passing unscaled lengths
2810 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002811 *
2812 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002813 */
2814static int postcopy_each_ram_send_discard(MigrationState *ms)
2815{
2816 struct RAMBlock *block;
2817 int ret;
2818
Yury Kotovfbd162e2019-02-15 20:45:46 +03002819 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002820 PostcopyDiscardState *pds =
2821 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002822
2823 /*
2824 * Postcopy sends chunks of bitmap over the wire, but it
2825 * just needs indexes at this point, avoids it having
2826 * target page specific code.
2827 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002828 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002829 postcopy_discard_send_finish(ms, pds);
2830 if (ret) {
2831 return ret;
2832 }
2833 }
2834
2835 return 0;
2836}
2837
Juan Quintela3d0684b2017-03-23 15:06:39 +01002838/**
2839 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002840 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002841 * Helper for postcopy_chunk_hostpages; it's called twice to
2842 * canonicalize the two bitmaps, that are similar, but one is
2843 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002844 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002845 * Postcopy requires that all target pages in a hostpage are dirty or
2846 * clean, not a mix. This function canonicalizes the bitmaps.
2847 *
2848 * @ms: current migration state
2849 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2850 * otherwise we need to canonicalize partially dirty host pages
2851 * @block: block that contains the page we want to canonicalize
2852 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002853 */
2854static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2855 RAMBlock *block,
2856 PostcopyDiscardState *pds)
2857{
Juan Quintela53518d92017-05-04 11:46:24 +02002858 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002859 unsigned long *bitmap = block->bmap;
2860 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002861 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002862 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002863 unsigned long run_start;
2864
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002865 if (block->page_size == TARGET_PAGE_SIZE) {
2866 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2867 return;
2868 }
2869
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002870 if (unsent_pass) {
2871 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002872 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002873 } else {
2874 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002875 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002876 }
2877
Juan Quintela6b6712e2017-03-22 15:18:04 +01002878 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002879 bool do_fixup = false;
2880 unsigned long fixup_start_addr;
2881 unsigned long host_offset;
2882
2883 /*
2884 * If the start of this run of pages is in the middle of a host
2885 * page, then we need to fixup this host page.
2886 */
2887 host_offset = run_start % host_ratio;
2888 if (host_offset) {
2889 do_fixup = true;
2890 run_start -= host_offset;
2891 fixup_start_addr = run_start;
2892 /* For the next pass */
2893 run_start = run_start + host_ratio;
2894 } else {
2895 /* Find the end of this run */
2896 unsigned long run_end;
2897 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002898 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002899 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002900 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002901 }
2902 /*
2903 * If the end isn't at the start of a host page, then the
2904 * run doesn't finish at the end of a host page
2905 * and we need to discard.
2906 */
2907 host_offset = run_end % host_ratio;
2908 if (host_offset) {
2909 do_fixup = true;
2910 fixup_start_addr = run_end - host_offset;
2911 /*
2912 * This host page has gone, the next loop iteration starts
2913 * from after the fixup
2914 */
2915 run_start = fixup_start_addr + host_ratio;
2916 } else {
2917 /*
2918 * No discards on this iteration, next loop starts from
2919 * next sent/dirty page
2920 */
2921 run_start = run_end + 1;
2922 }
2923 }
2924
2925 if (do_fixup) {
2926 unsigned long page;
2927
2928 /* Tell the destination to discard this page */
2929 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2930 /* For the unsent_pass we:
2931 * discard partially sent pages
2932 * For the !unsent_pass (dirty) we:
2933 * discard partially dirty pages that were sent
2934 * (any partially sent pages were already discarded
2935 * by the previous unsent_pass)
2936 */
2937 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2938 host_ratio);
2939 }
2940
2941 /* Clean up the bitmap */
2942 for (page = fixup_start_addr;
2943 page < fixup_start_addr + host_ratio; page++) {
2944 /* All pages in this host page are now not sent */
2945 set_bit(page, unsentmap);
2946
2947 /*
2948 * Remark them as dirty, updating the count for any pages
2949 * that weren't previously dirty.
2950 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002951 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002952 }
2953 }
2954
2955 if (unsent_pass) {
2956 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002957 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002958 } else {
2959 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002960 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002961 }
2962 }
2963}
2964
Juan Quintela3d0684b2017-03-23 15:06:39 +01002965/**
2966 * postcopy_chuck_hostpages: discrad any partially sent host page
2967 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002968 * Utility for the outgoing postcopy code.
2969 *
2970 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002971 * dirty host-page size chunks as all dirty. In this case the host-page
2972 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002973 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002974 * Returns zero on success
2975 *
2976 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002977 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002978 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002979static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002980{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002981 PostcopyDiscardState *pds =
2982 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002983
Juan Quintela6b6712e2017-03-22 15:18:04 +01002984 /* First pass: Discard all partially sent host pages */
2985 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2986 /*
2987 * Second pass: Ensure that all partially dirty host pages are made
2988 * fully dirty.
2989 */
2990 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002991
Juan Quintela6b6712e2017-03-22 15:18:04 +01002992 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002993 return 0;
2994}
2995
Juan Quintela3d0684b2017-03-23 15:06:39 +01002996/**
2997 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2998 *
2999 * Returns zero on success
3000 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003001 * Transmit the set of pages to be discarded after precopy to the target
3002 * these are pages that:
3003 * a) Have been previously transmitted but are now dirty again
3004 * b) Pages that have never been transmitted, this ensures that
3005 * any pages on the destination that have been mapped by background
3006 * tasks get discarded (transparent huge pages is the specific concern)
3007 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01003008 *
3009 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003010 */
3011int ram_postcopy_send_discard_bitmap(MigrationState *ms)
3012{
Juan Quintela53518d92017-05-04 11:46:24 +02003013 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01003014 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003015 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003016
3017 rcu_read_lock();
3018
3019 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01003020 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003021
Juan Quintela6b6712e2017-03-22 15:18:04 +01003022 /* Easiest way to make sure we don't resume in the middle of a host-page */
3023 rs->last_seen_block = NULL;
3024 rs->last_sent_block = NULL;
3025 rs->last_page = 0;
3026
Yury Kotovfbd162e2019-02-15 20:45:46 +03003027 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003028 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
3029 unsigned long *bitmap = block->bmap;
3030 unsigned long *unsentmap = block->unsentmap;
3031
3032 if (!unsentmap) {
3033 /* We don't have a safe way to resize the sentmap, so
3034 * if the bitmap was resized it will be NULL at this
3035 * point.
3036 */
3037 error_report("migration ram resized during precopy phase");
3038 rcu_read_unlock();
3039 return -EINVAL;
3040 }
3041 /* Deal with TPS != HPS and huge pages */
3042 ret = postcopy_chunk_hostpages(ms, block);
3043 if (ret) {
3044 rcu_read_unlock();
3045 return ret;
3046 }
3047
3048 /*
3049 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003050 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003051 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003052#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01003053 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003054#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01003055 }
3056 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003057
3058 ret = postcopy_each_ram_send_discard(ms);
3059 rcu_read_unlock();
3060
3061 return ret;
3062}
3063
Juan Quintela3d0684b2017-03-23 15:06:39 +01003064/**
3065 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003066 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003067 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003068 *
Juan Quintela36449152017-03-23 15:11:59 +01003069 * @rbname: name of the RAMBlock of the request. NULL means the
3070 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003071 * @start: RAMBlock starting page
3072 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003073 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01003074int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003075{
3076 int ret = -1;
3077
Juan Quintela36449152017-03-23 15:11:59 +01003078 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003079
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003080 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01003081 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003082
3083 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01003084 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003085 goto err;
3086 }
3087
Peter Xu814bb082018-07-23 20:33:02 +08003088 /*
3089 * On source VM, we don't need to update the received bitmap since
3090 * we don't even have one.
3091 */
3092 if (rb->receivedmap) {
3093 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3094 length >> qemu_target_page_bits());
3095 }
3096
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003097 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003098
3099err:
3100 rcu_read_unlock();
3101
3102 return ret;
3103}
3104
Peter Xu84593a02017-10-19 14:31:59 +08003105/*
3106 * For every allocation, we will try not to crash the VM if the
3107 * allocation failed.
3108 */
3109static int xbzrle_init(void)
3110{
3111 Error *local_err = NULL;
3112
3113 if (!migrate_use_xbzrle()) {
3114 return 0;
3115 }
3116
3117 XBZRLE_cache_lock();
3118
3119 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3120 if (!XBZRLE.zero_target_page) {
3121 error_report("%s: Error allocating zero page", __func__);
3122 goto err_out;
3123 }
3124
3125 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3126 TARGET_PAGE_SIZE, &local_err);
3127 if (!XBZRLE.cache) {
3128 error_report_err(local_err);
3129 goto free_zero_page;
3130 }
3131
3132 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3133 if (!XBZRLE.encoded_buf) {
3134 error_report("%s: Error allocating encoded_buf", __func__);
3135 goto free_cache;
3136 }
3137
3138 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3139 if (!XBZRLE.current_buf) {
3140 error_report("%s: Error allocating current_buf", __func__);
3141 goto free_encoded_buf;
3142 }
3143
3144 /* We are all good */
3145 XBZRLE_cache_unlock();
3146 return 0;
3147
3148free_encoded_buf:
3149 g_free(XBZRLE.encoded_buf);
3150 XBZRLE.encoded_buf = NULL;
3151free_cache:
3152 cache_fini(XBZRLE.cache);
3153 XBZRLE.cache = NULL;
3154free_zero_page:
3155 g_free(XBZRLE.zero_target_page);
3156 XBZRLE.zero_target_page = NULL;
3157err_out:
3158 XBZRLE_cache_unlock();
3159 return -ENOMEM;
3160}
3161
Juan Quintela53518d92017-05-04 11:46:24 +02003162static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003163{
Peter Xu7d00ee62017-10-19 14:31:57 +08003164 *rsp = g_try_new0(RAMState, 1);
3165
3166 if (!*rsp) {
3167 error_report("%s: Init ramstate fail", __func__);
3168 return -1;
3169 }
Juan Quintela53518d92017-05-04 11:46:24 +02003170
3171 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3172 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3173 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003174
Peter Xu7d00ee62017-10-19 14:31:57 +08003175 /*
Wei Yang03158512019-06-04 14:17:27 +08003176 * This must match with the initial values of dirty bitmap.
3177 * Currently we initialize the dirty bitmap to all zeros so
3178 * here the total dirty page count is zero.
Peter Xu7d00ee62017-10-19 14:31:57 +08003179 */
Wei Yang03158512019-06-04 14:17:27 +08003180 (*rsp)->migration_dirty_pages = 0;
Peter Xu7d00ee62017-10-19 14:31:57 +08003181 ram_state_reset(*rsp);
3182
3183 return 0;
3184}
3185
Peter Xud6eff5d2017-10-19 14:32:00 +08003186static void ram_list_init_bitmaps(void)
3187{
3188 RAMBlock *block;
3189 unsigned long pages;
3190
3191 /* Skip setting bitmap if there is no RAM */
3192 if (ram_bytes_total()) {
Yury Kotovfbd162e2019-02-15 20:45:46 +03003193 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003194 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08003195 /*
3196 * The initial dirty bitmap for migration must be set with all
3197 * ones to make sure we'll migrate every guest RAM page to
3198 * destination.
3199 * Here we didn't set RAMBlock.bmap simply because it is already
3200 * set in ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] in
3201 * ram_block_add, and that's where we'll sync the dirty bitmaps.
3202 * Here setting RAMBlock.bmap would be fine too but not necessary.
3203 */
Peter Xud6eff5d2017-10-19 14:32:00 +08003204 block->bmap = bitmap_new(pages);
Peter Xud6eff5d2017-10-19 14:32:00 +08003205 if (migrate_postcopy_ram()) {
3206 block->unsentmap = bitmap_new(pages);
3207 bitmap_set(block->unsentmap, 0, pages);
3208 }
3209 }
3210 }
3211}
3212
3213static void ram_init_bitmaps(RAMState *rs)
3214{
3215 /* For memory_global_dirty_log_start below. */
3216 qemu_mutex_lock_iothread();
3217 qemu_mutex_lock_ramlist();
3218 rcu_read_lock();
3219
3220 ram_list_init_bitmaps();
3221 memory_global_dirty_log_start();
Wei Wangbd227062018-12-11 16:24:51 +08003222 migration_bitmap_sync_precopy(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08003223
3224 rcu_read_unlock();
3225 qemu_mutex_unlock_ramlist();
3226 qemu_mutex_unlock_iothread();
3227}
3228
Peter Xu7d00ee62017-10-19 14:31:57 +08003229static int ram_init_all(RAMState **rsp)
3230{
Peter Xu7d00ee62017-10-19 14:31:57 +08003231 if (ram_state_init(rsp)) {
3232 return -1;
3233 }
3234
Peter Xu84593a02017-10-19 14:31:59 +08003235 if (xbzrle_init()) {
3236 ram_state_cleanup(rsp);
3237 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003238 }
3239
Peter Xud6eff5d2017-10-19 14:32:00 +08003240 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003241
3242 return 0;
3243}
3244
Peter Xu08614f32018-05-02 18:47:33 +08003245static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3246{
3247 RAMBlock *block;
3248 uint64_t pages = 0;
3249
3250 /*
3251 * Postcopy is not using xbzrle/compression, so no need for that.
3252 * Also, since source are already halted, we don't need to care
3253 * about dirty page logging as well.
3254 */
3255
Yury Kotovfbd162e2019-02-15 20:45:46 +03003256 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003257 pages += bitmap_count_one(block->bmap,
3258 block->used_length >> TARGET_PAGE_BITS);
3259 }
3260
3261 /* This may not be aligned with current bitmaps. Recalculate. */
3262 rs->migration_dirty_pages = pages;
3263
3264 rs->last_seen_block = NULL;
3265 rs->last_sent_block = NULL;
3266 rs->last_page = 0;
3267 rs->last_version = ram_list.version;
3268 /*
3269 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3270 * matter what we have sent.
3271 */
3272 rs->ram_bulk_stage = false;
3273
3274 /* Update RAMState cache of output QEMUFile */
3275 rs->f = out;
3276
3277 trace_ram_state_resume_prepare(pages);
3278}
3279
Juan Quintela3d0684b2017-03-23 15:06:39 +01003280/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08003281 * This function clears bits of the free pages reported by the caller from the
3282 * migration dirty bitmap. @addr is the host address corresponding to the
3283 * start of the continuous guest free pages, and @len is the total bytes of
3284 * those pages.
3285 */
3286void qemu_guest_free_page_hint(void *addr, size_t len)
3287{
3288 RAMBlock *block;
3289 ram_addr_t offset;
3290 size_t used_len, start, npages;
3291 MigrationState *s = migrate_get_current();
3292
3293 /* This function is currently expected to be used during live migration */
3294 if (!migration_is_setup_or_active(s->state)) {
3295 return;
3296 }
3297
3298 for (; len > 0; len -= used_len, addr += used_len) {
3299 block = qemu_ram_block_from_host(addr, false, &offset);
3300 if (unlikely(!block || offset >= block->used_length)) {
3301 /*
3302 * The implementation might not support RAMBlock resize during
3303 * live migration, but it could happen in theory with future
3304 * updates. So we add a check here to capture that case.
3305 */
3306 error_report_once("%s unexpected error", __func__);
3307 return;
3308 }
3309
3310 if (len <= block->used_length - offset) {
3311 used_len = len;
3312 } else {
3313 used_len = block->used_length - offset;
3314 }
3315
3316 start = offset >> TARGET_PAGE_BITS;
3317 npages = used_len >> TARGET_PAGE_BITS;
3318
3319 qemu_mutex_lock(&ram_state->bitmap_mutex);
3320 ram_state->migration_dirty_pages -=
3321 bitmap_count_one_with_offset(block->bmap, start, npages);
3322 bitmap_clear(block->bmap, start, npages);
3323 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3324 }
3325}
3326
3327/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01003328 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003329 * long-running RCU critical section. When rcu-reclaims in the code
3330 * start to become numerous it will be necessary to reduce the
3331 * granularity of these critical sections.
3332 */
3333
Juan Quintela3d0684b2017-03-23 15:06:39 +01003334/**
3335 * ram_save_setup: Setup RAM for migration
3336 *
3337 * Returns zero to indicate success and negative for error
3338 *
3339 * @f: QEMUFile where to send the data
3340 * @opaque: RAMState pointer
3341 */
zhanghailianga91246c2016-10-27 14:42:59 +08003342static int ram_save_setup(QEMUFile *f, void *opaque)
3343{
Juan Quintela53518d92017-05-04 11:46:24 +02003344 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003345 RAMBlock *block;
3346
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003347 if (compress_threads_save_setup()) {
3348 return -1;
3349 }
3350
zhanghailianga91246c2016-10-27 14:42:59 +08003351 /* migration has already setup the bitmap, reuse it. */
3352 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003353 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003354 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003355 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003356 }
zhanghailianga91246c2016-10-27 14:42:59 +08003357 }
Juan Quintela53518d92017-05-04 11:46:24 +02003358 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003359
3360 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003361
Yury Kotovfbd162e2019-02-15 20:45:46 +03003362 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02003363
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003364 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003365 qemu_put_byte(f, strlen(block->idstr));
3366 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3367 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003368 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3369 qemu_put_be64(f, block->page_size);
3370 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003371 if (migrate_ignore_shared()) {
3372 qemu_put_be64(f, block->mr->addr);
3373 qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0);
3374 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003375 }
3376
3377 rcu_read_unlock();
3378
3379 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3380 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3381
Juan Quintela6df264a2018-02-28 09:10:07 +01003382 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003383 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003384 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003385
3386 return 0;
3387}
3388
Juan Quintela3d0684b2017-03-23 15:06:39 +01003389/**
3390 * ram_save_iterate: iterative stage for migration
3391 *
3392 * Returns zero to indicate success and negative for error
3393 *
3394 * @f: QEMUFile where to send the data
3395 * @opaque: RAMState pointer
3396 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003397static int ram_save_iterate(QEMUFile *f, void *opaque)
3398{
Juan Quintela53518d92017-05-04 11:46:24 +02003399 RAMState **temp = opaque;
3400 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003401 int ret;
3402 int i;
3403 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003404 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003405
Peter Lievenb2557342018-03-08 12:18:24 +01003406 if (blk_mig_bulk_active()) {
3407 /* Avoid transferring ram during bulk phase of block migration as
3408 * the bulk phase will usually take a long time and transferring
3409 * ram updates during that time is pointless. */
3410 goto out;
3411 }
3412
Juan Quintela56e93d22015-05-07 19:33:31 +02003413 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003414 if (ram_list.version != rs->last_version) {
3415 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003416 }
3417
3418 /* Read version before ram_list.blocks */
3419 smp_rmb();
3420
3421 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3422
3423 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3424 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003425 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3426 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003427 int pages;
3428
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003429 if (qemu_file_get_error(f)) {
3430 break;
3431 }
3432
Juan Quintelace25d332017-03-15 11:00:51 +01003433 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003434 /* no more pages to sent */
3435 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003436 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003437 break;
3438 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003439
3440 if (pages < 0) {
3441 qemu_file_set_error(f, pages);
3442 break;
3443 }
3444
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08003445 rs->target_page_count += pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003446
Juan Quintela56e93d22015-05-07 19:33:31 +02003447 /* we want to check in the 1st loop, just in case it was the 1st time
3448 and we had to sync the dirty bitmap.
Wei Yanga5f7b1a2019-05-11 07:37:29 +08003449 qemu_clock_get_ns() is a bit expensive, so we only check each some
Juan Quintela56e93d22015-05-07 19:33:31 +02003450 iterations
3451 */
3452 if ((i & 63) == 0) {
3453 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3454 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003455 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003456 break;
3457 }
3458 }
3459 i++;
3460 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003461 rcu_read_unlock();
3462
3463 /*
3464 * Must occur before EOS (or any QEMUFile operation)
3465 * because of RDMA protocol.
3466 */
3467 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3468
Juan Quintela6df264a2018-02-28 09:10:07 +01003469 multifd_send_sync_main();
Peter Lievenb2557342018-03-08 12:18:24 +01003470out:
Juan Quintela56e93d22015-05-07 19:33:31 +02003471 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003472 qemu_fflush(f);
Juan Quintela93604472017-06-06 19:49:03 +02003473 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003474
3475 ret = qemu_file_get_error(f);
3476 if (ret < 0) {
3477 return ret;
3478 }
3479
Thomas Huth5c903082016-11-04 14:10:17 +01003480 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003481}
3482
Juan Quintela3d0684b2017-03-23 15:06:39 +01003483/**
3484 * ram_save_complete: function called to send the remaining amount of ram
3485 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003486 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003487 *
3488 * Called with iothread lock
3489 *
3490 * @f: QEMUFile where to send the data
3491 * @opaque: RAMState pointer
3492 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003493static int ram_save_complete(QEMUFile *f, void *opaque)
3494{
Juan Quintela53518d92017-05-04 11:46:24 +02003495 RAMState **temp = opaque;
3496 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003497 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003498
Juan Quintela56e93d22015-05-07 19:33:31 +02003499 rcu_read_lock();
3500
Juan Quintela57273092017-03-20 22:25:28 +01003501 if (!migration_in_postcopy()) {
Wei Wangbd227062018-12-11 16:24:51 +08003502 migration_bitmap_sync_precopy(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003503 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003504
3505 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3506
3507 /* try transferring iterative blocks of memory */
3508
3509 /* flush all remaining blocks regardless of rate limiting */
3510 while (true) {
3511 int pages;
3512
Juan Quintelace25d332017-03-15 11:00:51 +01003513 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003514 /* no more blocks to sent */
3515 if (pages == 0) {
3516 break;
3517 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003518 if (pages < 0) {
3519 ret = pages;
3520 break;
3521 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003522 }
3523
Juan Quintelace25d332017-03-15 11:00:51 +01003524 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003525 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003526
3527 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003528
Juan Quintela6df264a2018-02-28 09:10:07 +01003529 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003530 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003531 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003532
Xiao Guangronge8f37352018-09-03 17:26:44 +08003533 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003534}
3535
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003536static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003537 uint64_t *res_precopy_only,
3538 uint64_t *res_compatible,
3539 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003540{
Juan Quintela53518d92017-05-04 11:46:24 +02003541 RAMState **temp = opaque;
3542 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003543 uint64_t remaining_size;
3544
Juan Quintela9edabd42017-03-14 12:02:16 +01003545 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003546
Juan Quintela57273092017-03-20 22:25:28 +01003547 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003548 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003549 qemu_mutex_lock_iothread();
3550 rcu_read_lock();
Wei Wangbd227062018-12-11 16:24:51 +08003551 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003552 rcu_read_unlock();
3553 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003554 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003555 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003556
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003557 if (migrate_postcopy_ram()) {
3558 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003559 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003560 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003561 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003562 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003563}
3564
3565static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3566{
3567 unsigned int xh_len;
3568 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003569 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003570
Juan Quintela56e93d22015-05-07 19:33:31 +02003571 /* extract RLE header */
3572 xh_flags = qemu_get_byte(f);
3573 xh_len = qemu_get_be16(f);
3574
3575 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3576 error_report("Failed to load XBZRLE page - wrong compression!");
3577 return -1;
3578 }
3579
3580 if (xh_len > TARGET_PAGE_SIZE) {
3581 error_report("Failed to load XBZRLE page - len overflow!");
3582 return -1;
3583 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003584 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003585 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003586 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003587 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003588
3589 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003590 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003591 TARGET_PAGE_SIZE) == -1) {
3592 error_report("Failed to load XBZRLE page - decode error!");
3593 return -1;
3594 }
3595
3596 return 0;
3597}
3598
Juan Quintela3d0684b2017-03-23 15:06:39 +01003599/**
3600 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003601 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003602 * Must be called from within a rcu critical section.
3603 *
3604 * Returns a pointer from within the RCU-protected ram_list.
3605 *
3606 * @f: QEMUFile where to read the data from
3607 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003608 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003609static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003610{
3611 static RAMBlock *block = NULL;
3612 char id[256];
3613 uint8_t len;
3614
3615 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003616 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003617 error_report("Ack, bad migration stream!");
3618 return NULL;
3619 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003620 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003621 }
3622
3623 len = qemu_get_byte(f);
3624 qemu_get_buffer(f, (uint8_t *)id, len);
3625 id[len] = 0;
3626
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003627 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003628 if (!block) {
3629 error_report("Can't find block %s", id);
3630 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003631 }
3632
Yury Kotovfbd162e2019-02-15 20:45:46 +03003633 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003634 error_report("block %s should not be migrated !", id);
3635 return NULL;
3636 }
3637
zhanghailiang4c4bad42016-01-15 11:37:41 +08003638 return block;
3639}
3640
3641static inline void *host_from_ram_block_offset(RAMBlock *block,
3642 ram_addr_t offset)
3643{
3644 if (!offset_in_ramblock(block, offset)) {
3645 return NULL;
3646 }
3647
3648 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003649}
3650
Zhang Chen13af18f2018-09-03 12:38:48 +08003651static inline void *colo_cache_from_block_offset(RAMBlock *block,
3652 ram_addr_t offset)
3653{
3654 if (!offset_in_ramblock(block, offset)) {
3655 return NULL;
3656 }
3657 if (!block->colo_cache) {
3658 error_report("%s: colo_cache is NULL in block :%s",
3659 __func__, block->idstr);
3660 return NULL;
3661 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003662
3663 /*
3664 * During colo checkpoint, we need bitmap of these migrated pages.
3665 * It help us to decide which pages in ram cache should be flushed
3666 * into VM's RAM later.
3667 */
3668 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3669 ram_state->migration_dirty_pages++;
3670 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003671 return block->colo_cache + offset;
3672}
3673
Juan Quintela3d0684b2017-03-23 15:06:39 +01003674/**
3675 * ram_handle_compressed: handle the zero page case
3676 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003677 * If a page (or a whole RDMA chunk) has been
3678 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003679 *
3680 * @host: host address for the zero page
3681 * @ch: what the page is filled from. We only support zero
3682 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003683 */
3684void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3685{
3686 if (ch != 0 || !is_zero_range(host, size)) {
3687 memset(host, ch, size);
3688 }
3689}
3690
Xiao Guangrong797ca152018-03-30 15:51:21 +08003691/* return the size after decompression, or negative value on error */
3692static int
3693qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3694 const uint8_t *source, size_t source_len)
3695{
3696 int err;
3697
3698 err = inflateReset(stream);
3699 if (err != Z_OK) {
3700 return -1;
3701 }
3702
3703 stream->avail_in = source_len;
3704 stream->next_in = (uint8_t *)source;
3705 stream->avail_out = dest_len;
3706 stream->next_out = dest;
3707
3708 err = inflate(stream, Z_NO_FLUSH);
3709 if (err != Z_STREAM_END) {
3710 return -1;
3711 }
3712
3713 return stream->total_out;
3714}
3715
Juan Quintela56e93d22015-05-07 19:33:31 +02003716static void *do_data_decompress(void *opaque)
3717{
3718 DecompressParam *param = opaque;
3719 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003720 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003721 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003722
Liang Li33d151f2016-05-05 15:32:58 +08003723 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003724 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003725 if (param->des) {
3726 des = param->des;
3727 len = param->len;
3728 param->des = 0;
3729 qemu_mutex_unlock(&param->mutex);
3730
Liang Li73a89122016-05-05 15:32:51 +08003731 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003732
3733 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3734 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003735 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003736 error_report("decompress data failed");
3737 qemu_file_set_error(decomp_file, ret);
3738 }
Liang Li73a89122016-05-05 15:32:51 +08003739
Liang Li33d151f2016-05-05 15:32:58 +08003740 qemu_mutex_lock(&decomp_done_lock);
3741 param->done = true;
3742 qemu_cond_signal(&decomp_done_cond);
3743 qemu_mutex_unlock(&decomp_done_lock);
3744
3745 qemu_mutex_lock(&param->mutex);
3746 } else {
3747 qemu_cond_wait(&param->cond, &param->mutex);
3748 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003749 }
Liang Li33d151f2016-05-05 15:32:58 +08003750 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003751
3752 return NULL;
3753}
3754
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003755static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003756{
3757 int idx, thread_count;
3758
3759 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003760 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003761 }
3762
3763 thread_count = migrate_decompress_threads();
3764 qemu_mutex_lock(&decomp_done_lock);
3765 for (idx = 0; idx < thread_count; idx++) {
3766 while (!decomp_param[idx].done) {
3767 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3768 }
3769 }
3770 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003771 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003772}
3773
Juan Quintelaf0afa332017-06-28 11:52:28 +02003774static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003775{
3776 int i, thread_count;
3777
Juan Quintela3416ab52016-04-20 11:56:01 +02003778 if (!migrate_use_compression()) {
3779 return;
3780 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003781 thread_count = migrate_decompress_threads();
3782 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003783 /*
3784 * we use it as a indicator which shows if the thread is
3785 * properly init'd or not
3786 */
3787 if (!decomp_param[i].compbuf) {
3788 break;
3789 }
3790
Juan Quintela56e93d22015-05-07 19:33:31 +02003791 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003792 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003793 qemu_cond_signal(&decomp_param[i].cond);
3794 qemu_mutex_unlock(&decomp_param[i].mutex);
3795 }
3796 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003797 if (!decomp_param[i].compbuf) {
3798 break;
3799 }
3800
Juan Quintela56e93d22015-05-07 19:33:31 +02003801 qemu_thread_join(decompress_threads + i);
3802 qemu_mutex_destroy(&decomp_param[i].mutex);
3803 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003804 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003805 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003806 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003807 }
3808 g_free(decompress_threads);
3809 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003810 decompress_threads = NULL;
3811 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003812 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003813}
3814
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003815static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003816{
3817 int i, thread_count;
3818
3819 if (!migrate_use_compression()) {
3820 return 0;
3821 }
3822
3823 thread_count = migrate_decompress_threads();
3824 decompress_threads = g_new0(QemuThread, thread_count);
3825 decomp_param = g_new0(DecompressParam, thread_count);
3826 qemu_mutex_init(&decomp_done_lock);
3827 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003828 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003829 for (i = 0; i < thread_count; i++) {
3830 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3831 goto exit;
3832 }
3833
3834 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3835 qemu_mutex_init(&decomp_param[i].mutex);
3836 qemu_cond_init(&decomp_param[i].cond);
3837 decomp_param[i].done = true;
3838 decomp_param[i].quit = false;
3839 qemu_thread_create(decompress_threads + i, "decompress",
3840 do_data_decompress, decomp_param + i,
3841 QEMU_THREAD_JOINABLE);
3842 }
3843 return 0;
3844exit:
3845 compress_threads_load_cleanup();
3846 return -1;
3847}
3848
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003849static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003850 void *host, int len)
3851{
3852 int idx, thread_count;
3853
3854 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003855 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003856 while (true) {
3857 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003858 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003859 decomp_param[idx].done = false;
3860 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003861 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003862 decomp_param[idx].des = host;
3863 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003864 qemu_cond_signal(&decomp_param[idx].cond);
3865 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003866 break;
3867 }
3868 }
3869 if (idx < thread_count) {
3870 break;
Liang Li73a89122016-05-05 15:32:51 +08003871 } else {
3872 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003873 }
3874 }
Liang Li73a89122016-05-05 15:32:51 +08003875 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003876}
3877
Zhang Chen13af18f2018-09-03 12:38:48 +08003878/*
3879 * colo cache: this is for secondary VM, we cache the whole
3880 * memory of the secondary VM, it is need to hold the global lock
3881 * to call this helper.
3882 */
3883int colo_init_ram_cache(void)
3884{
3885 RAMBlock *block;
3886
3887 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003888 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003889 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3890 NULL,
3891 false);
3892 if (!block->colo_cache) {
3893 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3894 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3895 block->used_length);
3896 goto out_locked;
3897 }
3898 memcpy(block->colo_cache, block->host, block->used_length);
3899 }
3900 rcu_read_unlock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003901 /*
3902 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3903 * with to decide which page in cache should be flushed into SVM's RAM. Here
3904 * we use the same name 'ram_bitmap' as for migration.
3905 */
3906 if (ram_bytes_total()) {
3907 RAMBlock *block;
3908
Yury Kotovfbd162e2019-02-15 20:45:46 +03003909 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003910 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3911
3912 block->bmap = bitmap_new(pages);
3913 bitmap_set(block->bmap, 0, pages);
3914 }
3915 }
3916 ram_state = g_new0(RAMState, 1);
3917 ram_state->migration_dirty_pages = 0;
Zhang Chenc6e5baf2019-03-30 06:29:51 +08003918 qemu_mutex_init(&ram_state->bitmap_mutex);
zhanghailiangd1955d22018-09-03 12:38:55 +08003919 memory_global_dirty_log_start();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003920
Zhang Chen13af18f2018-09-03 12:38:48 +08003921 return 0;
3922
3923out_locked:
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003924
Yury Kotovfbd162e2019-02-15 20:45:46 +03003925 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003926 if (block->colo_cache) {
3927 qemu_anon_ram_free(block->colo_cache, block->used_length);
3928 block->colo_cache = NULL;
3929 }
3930 }
3931
3932 rcu_read_unlock();
3933 return -errno;
3934}
3935
3936/* It is need to hold the global lock to call this helper */
3937void colo_release_ram_cache(void)
3938{
3939 RAMBlock *block;
3940
zhanghailiangd1955d22018-09-03 12:38:55 +08003941 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003942 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003943 g_free(block->bmap);
3944 block->bmap = NULL;
3945 }
3946
Zhang Chen13af18f2018-09-03 12:38:48 +08003947 rcu_read_lock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003948
Yury Kotovfbd162e2019-02-15 20:45:46 +03003949 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003950 if (block->colo_cache) {
3951 qemu_anon_ram_free(block->colo_cache, block->used_length);
3952 block->colo_cache = NULL;
3953 }
3954 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003955
Zhang Chen13af18f2018-09-03 12:38:48 +08003956 rcu_read_unlock();
Zhang Chenc6e5baf2019-03-30 06:29:51 +08003957 qemu_mutex_destroy(&ram_state->bitmap_mutex);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003958 g_free(ram_state);
3959 ram_state = NULL;
Zhang Chen13af18f2018-09-03 12:38:48 +08003960}
3961
Juan Quintela3d0684b2017-03-23 15:06:39 +01003962/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003963 * ram_load_setup: Setup RAM for migration incoming side
3964 *
3965 * Returns zero to indicate success and negative for error
3966 *
3967 * @f: QEMUFile where to receive the data
3968 * @opaque: RAMState pointer
3969 */
3970static int ram_load_setup(QEMUFile *f, void *opaque)
3971{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003972 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003973 return -1;
3974 }
3975
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003976 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003977 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003978
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003979 return 0;
3980}
3981
3982static int ram_load_cleanup(void *opaque)
3983{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003984 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003985
Yury Kotovfbd162e2019-02-15 20:45:46 +03003986 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He56eb90a2018-07-18 15:48:03 +08003987 if (ramblock_is_pmem(rb)) {
3988 pmem_persist(rb->host, rb->used_length);
3989 }
3990 }
3991
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003992 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003993 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003994
Yury Kotovfbd162e2019-02-15 20:45:46 +03003995 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003996 g_free(rb->receivedmap);
3997 rb->receivedmap = NULL;
3998 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003999
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004000 return 0;
4001}
4002
4003/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01004004 * ram_postcopy_incoming_init: allocate postcopy data structures
4005 *
4006 * Returns 0 for success and negative if there was one error
4007 *
4008 * @mis: current migration incoming state
4009 *
4010 * Allocate data structures etc needed by incoming migration with
4011 * postcopy-ram. postcopy-ram's similarly names
4012 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004013 */
4014int ram_postcopy_incoming_init(MigrationIncomingState *mis)
4015{
David Hildenbrandc1361802018-06-20 22:27:36 +02004016 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004017}
4018
Juan Quintela3d0684b2017-03-23 15:06:39 +01004019/**
4020 * ram_load_postcopy: load a page in postcopy case
4021 *
4022 * Returns 0 for success or -errno in case of error
4023 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004024 * Called in postcopy mode by ram_load().
4025 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01004026 *
4027 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004028 */
4029static int ram_load_postcopy(QEMUFile *f)
4030{
4031 int flags = 0, ret = 0;
4032 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004033 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004034 MigrationIncomingState *mis = migration_incoming_get_current();
4035 /* Temporary page that is later 'placed' */
4036 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004037 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00004038 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004039
4040 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4041 ram_addr_t addr;
4042 void *host = NULL;
4043 void *page_buffer = NULL;
4044 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004045 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004046 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004047
4048 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004049
4050 /*
4051 * If qemu file error, we should stop here, and then "addr"
4052 * may be invalid
4053 */
4054 ret = qemu_file_get_error(f);
4055 if (ret) {
4056 break;
4057 }
4058
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004059 flags = addr & ~TARGET_PAGE_MASK;
4060 addr &= TARGET_PAGE_MASK;
4061
4062 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4063 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02004064 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004065 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08004066
4067 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004068 if (!host) {
4069 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4070 ret = -EINVAL;
4071 break;
4072 }
Peter Xu1aa83672018-07-10 17:18:53 +08004073 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004074 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004075 * Postcopy requires that we place whole host pages atomically;
4076 * these may be huge pages for RAMBlocks that are backed by
4077 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004078 * To make it atomic, the data is read into a temporary page
4079 * that's moved into place later.
4080 * The migration protocol uses, possibly smaller, target-pages
4081 * however the source ensures it always sends all the components
4082 * of a host page in order.
4083 */
4084 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004085 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004086 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004087 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004088 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004089 } else {
4090 /* not the 1st TP within the HP */
4091 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01004092 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004093 host, last_host);
4094 ret = -EINVAL;
4095 break;
4096 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004097 }
4098
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004099
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004100 /*
4101 * If it's the last part of a host page then we place the host
4102 * page
4103 */
4104 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004105 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004106 place_source = postcopy_host_page;
4107 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004108 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004109
4110 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02004111 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004112 ch = qemu_get_byte(f);
4113 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4114 if (ch) {
4115 all_zero = false;
4116 }
4117 break;
4118
4119 case RAM_SAVE_FLAG_PAGE:
4120 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004121 if (!matches_target_page_size) {
4122 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004123 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4124 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08004125 /*
4126 * For small pages that matches target page size, we
4127 * avoid the qemu_file copy. Instead we directly use
4128 * the buffer of QEMUFile to place the page. Note: we
4129 * cannot do any QEMUFile operation before using that
4130 * buffer to make sure the buffer is valid when
4131 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004132 */
4133 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4134 TARGET_PAGE_SIZE);
4135 }
4136 break;
4137 case RAM_SAVE_FLAG_EOS:
4138 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004139 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004140 break;
4141 default:
4142 error_report("Unknown combination of migration flags: %#x"
4143 " (postcopy mode)", flags);
4144 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004145 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004146 }
4147
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004148 /* Detect for any possible file errors */
4149 if (!ret && qemu_file_get_error(f)) {
4150 ret = qemu_file_get_error(f);
4151 }
4152
4153 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004154 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004155 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
4156
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004157 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004158 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004159 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004160 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004161 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004162 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004163 }
4164 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004165 }
4166
4167 return ret;
4168}
4169
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004170static bool postcopy_is_advised(void)
4171{
4172 PostcopyState ps = postcopy_state_get();
4173 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4174}
4175
4176static bool postcopy_is_running(void)
4177{
4178 PostcopyState ps = postcopy_state_get();
4179 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4180}
4181
Zhang Chene6f4aa12018-09-03 12:38:50 +08004182/*
4183 * Flush content of RAM cache into SVM's memory.
4184 * Only flush the pages that be dirtied by PVM or SVM or both.
4185 */
4186static void colo_flush_ram_cache(void)
4187{
4188 RAMBlock *block = NULL;
4189 void *dst_host;
4190 void *src_host;
4191 unsigned long offset = 0;
4192
zhanghailiangd1955d22018-09-03 12:38:55 +08004193 memory_global_dirty_log_sync();
4194 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004195 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yangbf212972019-04-30 11:44:10 +08004196 migration_bitmap_sync_range(ram_state, block, block->used_length);
zhanghailiangd1955d22018-09-03 12:38:55 +08004197 }
4198 rcu_read_unlock();
4199
Zhang Chene6f4aa12018-09-03 12:38:50 +08004200 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4201 rcu_read_lock();
4202 block = QLIST_FIRST_RCU(&ram_list.blocks);
4203
4204 while (block) {
4205 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4206
4207 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4208 offset = 0;
4209 block = QLIST_NEXT_RCU(block, next);
4210 } else {
4211 migration_bitmap_clear_dirty(ram_state, block, offset);
4212 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4213 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4214 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4215 }
4216 }
4217
4218 rcu_read_unlock();
4219 trace_colo_flush_ram_cache_end();
4220}
4221
Juan Quintela56e93d22015-05-07 19:33:31 +02004222static int ram_load(QEMUFile *f, void *opaque, int version_id)
4223{
Juan Quintelaedc60122016-11-02 12:40:46 +01004224 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02004225 static uint64_t seq_iter;
4226 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004227 /*
4228 * If system is running in postcopy mode, page inserts to host memory must
4229 * be atomic
4230 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004231 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004232 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004233 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02004234
4235 seq_iter++;
4236
4237 if (version_id != 4) {
4238 ret = -EINVAL;
4239 }
4240
Juan Quintelaedc60122016-11-02 12:40:46 +01004241 if (!migrate_use_compression()) {
4242 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4243 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004244 /* This RCU critical section can be very long running.
4245 * When RCU reclaims in the code start to become numerous,
4246 * it will be necessary to reduce the granularity of this
4247 * critical section.
4248 */
4249 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004250
4251 if (postcopy_running) {
4252 ret = ram_load_postcopy(f);
4253 }
4254
4255 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02004256 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004257 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004258 uint8_t ch;
4259
4260 addr = qemu_get_be64(f);
4261 flags = addr & ~TARGET_PAGE_MASK;
4262 addr &= TARGET_PAGE_MASK;
4263
Juan Quintelaedc60122016-11-02 12:40:46 +01004264 if (flags & invalid_flags) {
4265 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4266 error_report("Received an unexpected compressed page");
4267 }
4268
4269 ret = -EINVAL;
4270 break;
4271 }
4272
Juan Quintelabb890ed2017-04-28 09:39:55 +02004273 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004274 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08004275 RAMBlock *block = ram_block_from_stream(f, flags);
4276
Zhang Chen13af18f2018-09-03 12:38:48 +08004277 /*
4278 * After going into COLO, we should load the Page into colo_cache.
4279 */
4280 if (migration_incoming_in_colo_state()) {
4281 host = colo_cache_from_block_offset(block, addr);
4282 } else {
4283 host = host_from_ram_block_offset(block, addr);
4284 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004285 if (!host) {
4286 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4287 ret = -EINVAL;
4288 break;
4289 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004290
4291 if (!migration_incoming_in_colo_state()) {
4292 ramblock_recv_bitmap_set(block, host);
4293 }
4294
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004295 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004296 }
4297
Juan Quintela56e93d22015-05-07 19:33:31 +02004298 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4299 case RAM_SAVE_FLAG_MEM_SIZE:
4300 /* Synchronize RAM block list */
4301 total_ram_bytes = addr;
4302 while (!ret && total_ram_bytes) {
4303 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004304 char id[256];
4305 ram_addr_t length;
4306
4307 len = qemu_get_byte(f);
4308 qemu_get_buffer(f, (uint8_t *)id, len);
4309 id[len] = 0;
4310 length = qemu_get_be64(f);
4311
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004312 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004313 if (block && !qemu_ram_is_migratable(block)) {
4314 error_report("block %s should not be migrated !", id);
4315 ret = -EINVAL;
4316 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004317 if (length != block->used_length) {
4318 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004319
Gongleifa53a0e2016-05-10 10:04:59 +08004320 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004321 &local_err);
4322 if (local_err) {
4323 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004324 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004325 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004326 /* For postcopy we need to check hugepage sizes match */
4327 if (postcopy_advised &&
4328 block->page_size != qemu_host_page_size) {
4329 uint64_t remote_page_size = qemu_get_be64(f);
4330 if (remote_page_size != block->page_size) {
4331 error_report("Mismatched RAM page size %s "
4332 "(local) %zd != %" PRId64,
4333 id, block->page_size,
4334 remote_page_size);
4335 ret = -EINVAL;
4336 }
4337 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004338 if (migrate_ignore_shared()) {
4339 hwaddr addr = qemu_get_be64(f);
4340 bool ignored = qemu_get_byte(f);
4341 if (ignored != ramblock_is_ignored(block)) {
4342 error_report("RAM block %s should %s be migrated",
4343 id, ignored ? "" : "not");
4344 ret = -EINVAL;
4345 }
4346 if (ramblock_is_ignored(block) &&
4347 block->mr->addr != addr) {
4348 error_report("Mismatched GPAs for block %s "
4349 "%" PRId64 "!= %" PRId64,
4350 id, (uint64_t)addr,
4351 (uint64_t)block->mr->addr);
4352 ret = -EINVAL;
4353 }
4354 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004355 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4356 block->idstr);
4357 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004358 error_report("Unknown ramblock \"%s\", cannot "
4359 "accept migration", id);
4360 ret = -EINVAL;
4361 }
4362
4363 total_ram_bytes -= length;
4364 }
4365 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004366
Juan Quintelabb890ed2017-04-28 09:39:55 +02004367 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004368 ch = qemu_get_byte(f);
4369 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4370 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004371
Juan Quintela56e93d22015-05-07 19:33:31 +02004372 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004373 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4374 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004375
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004376 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004377 len = qemu_get_be32(f);
4378 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4379 error_report("Invalid compressed data length: %d", len);
4380 ret = -EINVAL;
4381 break;
4382 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004383 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004384 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004385
Juan Quintela56e93d22015-05-07 19:33:31 +02004386 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004387 if (load_xbzrle(f, addr, host) < 0) {
4388 error_report("Failed to decompress XBZRLE page at "
4389 RAM_ADDR_FMT, addr);
4390 ret = -EINVAL;
4391 break;
4392 }
4393 break;
4394 case RAM_SAVE_FLAG_EOS:
4395 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004396 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004397 break;
4398 default:
4399 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004400 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004401 } else {
4402 error_report("Unknown combination of migration flags: %#x",
4403 flags);
4404 ret = -EINVAL;
4405 }
4406 }
4407 if (!ret) {
4408 ret = qemu_file_get_error(f);
4409 }
4410 }
4411
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004412 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02004413 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01004414 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004415
4416 if (!ret && migration_incoming_in_colo_state()) {
4417 colo_flush_ram_cache();
4418 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004419 return ret;
4420}
4421
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004422static bool ram_has_postcopy(void *opaque)
4423{
Junyan He469dd512018-07-18 15:48:02 +08004424 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004425 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004426 if (ramblock_is_pmem(rb)) {
4427 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4428 "is not supported now!", rb->idstr, rb->host);
4429 return false;
4430 }
4431 }
4432
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004433 return migrate_postcopy_ram();
4434}
4435
Peter Xuedd090c2018-05-02 18:47:32 +08004436/* Sync all the dirty bitmap with destination VM. */
4437static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4438{
4439 RAMBlock *block;
4440 QEMUFile *file = s->to_dst_file;
4441 int ramblock_count = 0;
4442
4443 trace_ram_dirty_bitmap_sync_start();
4444
Yury Kotovfbd162e2019-02-15 20:45:46 +03004445 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004446 qemu_savevm_send_recv_bitmap(file, block->idstr);
4447 trace_ram_dirty_bitmap_request(block->idstr);
4448 ramblock_count++;
4449 }
4450
4451 trace_ram_dirty_bitmap_sync_wait();
4452
4453 /* Wait until all the ramblocks' dirty bitmap synced */
4454 while (ramblock_count--) {
4455 qemu_sem_wait(&s->rp_state.rp_sem);
4456 }
4457
4458 trace_ram_dirty_bitmap_sync_complete();
4459
4460 return 0;
4461}
4462
4463static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4464{
4465 qemu_sem_post(&s->rp_state.rp_sem);
4466}
4467
Peter Xua335deb2018-05-02 18:47:28 +08004468/*
4469 * Read the received bitmap, revert it as the initial dirty bitmap.
4470 * This is only used when the postcopy migration is paused but wants
4471 * to resume from a middle point.
4472 */
4473int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4474{
4475 int ret = -EINVAL;
4476 QEMUFile *file = s->rp_state.from_dst_file;
4477 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004478 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004479 uint64_t size, end_mark;
4480
4481 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4482
4483 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4484 error_report("%s: incorrect state %s", __func__,
4485 MigrationStatus_str(s->state));
4486 return -EINVAL;
4487 }
4488
4489 /*
4490 * Note: see comments in ramblock_recv_bitmap_send() on why we
4491 * need the endianess convertion, and the paddings.
4492 */
4493 local_size = ROUND_UP(local_size, 8);
4494
4495 /* Add paddings */
4496 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4497
4498 size = qemu_get_be64(file);
4499
4500 /* The size of the bitmap should match with our ramblock */
4501 if (size != local_size) {
4502 error_report("%s: ramblock '%s' bitmap size mismatch "
4503 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4504 block->idstr, size, local_size);
4505 ret = -EINVAL;
4506 goto out;
4507 }
4508
4509 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4510 end_mark = qemu_get_be64(file);
4511
4512 ret = qemu_file_get_error(file);
4513 if (ret || size != local_size) {
4514 error_report("%s: read bitmap failed for ramblock '%s': %d"
4515 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4516 __func__, block->idstr, ret, local_size, size);
4517 ret = -EIO;
4518 goto out;
4519 }
4520
4521 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4522 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4523 __func__, block->idstr, end_mark);
4524 ret = -EINVAL;
4525 goto out;
4526 }
4527
4528 /*
4529 * Endianess convertion. We are during postcopy (though paused).
4530 * The dirty bitmap won't change. We can directly modify it.
4531 */
4532 bitmap_from_le(block->bmap, le_bitmap, nbits);
4533
4534 /*
4535 * What we received is "received bitmap". Revert it as the initial
4536 * dirty bitmap for this ramblock.
4537 */
4538 bitmap_complement(block->bmap, block->bmap, nbits);
4539
4540 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4541
Peter Xuedd090c2018-05-02 18:47:32 +08004542 /*
4543 * We succeeded to sync bitmap for current ramblock. If this is
4544 * the last one to sync, we need to notify the main send thread.
4545 */
4546 ram_dirty_bitmap_reload_notify(s);
4547
Peter Xua335deb2018-05-02 18:47:28 +08004548 ret = 0;
4549out:
Peter Xubf269902018-05-25 09:50:42 +08004550 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004551 return ret;
4552}
4553
Peter Xuedd090c2018-05-02 18:47:32 +08004554static int ram_resume_prepare(MigrationState *s, void *opaque)
4555{
4556 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004557 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004558
Peter Xu08614f32018-05-02 18:47:33 +08004559 ret = ram_dirty_bitmap_sync_all(s, rs);
4560 if (ret) {
4561 return ret;
4562 }
4563
4564 ram_state_resume_prepare(rs, s->to_dst_file);
4565
4566 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004567}
4568
Juan Quintela56e93d22015-05-07 19:33:31 +02004569static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004570 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004571 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004572 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004573 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004574 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004575 .save_live_pending = ram_save_pending,
4576 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004577 .save_cleanup = ram_save_cleanup,
4578 .load_setup = ram_load_setup,
4579 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004580 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004581};
4582
4583void ram_mig_init(void)
4584{
4585 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004586 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004587}