blob: dc73829e25346b9b8c028766782187f1dff6af4d [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Junyan He56eb90a2018-07-18 15:48:03 +080036#include "qemu/pmem.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020037#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020039#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010040#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020044#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030045#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010047#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010048#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020049#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030052#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080054#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030055#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020056#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080058#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010059#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020060
Juan Quintela56e93d22015-05-07 19:33:31 +020061/***********************************************************/
62/* ram save/restore */
63
Juan Quintelabb890ed2017-04-28 09:39:55 +020064/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
65 * worked for pages that where filled with the same char. We switched
66 * it to only search for the zero value. And to avoid confusion with
67 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 */
69
Juan Quintela56e93d22015-05-07 19:33:31 +020070#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020071#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77/* 0x80 is reserved in migration.h start with 0x100 next */
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
Richard Hendersona1febc42016-08-29 11:46:14 -070082 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020083}
84
Juan Quintela93604472017-06-06 19:49:03 +020085XBZRLECacheStats xbzrle_counters;
86
Juan Quintela56e93d22015-05-07 19:33:31 +020087/* struct contains XBZRLE cache and a static page
88 used by the compression */
89static struct {
90 /* buffer used for XBZRLE encoding */
91 uint8_t *encoded_buf;
92 /* buffer for storing page content */
93 uint8_t *current_buf;
94 /* Cache for XBZRLE, Protected by lock. */
95 PageCache *cache;
96 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020097 /* it will store a page full of zeros */
98 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020099 /* buffer used for XBZRLE decoding */
100 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200101} XBZRLE;
102
Juan Quintela56e93d22015-05-07 19:33:31 +0200103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
Juan Quintela3d0684b2017-03-23 15:06:39 +0100115/**
116 * xbzrle_cache_resize: resize the xbzrle cache
117 *
118 * This function is called from qmp_migrate_set_cache_size in main
119 * thread, possibly while a migration is in progress. A running
120 * migration may be using the cache and might finish during this call,
121 * hence changes to the cache are protected by XBZRLE.lock().
122 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200123 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100124 *
125 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200126 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200127 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200128int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200129{
130 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200131 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200132
Juan Quintela8acabf62017-10-05 22:00:31 +0200133 /* Check for truncation */
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 if (new_size == migrate_xbzrle_cache_size()) {
141 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200142 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200143 }
144
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
Yury Kotovfbd162e2019-02-15 20:45:46 +0300162static bool ramblock_is_ignored(RAMBlock *block)
163{
164 return !qemu_ram_is_migratable(block) ||
165 (migrate_ignore_shared() && qemu_ram_is_shared(block));
166}
167
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200168/* Should be holding either ram_list.mutex, or the RCU lock. */
Yury Kotovfbd162e2019-02-15 20:45:46 +0300169#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
170 INTERNAL_RAMBLOCK_FOREACH(block) \
171 if (ramblock_is_ignored(block)) {} else
172
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200173#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100174 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200175 if (!qemu_ram_is_migratable(block)) {} else
176
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100177#undef RAMBLOCK_FOREACH
178
Yury Kotovfbd162e2019-02-15 20:45:46 +0300179int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
180{
181 RAMBlock *block;
182 int ret = 0;
183
184 rcu_read_lock();
185 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
186 ret = func(block, opaque);
187 if (ret) {
188 break;
189 }
190 }
191 rcu_read_unlock();
192 return ret;
193}
194
Alexey Perevalovf9494612017-10-05 14:13:20 +0300195static void ramblock_recv_map_init(void)
196{
197 RAMBlock *rb;
198
Yury Kotovfbd162e2019-02-15 20:45:46 +0300199 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300200 assert(!rb->receivedmap);
201 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
202 }
203}
204
205int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
206{
207 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
208 rb->receivedmap);
209}
210
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000211bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
212{
213 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
214}
215
Alexey Perevalovf9494612017-10-05 14:13:20 +0300216void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
217{
218 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
219}
220
221void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
222 size_t nr)
223{
224 bitmap_set_atomic(rb->receivedmap,
225 ramblock_recv_bitmap_offset(host_addr, rb),
226 nr);
227}
228
Peter Xua335deb2018-05-02 18:47:28 +0800229#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
230
231/*
232 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
233 *
234 * Returns >0 if success with sent bytes, or <0 if error.
235 */
236int64_t ramblock_recv_bitmap_send(QEMUFile *file,
237 const char *block_name)
238{
239 RAMBlock *block = qemu_ram_block_by_name(block_name);
240 unsigned long *le_bitmap, nbits;
241 uint64_t size;
242
243 if (!block) {
244 error_report("%s: invalid block name: %s", __func__, block_name);
245 return -1;
246 }
247
248 nbits = block->used_length >> TARGET_PAGE_BITS;
249
250 /*
251 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
252 * machines we may need 4 more bytes for padding (see below
253 * comment). So extend it a bit before hand.
254 */
255 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
256
257 /*
258 * Always use little endian when sending the bitmap. This is
259 * required that when source and destination VMs are not using the
260 * same endianess. (Note: big endian won't work.)
261 */
262 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
263
264 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800265 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800266
267 /*
268 * size is always aligned to 8 bytes for 64bit machines, but it
269 * may not be true for 32bit machines. We need this padding to
270 * make sure the migration can survive even between 32bit and
271 * 64bit machines.
272 */
273 size = ROUND_UP(size, 8);
274
275 qemu_put_be64(file, size);
276 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
277 /*
278 * Mark as an end, in case the middle part is screwed up due to
279 * some "misterious" reason.
280 */
281 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
282 qemu_fflush(file);
283
Peter Xubf269902018-05-25 09:50:42 +0800284 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800285
286 if (qemu_file_get_error(file)) {
287 return qemu_file_get_error(file);
288 }
289
290 return size + sizeof(size);
291}
292
Juan Quintelaec481c62017-03-20 22:12:40 +0100293/*
294 * An outstanding page request, on the source, having been received
295 * and queued
296 */
297struct RAMSrcPageRequest {
298 RAMBlock *rb;
299 hwaddr offset;
300 hwaddr len;
301
302 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
303};
304
Juan Quintela6f37bb82017-03-13 19:26:29 +0100305/* State of RAM for migration */
306struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100307 /* QEMUFile used for this migration */
308 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100309 /* Last block that we have visited searching for dirty pages */
310 RAMBlock *last_seen_block;
311 /* Last block from where we have sent data */
312 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100313 /* Last dirty target page we have sent */
314 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100315 /* last ram version we have seen */
316 uint32_t last_version;
317 /* We are in the first round */
318 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800319 /* The free page optimization is enabled */
320 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100321 /* How many times we have dirty too many pages */
322 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100323 /* these variables are used for bitmap sync */
324 /* last time we did a full bitmap_sync */
325 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200326 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200327 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200328 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200329 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100330 /* xbzrle misses since the beginning of the period */
331 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800332
333 /* compression statistics since the beginning of the period */
334 /* amount of count that no free thread to compress data */
335 uint64_t compress_thread_busy_prev;
336 /* amount bytes after compression */
337 uint64_t compressed_size_prev;
338 /* amount of compressed pages */
339 uint64_t compress_pages_prev;
340
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800341 /* total handled target pages at the beginning of period */
342 uint64_t target_page_count_prev;
343 /* total handled target pages since start */
344 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200345 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800346 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800347 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100348 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100349 /* The RAMBlock used in the last src_page_requests */
350 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100351 /* Queue of outstanding page requests from the destination */
352 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100353 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100354};
355typedef struct RAMState RAMState;
356
Juan Quintela53518d92017-05-04 11:46:24 +0200357static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100358
Wei Wangbd227062018-12-11 16:24:51 +0800359static NotifierWithReturnList precopy_notifier_list;
360
361void precopy_infrastructure_init(void)
362{
363 notifier_with_return_list_init(&precopy_notifier_list);
364}
365
366void precopy_add_notifier(NotifierWithReturn *n)
367{
368 notifier_with_return_list_add(&precopy_notifier_list, n);
369}
370
371void precopy_remove_notifier(NotifierWithReturn *n)
372{
373 notifier_with_return_remove(n);
374}
375
376int precopy_notify(PrecopyNotifyReason reason, Error **errp)
377{
378 PrecopyNotifyData pnd;
379 pnd.reason = reason;
380 pnd.errp = errp;
381
382 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
383}
384
Wei Wang6eeb63f2018-12-11 16:24:52 +0800385void precopy_enable_free_page_optimization(void)
386{
387 if (!ram_state) {
388 return;
389 }
390
391 ram_state->fpo_enabled = true;
392}
393
Juan Quintela9edabd42017-03-14 12:02:16 +0100394uint64_t ram_bytes_remaining(void)
395{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000396 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
397 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100398}
399
Juan Quintela93604472017-06-06 19:49:03 +0200400MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100401
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100402/* used by the search for pages to send */
403struct PageSearchStatus {
404 /* Current block being searched */
405 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100406 /* Current page to search from */
407 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100408 /* Set once we wrap around */
409 bool complete_round;
410};
411typedef struct PageSearchStatus PageSearchStatus;
412
Xiao Guangrong76e03002018-09-06 15:01:00 +0800413CompressionStats compression_counters;
414
Juan Quintela56e93d22015-05-07 19:33:31 +0200415struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200416 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800417 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800418 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200419 QEMUFile *file;
420 QemuMutex mutex;
421 QemuCond cond;
422 RAMBlock *block;
423 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800424
425 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800426 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800427 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428};
429typedef struct CompressParam CompressParam;
430
431struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800432 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800433 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 QemuMutex mutex;
435 QemuCond cond;
436 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000437 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200438 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800439 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200440};
441typedef struct DecompressParam DecompressParam;
442
443static CompressParam *comp_param;
444static QemuThread *compress_threads;
445/* comp_done_cond is used to wake up the migration thread when
446 * one of the compression threads has finished the compression.
447 * comp_done_lock is used to co-work with comp_done_cond.
448 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800449static QemuMutex comp_done_lock;
450static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200451/* The empty QEMUFileOps will be used by file in CompressParam */
452static const QEMUFileOps empty_ops = { };
453
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800454static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200455static DecompressParam *decomp_param;
456static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800457static QemuMutex decomp_done_lock;
458static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200459
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800460static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800461 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462
463static void *do_data_compress(void *opaque)
464{
465 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800466 RAMBlock *block;
467 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800468 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200469
Liang Lia7a9a882016-05-05 15:32:57 +0800470 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800471 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800472 if (param->block) {
473 block = param->block;
474 offset = param->offset;
475 param->block = NULL;
476 qemu_mutex_unlock(&param->mutex);
477
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800478 zero_page = do_compress_ram_page(param->file, &param->stream,
479 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800480
Liang Li0d9f9a52016-05-05 15:32:59 +0800481 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800482 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800483 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800484 qemu_cond_signal(&comp_done_cond);
485 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800486
487 qemu_mutex_lock(&param->mutex);
488 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200489 qemu_cond_wait(&param->cond, &param->mutex);
490 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200491 }
Liang Lia7a9a882016-05-05 15:32:57 +0800492 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200493
494 return NULL;
495}
496
Juan Quintelaf0afa332017-06-28 11:52:28 +0200497static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200498{
499 int i, thread_count;
500
Fei Li05306932018-09-25 17:14:40 +0800501 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200502 return;
503 }
Fei Li05306932018-09-25 17:14:40 +0800504
Juan Quintela56e93d22015-05-07 19:33:31 +0200505 thread_count = migrate_compress_threads();
506 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800507 /*
508 * we use it as a indicator which shows if the thread is
509 * properly init'd or not
510 */
511 if (!comp_param[i].file) {
512 break;
513 }
Fei Li05306932018-09-25 17:14:40 +0800514
515 qemu_mutex_lock(&comp_param[i].mutex);
516 comp_param[i].quit = true;
517 qemu_cond_signal(&comp_param[i].cond);
518 qemu_mutex_unlock(&comp_param[i].mutex);
519
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200521 qemu_mutex_destroy(&comp_param[i].mutex);
522 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800523 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800524 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800525 qemu_fclose(comp_param[i].file);
526 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200527 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800528 qemu_mutex_destroy(&comp_done_lock);
529 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200530 g_free(compress_threads);
531 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200532 compress_threads = NULL;
533 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200534}
535
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800536static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200537{
538 int i, thread_count;
539
540 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800541 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200542 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200543 thread_count = migrate_compress_threads();
544 compress_threads = g_new0(QemuThread, thread_count);
545 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800546 qemu_cond_init(&comp_done_cond);
547 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200548 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800549 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
550 if (!comp_param[i].originbuf) {
551 goto exit;
552 }
553
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800554 if (deflateInit(&comp_param[i].stream,
555 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800556 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800557 goto exit;
558 }
559
Cao jine110aa92016-07-29 15:10:31 +0800560 /* comp_param[i].file is just used as a dummy buffer to save data,
561 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 */
563 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
564 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800565 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200566 qemu_mutex_init(&comp_param[i].mutex);
567 qemu_cond_init(&comp_param[i].cond);
568 qemu_thread_create(compress_threads + i, "compress",
569 do_data_compress, comp_param + i,
570 QEMU_THREAD_JOINABLE);
571 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800572 return 0;
573
574exit:
575 compress_threads_save_cleanup();
576 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200577}
578
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100579/* Multiple fd's */
580
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200581#define MULTIFD_MAGIC 0x11223344U
582#define MULTIFD_VERSION 1
583
Juan Quintela6df264a2018-02-28 09:10:07 +0100584#define MULTIFD_FLAG_SYNC (1 << 0)
585
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200586typedef struct {
587 uint32_t magic;
588 uint32_t version;
589 unsigned char uuid[16]; /* QemuUUID */
590 uint8_t id;
591} __attribute__((packed)) MultiFDInit_t;
592
Juan Quintela8c4598f2018-04-07 13:59:07 +0200593typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200594 uint32_t magic;
595 uint32_t version;
596 uint32_t flags;
Juan Quintela6f862692019-02-20 12:04:04 +0100597 /* maximum number of allocated pages */
598 uint32_t pages_alloc;
599 uint32_t pages_used;
Juan Quintela2a26c972018-04-04 11:26:58 +0200600 uint64_t packet_num;
601 char ramblock[256];
602 uint64_t offset[];
603} __attribute__((packed)) MultiFDPacket_t;
604
605typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200606 /* number of used pages */
607 uint32_t used;
608 /* number of allocated pages */
609 uint32_t allocated;
610 /* global number of generated multifd packets */
611 uint64_t packet_num;
612 /* offset of each page */
613 ram_addr_t *offset;
614 /* pointer to each page */
615 struct iovec *iov;
616 RAMBlock *block;
617} MultiFDPages_t;
618
619typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200620 /* this fields are not changed once the thread is created */
621 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100622 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200623 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100624 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200625 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100626 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200627 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100628 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200629 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100630 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200631 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100632 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200633 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100634 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200635 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100636 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200637 /* thread has work to do */
638 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200639 /* array of pages to sent */
640 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200641 /* packet allocated len */
642 uint32_t packet_len;
643 /* pointer to the packet */
644 MultiFDPacket_t *packet;
645 /* multifd flags for each packet */
646 uint32_t flags;
647 /* global number of generated multifd packets */
648 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200649 /* thread local variables */
650 /* packets sent through this channel */
651 uint64_t num_packets;
652 /* pages sent through this channel */
653 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100654 /* syncs main thread and channels */
655 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200656} MultiFDSendParams;
657
658typedef struct {
659 /* this fields are not changed once the thread is created */
660 /* channel number */
661 uint8_t id;
662 /* channel thread name */
663 char *name;
664 /* channel thread id */
665 QemuThread thread;
666 /* communication channel */
667 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200668 /* this mutex protects the following parameters */
669 QemuMutex mutex;
670 /* is this channel thread running */
671 bool running;
Juan Quintela34c55a92018-04-10 23:35:15 +0200672 /* array of pages to receive */
673 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200674 /* packet allocated len */
675 uint32_t packet_len;
676 /* pointer to the packet */
677 MultiFDPacket_t *packet;
678 /* multifd flags for each packet */
679 uint32_t flags;
680 /* global number of generated multifd packets */
681 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200682 /* thread local variables */
683 /* packets sent through this channel */
684 uint64_t num_packets;
685 /* pages sent through this channel */
686 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100687 /* syncs main thread and channels */
688 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200689} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100690
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200691static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
692{
693 MultiFDInit_t msg;
694 int ret;
695
696 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
697 msg.version = cpu_to_be32(MULTIFD_VERSION);
698 msg.id = p->id;
699 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
700
701 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
702 if (ret != 0) {
703 return -1;
704 }
705 return 0;
706}
707
708static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
709{
710 MultiFDInit_t msg;
711 int ret;
712
713 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
714 if (ret != 0) {
715 return -1;
716 }
717
Peter Maydell341ba0d2018-09-25 17:19:24 +0100718 msg.magic = be32_to_cpu(msg.magic);
719 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200720
721 if (msg.magic != MULTIFD_MAGIC) {
722 error_setg(errp, "multifd: received packet magic %x "
723 "expected %x", msg.magic, MULTIFD_MAGIC);
724 return -1;
725 }
726
727 if (msg.version != MULTIFD_VERSION) {
728 error_setg(errp, "multifd: received packet version %d "
729 "expected %d", msg.version, MULTIFD_VERSION);
730 return -1;
731 }
732
733 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
734 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
735 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
736
737 error_setg(errp, "multifd: received uuid '%s' and expected "
738 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
739 g_free(uuid);
740 g_free(msg_uuid);
741 return -1;
742 }
743
744 if (msg.id > migrate_multifd_channels()) {
745 error_setg(errp, "multifd: received channel version %d "
746 "expected %d", msg.version, MULTIFD_VERSION);
747 return -1;
748 }
749
750 return msg.id;
751}
752
Juan Quintela34c55a92018-04-10 23:35:15 +0200753static MultiFDPages_t *multifd_pages_init(size_t size)
754{
755 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
756
757 pages->allocated = size;
758 pages->iov = g_new0(struct iovec, size);
759 pages->offset = g_new0(ram_addr_t, size);
760
761 return pages;
762}
763
764static void multifd_pages_clear(MultiFDPages_t *pages)
765{
766 pages->used = 0;
767 pages->allocated = 0;
768 pages->packet_num = 0;
769 pages->block = NULL;
770 g_free(pages->iov);
771 pages->iov = NULL;
772 g_free(pages->offset);
773 pages->offset = NULL;
774 g_free(pages);
775}
776
Juan Quintela2a26c972018-04-04 11:26:58 +0200777static void multifd_send_fill_packet(MultiFDSendParams *p)
778{
779 MultiFDPacket_t *packet = p->packet;
780 int i;
781
782 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
783 packet->version = cpu_to_be32(MULTIFD_VERSION);
784 packet->flags = cpu_to_be32(p->flags);
Juan Quintela6f862692019-02-20 12:04:04 +0100785 packet->pages_alloc = cpu_to_be32(migrate_multifd_page_count());
786 packet->pages_used = cpu_to_be32(p->pages->used);
Juan Quintela2a26c972018-04-04 11:26:58 +0200787 packet->packet_num = cpu_to_be64(p->packet_num);
788
789 if (p->pages->block) {
790 strncpy(packet->ramblock, p->pages->block->idstr, 256);
791 }
792
793 for (i = 0; i < p->pages->used; i++) {
794 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
795 }
796}
797
798static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
799{
800 MultiFDPacket_t *packet = p->packet;
801 RAMBlock *block;
802 int i;
803
Peter Maydell341ba0d2018-09-25 17:19:24 +0100804 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200805 if (packet->magic != MULTIFD_MAGIC) {
806 error_setg(errp, "multifd: received packet "
807 "magic %x and expected magic %x",
808 packet->magic, MULTIFD_MAGIC);
809 return -1;
810 }
811
Peter Maydell341ba0d2018-09-25 17:19:24 +0100812 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200813 if (packet->version != MULTIFD_VERSION) {
814 error_setg(errp, "multifd: received packet "
815 "version %d and expected version %d",
816 packet->version, MULTIFD_VERSION);
817 return -1;
818 }
819
820 p->flags = be32_to_cpu(packet->flags);
821
Juan Quintela6f862692019-02-20 12:04:04 +0100822 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
823 if (packet->pages_alloc > migrate_multifd_page_count()) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200824 error_setg(errp, "multifd: received packet "
825 "with size %d and expected maximum size %d",
Juan Quintela6f862692019-02-20 12:04:04 +0100826 packet->pages_alloc, migrate_multifd_page_count()) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200827 return -1;
828 }
829
Juan Quintela6f862692019-02-20 12:04:04 +0100830 p->pages->used = be32_to_cpu(packet->pages_used);
831 if (p->pages->used > packet->pages_alloc) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200832 error_setg(errp, "multifd: received packet "
Juan Quintela6f862692019-02-20 12:04:04 +0100833 "with %d pages and expected maximum pages are %d",
834 p->pages->used, packet->pages_alloc) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200835 return -1;
836 }
837
838 p->packet_num = be64_to_cpu(packet->packet_num);
839
840 if (p->pages->used) {
841 /* make sure that ramblock is 0 terminated */
842 packet->ramblock[255] = 0;
843 block = qemu_ram_block_by_name(packet->ramblock);
844 if (!block) {
845 error_setg(errp, "multifd: unknown ram block %s",
846 packet->ramblock);
847 return -1;
848 }
849 }
850
851 for (i = 0; i < p->pages->used; i++) {
852 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
853
854 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
855 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
856 " (max " RAM_ADDR_FMT ")",
857 offset, block->max_length);
858 return -1;
859 }
860 p->pages->iov[i].iov_base = block->host + offset;
861 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
862 }
863
864 return 0;
865}
866
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100867struct {
868 MultiFDSendParams *params;
869 /* number of created threads */
870 int count;
Juan Quintela34c55a92018-04-10 23:35:15 +0200871 /* array of pages to sent */
872 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100873 /* syncs main thread and channels */
874 QemuSemaphore sem_sync;
875 /* global number of generated multifd packets */
876 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100877 /* send channels ready */
878 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100879} *multifd_send_state;
880
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100881/*
882 * How we use multifd_send_state->pages and channel->pages?
883 *
884 * We create a pages for each channel, and a main one. Each time that
885 * we need to send a batch of pages we interchange the ones between
886 * multifd_send_state and the channel that is sending it. There are
887 * two reasons for that:
888 * - to not have to do so many mallocs during migration
889 * - to make easier to know what to free at the end of migration
890 *
891 * This way we always know who is the owner of each "pages" struct,
892 * and we don't need any loocking. It belongs to the migration thread
893 * or to the channel thread. Switching is safe because the migration
894 * thread is using the channel mutex when changing it, and the channel
895 * have to had finish with its own, otherwise pending_job can't be
896 * false.
897 */
898
899static void multifd_send_pages(void)
900{
901 int i;
902 static int next_channel;
903 MultiFDSendParams *p = NULL; /* make happy gcc */
904 MultiFDPages_t *pages = multifd_send_state->pages;
905 uint64_t transferred;
906
907 qemu_sem_wait(&multifd_send_state->channels_ready);
908 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
909 p = &multifd_send_state->params[i];
910
911 qemu_mutex_lock(&p->mutex);
912 if (!p->pending_job) {
913 p->pending_job++;
914 next_channel = (i + 1) % migrate_multifd_channels();
915 break;
916 }
917 qemu_mutex_unlock(&p->mutex);
918 }
919 p->pages->used = 0;
920
921 p->packet_num = multifd_send_state->packet_num++;
922 p->pages->block = NULL;
923 multifd_send_state->pages = p->pages;
924 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800925 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100926 ram_counters.multifd_bytes += transferred;
927 ram_counters.transferred += transferred;;
928 qemu_mutex_unlock(&p->mutex);
929 qemu_sem_post(&p->sem);
930}
931
932static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
933{
934 MultiFDPages_t *pages = multifd_send_state->pages;
935
936 if (!pages->block) {
937 pages->block = block;
938 }
939
940 if (pages->block == block) {
941 pages->offset[pages->used] = offset;
942 pages->iov[pages->used].iov_base = block->host + offset;
943 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
944 pages->used++;
945
946 if (pages->used < pages->allocated) {
947 return;
948 }
949 }
950
951 multifd_send_pages();
952
953 if (pages->block != block) {
954 multifd_queue_page(block, offset);
955 }
956}
957
Juan Quintela66770702018-02-19 19:01:45 +0100958static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100959{
960 int i;
961
Juan Quintela7a169d72018-02-19 19:01:15 +0100962 if (err) {
963 MigrationState *s = migrate_get_current();
964 migrate_set_error(s, err);
965 if (s->state == MIGRATION_STATUS_SETUP ||
966 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
967 s->state == MIGRATION_STATUS_DEVICE ||
968 s->state == MIGRATION_STATUS_ACTIVE) {
969 migrate_set_state(&s->state, s->state,
970 MIGRATION_STATUS_FAILED);
971 }
972 }
973
Juan Quintela66770702018-02-19 19:01:45 +0100974 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100975 MultiFDSendParams *p = &multifd_send_state->params[i];
976
977 qemu_mutex_lock(&p->mutex);
978 p->quit = true;
979 qemu_sem_post(&p->sem);
980 qemu_mutex_unlock(&p->mutex);
981 }
982}
983
Fei Li1398b2e2019-01-13 22:08:47 +0800984void multifd_save_cleanup(void)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100985{
986 int i;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100987
988 if (!migrate_use_multifd()) {
Fei Li1398b2e2019-01-13 22:08:47 +0800989 return;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100990 }
Juan Quintela66770702018-02-19 19:01:45 +0100991 multifd_send_terminate_threads(NULL);
992 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100993 MultiFDSendParams *p = &multifd_send_state->params[i];
994
Juan Quintela66770702018-02-19 19:01:45 +0100995 if (p->running) {
996 qemu_thread_join(&p->thread);
997 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100998 socket_send_channel_destroy(p->c);
999 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001000 qemu_mutex_destroy(&p->mutex);
1001 qemu_sem_destroy(&p->sem);
Juan Quintela6df264a2018-02-28 09:10:07 +01001002 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001003 g_free(p->name);
1004 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001005 multifd_pages_clear(p->pages);
1006 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001007 p->packet_len = 0;
1008 g_free(p->packet);
1009 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001010 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001011 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintela6df264a2018-02-28 09:10:07 +01001012 qemu_sem_destroy(&multifd_send_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001013 g_free(multifd_send_state->params);
1014 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001015 multifd_pages_clear(multifd_send_state->pages);
1016 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001017 g_free(multifd_send_state);
1018 multifd_send_state = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001019}
1020
Juan Quintela6df264a2018-02-28 09:10:07 +01001021static void multifd_send_sync_main(void)
1022{
1023 int i;
1024
1025 if (!migrate_use_multifd()) {
1026 return;
1027 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001028 if (multifd_send_state->pages->used) {
1029 multifd_send_pages();
1030 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001031 for (i = 0; i < migrate_multifd_channels(); i++) {
1032 MultiFDSendParams *p = &multifd_send_state->params[i];
1033
1034 trace_multifd_send_sync_main_signal(p->id);
1035
1036 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001037
1038 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +01001039 p->flags |= MULTIFD_FLAG_SYNC;
1040 p->pending_job++;
1041 qemu_mutex_unlock(&p->mutex);
1042 qemu_sem_post(&p->sem);
1043 }
1044 for (i = 0; i < migrate_multifd_channels(); i++) {
1045 MultiFDSendParams *p = &multifd_send_state->params[i];
1046
1047 trace_multifd_send_sync_main_wait(p->id);
1048 qemu_sem_wait(&multifd_send_state->sem_sync);
1049 }
1050 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1051}
1052
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001053static void *multifd_send_thread(void *opaque)
1054{
1055 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001056 Error *local_err = NULL;
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001057 int ret;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001058
Juan Quintela408ea6a2018-04-06 18:28:59 +02001059 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001060 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001061
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001062 if (multifd_send_initial_packet(p, &local_err) < 0) {
1063 goto out;
1064 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001065 /* initial packet */
1066 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001067
1068 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001069 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001070 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001071
1072 if (p->pending_job) {
1073 uint32_t used = p->pages->used;
1074 uint64_t packet_num = p->packet_num;
1075 uint32_t flags = p->flags;
1076
1077 multifd_send_fill_packet(p);
1078 p->flags = 0;
1079 p->num_packets++;
1080 p->num_pages += used;
1081 p->pages->used = 0;
1082 qemu_mutex_unlock(&p->mutex);
1083
1084 trace_multifd_send(p->id, packet_num, used, flags);
1085
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001086 ret = qio_channel_write_all(p->c, (void *)p->packet,
1087 p->packet_len, &local_err);
1088 if (ret != 0) {
1089 break;
1090 }
1091
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001092 if (used) {
1093 ret = qio_channel_writev_all(p->c, p->pages->iov,
1094 used, &local_err);
1095 if (ret != 0) {
1096 break;
1097 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001098 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001099
1100 qemu_mutex_lock(&p->mutex);
1101 p->pending_job--;
1102 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001103
1104 if (flags & MULTIFD_FLAG_SYNC) {
1105 qemu_sem_post(&multifd_send_state->sem_sync);
1106 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001107 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001108 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001109 qemu_mutex_unlock(&p->mutex);
1110 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001111 } else {
1112 qemu_mutex_unlock(&p->mutex);
1113 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001114 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001115 }
1116
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001117out:
1118 if (local_err) {
1119 multifd_send_terminate_threads(local_err);
1120 }
1121
Juan Quintela66770702018-02-19 19:01:45 +01001122 qemu_mutex_lock(&p->mutex);
1123 p->running = false;
1124 qemu_mutex_unlock(&p->mutex);
1125
Lidong Chen74637e62018-08-06 21:29:29 +08001126 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001127 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1128
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001129 return NULL;
1130}
1131
Juan Quintela60df2d42018-03-07 07:56:15 +01001132static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1133{
1134 MultiFDSendParams *p = opaque;
1135 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1136 Error *local_err = NULL;
1137
1138 if (qio_task_propagate_error(task, &local_err)) {
Fei Li1398b2e2019-01-13 22:08:47 +08001139 migrate_set_error(migrate_get_current(), local_err);
1140 multifd_save_cleanup();
Juan Quintela60df2d42018-03-07 07:56:15 +01001141 } else {
1142 p->c = QIO_CHANNEL(sioc);
1143 qio_channel_set_delay(p->c, false);
1144 p->running = true;
1145 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1146 QEMU_THREAD_JOINABLE);
1147
1148 atomic_inc(&multifd_send_state->count);
1149 }
1150}
1151
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001152int multifd_save_setup(void)
1153{
1154 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001155 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001156 uint8_t i;
1157
1158 if (!migrate_use_multifd()) {
1159 return 0;
1160 }
1161 thread_count = migrate_multifd_channels();
1162 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1163 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001164 atomic_set(&multifd_send_state->count, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001165 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintela6df264a2018-02-28 09:10:07 +01001166 qemu_sem_init(&multifd_send_state->sem_sync, 0);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001167 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001168
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001169 for (i = 0; i < thread_count; i++) {
1170 MultiFDSendParams *p = &multifd_send_state->params[i];
1171
1172 qemu_mutex_init(&p->mutex);
1173 qemu_sem_init(&p->sem, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001174 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001175 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001176 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001177 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001178 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001179 p->packet_len = sizeof(MultiFDPacket_t)
1180 + sizeof(ram_addr_t) * page_count;
1181 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001182 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001183 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001184 }
1185 return 0;
1186}
1187
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001188struct {
1189 MultiFDRecvParams *params;
1190 /* number of created threads */
1191 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001192 /* syncs main thread and channels */
1193 QemuSemaphore sem_sync;
1194 /* global number of generated multifd packets */
1195 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001196} *multifd_recv_state;
1197
Juan Quintela66770702018-02-19 19:01:45 +01001198static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001199{
1200 int i;
1201
Juan Quintela7a169d72018-02-19 19:01:15 +01001202 if (err) {
1203 MigrationState *s = migrate_get_current();
1204 migrate_set_error(s, err);
1205 if (s->state == MIGRATION_STATUS_SETUP ||
1206 s->state == MIGRATION_STATUS_ACTIVE) {
1207 migrate_set_state(&s->state, s->state,
1208 MIGRATION_STATUS_FAILED);
1209 }
1210 }
1211
Juan Quintela66770702018-02-19 19:01:45 +01001212 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001213 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1214
1215 qemu_mutex_lock(&p->mutex);
Juan Quintela7a5cc332018-04-18 00:49:19 +02001216 /* We could arrive here for two reasons:
1217 - normal quit, i.e. everything went fine, just finished
1218 - error quit: We close the channels so the channel threads
1219 finish the qio_channel_read_all_eof() */
1220 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001221 qemu_mutex_unlock(&p->mutex);
1222 }
1223}
1224
1225int multifd_load_cleanup(Error **errp)
1226{
1227 int i;
1228 int ret = 0;
1229
1230 if (!migrate_use_multifd()) {
1231 return 0;
1232 }
Juan Quintela66770702018-02-19 19:01:45 +01001233 multifd_recv_terminate_threads(NULL);
1234 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001235 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1236
Juan Quintela66770702018-02-19 19:01:45 +01001237 if (p->running) {
1238 qemu_thread_join(&p->thread);
1239 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001240 object_unref(OBJECT(p->c));
1241 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001242 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001243 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001244 g_free(p->name);
1245 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001246 multifd_pages_clear(p->pages);
1247 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001248 p->packet_len = 0;
1249 g_free(p->packet);
1250 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001251 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001252 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001253 g_free(multifd_recv_state->params);
1254 multifd_recv_state->params = NULL;
1255 g_free(multifd_recv_state);
1256 multifd_recv_state = NULL;
1257
1258 return ret;
1259}
1260
Juan Quintela6df264a2018-02-28 09:10:07 +01001261static void multifd_recv_sync_main(void)
1262{
1263 int i;
1264
1265 if (!migrate_use_multifd()) {
1266 return;
1267 }
1268 for (i = 0; i < migrate_multifd_channels(); i++) {
1269 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1270
Juan Quintela6df264a2018-02-28 09:10:07 +01001271 trace_multifd_recv_sync_main_wait(p->id);
1272 qemu_sem_wait(&multifd_recv_state->sem_sync);
1273 qemu_mutex_lock(&p->mutex);
1274 if (multifd_recv_state->packet_num < p->packet_num) {
1275 multifd_recv_state->packet_num = p->packet_num;
1276 }
1277 qemu_mutex_unlock(&p->mutex);
1278 }
1279 for (i = 0; i < migrate_multifd_channels(); i++) {
1280 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1281
1282 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001283 qemu_sem_post(&p->sem_sync);
1284 }
1285 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1286}
1287
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001288static void *multifd_recv_thread(void *opaque)
1289{
1290 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001291 Error *local_err = NULL;
1292 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001293
Juan Quintela408ea6a2018-04-06 18:28:59 +02001294 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001295 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001296
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001297 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001298 uint32_t used;
1299 uint32_t flags;
1300
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001301 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1302 p->packet_len, &local_err);
1303 if (ret == 0) { /* EOF */
1304 break;
1305 }
1306 if (ret == -1) { /* Error */
1307 break;
1308 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001309
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001310 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001311 ret = multifd_recv_unfill_packet(p, &local_err);
1312 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001313 qemu_mutex_unlock(&p->mutex);
1314 break;
1315 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001316
1317 used = p->pages->used;
1318 flags = p->flags;
1319 trace_multifd_recv(p->id, p->packet_num, used, flags);
Juan Quintela6df264a2018-02-28 09:10:07 +01001320 p->num_packets++;
1321 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001322 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001323
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001324 if (used) {
1325 ret = qio_channel_readv_all(p->c, p->pages->iov,
1326 used, &local_err);
1327 if (ret != 0) {
1328 break;
1329 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001330 }
1331
Juan Quintela6df264a2018-02-28 09:10:07 +01001332 if (flags & MULTIFD_FLAG_SYNC) {
1333 qemu_sem_post(&multifd_recv_state->sem_sync);
1334 qemu_sem_wait(&p->sem_sync);
1335 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001336 }
1337
Juan Quintelad82628e2018-04-11 02:44:24 +02001338 if (local_err) {
1339 multifd_recv_terminate_threads(local_err);
1340 }
Juan Quintela66770702018-02-19 19:01:45 +01001341 qemu_mutex_lock(&p->mutex);
1342 p->running = false;
1343 qemu_mutex_unlock(&p->mutex);
1344
Lidong Chen74637e62018-08-06 21:29:29 +08001345 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001346 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1347
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001348 return NULL;
1349}
1350
1351int multifd_load_setup(void)
1352{
1353 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001354 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001355 uint8_t i;
1356
1357 if (!migrate_use_multifd()) {
1358 return 0;
1359 }
1360 thread_count = migrate_multifd_channels();
1361 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1362 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001363 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001364 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001365
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001366 for (i = 0; i < thread_count; i++) {
1367 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1368
1369 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001370 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001371 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001372 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001373 p->packet_len = sizeof(MultiFDPacket_t)
1374 + sizeof(ram_addr_t) * page_count;
1375 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001376 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001377 }
1378 return 0;
1379}
1380
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001381bool multifd_recv_all_channels_created(void)
1382{
1383 int thread_count = migrate_multifd_channels();
1384
1385 if (!migrate_use_multifd()) {
1386 return true;
1387 }
1388
1389 return thread_count == atomic_read(&multifd_recv_state->count);
1390}
1391
Fei Li49ed0d22019-01-13 22:08:46 +08001392/*
1393 * Try to receive all multifd channels to get ready for the migration.
1394 * - Return true and do not set @errp when correctly receving all channels;
1395 * - Return false and do not set @errp when correctly receiving the current one;
1396 * - Return false and set @errp when failing to receive the current channel.
1397 */
1398bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001399{
Juan Quintela60df2d42018-03-07 07:56:15 +01001400 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001401 Error *local_err = NULL;
1402 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001403
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001404 id = multifd_recv_initial_packet(ioc, &local_err);
1405 if (id < 0) {
1406 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001407 error_propagate_prepend(errp, local_err,
1408 "failed to receive packet"
1409 " via multifd channel %d: ",
1410 atomic_read(&multifd_recv_state->count));
Peter Xu81e62052018-06-27 21:22:44 +08001411 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001412 }
1413
1414 p = &multifd_recv_state->params[id];
1415 if (p->c != NULL) {
1416 error_setg(&local_err, "multifd: received id '%d' already setup'",
1417 id);
1418 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001419 error_propagate(errp, local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001420 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001421 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001422 p->c = ioc;
1423 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001424 /* initial packet */
1425 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001426
1427 p->running = true;
1428 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1429 QEMU_THREAD_JOINABLE);
1430 atomic_inc(&multifd_recv_state->count);
Fei Li49ed0d22019-01-13 22:08:46 +08001431 return atomic_read(&multifd_recv_state->count) ==
1432 migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001433}
1434
Juan Quintela56e93d22015-05-07 19:33:31 +02001435/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001436 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001437 *
1438 * If this is the 1st block, it also writes the block identification
1439 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001440 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001441 *
1442 * @f: QEMUFile where to send the data
1443 * @block: block that contains the page we want to send
1444 * @offset: offset inside the block for the page
1445 * in the lower bits, it contains flags
1446 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001447static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1448 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001449{
Liang Li9f5f3802015-07-13 17:34:10 +08001450 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001451
Juan Quintela24795692017-03-21 11:45:01 +01001452 if (block == rs->last_sent_block) {
1453 offset |= RAM_SAVE_FLAG_CONTINUE;
1454 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001455 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001456 size = 8;
1457
1458 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001459 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001460 qemu_put_byte(f, len);
1461 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001462 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001463 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001464 }
1465 return size;
1466}
1467
Juan Quintela3d0684b2017-03-23 15:06:39 +01001468/**
1469 * mig_throttle_guest_down: throotle down the guest
1470 *
1471 * Reduce amount of guest cpu execution to hopefully slow down memory
1472 * writes. If guest dirty memory rate is reduced below the rate at
1473 * which we can transfer pages to the destination then we should be
1474 * able to complete migration. Some workloads dirty memory way too
1475 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001476 */
1477static void mig_throttle_guest_down(void)
1478{
1479 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001480 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1481 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001482 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001483
1484 /* We have not started throttling yet. Let's start it. */
1485 if (!cpu_throttle_active()) {
1486 cpu_throttle_set(pct_initial);
1487 } else {
1488 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001489 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1490 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001491 }
1492}
1493
Juan Quintela3d0684b2017-03-23 15:06:39 +01001494/**
1495 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1496 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001497 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001498 * @current_addr: address for the zero page
1499 *
1500 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001501 * The important thing is that a stale (not-yet-0'd) page be replaced
1502 * by the new data.
1503 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001504 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001505 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001506static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001507{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001508 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001509 return;
1510 }
1511
1512 /* We don't care if this fails to allocate a new cache page
1513 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001514 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001515 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001516}
1517
1518#define ENCODING_FLAG_XBZRLE 0x1
1519
1520/**
1521 * save_xbzrle_page: compress and send current page
1522 *
1523 * Returns: 1 means that we wrote the page
1524 * 0 means that page is identical to the one already sent
1525 * -1 means that xbzrle would be longer than normal
1526 *
Juan Quintela5a987732017-03-13 19:39:02 +01001527 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001528 * @current_data: pointer to the address of the page contents
1529 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001530 * @block: block that contains the page we want to send
1531 * @offset: offset inside the block for the page
1532 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001533 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001534static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001535 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001536 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001537{
1538 int encoded_len = 0, bytes_xbzrle;
1539 uint8_t *prev_cached_page;
1540
Juan Quintela93604472017-06-06 19:49:03 +02001541 if (!cache_is_cached(XBZRLE.cache, current_addr,
1542 ram_counters.dirty_sync_count)) {
1543 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001544 if (!last_stage) {
1545 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001546 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001547 return -1;
1548 } else {
1549 /* update *current_data when the page has been
1550 inserted into cache */
1551 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1552 }
1553 }
1554 return -1;
1555 }
1556
1557 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1558
1559 /* save current buffer into memory */
1560 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1561
1562 /* XBZRLE encoding (if there is no overflow) */
1563 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1564 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1565 TARGET_PAGE_SIZE);
1566 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001567 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001568 return 0;
1569 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001570 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001571 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001572 /* update data in the cache */
1573 if (!last_stage) {
1574 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1575 *current_data = prev_cached_page;
1576 }
1577 return -1;
1578 }
1579
1580 /* we need to update the data in the cache, in order to get the same data */
1581 if (!last_stage) {
1582 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1583 }
1584
1585 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001586 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001587 offset | RAM_SAVE_FLAG_XBZRLE);
1588 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1589 qemu_put_be16(rs->f, encoded_len);
1590 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001591 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001592 xbzrle_counters.pages++;
1593 xbzrle_counters.bytes += bytes_xbzrle;
1594 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001595
1596 return 1;
1597}
1598
Juan Quintela3d0684b2017-03-23 15:06:39 +01001599/**
1600 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001601 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001602 * Called with rcu_read_lock() to protect migration_bitmap
1603 *
1604 * Returns the byte offset within memory region of the start of a dirty page
1605 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001606 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001607 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001608 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001609 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001610static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001611unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001612 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001613{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001614 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1615 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001616 unsigned long next;
1617
Yury Kotovfbd162e2019-02-15 20:45:46 +03001618 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001619 return size;
1620 }
1621
Wei Wang6eeb63f2018-12-11 16:24:52 +08001622 /*
1623 * When the free page optimization is enabled, we need to check the bitmap
1624 * to send the non-free pages rather than all the pages in the bulk stage.
1625 */
1626 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001627 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001628 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001629 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001630 }
1631
Juan Quintela6b6712e2017-03-22 15:18:04 +01001632 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001633}
1634
Juan Quintela06b10682017-03-21 15:18:05 +01001635static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001636 RAMBlock *rb,
1637 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001638{
1639 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001640
Wei Wang386a9072018-12-11 16:24:49 +08001641 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela6b6712e2017-03-22 15:18:04 +01001642 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001643
1644 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001645 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001646 }
Wei Wang386a9072018-12-11 16:24:49 +08001647 qemu_mutex_unlock(&rs->bitmap_mutex);
1648
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001649 return ret;
1650}
1651
Juan Quintela15440dd2017-03-21 09:35:04 +01001652static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1653 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +02001654{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001655 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +01001656 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001657 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001658}
1659
Juan Quintela3d0684b2017-03-23 15:06:39 +01001660/**
1661 * ram_pagesize_summary: calculate all the pagesizes of a VM
1662 *
1663 * Returns a summary bitmap of the page sizes of all RAMBlocks
1664 *
1665 * For VMs with just normal pages this is equivalent to the host page
1666 * size. If it's got some huge pages then it's the OR of all the
1667 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001668 */
1669uint64_t ram_pagesize_summary(void)
1670{
1671 RAMBlock *block;
1672 uint64_t summary = 0;
1673
Yury Kotovfbd162e2019-02-15 20:45:46 +03001674 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001675 summary |= block->page_size;
1676 }
1677
1678 return summary;
1679}
1680
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001681uint64_t ram_get_total_transferred_pages(void)
1682{
1683 return ram_counters.normal + ram_counters.duplicate +
1684 compression_counters.pages + xbzrle_counters.pages;
1685}
1686
Xiao Guangrongb7340352018-06-04 17:55:12 +08001687static void migration_update_rates(RAMState *rs, int64_t end_time)
1688{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001689 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001690 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001691
1692 /* calculate period counters */
1693 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1694 / (end_time - rs->time_last_bitmap_sync);
1695
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001696 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001697 return;
1698 }
1699
1700 if (migrate_use_xbzrle()) {
1701 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001702 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001703 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1704 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001705
1706 if (migrate_use_compression()) {
1707 compression_counters.busy_rate = (double)(compression_counters.busy -
1708 rs->compress_thread_busy_prev) / page_count;
1709 rs->compress_thread_busy_prev = compression_counters.busy;
1710
1711 compressed_size = compression_counters.compressed_size -
1712 rs->compressed_size_prev;
1713 if (compressed_size) {
1714 double uncompressed_size = (compression_counters.pages -
1715 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1716
1717 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1718 compression_counters.compression_rate =
1719 uncompressed_size / compressed_size;
1720
1721 rs->compress_pages_prev = compression_counters.pages;
1722 rs->compressed_size_prev = compression_counters.compressed_size;
1723 }
1724 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001725}
1726
Juan Quintela8d820d62017-03-13 19:35:50 +01001727static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001728{
1729 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001730 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001731 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001732
Juan Quintela93604472017-06-06 19:49:03 +02001733 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001734
Juan Quintelaf664da82017-03-13 19:44:57 +01001735 if (!rs->time_last_bitmap_sync) {
1736 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001737 }
1738
1739 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001740 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001741
Juan Quintela108cfae2017-03-13 21:38:09 +01001742 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001743 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03001744 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +01001745 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +02001746 }
Balamuruhan S650af892018-06-12 14:20:09 +05301747 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001748 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001749 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001750
Juan Quintelaa66cd902017-03-28 15:02:43 +02001751 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001752
Juan Quintela56e93d22015-05-07 19:33:31 +02001753 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1754
1755 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001756 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001757 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001758
Peter Lieven9ac78b62017-09-26 12:33:16 +02001759 /* During block migration the auto-converge logic incorrectly detects
1760 * that ram migration makes no progress. Avoid this by disabling the
1761 * throttling logic during the bulk phase of block migration. */
1762 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001763 /* The following detection logic can be refined later. For now:
1764 Check to see if the dirtied bytes is 50% more than the approx.
1765 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001766 were in this routine. If that happens twice, start or increase
1767 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001768
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001769 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001770 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001771 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001772 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001773 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001774 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001775 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001776 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001777
Xiao Guangrongb7340352018-06-04 17:55:12 +08001778 migration_update_rates(rs, end_time);
1779
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001780 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001781
1782 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001783 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001784 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001785 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001786 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001787 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001788 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001789 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001790}
1791
Wei Wangbd227062018-12-11 16:24:51 +08001792static void migration_bitmap_sync_precopy(RAMState *rs)
1793{
1794 Error *local_err = NULL;
1795
1796 /*
1797 * The current notifier usage is just an optimization to migration, so we
1798 * don't stop the normal migration process in the error case.
1799 */
1800 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1801 error_report_err(local_err);
1802 }
1803
1804 migration_bitmap_sync(rs);
1805
1806 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1807 error_report_err(local_err);
1808 }
1809}
1810
Juan Quintela56e93d22015-05-07 19:33:31 +02001811/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001812 * save_zero_page_to_file: send the zero page to the file
1813 *
1814 * Returns the size of data written to the file, 0 means the page is not
1815 * a zero page
1816 *
1817 * @rs: current RAM state
1818 * @file: the file where the data is saved
1819 * @block: block that contains the page we want to send
1820 * @offset: offset inside the block for the page
1821 */
1822static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1823 RAMBlock *block, ram_addr_t offset)
1824{
1825 uint8_t *p = block->host + offset;
1826 int len = 0;
1827
1828 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1829 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1830 qemu_put_byte(file, 0);
1831 len += 1;
1832 }
1833 return len;
1834}
1835
1836/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001837 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001838 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001839 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001840 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001841 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001842 * @block: block that contains the page we want to send
1843 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001844 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001845static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001846{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001847 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001848
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001849 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001850 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001851 ram_counters.transferred += len;
1852 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001853 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001854 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001855}
1856
Juan Quintela57273092017-03-20 22:25:28 +01001857static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001858{
Juan Quintela57273092017-03-20 22:25:28 +01001859 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001860 return;
1861 }
1862
Juan Quintelaaaa20642017-03-21 11:35:24 +01001863 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001864}
1865
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001866/*
1867 * @pages: the number of pages written by the control path,
1868 * < 0 - error
1869 * > 0 - number of pages written
1870 *
1871 * Return true if the pages has been saved, otherwise false is returned.
1872 */
1873static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1874 int *pages)
1875{
1876 uint64_t bytes_xmit = 0;
1877 int ret;
1878
1879 *pages = -1;
1880 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1881 &bytes_xmit);
1882 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1883 return false;
1884 }
1885
1886 if (bytes_xmit) {
1887 ram_counters.transferred += bytes_xmit;
1888 *pages = 1;
1889 }
1890
1891 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1892 return true;
1893 }
1894
1895 if (bytes_xmit > 0) {
1896 ram_counters.normal++;
1897 } else if (bytes_xmit == 0) {
1898 ram_counters.duplicate++;
1899 }
1900
1901 return true;
1902}
1903
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001904/*
1905 * directly send the page to the stream
1906 *
1907 * Returns the number of pages written.
1908 *
1909 * @rs: current RAM state
1910 * @block: block that contains the page we want to send
1911 * @offset: offset inside the block for the page
1912 * @buf: the page to be sent
1913 * @async: send to page asyncly
1914 */
1915static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1916 uint8_t *buf, bool async)
1917{
1918 ram_counters.transferred += save_page_header(rs, rs->f, block,
1919 offset | RAM_SAVE_FLAG_PAGE);
1920 if (async) {
1921 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1922 migrate_release_ram() &
1923 migration_in_postcopy());
1924 } else {
1925 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1926 }
1927 ram_counters.transferred += TARGET_PAGE_SIZE;
1928 ram_counters.normal++;
1929 return 1;
1930}
1931
Juan Quintela56e93d22015-05-07 19:33:31 +02001932/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001933 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001934 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001935 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001936 * < 0 - error
1937 * >=0 - Number of pages written - this might legally be 0
1938 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001939 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001940 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001941 * @block: block that contains the page we want to send
1942 * @offset: offset inside the block for the page
1943 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001944 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001945static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001946{
1947 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001948 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001949 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001950 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001951 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001952 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001953
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001954 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001955 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001956
Juan Quintela56e93d22015-05-07 19:33:31 +02001957 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001958 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1959 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001960 pages = save_xbzrle_page(rs, &p, current_addr, block,
1961 offset, last_stage);
1962 if (!last_stage) {
1963 /* Can't send this cached data async, since the cache page
1964 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001965 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001966 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001967 }
1968 }
1969
1970 /* XBZRLE overflow or normal page */
1971 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001972 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001973 }
1974
1975 XBZRLE_cache_unlock();
1976
1977 return pages;
1978}
1979
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001980static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1981 ram_addr_t offset)
1982{
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001983 multifd_queue_page(block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001984 ram_counters.normal++;
1985
1986 return 1;
1987}
1988
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001989static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001990 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001991{
Juan Quintela53518d92017-05-04 11:46:24 +02001992 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08001993 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001994 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001995 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02001996
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001997 if (save_zero_page_to_file(rs, f, block, offset)) {
1998 zero_page = true;
1999 goto exit;
2000 }
2001
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002002 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002003
2004 /*
2005 * copy it to a internal buffer to avoid it being modified by VM
2006 * so that we can catch up the error during compression and
2007 * decompression
2008 */
2009 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002010 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2011 if (ret < 0) {
2012 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08002013 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002014 return false;
Liang Lib3be2892016-05-05 15:32:54 +08002015 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002016
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002017exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002018 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002019 return zero_page;
2020}
2021
2022static void
2023update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2024{
Xiao Guangrong76e03002018-09-06 15:01:00 +08002025 ram_counters.transferred += bytes_xmit;
2026
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002027 if (param->zero_page) {
2028 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08002029 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002030 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08002031
2032 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
2033 compression_counters.compressed_size += bytes_xmit - 8;
2034 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002035}
2036
Xiao Guangrong32b05492018-09-06 15:01:01 +08002037static bool save_page_use_compression(RAMState *rs);
2038
Juan Quintelace25d332017-03-15 11:00:51 +01002039static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002040{
2041 int idx, len, thread_count;
2042
Xiao Guangrong32b05492018-09-06 15:01:01 +08002043 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002044 return;
2045 }
2046 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08002047
Liang Li0d9f9a52016-05-05 15:32:59 +08002048 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002049 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08002050 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08002051 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002052 }
Liang Lia7a9a882016-05-05 15:32:57 +08002053 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002054 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08002055
2056 for (idx = 0; idx < thread_count; idx++) {
2057 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002058 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01002059 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002060 /*
2061 * it's safe to fetch zero_page without holding comp_done_lock
2062 * as there is no further request submitted to the thread,
2063 * i.e, the thread should be waiting for a request at this point.
2064 */
2065 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002066 }
Liang Lia7a9a882016-05-05 15:32:57 +08002067 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002068 }
2069}
2070
2071static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2072 ram_addr_t offset)
2073{
2074 param->block = block;
2075 param->offset = offset;
2076}
2077
Juan Quintelace25d332017-03-15 11:00:51 +01002078static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2079 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02002080{
2081 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08002082 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02002083
2084 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08002085 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08002086retry:
2087 for (idx = 0; idx < thread_count; idx++) {
2088 if (comp_param[idx].done) {
2089 comp_param[idx].done = false;
2090 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2091 qemu_mutex_lock(&comp_param[idx].mutex);
2092 set_compress_params(&comp_param[idx], block, offset);
2093 qemu_cond_signal(&comp_param[idx].cond);
2094 qemu_mutex_unlock(&comp_param[idx].mutex);
2095 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002096 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02002097 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002098 }
2099 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08002100
2101 /*
2102 * wait for the free thread if the user specifies 'compress-wait-thread',
2103 * otherwise we will post the page out in the main thread as normal page.
2104 */
2105 if (pages < 0 && wait) {
2106 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2107 goto retry;
2108 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002109 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002110
2111 return pages;
2112}
2113
2114/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002115 * find_dirty_block: find the next dirty page and update any state
2116 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002117 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002118 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002119 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002120 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002121 * @pss: data about the state of the current dirty page scan
2122 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002123 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002124static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002125{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002126 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002127 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002128 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002129 /*
2130 * We've been once around the RAM and haven't found anything.
2131 * Give up.
2132 */
2133 *again = false;
2134 return false;
2135 }
Juan Quintelaa935e302017-03-21 15:36:51 +01002136 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002137 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002138 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002139 pss->block = QLIST_NEXT_RCU(pss->block, next);
2140 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002141 /*
2142 * If memory migration starts over, we will meet a dirtied page
2143 * which may still exists in compression threads's ring, so we
2144 * should flush the compressed data to make sure the new page
2145 * is not overwritten by the old one in the destination.
2146 *
2147 * Also If xbzrle is on, stop using the data compression at this
2148 * point. In theory, xbzrle can do better than compression.
2149 */
2150 flush_compressed_data(rs);
2151
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002152 /* Hit the end of the list */
2153 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2154 /* Flag that we've looped */
2155 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002156 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002157 }
2158 /* Didn't find anything this time, but try again on the new block */
2159 *again = true;
2160 return false;
2161 } else {
2162 /* Can go around again, but... */
2163 *again = true;
2164 /* We've found something so probably don't need to */
2165 return true;
2166 }
2167}
2168
Juan Quintela3d0684b2017-03-23 15:06:39 +01002169/**
2170 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002171 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002172 * Helper for 'get_queued_page' - gets a page off the queue
2173 *
2174 * Returns the block of the page (or NULL if none available)
2175 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002176 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002177 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002178 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002179static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002180{
2181 RAMBlock *block = NULL;
2182
Xiao Guangrongae526e32018-08-21 16:10:25 +08002183 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2184 return NULL;
2185 }
2186
Juan Quintelaec481c62017-03-20 22:12:40 +01002187 qemu_mutex_lock(&rs->src_page_req_mutex);
2188 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2189 struct RAMSrcPageRequest *entry =
2190 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002191 block = entry->rb;
2192 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002193
2194 if (entry->len > TARGET_PAGE_SIZE) {
2195 entry->len -= TARGET_PAGE_SIZE;
2196 entry->offset += TARGET_PAGE_SIZE;
2197 } else {
2198 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002199 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002200 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002201 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002202 }
2203 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002204 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002205
2206 return block;
2207}
2208
Juan Quintela3d0684b2017-03-23 15:06:39 +01002209/**
2210 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002211 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002212 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002213 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002214 * Returns if a queued page is found
2215 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002216 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002217 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002218 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002219static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002220{
2221 RAMBlock *block;
2222 ram_addr_t offset;
2223 bool dirty;
2224
2225 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002226 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002227 /*
2228 * We're sending this page, and since it's postcopy nothing else
2229 * will dirty it, and we must make sure it doesn't get sent again
2230 * even if this queue request was received after the background
2231 * search already sent it.
2232 */
2233 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002234 unsigned long page;
2235
Juan Quintela6b6712e2017-03-22 15:18:04 +01002236 page = offset >> TARGET_PAGE_BITS;
2237 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002238 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002239 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002240 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002241 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002242 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002243 }
2244 }
2245
2246 } while (block && !dirty);
2247
2248 if (block) {
2249 /*
2250 * As soon as we start servicing pages out of order, then we have
2251 * to kill the bulk stage, since the bulk stage assumes
2252 * in (migration_bitmap_find_and_reset_dirty) that every page is
2253 * dirty, that's no longer true.
2254 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002255 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002256
2257 /*
2258 * We want the background search to continue from the queued page
2259 * since the guest is likely to want other pages near to the page
2260 * it just requested.
2261 */
2262 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002263 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002264 }
2265
2266 return !!block;
2267}
2268
Juan Quintela56e93d22015-05-07 19:33:31 +02002269/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002270 * migration_page_queue_free: drop any remaining pages in the ram
2271 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002272 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002273 * It should be empty at the end anyway, but in error cases there may
2274 * be some left. in case that there is any page left, we drop it.
2275 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002276 */
Juan Quintela83c13382017-05-04 11:45:01 +02002277static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002278{
Juan Quintelaec481c62017-03-20 22:12:40 +01002279 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002280 /* This queue generally should be empty - but in the case of a failed
2281 * migration might have some droppings in.
2282 */
2283 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002284 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002285 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002286 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002287 g_free(mspr);
2288 }
2289 rcu_read_unlock();
2290}
2291
2292/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002293 * ram_save_queue_pages: queue the page for transmission
2294 *
2295 * A request from postcopy destination for example.
2296 *
2297 * Returns zero on success or negative on error
2298 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002299 * @rbname: Name of the RAMBLock of the request. NULL means the
2300 * same that last one.
2301 * @start: starting address from the start of the RAMBlock
2302 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002303 */
Juan Quintela96506892017-03-14 18:41:03 +01002304int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002305{
2306 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002307 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002308
Juan Quintela93604472017-06-06 19:49:03 +02002309 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002310 rcu_read_lock();
2311 if (!rbname) {
2312 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002313 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002314
2315 if (!ramblock) {
2316 /*
2317 * Shouldn't happen, we can't reuse the last RAMBlock if
2318 * it's the 1st request.
2319 */
2320 error_report("ram_save_queue_pages no previous block");
2321 goto err;
2322 }
2323 } else {
2324 ramblock = qemu_ram_block_by_name(rbname);
2325
2326 if (!ramblock) {
2327 /* We shouldn't be asked for a non-existent RAMBlock */
2328 error_report("ram_save_queue_pages no block '%s'", rbname);
2329 goto err;
2330 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002331 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002332 }
2333 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2334 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002335 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2336 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002337 __func__, start, len, ramblock->used_length);
2338 goto err;
2339 }
2340
Juan Quintelaec481c62017-03-20 22:12:40 +01002341 struct RAMSrcPageRequest *new_entry =
2342 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002343 new_entry->rb = ramblock;
2344 new_entry->offset = start;
2345 new_entry->len = len;
2346
2347 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002348 qemu_mutex_lock(&rs->src_page_req_mutex);
2349 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002350 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002351 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002352 rcu_read_unlock();
2353
2354 return 0;
2355
2356err:
2357 rcu_read_unlock();
2358 return -1;
2359}
2360
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002361static bool save_page_use_compression(RAMState *rs)
2362{
2363 if (!migrate_use_compression()) {
2364 return false;
2365 }
2366
2367 /*
2368 * If xbzrle is on, stop using the data compression after first
2369 * round of migration even if compression is enabled. In theory,
2370 * xbzrle can do better than compression.
2371 */
2372 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2373 return true;
2374 }
2375
2376 return false;
2377}
2378
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002379/*
2380 * try to compress the page before posting it out, return true if the page
2381 * has been properly handled by compression, otherwise needs other
2382 * paths to handle it
2383 */
2384static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2385{
2386 if (!save_page_use_compression(rs)) {
2387 return false;
2388 }
2389
2390 /*
2391 * When starting the process of a new block, the first page of
2392 * the block should be sent out before other pages in the same
2393 * block, and all the pages in last block should have been sent
2394 * out, keeping this order is important, because the 'cont' flag
2395 * is used to avoid resending the block name.
2396 *
2397 * We post the fist page as normal page as compression will take
2398 * much CPU resource.
2399 */
2400 if (block != rs->last_sent_block) {
2401 flush_compressed_data(rs);
2402 return false;
2403 }
2404
2405 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2406 return true;
2407 }
2408
Xiao Guangrong76e03002018-09-06 15:01:00 +08002409 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002410 return false;
2411}
2412
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002413/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002414 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002415 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002416 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002417 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002418 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002419 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002420 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002421 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002422static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002423 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002424{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002425 RAMBlock *block = pss->block;
2426 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2427 int res;
2428
2429 if (control_save_page(rs, block, offset, &res)) {
2430 return res;
2431 }
2432
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002433 if (save_compress_page(rs, block, offset)) {
2434 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002435 }
2436
2437 res = save_zero_page(rs, block, offset);
2438 if (res > 0) {
2439 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2440 * page would be stale
2441 */
2442 if (!save_page_use_compression(rs)) {
2443 XBZRLE_cache_lock();
2444 xbzrle_cache_zero_page(rs, block->offset + offset);
2445 XBZRLE_cache_unlock();
2446 }
2447 ram_release_pages(block->idstr, offset, res);
2448 return res;
2449 }
2450
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002451 /*
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002452 * do not use multifd for compression as the first page in the new
2453 * block should be posted out before sending the compressed page
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002454 */
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002455 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002456 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002457 }
2458
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002459 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002460}
2461
2462/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002463 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002464 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002465 * Starting at *offset send pages up to the end of the current host
2466 * page. It's valid for the initial offset to point into the middle of
2467 * a host page in which case the remainder of the hostpage is sent.
2468 * Only dirty target pages are sent. Note that the host page size may
2469 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002470 * The saving stops at the boundary of the used_length of the block
2471 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002472 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002473 * Returns the number of pages written or negative on error
2474 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002475 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002476 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002477 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002478 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002479 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002480static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002481 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002482{
2483 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002484 size_t pagesize_bits =
2485 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002486
Yury Kotovfbd162e2019-02-15 20:45:46 +03002487 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002488 error_report("block %s should not be migrated !", pss->block->idstr);
2489 return 0;
2490 }
2491
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002492 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002493 /* Check the pages is dirty and if it is send it */
2494 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2495 pss->page++;
2496 continue;
2497 }
2498
Juan Quintelaf20e2862017-03-21 16:19:05 +01002499 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002500 if (tmppages < 0) {
2501 return tmppages;
2502 }
2503
2504 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002505 if (pss->block->unsentmap) {
2506 clear_bit(pss->page, pss->block->unsentmap);
2507 }
2508
Juan Quintelaa935e302017-03-21 15:36:51 +01002509 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002510 } while ((pss->page & (pagesize_bits - 1)) &&
2511 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002512
2513 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002514 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002515 return pages;
2516}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002517
2518/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002519 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002520 *
2521 * Called within an RCU critical section.
2522 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002523 * Returns the number of pages written where zero means no dirty pages,
2524 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002525 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002526 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002527 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002528 *
2529 * On systems where host-page-size > target-page-size it will send all the
2530 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002531 */
2532
Juan Quintelace25d332017-03-15 11:00:51 +01002533static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002534{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002535 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002536 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002537 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002538
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302539 /* No dirty page as there is zero RAM */
2540 if (!ram_bytes_total()) {
2541 return pages;
2542 }
2543
Juan Quintela6f37bb82017-03-13 19:26:29 +01002544 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002545 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002546 pss.complete_round = false;
2547
2548 if (!pss.block) {
2549 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2550 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002551
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002552 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002553 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002554 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002555
2556 if (!found) {
2557 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002558 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002559 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002560
2561 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002562 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002563 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002564 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002565
Juan Quintela6f37bb82017-03-13 19:26:29 +01002566 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002567 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002568
2569 return pages;
2570}
2571
2572void acct_update_position(QEMUFile *f, size_t size, bool zero)
2573{
2574 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002575
Juan Quintela56e93d22015-05-07 19:33:31 +02002576 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002577 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002578 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002579 ram_counters.normal += pages;
2580 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002581 qemu_update_position(f, size);
2582 }
2583}
2584
Yury Kotovfbd162e2019-02-15 20:45:46 +03002585static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002586{
2587 RAMBlock *block;
2588 uint64_t total = 0;
2589
2590 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03002591 if (count_ignored) {
2592 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2593 total += block->used_length;
2594 }
2595 } else {
2596 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2597 total += block->used_length;
2598 }
Peter Xu99e15582017-05-12 12:17:39 +08002599 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002600 rcu_read_unlock();
2601 return total;
2602}
2603
Yury Kotovfbd162e2019-02-15 20:45:46 +03002604uint64_t ram_bytes_total(void)
2605{
2606 return ram_bytes_total_common(false);
2607}
2608
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002609static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002610{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002611 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002612}
2613
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002614static void xbzrle_load_cleanup(void)
2615{
2616 g_free(XBZRLE.decoded_buf);
2617 XBZRLE.decoded_buf = NULL;
2618}
2619
Peter Xu7d7c96b2017-10-19 14:31:58 +08002620static void ram_state_cleanup(RAMState **rsp)
2621{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002622 if (*rsp) {
2623 migration_page_queue_free(*rsp);
2624 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2625 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2626 g_free(*rsp);
2627 *rsp = NULL;
2628 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002629}
2630
Peter Xu84593a02017-10-19 14:31:59 +08002631static void xbzrle_cleanup(void)
2632{
2633 XBZRLE_cache_lock();
2634 if (XBZRLE.cache) {
2635 cache_fini(XBZRLE.cache);
2636 g_free(XBZRLE.encoded_buf);
2637 g_free(XBZRLE.current_buf);
2638 g_free(XBZRLE.zero_target_page);
2639 XBZRLE.cache = NULL;
2640 XBZRLE.encoded_buf = NULL;
2641 XBZRLE.current_buf = NULL;
2642 XBZRLE.zero_target_page = NULL;
2643 }
2644 XBZRLE_cache_unlock();
2645}
2646
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002647static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002648{
Juan Quintela53518d92017-05-04 11:46:24 +02002649 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002650 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002651
Li Zhijian2ff64032015-07-02 20:18:05 +08002652 /* caller have hold iothread lock or is in a bh, so there is
2653 * no writing race against this migration_bitmap
2654 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002655 memory_global_dirty_log_stop();
2656
Yury Kotovfbd162e2019-02-15 20:45:46 +03002657 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002658 g_free(block->bmap);
2659 block->bmap = NULL;
2660 g_free(block->unsentmap);
2661 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002662 }
2663
Peter Xu84593a02017-10-19 14:31:59 +08002664 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002665 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002666 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002667}
2668
Juan Quintela6f37bb82017-03-13 19:26:29 +01002669static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002670{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002671 rs->last_seen_block = NULL;
2672 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002673 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002674 rs->last_version = ram_list.version;
2675 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08002676 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002677}
2678
2679#define MAX_WAIT 50 /* ms, half buffered_file limit */
2680
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002681/*
2682 * 'expected' is the value you expect the bitmap mostly to be full
2683 * of; it won't bother printing lines that are all this value.
2684 * If 'todump' is null the migration bitmap is dumped.
2685 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002686void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2687 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002688{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002689 int64_t cur;
2690 int64_t linelen = 128;
2691 char linebuf[129];
2692
Juan Quintela6b6712e2017-03-22 15:18:04 +01002693 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002694 int64_t curb;
2695 bool found = false;
2696 /*
2697 * Last line; catch the case where the line length
2698 * is longer than remaining ram
2699 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002700 if (cur + linelen > pages) {
2701 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002702 }
2703 for (curb = 0; curb < linelen; curb++) {
2704 bool thisbit = test_bit(cur + curb, todump);
2705 linebuf[curb] = thisbit ? '1' : '.';
2706 found = found || (thisbit != expected);
2707 }
2708 if (found) {
2709 linebuf[curb] = '\0';
2710 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2711 }
2712 }
2713}
2714
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002715/* **** functions for postcopy ***** */
2716
Pavel Butsykinced1c612017-02-03 18:23:21 +03002717void ram_postcopy_migrated_memory_release(MigrationState *ms)
2718{
2719 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002720
Yury Kotovfbd162e2019-02-15 20:45:46 +03002721 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002722 unsigned long *bitmap = block->bmap;
2723 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2724 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002725
2726 while (run_start < range) {
2727 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002728 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002729 (run_end - run_start) << TARGET_PAGE_BITS);
2730 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2731 }
2732 }
2733}
2734
Juan Quintela3d0684b2017-03-23 15:06:39 +01002735/**
2736 * postcopy_send_discard_bm_ram: discard a RAMBlock
2737 *
2738 * Returns zero on success
2739 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002740 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2741 * Note: At this point the 'unsentmap' is the processed bitmap combined
2742 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002743 *
2744 * @ms: current migration state
2745 * @pds: state for postcopy
2746 * @start: RAMBlock starting page
2747 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002748 */
2749static int postcopy_send_discard_bm_ram(MigrationState *ms,
2750 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002751 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002752{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002753 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002754 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002755 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002756
Juan Quintela6b6712e2017-03-22 15:18:04 +01002757 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002758 unsigned long one = find_next_bit(unsentmap, end, current);
2759
2760 if (one <= end) {
2761 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2762 unsigned long discard_length;
2763
2764 if (zero >= end) {
2765 discard_length = end - one;
2766 } else {
2767 discard_length = zero - one;
2768 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01002769 if (discard_length) {
2770 postcopy_discard_send_range(ms, pds, one, discard_length);
2771 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002772 current = one + discard_length;
2773 } else {
2774 current = one;
2775 }
2776 }
2777
2778 return 0;
2779}
2780
Juan Quintela3d0684b2017-03-23 15:06:39 +01002781/**
2782 * postcopy_each_ram_send_discard: discard all RAMBlocks
2783 *
2784 * Returns 0 for success or negative for error
2785 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002786 * Utility for the outgoing postcopy code.
2787 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2788 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002789 * (qemu_ram_foreach_block ends up passing unscaled lengths
2790 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002791 *
2792 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002793 */
2794static int postcopy_each_ram_send_discard(MigrationState *ms)
2795{
2796 struct RAMBlock *block;
2797 int ret;
2798
Yury Kotovfbd162e2019-02-15 20:45:46 +03002799 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002800 PostcopyDiscardState *pds =
2801 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002802
2803 /*
2804 * Postcopy sends chunks of bitmap over the wire, but it
2805 * just needs indexes at this point, avoids it having
2806 * target page specific code.
2807 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002808 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002809 postcopy_discard_send_finish(ms, pds);
2810 if (ret) {
2811 return ret;
2812 }
2813 }
2814
2815 return 0;
2816}
2817
Juan Quintela3d0684b2017-03-23 15:06:39 +01002818/**
2819 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002820 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002821 * Helper for postcopy_chunk_hostpages; it's called twice to
2822 * canonicalize the two bitmaps, that are similar, but one is
2823 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002824 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002825 * Postcopy requires that all target pages in a hostpage are dirty or
2826 * clean, not a mix. This function canonicalizes the bitmaps.
2827 *
2828 * @ms: current migration state
2829 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2830 * otherwise we need to canonicalize partially dirty host pages
2831 * @block: block that contains the page we want to canonicalize
2832 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002833 */
2834static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2835 RAMBlock *block,
2836 PostcopyDiscardState *pds)
2837{
Juan Quintela53518d92017-05-04 11:46:24 +02002838 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002839 unsigned long *bitmap = block->bmap;
2840 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002841 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002842 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002843 unsigned long run_start;
2844
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002845 if (block->page_size == TARGET_PAGE_SIZE) {
2846 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2847 return;
2848 }
2849
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002850 if (unsent_pass) {
2851 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002852 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002853 } else {
2854 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002855 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002856 }
2857
Juan Quintela6b6712e2017-03-22 15:18:04 +01002858 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002859 bool do_fixup = false;
2860 unsigned long fixup_start_addr;
2861 unsigned long host_offset;
2862
2863 /*
2864 * If the start of this run of pages is in the middle of a host
2865 * page, then we need to fixup this host page.
2866 */
2867 host_offset = run_start % host_ratio;
2868 if (host_offset) {
2869 do_fixup = true;
2870 run_start -= host_offset;
2871 fixup_start_addr = run_start;
2872 /* For the next pass */
2873 run_start = run_start + host_ratio;
2874 } else {
2875 /* Find the end of this run */
2876 unsigned long run_end;
2877 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002878 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002879 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002880 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002881 }
2882 /*
2883 * If the end isn't at the start of a host page, then the
2884 * run doesn't finish at the end of a host page
2885 * and we need to discard.
2886 */
2887 host_offset = run_end % host_ratio;
2888 if (host_offset) {
2889 do_fixup = true;
2890 fixup_start_addr = run_end - host_offset;
2891 /*
2892 * This host page has gone, the next loop iteration starts
2893 * from after the fixup
2894 */
2895 run_start = fixup_start_addr + host_ratio;
2896 } else {
2897 /*
2898 * No discards on this iteration, next loop starts from
2899 * next sent/dirty page
2900 */
2901 run_start = run_end + 1;
2902 }
2903 }
2904
2905 if (do_fixup) {
2906 unsigned long page;
2907
2908 /* Tell the destination to discard this page */
2909 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2910 /* For the unsent_pass we:
2911 * discard partially sent pages
2912 * For the !unsent_pass (dirty) we:
2913 * discard partially dirty pages that were sent
2914 * (any partially sent pages were already discarded
2915 * by the previous unsent_pass)
2916 */
2917 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2918 host_ratio);
2919 }
2920
2921 /* Clean up the bitmap */
2922 for (page = fixup_start_addr;
2923 page < fixup_start_addr + host_ratio; page++) {
2924 /* All pages in this host page are now not sent */
2925 set_bit(page, unsentmap);
2926
2927 /*
2928 * Remark them as dirty, updating the count for any pages
2929 * that weren't previously dirty.
2930 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002931 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002932 }
2933 }
2934
2935 if (unsent_pass) {
2936 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002937 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002938 } else {
2939 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002940 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002941 }
2942 }
2943}
2944
Juan Quintela3d0684b2017-03-23 15:06:39 +01002945/**
2946 * postcopy_chuck_hostpages: discrad any partially sent host page
2947 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002948 * Utility for the outgoing postcopy code.
2949 *
2950 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002951 * dirty host-page size chunks as all dirty. In this case the host-page
2952 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002953 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002954 * Returns zero on success
2955 *
2956 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002957 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002958 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002959static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002960{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002961 PostcopyDiscardState *pds =
2962 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002963
Juan Quintela6b6712e2017-03-22 15:18:04 +01002964 /* First pass: Discard all partially sent host pages */
2965 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2966 /*
2967 * Second pass: Ensure that all partially dirty host pages are made
2968 * fully dirty.
2969 */
2970 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002971
Juan Quintela6b6712e2017-03-22 15:18:04 +01002972 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002973 return 0;
2974}
2975
Juan Quintela3d0684b2017-03-23 15:06:39 +01002976/**
2977 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2978 *
2979 * Returns zero on success
2980 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002981 * Transmit the set of pages to be discarded after precopy to the target
2982 * these are pages that:
2983 * a) Have been previously transmitted but are now dirty again
2984 * b) Pages that have never been transmitted, this ensures that
2985 * any pages on the destination that have been mapped by background
2986 * tasks get discarded (transparent huge pages is the specific concern)
2987 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002988 *
2989 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002990 */
2991int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2992{
Juan Quintela53518d92017-05-04 11:46:24 +02002993 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002994 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002995 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002996
2997 rcu_read_lock();
2998
2999 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01003000 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003001
Juan Quintela6b6712e2017-03-22 15:18:04 +01003002 /* Easiest way to make sure we don't resume in the middle of a host-page */
3003 rs->last_seen_block = NULL;
3004 rs->last_sent_block = NULL;
3005 rs->last_page = 0;
3006
Yury Kotovfbd162e2019-02-15 20:45:46 +03003007 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003008 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
3009 unsigned long *bitmap = block->bmap;
3010 unsigned long *unsentmap = block->unsentmap;
3011
3012 if (!unsentmap) {
3013 /* We don't have a safe way to resize the sentmap, so
3014 * if the bitmap was resized it will be NULL at this
3015 * point.
3016 */
3017 error_report("migration ram resized during precopy phase");
3018 rcu_read_unlock();
3019 return -EINVAL;
3020 }
3021 /* Deal with TPS != HPS and huge pages */
3022 ret = postcopy_chunk_hostpages(ms, block);
3023 if (ret) {
3024 rcu_read_unlock();
3025 return ret;
3026 }
3027
3028 /*
3029 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003030 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003031 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003032#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01003033 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003034#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01003035 }
3036 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003037
3038 ret = postcopy_each_ram_send_discard(ms);
3039 rcu_read_unlock();
3040
3041 return ret;
3042}
3043
Juan Quintela3d0684b2017-03-23 15:06:39 +01003044/**
3045 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003046 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003047 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003048 *
Juan Quintela36449152017-03-23 15:11:59 +01003049 * @rbname: name of the RAMBlock of the request. NULL means the
3050 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003051 * @start: RAMBlock starting page
3052 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003053 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01003054int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003055{
3056 int ret = -1;
3057
Juan Quintela36449152017-03-23 15:11:59 +01003058 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003059
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003060 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01003061 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003062
3063 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01003064 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003065 goto err;
3066 }
3067
Peter Xu814bb082018-07-23 20:33:02 +08003068 /*
3069 * On source VM, we don't need to update the received bitmap since
3070 * we don't even have one.
3071 */
3072 if (rb->receivedmap) {
3073 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3074 length >> qemu_target_page_bits());
3075 }
3076
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003077 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003078
3079err:
3080 rcu_read_unlock();
3081
3082 return ret;
3083}
3084
Peter Xu84593a02017-10-19 14:31:59 +08003085/*
3086 * For every allocation, we will try not to crash the VM if the
3087 * allocation failed.
3088 */
3089static int xbzrle_init(void)
3090{
3091 Error *local_err = NULL;
3092
3093 if (!migrate_use_xbzrle()) {
3094 return 0;
3095 }
3096
3097 XBZRLE_cache_lock();
3098
3099 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3100 if (!XBZRLE.zero_target_page) {
3101 error_report("%s: Error allocating zero page", __func__);
3102 goto err_out;
3103 }
3104
3105 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3106 TARGET_PAGE_SIZE, &local_err);
3107 if (!XBZRLE.cache) {
3108 error_report_err(local_err);
3109 goto free_zero_page;
3110 }
3111
3112 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3113 if (!XBZRLE.encoded_buf) {
3114 error_report("%s: Error allocating encoded_buf", __func__);
3115 goto free_cache;
3116 }
3117
3118 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3119 if (!XBZRLE.current_buf) {
3120 error_report("%s: Error allocating current_buf", __func__);
3121 goto free_encoded_buf;
3122 }
3123
3124 /* We are all good */
3125 XBZRLE_cache_unlock();
3126 return 0;
3127
3128free_encoded_buf:
3129 g_free(XBZRLE.encoded_buf);
3130 XBZRLE.encoded_buf = NULL;
3131free_cache:
3132 cache_fini(XBZRLE.cache);
3133 XBZRLE.cache = NULL;
3134free_zero_page:
3135 g_free(XBZRLE.zero_target_page);
3136 XBZRLE.zero_target_page = NULL;
3137err_out:
3138 XBZRLE_cache_unlock();
3139 return -ENOMEM;
3140}
3141
Juan Quintela53518d92017-05-04 11:46:24 +02003142static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003143{
Peter Xu7d00ee62017-10-19 14:31:57 +08003144 *rsp = g_try_new0(RAMState, 1);
3145
3146 if (!*rsp) {
3147 error_report("%s: Init ramstate fail", __func__);
3148 return -1;
3149 }
Juan Quintela53518d92017-05-04 11:46:24 +02003150
3151 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3152 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3153 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003154
Peter Xu7d00ee62017-10-19 14:31:57 +08003155 /*
3156 * Count the total number of pages used by ram blocks not including any
3157 * gaps due to alignment or unplugs.
3158 */
3159 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
3160
3161 ram_state_reset(*rsp);
3162
3163 return 0;
3164}
3165
Peter Xud6eff5d2017-10-19 14:32:00 +08003166static void ram_list_init_bitmaps(void)
3167{
3168 RAMBlock *block;
3169 unsigned long pages;
3170
3171 /* Skip setting bitmap if there is no RAM */
3172 if (ram_bytes_total()) {
Yury Kotovfbd162e2019-02-15 20:45:46 +03003173 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003174 pages = block->max_length >> TARGET_PAGE_BITS;
3175 block->bmap = bitmap_new(pages);
3176 bitmap_set(block->bmap, 0, pages);
3177 if (migrate_postcopy_ram()) {
3178 block->unsentmap = bitmap_new(pages);
3179 bitmap_set(block->unsentmap, 0, pages);
3180 }
3181 }
3182 }
3183}
3184
3185static void ram_init_bitmaps(RAMState *rs)
3186{
3187 /* For memory_global_dirty_log_start below. */
3188 qemu_mutex_lock_iothread();
3189 qemu_mutex_lock_ramlist();
3190 rcu_read_lock();
3191
3192 ram_list_init_bitmaps();
3193 memory_global_dirty_log_start();
Wei Wangbd227062018-12-11 16:24:51 +08003194 migration_bitmap_sync_precopy(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08003195
3196 rcu_read_unlock();
3197 qemu_mutex_unlock_ramlist();
3198 qemu_mutex_unlock_iothread();
3199}
3200
Peter Xu7d00ee62017-10-19 14:31:57 +08003201static int ram_init_all(RAMState **rsp)
3202{
Peter Xu7d00ee62017-10-19 14:31:57 +08003203 if (ram_state_init(rsp)) {
3204 return -1;
3205 }
3206
Peter Xu84593a02017-10-19 14:31:59 +08003207 if (xbzrle_init()) {
3208 ram_state_cleanup(rsp);
3209 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003210 }
3211
Peter Xud6eff5d2017-10-19 14:32:00 +08003212 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003213
3214 return 0;
3215}
3216
Peter Xu08614f32018-05-02 18:47:33 +08003217static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3218{
3219 RAMBlock *block;
3220 uint64_t pages = 0;
3221
3222 /*
3223 * Postcopy is not using xbzrle/compression, so no need for that.
3224 * Also, since source are already halted, we don't need to care
3225 * about dirty page logging as well.
3226 */
3227
Yury Kotovfbd162e2019-02-15 20:45:46 +03003228 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003229 pages += bitmap_count_one(block->bmap,
3230 block->used_length >> TARGET_PAGE_BITS);
3231 }
3232
3233 /* This may not be aligned with current bitmaps. Recalculate. */
3234 rs->migration_dirty_pages = pages;
3235
3236 rs->last_seen_block = NULL;
3237 rs->last_sent_block = NULL;
3238 rs->last_page = 0;
3239 rs->last_version = ram_list.version;
3240 /*
3241 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3242 * matter what we have sent.
3243 */
3244 rs->ram_bulk_stage = false;
3245
3246 /* Update RAMState cache of output QEMUFile */
3247 rs->f = out;
3248
3249 trace_ram_state_resume_prepare(pages);
3250}
3251
Juan Quintela3d0684b2017-03-23 15:06:39 +01003252/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08003253 * This function clears bits of the free pages reported by the caller from the
3254 * migration dirty bitmap. @addr is the host address corresponding to the
3255 * start of the continuous guest free pages, and @len is the total bytes of
3256 * those pages.
3257 */
3258void qemu_guest_free_page_hint(void *addr, size_t len)
3259{
3260 RAMBlock *block;
3261 ram_addr_t offset;
3262 size_t used_len, start, npages;
3263 MigrationState *s = migrate_get_current();
3264
3265 /* This function is currently expected to be used during live migration */
3266 if (!migration_is_setup_or_active(s->state)) {
3267 return;
3268 }
3269
3270 for (; len > 0; len -= used_len, addr += used_len) {
3271 block = qemu_ram_block_from_host(addr, false, &offset);
3272 if (unlikely(!block || offset >= block->used_length)) {
3273 /*
3274 * The implementation might not support RAMBlock resize during
3275 * live migration, but it could happen in theory with future
3276 * updates. So we add a check here to capture that case.
3277 */
3278 error_report_once("%s unexpected error", __func__);
3279 return;
3280 }
3281
3282 if (len <= block->used_length - offset) {
3283 used_len = len;
3284 } else {
3285 used_len = block->used_length - offset;
3286 }
3287
3288 start = offset >> TARGET_PAGE_BITS;
3289 npages = used_len >> TARGET_PAGE_BITS;
3290
3291 qemu_mutex_lock(&ram_state->bitmap_mutex);
3292 ram_state->migration_dirty_pages -=
3293 bitmap_count_one_with_offset(block->bmap, start, npages);
3294 bitmap_clear(block->bmap, start, npages);
3295 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3296 }
3297}
3298
3299/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01003300 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003301 * long-running RCU critical section. When rcu-reclaims in the code
3302 * start to become numerous it will be necessary to reduce the
3303 * granularity of these critical sections.
3304 */
3305
Juan Quintela3d0684b2017-03-23 15:06:39 +01003306/**
3307 * ram_save_setup: Setup RAM for migration
3308 *
3309 * Returns zero to indicate success and negative for error
3310 *
3311 * @f: QEMUFile where to send the data
3312 * @opaque: RAMState pointer
3313 */
zhanghailianga91246c2016-10-27 14:42:59 +08003314static int ram_save_setup(QEMUFile *f, void *opaque)
3315{
Juan Quintela53518d92017-05-04 11:46:24 +02003316 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003317 RAMBlock *block;
3318
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003319 if (compress_threads_save_setup()) {
3320 return -1;
3321 }
3322
zhanghailianga91246c2016-10-27 14:42:59 +08003323 /* migration has already setup the bitmap, reuse it. */
3324 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003325 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003326 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003327 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003328 }
zhanghailianga91246c2016-10-27 14:42:59 +08003329 }
Juan Quintela53518d92017-05-04 11:46:24 +02003330 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003331
3332 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003333
Yury Kotovfbd162e2019-02-15 20:45:46 +03003334 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02003335
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003336 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003337 qemu_put_byte(f, strlen(block->idstr));
3338 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3339 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003340 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3341 qemu_put_be64(f, block->page_size);
3342 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003343 if (migrate_ignore_shared()) {
3344 qemu_put_be64(f, block->mr->addr);
3345 qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0);
3346 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003347 }
3348
3349 rcu_read_unlock();
3350
3351 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3352 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3353
Juan Quintela6df264a2018-02-28 09:10:07 +01003354 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003355 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003356 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003357
3358 return 0;
3359}
3360
Juan Quintela3d0684b2017-03-23 15:06:39 +01003361/**
3362 * ram_save_iterate: iterative stage for migration
3363 *
3364 * Returns zero to indicate success and negative for error
3365 *
3366 * @f: QEMUFile where to send the data
3367 * @opaque: RAMState pointer
3368 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003369static int ram_save_iterate(QEMUFile *f, void *opaque)
3370{
Juan Quintela53518d92017-05-04 11:46:24 +02003371 RAMState **temp = opaque;
3372 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003373 int ret;
3374 int i;
3375 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003376 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003377
Peter Lievenb2557342018-03-08 12:18:24 +01003378 if (blk_mig_bulk_active()) {
3379 /* Avoid transferring ram during bulk phase of block migration as
3380 * the bulk phase will usually take a long time and transferring
3381 * ram updates during that time is pointless. */
3382 goto out;
3383 }
3384
Juan Quintela56e93d22015-05-07 19:33:31 +02003385 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003386 if (ram_list.version != rs->last_version) {
3387 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003388 }
3389
3390 /* Read version before ram_list.blocks */
3391 smp_rmb();
3392
3393 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3394
3395 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3396 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003397 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3398 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003399 int pages;
3400
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003401 if (qemu_file_get_error(f)) {
3402 break;
3403 }
3404
Juan Quintelace25d332017-03-15 11:00:51 +01003405 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003406 /* no more pages to sent */
3407 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003408 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003409 break;
3410 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003411
3412 if (pages < 0) {
3413 qemu_file_set_error(f, pages);
3414 break;
3415 }
3416
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08003417 rs->target_page_count += pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003418
Juan Quintela56e93d22015-05-07 19:33:31 +02003419 /* we want to check in the 1st loop, just in case it was the 1st time
3420 and we had to sync the dirty bitmap.
3421 qemu_get_clock_ns() is a bit expensive, so we only check each some
3422 iterations
3423 */
3424 if ((i & 63) == 0) {
3425 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3426 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003427 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003428 break;
3429 }
3430 }
3431 i++;
3432 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003433 rcu_read_unlock();
3434
3435 /*
3436 * Must occur before EOS (or any QEMUFile operation)
3437 * because of RDMA protocol.
3438 */
3439 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3440
Juan Quintela6df264a2018-02-28 09:10:07 +01003441 multifd_send_sync_main();
Peter Lievenb2557342018-03-08 12:18:24 +01003442out:
Juan Quintela56e93d22015-05-07 19:33:31 +02003443 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003444 qemu_fflush(f);
Juan Quintela93604472017-06-06 19:49:03 +02003445 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003446
3447 ret = qemu_file_get_error(f);
3448 if (ret < 0) {
3449 return ret;
3450 }
3451
Thomas Huth5c903082016-11-04 14:10:17 +01003452 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003453}
3454
Juan Quintela3d0684b2017-03-23 15:06:39 +01003455/**
3456 * ram_save_complete: function called to send the remaining amount of ram
3457 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003458 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003459 *
3460 * Called with iothread lock
3461 *
3462 * @f: QEMUFile where to send the data
3463 * @opaque: RAMState pointer
3464 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003465static int ram_save_complete(QEMUFile *f, void *opaque)
3466{
Juan Quintela53518d92017-05-04 11:46:24 +02003467 RAMState **temp = opaque;
3468 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003469 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003470
Juan Quintela56e93d22015-05-07 19:33:31 +02003471 rcu_read_lock();
3472
Juan Quintela57273092017-03-20 22:25:28 +01003473 if (!migration_in_postcopy()) {
Wei Wangbd227062018-12-11 16:24:51 +08003474 migration_bitmap_sync_precopy(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003475 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003476
3477 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3478
3479 /* try transferring iterative blocks of memory */
3480
3481 /* flush all remaining blocks regardless of rate limiting */
3482 while (true) {
3483 int pages;
3484
Juan Quintelace25d332017-03-15 11:00:51 +01003485 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003486 /* no more blocks to sent */
3487 if (pages == 0) {
3488 break;
3489 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003490 if (pages < 0) {
3491 ret = pages;
3492 break;
3493 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003494 }
3495
Juan Quintelace25d332017-03-15 11:00:51 +01003496 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003497 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003498
3499 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003500
Juan Quintela6df264a2018-02-28 09:10:07 +01003501 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003502 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003503 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003504
Xiao Guangronge8f37352018-09-03 17:26:44 +08003505 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003506}
3507
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003508static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003509 uint64_t *res_precopy_only,
3510 uint64_t *res_compatible,
3511 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003512{
Juan Quintela53518d92017-05-04 11:46:24 +02003513 RAMState **temp = opaque;
3514 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003515 uint64_t remaining_size;
3516
Juan Quintela9edabd42017-03-14 12:02:16 +01003517 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003518
Juan Quintela57273092017-03-20 22:25:28 +01003519 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003520 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003521 qemu_mutex_lock_iothread();
3522 rcu_read_lock();
Wei Wangbd227062018-12-11 16:24:51 +08003523 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003524 rcu_read_unlock();
3525 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003526 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003527 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003528
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003529 if (migrate_postcopy_ram()) {
3530 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003531 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003532 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003533 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003534 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003535}
3536
3537static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3538{
3539 unsigned int xh_len;
3540 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003541 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003542
Juan Quintela56e93d22015-05-07 19:33:31 +02003543 /* extract RLE header */
3544 xh_flags = qemu_get_byte(f);
3545 xh_len = qemu_get_be16(f);
3546
3547 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3548 error_report("Failed to load XBZRLE page - wrong compression!");
3549 return -1;
3550 }
3551
3552 if (xh_len > TARGET_PAGE_SIZE) {
3553 error_report("Failed to load XBZRLE page - len overflow!");
3554 return -1;
3555 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003556 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003557 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003558 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003559 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003560
3561 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003562 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003563 TARGET_PAGE_SIZE) == -1) {
3564 error_report("Failed to load XBZRLE page - decode error!");
3565 return -1;
3566 }
3567
3568 return 0;
3569}
3570
Juan Quintela3d0684b2017-03-23 15:06:39 +01003571/**
3572 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003573 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003574 * Must be called from within a rcu critical section.
3575 *
3576 * Returns a pointer from within the RCU-protected ram_list.
3577 *
3578 * @f: QEMUFile where to read the data from
3579 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003580 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003581static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003582{
3583 static RAMBlock *block = NULL;
3584 char id[256];
3585 uint8_t len;
3586
3587 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003588 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003589 error_report("Ack, bad migration stream!");
3590 return NULL;
3591 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003592 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003593 }
3594
3595 len = qemu_get_byte(f);
3596 qemu_get_buffer(f, (uint8_t *)id, len);
3597 id[len] = 0;
3598
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003599 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003600 if (!block) {
3601 error_report("Can't find block %s", id);
3602 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003603 }
3604
Yury Kotovfbd162e2019-02-15 20:45:46 +03003605 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003606 error_report("block %s should not be migrated !", id);
3607 return NULL;
3608 }
3609
zhanghailiang4c4bad42016-01-15 11:37:41 +08003610 return block;
3611}
3612
3613static inline void *host_from_ram_block_offset(RAMBlock *block,
3614 ram_addr_t offset)
3615{
3616 if (!offset_in_ramblock(block, offset)) {
3617 return NULL;
3618 }
3619
3620 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003621}
3622
Zhang Chen13af18f2018-09-03 12:38:48 +08003623static inline void *colo_cache_from_block_offset(RAMBlock *block,
3624 ram_addr_t offset)
3625{
3626 if (!offset_in_ramblock(block, offset)) {
3627 return NULL;
3628 }
3629 if (!block->colo_cache) {
3630 error_report("%s: colo_cache is NULL in block :%s",
3631 __func__, block->idstr);
3632 return NULL;
3633 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003634
3635 /*
3636 * During colo checkpoint, we need bitmap of these migrated pages.
3637 * It help us to decide which pages in ram cache should be flushed
3638 * into VM's RAM later.
3639 */
3640 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3641 ram_state->migration_dirty_pages++;
3642 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003643 return block->colo_cache + offset;
3644}
3645
Juan Quintela3d0684b2017-03-23 15:06:39 +01003646/**
3647 * ram_handle_compressed: handle the zero page case
3648 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003649 * If a page (or a whole RDMA chunk) has been
3650 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003651 *
3652 * @host: host address for the zero page
3653 * @ch: what the page is filled from. We only support zero
3654 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003655 */
3656void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3657{
3658 if (ch != 0 || !is_zero_range(host, size)) {
3659 memset(host, ch, size);
3660 }
3661}
3662
Xiao Guangrong797ca152018-03-30 15:51:21 +08003663/* return the size after decompression, or negative value on error */
3664static int
3665qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3666 const uint8_t *source, size_t source_len)
3667{
3668 int err;
3669
3670 err = inflateReset(stream);
3671 if (err != Z_OK) {
3672 return -1;
3673 }
3674
3675 stream->avail_in = source_len;
3676 stream->next_in = (uint8_t *)source;
3677 stream->avail_out = dest_len;
3678 stream->next_out = dest;
3679
3680 err = inflate(stream, Z_NO_FLUSH);
3681 if (err != Z_STREAM_END) {
3682 return -1;
3683 }
3684
3685 return stream->total_out;
3686}
3687
Juan Quintela56e93d22015-05-07 19:33:31 +02003688static void *do_data_decompress(void *opaque)
3689{
3690 DecompressParam *param = opaque;
3691 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003692 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003693 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003694
Liang Li33d151f2016-05-05 15:32:58 +08003695 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003696 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003697 if (param->des) {
3698 des = param->des;
3699 len = param->len;
3700 param->des = 0;
3701 qemu_mutex_unlock(&param->mutex);
3702
Liang Li73a89122016-05-05 15:32:51 +08003703 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003704
3705 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3706 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003707 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003708 error_report("decompress data failed");
3709 qemu_file_set_error(decomp_file, ret);
3710 }
Liang Li73a89122016-05-05 15:32:51 +08003711
Liang Li33d151f2016-05-05 15:32:58 +08003712 qemu_mutex_lock(&decomp_done_lock);
3713 param->done = true;
3714 qemu_cond_signal(&decomp_done_cond);
3715 qemu_mutex_unlock(&decomp_done_lock);
3716
3717 qemu_mutex_lock(&param->mutex);
3718 } else {
3719 qemu_cond_wait(&param->cond, &param->mutex);
3720 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003721 }
Liang Li33d151f2016-05-05 15:32:58 +08003722 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003723
3724 return NULL;
3725}
3726
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003727static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003728{
3729 int idx, thread_count;
3730
3731 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003732 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003733 }
3734
3735 thread_count = migrate_decompress_threads();
3736 qemu_mutex_lock(&decomp_done_lock);
3737 for (idx = 0; idx < thread_count; idx++) {
3738 while (!decomp_param[idx].done) {
3739 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3740 }
3741 }
3742 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003743 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003744}
3745
Juan Quintelaf0afa332017-06-28 11:52:28 +02003746static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003747{
3748 int i, thread_count;
3749
Juan Quintela3416ab52016-04-20 11:56:01 +02003750 if (!migrate_use_compression()) {
3751 return;
3752 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003753 thread_count = migrate_decompress_threads();
3754 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003755 /*
3756 * we use it as a indicator which shows if the thread is
3757 * properly init'd or not
3758 */
3759 if (!decomp_param[i].compbuf) {
3760 break;
3761 }
3762
Juan Quintela56e93d22015-05-07 19:33:31 +02003763 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003764 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003765 qemu_cond_signal(&decomp_param[i].cond);
3766 qemu_mutex_unlock(&decomp_param[i].mutex);
3767 }
3768 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003769 if (!decomp_param[i].compbuf) {
3770 break;
3771 }
3772
Juan Quintela56e93d22015-05-07 19:33:31 +02003773 qemu_thread_join(decompress_threads + i);
3774 qemu_mutex_destroy(&decomp_param[i].mutex);
3775 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003776 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003777 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003778 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003779 }
3780 g_free(decompress_threads);
3781 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003782 decompress_threads = NULL;
3783 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003784 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003785}
3786
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003787static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003788{
3789 int i, thread_count;
3790
3791 if (!migrate_use_compression()) {
3792 return 0;
3793 }
3794
3795 thread_count = migrate_decompress_threads();
3796 decompress_threads = g_new0(QemuThread, thread_count);
3797 decomp_param = g_new0(DecompressParam, thread_count);
3798 qemu_mutex_init(&decomp_done_lock);
3799 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003800 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003801 for (i = 0; i < thread_count; i++) {
3802 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3803 goto exit;
3804 }
3805
3806 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3807 qemu_mutex_init(&decomp_param[i].mutex);
3808 qemu_cond_init(&decomp_param[i].cond);
3809 decomp_param[i].done = true;
3810 decomp_param[i].quit = false;
3811 qemu_thread_create(decompress_threads + i, "decompress",
3812 do_data_decompress, decomp_param + i,
3813 QEMU_THREAD_JOINABLE);
3814 }
3815 return 0;
3816exit:
3817 compress_threads_load_cleanup();
3818 return -1;
3819}
3820
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003821static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003822 void *host, int len)
3823{
3824 int idx, thread_count;
3825
3826 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003827 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003828 while (true) {
3829 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003830 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003831 decomp_param[idx].done = false;
3832 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003833 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003834 decomp_param[idx].des = host;
3835 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003836 qemu_cond_signal(&decomp_param[idx].cond);
3837 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003838 break;
3839 }
3840 }
3841 if (idx < thread_count) {
3842 break;
Liang Li73a89122016-05-05 15:32:51 +08003843 } else {
3844 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003845 }
3846 }
Liang Li73a89122016-05-05 15:32:51 +08003847 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003848}
3849
Zhang Chen13af18f2018-09-03 12:38:48 +08003850/*
3851 * colo cache: this is for secondary VM, we cache the whole
3852 * memory of the secondary VM, it is need to hold the global lock
3853 * to call this helper.
3854 */
3855int colo_init_ram_cache(void)
3856{
3857 RAMBlock *block;
3858
3859 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003860 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003861 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3862 NULL,
3863 false);
3864 if (!block->colo_cache) {
3865 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3866 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3867 block->used_length);
3868 goto out_locked;
3869 }
3870 memcpy(block->colo_cache, block->host, block->used_length);
3871 }
3872 rcu_read_unlock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003873 /*
3874 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3875 * with to decide which page in cache should be flushed into SVM's RAM. Here
3876 * we use the same name 'ram_bitmap' as for migration.
3877 */
3878 if (ram_bytes_total()) {
3879 RAMBlock *block;
3880
Yury Kotovfbd162e2019-02-15 20:45:46 +03003881 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003882 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3883
3884 block->bmap = bitmap_new(pages);
3885 bitmap_set(block->bmap, 0, pages);
3886 }
3887 }
3888 ram_state = g_new0(RAMState, 1);
3889 ram_state->migration_dirty_pages = 0;
zhanghailiangd1955d22018-09-03 12:38:55 +08003890 memory_global_dirty_log_start();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003891
Zhang Chen13af18f2018-09-03 12:38:48 +08003892 return 0;
3893
3894out_locked:
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003895
Yury Kotovfbd162e2019-02-15 20:45:46 +03003896 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003897 if (block->colo_cache) {
3898 qemu_anon_ram_free(block->colo_cache, block->used_length);
3899 block->colo_cache = NULL;
3900 }
3901 }
3902
3903 rcu_read_unlock();
3904 return -errno;
3905}
3906
3907/* It is need to hold the global lock to call this helper */
3908void colo_release_ram_cache(void)
3909{
3910 RAMBlock *block;
3911
zhanghailiangd1955d22018-09-03 12:38:55 +08003912 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003913 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003914 g_free(block->bmap);
3915 block->bmap = NULL;
3916 }
3917
Zhang Chen13af18f2018-09-03 12:38:48 +08003918 rcu_read_lock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003919
Yury Kotovfbd162e2019-02-15 20:45:46 +03003920 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003921 if (block->colo_cache) {
3922 qemu_anon_ram_free(block->colo_cache, block->used_length);
3923 block->colo_cache = NULL;
3924 }
3925 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003926
Zhang Chen13af18f2018-09-03 12:38:48 +08003927 rcu_read_unlock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003928 g_free(ram_state);
3929 ram_state = NULL;
Zhang Chen13af18f2018-09-03 12:38:48 +08003930}
3931
Juan Quintela3d0684b2017-03-23 15:06:39 +01003932/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003933 * ram_load_setup: Setup RAM for migration incoming side
3934 *
3935 * Returns zero to indicate success and negative for error
3936 *
3937 * @f: QEMUFile where to receive the data
3938 * @opaque: RAMState pointer
3939 */
3940static int ram_load_setup(QEMUFile *f, void *opaque)
3941{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003942 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003943 return -1;
3944 }
3945
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003946 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003947 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003948
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003949 return 0;
3950}
3951
3952static int ram_load_cleanup(void *opaque)
3953{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003954 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003955
Yury Kotovfbd162e2019-02-15 20:45:46 +03003956 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He56eb90a2018-07-18 15:48:03 +08003957 if (ramblock_is_pmem(rb)) {
3958 pmem_persist(rb->host, rb->used_length);
3959 }
3960 }
3961
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003962 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003963 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003964
Yury Kotovfbd162e2019-02-15 20:45:46 +03003965 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003966 g_free(rb->receivedmap);
3967 rb->receivedmap = NULL;
3968 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003969
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003970 return 0;
3971}
3972
3973/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003974 * ram_postcopy_incoming_init: allocate postcopy data structures
3975 *
3976 * Returns 0 for success and negative if there was one error
3977 *
3978 * @mis: current migration incoming state
3979 *
3980 * Allocate data structures etc needed by incoming migration with
3981 * postcopy-ram. postcopy-ram's similarly names
3982 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003983 */
3984int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3985{
David Hildenbrandc1361802018-06-20 22:27:36 +02003986 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003987}
3988
Juan Quintela3d0684b2017-03-23 15:06:39 +01003989/**
3990 * ram_load_postcopy: load a page in postcopy case
3991 *
3992 * Returns 0 for success or -errno in case of error
3993 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003994 * Called in postcopy mode by ram_load().
3995 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003996 *
3997 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003998 */
3999static int ram_load_postcopy(QEMUFile *f)
4000{
4001 int flags = 0, ret = 0;
4002 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004003 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004004 MigrationIncomingState *mis = migration_incoming_get_current();
4005 /* Temporary page that is later 'placed' */
4006 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004007 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00004008 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004009
4010 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4011 ram_addr_t addr;
4012 void *host = NULL;
4013 void *page_buffer = NULL;
4014 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004015 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004016 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004017
4018 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004019
4020 /*
4021 * If qemu file error, we should stop here, and then "addr"
4022 * may be invalid
4023 */
4024 ret = qemu_file_get_error(f);
4025 if (ret) {
4026 break;
4027 }
4028
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004029 flags = addr & ~TARGET_PAGE_MASK;
4030 addr &= TARGET_PAGE_MASK;
4031
4032 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4033 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02004034 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004035 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08004036
4037 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004038 if (!host) {
4039 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4040 ret = -EINVAL;
4041 break;
4042 }
Peter Xu1aa83672018-07-10 17:18:53 +08004043 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004044 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004045 * Postcopy requires that we place whole host pages atomically;
4046 * these may be huge pages for RAMBlocks that are backed by
4047 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004048 * To make it atomic, the data is read into a temporary page
4049 * that's moved into place later.
4050 * The migration protocol uses, possibly smaller, target-pages
4051 * however the source ensures it always sends all the components
4052 * of a host page in order.
4053 */
4054 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004055 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004056 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004057 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004058 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004059 } else {
4060 /* not the 1st TP within the HP */
4061 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01004062 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004063 host, last_host);
4064 ret = -EINVAL;
4065 break;
4066 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004067 }
4068
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004069
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004070 /*
4071 * If it's the last part of a host page then we place the host
4072 * page
4073 */
4074 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004075 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004076 place_source = postcopy_host_page;
4077 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004078 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004079
4080 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02004081 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004082 ch = qemu_get_byte(f);
4083 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4084 if (ch) {
4085 all_zero = false;
4086 }
4087 break;
4088
4089 case RAM_SAVE_FLAG_PAGE:
4090 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004091 if (!matches_target_page_size) {
4092 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004093 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4094 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08004095 /*
4096 * For small pages that matches target page size, we
4097 * avoid the qemu_file copy. Instead we directly use
4098 * the buffer of QEMUFile to place the page. Note: we
4099 * cannot do any QEMUFile operation before using that
4100 * buffer to make sure the buffer is valid when
4101 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004102 */
4103 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4104 TARGET_PAGE_SIZE);
4105 }
4106 break;
4107 case RAM_SAVE_FLAG_EOS:
4108 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004109 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004110 break;
4111 default:
4112 error_report("Unknown combination of migration flags: %#x"
4113 " (postcopy mode)", flags);
4114 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004115 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004116 }
4117
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004118 /* Detect for any possible file errors */
4119 if (!ret && qemu_file_get_error(f)) {
4120 ret = qemu_file_get_error(f);
4121 }
4122
4123 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004124 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004125 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
4126
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004127 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004128 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004129 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004130 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004131 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004132 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004133 }
4134 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004135 }
4136
4137 return ret;
4138}
4139
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004140static bool postcopy_is_advised(void)
4141{
4142 PostcopyState ps = postcopy_state_get();
4143 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4144}
4145
4146static bool postcopy_is_running(void)
4147{
4148 PostcopyState ps = postcopy_state_get();
4149 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4150}
4151
Zhang Chene6f4aa12018-09-03 12:38:50 +08004152/*
4153 * Flush content of RAM cache into SVM's memory.
4154 * Only flush the pages that be dirtied by PVM or SVM or both.
4155 */
4156static void colo_flush_ram_cache(void)
4157{
4158 RAMBlock *block = NULL;
4159 void *dst_host;
4160 void *src_host;
4161 unsigned long offset = 0;
4162
zhanghailiangd1955d22018-09-03 12:38:55 +08004163 memory_global_dirty_log_sync();
4164 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004165 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
zhanghailiangd1955d22018-09-03 12:38:55 +08004166 migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
4167 }
4168 rcu_read_unlock();
4169
Zhang Chene6f4aa12018-09-03 12:38:50 +08004170 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4171 rcu_read_lock();
4172 block = QLIST_FIRST_RCU(&ram_list.blocks);
4173
4174 while (block) {
4175 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4176
4177 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4178 offset = 0;
4179 block = QLIST_NEXT_RCU(block, next);
4180 } else {
4181 migration_bitmap_clear_dirty(ram_state, block, offset);
4182 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4183 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4184 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4185 }
4186 }
4187
4188 rcu_read_unlock();
4189 trace_colo_flush_ram_cache_end();
4190}
4191
Juan Quintela56e93d22015-05-07 19:33:31 +02004192static int ram_load(QEMUFile *f, void *opaque, int version_id)
4193{
Juan Quintelaedc60122016-11-02 12:40:46 +01004194 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02004195 static uint64_t seq_iter;
4196 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004197 /*
4198 * If system is running in postcopy mode, page inserts to host memory must
4199 * be atomic
4200 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004201 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004202 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004203 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02004204
4205 seq_iter++;
4206
4207 if (version_id != 4) {
4208 ret = -EINVAL;
4209 }
4210
Juan Quintelaedc60122016-11-02 12:40:46 +01004211 if (!migrate_use_compression()) {
4212 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4213 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004214 /* This RCU critical section can be very long running.
4215 * When RCU reclaims in the code start to become numerous,
4216 * it will be necessary to reduce the granularity of this
4217 * critical section.
4218 */
4219 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004220
4221 if (postcopy_running) {
4222 ret = ram_load_postcopy(f);
4223 }
4224
4225 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02004226 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004227 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004228 uint8_t ch;
4229
4230 addr = qemu_get_be64(f);
4231 flags = addr & ~TARGET_PAGE_MASK;
4232 addr &= TARGET_PAGE_MASK;
4233
Juan Quintelaedc60122016-11-02 12:40:46 +01004234 if (flags & invalid_flags) {
4235 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4236 error_report("Received an unexpected compressed page");
4237 }
4238
4239 ret = -EINVAL;
4240 break;
4241 }
4242
Juan Quintelabb890ed2017-04-28 09:39:55 +02004243 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004244 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08004245 RAMBlock *block = ram_block_from_stream(f, flags);
4246
Zhang Chen13af18f2018-09-03 12:38:48 +08004247 /*
4248 * After going into COLO, we should load the Page into colo_cache.
4249 */
4250 if (migration_incoming_in_colo_state()) {
4251 host = colo_cache_from_block_offset(block, addr);
4252 } else {
4253 host = host_from_ram_block_offset(block, addr);
4254 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004255 if (!host) {
4256 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4257 ret = -EINVAL;
4258 break;
4259 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004260
4261 if (!migration_incoming_in_colo_state()) {
4262 ramblock_recv_bitmap_set(block, host);
4263 }
4264
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004265 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004266 }
4267
Juan Quintela56e93d22015-05-07 19:33:31 +02004268 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4269 case RAM_SAVE_FLAG_MEM_SIZE:
4270 /* Synchronize RAM block list */
4271 total_ram_bytes = addr;
4272 while (!ret && total_ram_bytes) {
4273 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004274 char id[256];
4275 ram_addr_t length;
4276
4277 len = qemu_get_byte(f);
4278 qemu_get_buffer(f, (uint8_t *)id, len);
4279 id[len] = 0;
4280 length = qemu_get_be64(f);
4281
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004282 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004283 if (block && !qemu_ram_is_migratable(block)) {
4284 error_report("block %s should not be migrated !", id);
4285 ret = -EINVAL;
4286 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004287 if (length != block->used_length) {
4288 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004289
Gongleifa53a0e2016-05-10 10:04:59 +08004290 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004291 &local_err);
4292 if (local_err) {
4293 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004294 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004295 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004296 /* For postcopy we need to check hugepage sizes match */
4297 if (postcopy_advised &&
4298 block->page_size != qemu_host_page_size) {
4299 uint64_t remote_page_size = qemu_get_be64(f);
4300 if (remote_page_size != block->page_size) {
4301 error_report("Mismatched RAM page size %s "
4302 "(local) %zd != %" PRId64,
4303 id, block->page_size,
4304 remote_page_size);
4305 ret = -EINVAL;
4306 }
4307 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004308 if (migrate_ignore_shared()) {
4309 hwaddr addr = qemu_get_be64(f);
4310 bool ignored = qemu_get_byte(f);
4311 if (ignored != ramblock_is_ignored(block)) {
4312 error_report("RAM block %s should %s be migrated",
4313 id, ignored ? "" : "not");
4314 ret = -EINVAL;
4315 }
4316 if (ramblock_is_ignored(block) &&
4317 block->mr->addr != addr) {
4318 error_report("Mismatched GPAs for block %s "
4319 "%" PRId64 "!= %" PRId64,
4320 id, (uint64_t)addr,
4321 (uint64_t)block->mr->addr);
4322 ret = -EINVAL;
4323 }
4324 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004325 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4326 block->idstr);
4327 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004328 error_report("Unknown ramblock \"%s\", cannot "
4329 "accept migration", id);
4330 ret = -EINVAL;
4331 }
4332
4333 total_ram_bytes -= length;
4334 }
4335 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004336
Juan Quintelabb890ed2017-04-28 09:39:55 +02004337 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004338 ch = qemu_get_byte(f);
4339 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4340 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004341
Juan Quintela56e93d22015-05-07 19:33:31 +02004342 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004343 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4344 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004345
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004346 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004347 len = qemu_get_be32(f);
4348 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4349 error_report("Invalid compressed data length: %d", len);
4350 ret = -EINVAL;
4351 break;
4352 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004353 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004354 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004355
Juan Quintela56e93d22015-05-07 19:33:31 +02004356 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004357 if (load_xbzrle(f, addr, host) < 0) {
4358 error_report("Failed to decompress XBZRLE page at "
4359 RAM_ADDR_FMT, addr);
4360 ret = -EINVAL;
4361 break;
4362 }
4363 break;
4364 case RAM_SAVE_FLAG_EOS:
4365 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004366 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004367 break;
4368 default:
4369 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004370 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004371 } else {
4372 error_report("Unknown combination of migration flags: %#x",
4373 flags);
4374 ret = -EINVAL;
4375 }
4376 }
4377 if (!ret) {
4378 ret = qemu_file_get_error(f);
4379 }
4380 }
4381
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004382 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02004383 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01004384 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004385
4386 if (!ret && migration_incoming_in_colo_state()) {
4387 colo_flush_ram_cache();
4388 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004389 return ret;
4390}
4391
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004392static bool ram_has_postcopy(void *opaque)
4393{
Junyan He469dd512018-07-18 15:48:02 +08004394 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004395 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004396 if (ramblock_is_pmem(rb)) {
4397 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4398 "is not supported now!", rb->idstr, rb->host);
4399 return false;
4400 }
4401 }
4402
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004403 return migrate_postcopy_ram();
4404}
4405
Peter Xuedd090c2018-05-02 18:47:32 +08004406/* Sync all the dirty bitmap with destination VM. */
4407static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4408{
4409 RAMBlock *block;
4410 QEMUFile *file = s->to_dst_file;
4411 int ramblock_count = 0;
4412
4413 trace_ram_dirty_bitmap_sync_start();
4414
Yury Kotovfbd162e2019-02-15 20:45:46 +03004415 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004416 qemu_savevm_send_recv_bitmap(file, block->idstr);
4417 trace_ram_dirty_bitmap_request(block->idstr);
4418 ramblock_count++;
4419 }
4420
4421 trace_ram_dirty_bitmap_sync_wait();
4422
4423 /* Wait until all the ramblocks' dirty bitmap synced */
4424 while (ramblock_count--) {
4425 qemu_sem_wait(&s->rp_state.rp_sem);
4426 }
4427
4428 trace_ram_dirty_bitmap_sync_complete();
4429
4430 return 0;
4431}
4432
4433static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4434{
4435 qemu_sem_post(&s->rp_state.rp_sem);
4436}
4437
Peter Xua335deb2018-05-02 18:47:28 +08004438/*
4439 * Read the received bitmap, revert it as the initial dirty bitmap.
4440 * This is only used when the postcopy migration is paused but wants
4441 * to resume from a middle point.
4442 */
4443int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4444{
4445 int ret = -EINVAL;
4446 QEMUFile *file = s->rp_state.from_dst_file;
4447 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004448 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004449 uint64_t size, end_mark;
4450
4451 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4452
4453 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4454 error_report("%s: incorrect state %s", __func__,
4455 MigrationStatus_str(s->state));
4456 return -EINVAL;
4457 }
4458
4459 /*
4460 * Note: see comments in ramblock_recv_bitmap_send() on why we
4461 * need the endianess convertion, and the paddings.
4462 */
4463 local_size = ROUND_UP(local_size, 8);
4464
4465 /* Add paddings */
4466 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4467
4468 size = qemu_get_be64(file);
4469
4470 /* The size of the bitmap should match with our ramblock */
4471 if (size != local_size) {
4472 error_report("%s: ramblock '%s' bitmap size mismatch "
4473 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4474 block->idstr, size, local_size);
4475 ret = -EINVAL;
4476 goto out;
4477 }
4478
4479 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4480 end_mark = qemu_get_be64(file);
4481
4482 ret = qemu_file_get_error(file);
4483 if (ret || size != local_size) {
4484 error_report("%s: read bitmap failed for ramblock '%s': %d"
4485 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4486 __func__, block->idstr, ret, local_size, size);
4487 ret = -EIO;
4488 goto out;
4489 }
4490
4491 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4492 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4493 __func__, block->idstr, end_mark);
4494 ret = -EINVAL;
4495 goto out;
4496 }
4497
4498 /*
4499 * Endianess convertion. We are during postcopy (though paused).
4500 * The dirty bitmap won't change. We can directly modify it.
4501 */
4502 bitmap_from_le(block->bmap, le_bitmap, nbits);
4503
4504 /*
4505 * What we received is "received bitmap". Revert it as the initial
4506 * dirty bitmap for this ramblock.
4507 */
4508 bitmap_complement(block->bmap, block->bmap, nbits);
4509
4510 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4511
Peter Xuedd090c2018-05-02 18:47:32 +08004512 /*
4513 * We succeeded to sync bitmap for current ramblock. If this is
4514 * the last one to sync, we need to notify the main send thread.
4515 */
4516 ram_dirty_bitmap_reload_notify(s);
4517
Peter Xua335deb2018-05-02 18:47:28 +08004518 ret = 0;
4519out:
Peter Xubf269902018-05-25 09:50:42 +08004520 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004521 return ret;
4522}
4523
Peter Xuedd090c2018-05-02 18:47:32 +08004524static int ram_resume_prepare(MigrationState *s, void *opaque)
4525{
4526 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004527 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004528
Peter Xu08614f32018-05-02 18:47:33 +08004529 ret = ram_dirty_bitmap_sync_all(s, rs);
4530 if (ret) {
4531 return ret;
4532 }
4533
4534 ram_state_resume_prepare(rs, s->to_dst_file);
4535
4536 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004537}
4538
Juan Quintela56e93d22015-05-07 19:33:31 +02004539static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004540 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004541 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004542 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004543 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004544 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004545 .save_live_pending = ram_save_pending,
4546 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004547 .save_cleanup = ram_save_cleanup,
4548 .load_setup = ram_load_setup,
4549 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004550 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004551};
4552
4553void ram_mig_init(void)
4554{
4555 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004556 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004557}