blob: d2184c3cfc61d9f5d7551bf2c37347f757c13500 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Junyan He56eb90a2018-07-18 15:48:03 +080036#include "qemu/pmem.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020037#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020039#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010040#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020044#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030045#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010047#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010048#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020049#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030052#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080054#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030055#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020056#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080058#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010059#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020060
Juan Quintela56e93d22015-05-07 19:33:31 +020061/***********************************************************/
62/* ram save/restore */
63
Juan Quintelabb890ed2017-04-28 09:39:55 +020064/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
65 * worked for pages that where filled with the same char. We switched
66 * it to only search for the zero value. And to avoid confusion with
67 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 */
69
Juan Quintela56e93d22015-05-07 19:33:31 +020070#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020071#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77/* 0x80 is reserved in migration.h start with 0x100 next */
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
Richard Hendersona1febc42016-08-29 11:46:14 -070082 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020083}
84
Juan Quintela93604472017-06-06 19:49:03 +020085XBZRLECacheStats xbzrle_counters;
86
Juan Quintela56e93d22015-05-07 19:33:31 +020087/* struct contains XBZRLE cache and a static page
88 used by the compression */
89static struct {
90 /* buffer used for XBZRLE encoding */
91 uint8_t *encoded_buf;
92 /* buffer for storing page content */
93 uint8_t *current_buf;
94 /* Cache for XBZRLE, Protected by lock. */
95 PageCache *cache;
96 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020097 /* it will store a page full of zeros */
98 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020099 /* buffer used for XBZRLE decoding */
100 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200101} XBZRLE;
102
Juan Quintela56e93d22015-05-07 19:33:31 +0200103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
Juan Quintela3d0684b2017-03-23 15:06:39 +0100115/**
116 * xbzrle_cache_resize: resize the xbzrle cache
117 *
118 * This function is called from qmp_migrate_set_cache_size in main
119 * thread, possibly while a migration is in progress. A running
120 * migration may be using the cache and might finish during this call,
121 * hence changes to the cache are protected by XBZRLE.lock().
122 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200123 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100124 *
125 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200126 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200127 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200128int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200129{
130 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200131 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200132
Juan Quintela8acabf62017-10-05 22:00:31 +0200133 /* Check for truncation */
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 if (new_size == migrate_xbzrle_cache_size()) {
141 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200142 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200143 }
144
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
Yury Kotovfbd162e2019-02-15 20:45:46 +0300162static bool ramblock_is_ignored(RAMBlock *block)
163{
164 return !qemu_ram_is_migratable(block) ||
165 (migrate_ignore_shared() && qemu_ram_is_shared(block));
166}
167
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200168/* Should be holding either ram_list.mutex, or the RCU lock. */
Yury Kotovfbd162e2019-02-15 20:45:46 +0300169#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
170 INTERNAL_RAMBLOCK_FOREACH(block) \
171 if (ramblock_is_ignored(block)) {} else
172
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200173#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100174 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200175 if (!qemu_ram_is_migratable(block)) {} else
176
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100177#undef RAMBLOCK_FOREACH
178
Yury Kotovfbd162e2019-02-15 20:45:46 +0300179int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
180{
181 RAMBlock *block;
182 int ret = 0;
183
184 rcu_read_lock();
185 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
186 ret = func(block, opaque);
187 if (ret) {
188 break;
189 }
190 }
191 rcu_read_unlock();
192 return ret;
193}
194
Alexey Perevalovf9494612017-10-05 14:13:20 +0300195static void ramblock_recv_map_init(void)
196{
197 RAMBlock *rb;
198
Yury Kotovfbd162e2019-02-15 20:45:46 +0300199 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300200 assert(!rb->receivedmap);
201 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
202 }
203}
204
205int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
206{
207 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
208 rb->receivedmap);
209}
210
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000211bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
212{
213 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
214}
215
Alexey Perevalovf9494612017-10-05 14:13:20 +0300216void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
217{
218 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
219}
220
221void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
222 size_t nr)
223{
224 bitmap_set_atomic(rb->receivedmap,
225 ramblock_recv_bitmap_offset(host_addr, rb),
226 nr);
227}
228
Peter Xua335deb2018-05-02 18:47:28 +0800229#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
230
231/*
232 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
233 *
234 * Returns >0 if success with sent bytes, or <0 if error.
235 */
236int64_t ramblock_recv_bitmap_send(QEMUFile *file,
237 const char *block_name)
238{
239 RAMBlock *block = qemu_ram_block_by_name(block_name);
240 unsigned long *le_bitmap, nbits;
241 uint64_t size;
242
243 if (!block) {
244 error_report("%s: invalid block name: %s", __func__, block_name);
245 return -1;
246 }
247
248 nbits = block->used_length >> TARGET_PAGE_BITS;
249
250 /*
251 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
252 * machines we may need 4 more bytes for padding (see below
253 * comment). So extend it a bit before hand.
254 */
255 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
256
257 /*
258 * Always use little endian when sending the bitmap. This is
259 * required that when source and destination VMs are not using the
260 * same endianess. (Note: big endian won't work.)
261 */
262 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
263
264 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800265 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800266
267 /*
268 * size is always aligned to 8 bytes for 64bit machines, but it
269 * may not be true for 32bit machines. We need this padding to
270 * make sure the migration can survive even between 32bit and
271 * 64bit machines.
272 */
273 size = ROUND_UP(size, 8);
274
275 qemu_put_be64(file, size);
276 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
277 /*
278 * Mark as an end, in case the middle part is screwed up due to
279 * some "misterious" reason.
280 */
281 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
282 qemu_fflush(file);
283
Peter Xubf269902018-05-25 09:50:42 +0800284 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800285
286 if (qemu_file_get_error(file)) {
287 return qemu_file_get_error(file);
288 }
289
290 return size + sizeof(size);
291}
292
Juan Quintelaec481c62017-03-20 22:12:40 +0100293/*
294 * An outstanding page request, on the source, having been received
295 * and queued
296 */
297struct RAMSrcPageRequest {
298 RAMBlock *rb;
299 hwaddr offset;
300 hwaddr len;
301
302 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
303};
304
Juan Quintela6f37bb82017-03-13 19:26:29 +0100305/* State of RAM for migration */
306struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100307 /* QEMUFile used for this migration */
308 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100309 /* Last block that we have visited searching for dirty pages */
310 RAMBlock *last_seen_block;
311 /* Last block from where we have sent data */
312 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100313 /* Last dirty target page we have sent */
314 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100315 /* last ram version we have seen */
316 uint32_t last_version;
317 /* We are in the first round */
318 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800319 /* The free page optimization is enabled */
320 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100321 /* How many times we have dirty too many pages */
322 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100323 /* these variables are used for bitmap sync */
324 /* last time we did a full bitmap_sync */
325 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200326 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200327 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200328 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200329 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100330 /* xbzrle misses since the beginning of the period */
331 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800332
333 /* compression statistics since the beginning of the period */
334 /* amount of count that no free thread to compress data */
335 uint64_t compress_thread_busy_prev;
336 /* amount bytes after compression */
337 uint64_t compressed_size_prev;
338 /* amount of compressed pages */
339 uint64_t compress_pages_prev;
340
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800341 /* total handled target pages at the beginning of period */
342 uint64_t target_page_count_prev;
343 /* total handled target pages since start */
344 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200345 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800346 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800347 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100348 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100349 /* The RAMBlock used in the last src_page_requests */
350 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100351 /* Queue of outstanding page requests from the destination */
352 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100353 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100354};
355typedef struct RAMState RAMState;
356
Juan Quintela53518d92017-05-04 11:46:24 +0200357static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100358
Wei Wangbd227062018-12-11 16:24:51 +0800359static NotifierWithReturnList precopy_notifier_list;
360
361void precopy_infrastructure_init(void)
362{
363 notifier_with_return_list_init(&precopy_notifier_list);
364}
365
366void precopy_add_notifier(NotifierWithReturn *n)
367{
368 notifier_with_return_list_add(&precopy_notifier_list, n);
369}
370
371void precopy_remove_notifier(NotifierWithReturn *n)
372{
373 notifier_with_return_remove(n);
374}
375
376int precopy_notify(PrecopyNotifyReason reason, Error **errp)
377{
378 PrecopyNotifyData pnd;
379 pnd.reason = reason;
380 pnd.errp = errp;
381
382 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
383}
384
Wei Wang6eeb63f2018-12-11 16:24:52 +0800385void precopy_enable_free_page_optimization(void)
386{
387 if (!ram_state) {
388 return;
389 }
390
391 ram_state->fpo_enabled = true;
392}
393
Juan Quintela9edabd42017-03-14 12:02:16 +0100394uint64_t ram_bytes_remaining(void)
395{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000396 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
397 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100398}
399
Juan Quintela93604472017-06-06 19:49:03 +0200400MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100401
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100402/* used by the search for pages to send */
403struct PageSearchStatus {
404 /* Current block being searched */
405 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100406 /* Current page to search from */
407 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100408 /* Set once we wrap around */
409 bool complete_round;
410};
411typedef struct PageSearchStatus PageSearchStatus;
412
Xiao Guangrong76e03002018-09-06 15:01:00 +0800413CompressionStats compression_counters;
414
Juan Quintela56e93d22015-05-07 19:33:31 +0200415struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200416 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800417 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800418 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200419 QEMUFile *file;
420 QemuMutex mutex;
421 QemuCond cond;
422 RAMBlock *block;
423 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800424
425 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800426 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800427 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428};
429typedef struct CompressParam CompressParam;
430
431struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800432 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800433 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 QemuMutex mutex;
435 QemuCond cond;
436 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000437 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200438 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800439 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200440};
441typedef struct DecompressParam DecompressParam;
442
443static CompressParam *comp_param;
444static QemuThread *compress_threads;
445/* comp_done_cond is used to wake up the migration thread when
446 * one of the compression threads has finished the compression.
447 * comp_done_lock is used to co-work with comp_done_cond.
448 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800449static QemuMutex comp_done_lock;
450static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200451/* The empty QEMUFileOps will be used by file in CompressParam */
452static const QEMUFileOps empty_ops = { };
453
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800454static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200455static DecompressParam *decomp_param;
456static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800457static QemuMutex decomp_done_lock;
458static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200459
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800460static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800461 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462
463static void *do_data_compress(void *opaque)
464{
465 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800466 RAMBlock *block;
467 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800468 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200469
Liang Lia7a9a882016-05-05 15:32:57 +0800470 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800471 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800472 if (param->block) {
473 block = param->block;
474 offset = param->offset;
475 param->block = NULL;
476 qemu_mutex_unlock(&param->mutex);
477
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800478 zero_page = do_compress_ram_page(param->file, &param->stream,
479 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800480
Liang Li0d9f9a52016-05-05 15:32:59 +0800481 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800482 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800483 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800484 qemu_cond_signal(&comp_done_cond);
485 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800486
487 qemu_mutex_lock(&param->mutex);
488 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200489 qemu_cond_wait(&param->cond, &param->mutex);
490 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200491 }
Liang Lia7a9a882016-05-05 15:32:57 +0800492 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200493
494 return NULL;
495}
496
Juan Quintelaf0afa332017-06-28 11:52:28 +0200497static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200498{
499 int i, thread_count;
500
Fei Li05306932018-09-25 17:14:40 +0800501 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200502 return;
503 }
Fei Li05306932018-09-25 17:14:40 +0800504
Juan Quintela56e93d22015-05-07 19:33:31 +0200505 thread_count = migrate_compress_threads();
506 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800507 /*
508 * we use it as a indicator which shows if the thread is
509 * properly init'd or not
510 */
511 if (!comp_param[i].file) {
512 break;
513 }
Fei Li05306932018-09-25 17:14:40 +0800514
515 qemu_mutex_lock(&comp_param[i].mutex);
516 comp_param[i].quit = true;
517 qemu_cond_signal(&comp_param[i].cond);
518 qemu_mutex_unlock(&comp_param[i].mutex);
519
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200521 qemu_mutex_destroy(&comp_param[i].mutex);
522 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800523 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800524 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800525 qemu_fclose(comp_param[i].file);
526 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200527 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800528 qemu_mutex_destroy(&comp_done_lock);
529 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200530 g_free(compress_threads);
531 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200532 compress_threads = NULL;
533 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200534}
535
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800536static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200537{
538 int i, thread_count;
539
540 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800541 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200542 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200543 thread_count = migrate_compress_threads();
544 compress_threads = g_new0(QemuThread, thread_count);
545 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800546 qemu_cond_init(&comp_done_cond);
547 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200548 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800549 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
550 if (!comp_param[i].originbuf) {
551 goto exit;
552 }
553
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800554 if (deflateInit(&comp_param[i].stream,
555 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800556 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800557 goto exit;
558 }
559
Cao jine110aa92016-07-29 15:10:31 +0800560 /* comp_param[i].file is just used as a dummy buffer to save data,
561 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 */
563 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
564 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800565 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200566 qemu_mutex_init(&comp_param[i].mutex);
567 qemu_cond_init(&comp_param[i].cond);
568 qemu_thread_create(compress_threads + i, "compress",
569 do_data_compress, comp_param + i,
570 QEMU_THREAD_JOINABLE);
571 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800572 return 0;
573
574exit:
575 compress_threads_save_cleanup();
576 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200577}
578
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100579/* Multiple fd's */
580
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200581#define MULTIFD_MAGIC 0x11223344U
582#define MULTIFD_VERSION 1
583
Juan Quintela6df264a2018-02-28 09:10:07 +0100584#define MULTIFD_FLAG_SYNC (1 << 0)
585
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100586/* This value needs to be a multiple of qemu_target_page_size() */
Juan Quintela4b0c7262019-02-20 12:45:57 +0100587#define MULTIFD_PACKET_SIZE (512 * 1024)
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100588
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200589typedef struct {
590 uint32_t magic;
591 uint32_t version;
592 unsigned char uuid[16]; /* QemuUUID */
593 uint8_t id;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100594 uint8_t unused1[7]; /* Reserved for future use */
595 uint64_t unused2[4]; /* Reserved for future use */
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200596} __attribute__((packed)) MultiFDInit_t;
597
Juan Quintela8c4598f2018-04-07 13:59:07 +0200598typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200599 uint32_t magic;
600 uint32_t version;
601 uint32_t flags;
Juan Quintela6f862692019-02-20 12:04:04 +0100602 /* maximum number of allocated pages */
603 uint32_t pages_alloc;
604 uint32_t pages_used;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100605 /* size of the next packet that contains pages */
606 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200607 uint64_t packet_num;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100608 uint64_t unused[4]; /* Reserved for future use */
Juan Quintela2a26c972018-04-04 11:26:58 +0200609 char ramblock[256];
610 uint64_t offset[];
611} __attribute__((packed)) MultiFDPacket_t;
612
613typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200614 /* number of used pages */
615 uint32_t used;
616 /* number of allocated pages */
617 uint32_t allocated;
618 /* global number of generated multifd packets */
619 uint64_t packet_num;
620 /* offset of each page */
621 ram_addr_t *offset;
622 /* pointer to each page */
623 struct iovec *iov;
624 RAMBlock *block;
625} MultiFDPages_t;
626
627typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200628 /* this fields are not changed once the thread is created */
629 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100630 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200631 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100632 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200633 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100634 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200635 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100636 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200637 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100638 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200639 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100640 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200641 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100642 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200643 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100644 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200645 /* thread has work to do */
646 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200647 /* array of pages to sent */
648 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200649 /* packet allocated len */
650 uint32_t packet_len;
651 /* pointer to the packet */
652 MultiFDPacket_t *packet;
653 /* multifd flags for each packet */
654 uint32_t flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100655 /* size of the next packet that contains pages */
656 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200657 /* global number of generated multifd packets */
658 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200659 /* thread local variables */
660 /* packets sent through this channel */
661 uint64_t num_packets;
662 /* pages sent through this channel */
663 uint64_t num_pages;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200664} MultiFDSendParams;
665
666typedef struct {
667 /* this fields are not changed once the thread is created */
668 /* channel number */
669 uint8_t id;
670 /* channel thread name */
671 char *name;
672 /* channel thread id */
673 QemuThread thread;
674 /* communication channel */
675 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200676 /* this mutex protects the following parameters */
677 QemuMutex mutex;
678 /* is this channel thread running */
679 bool running;
Juan Quintela3c3ca252019-07-24 11:46:24 +0200680 /* should this thread finish */
681 bool quit;
Juan Quintela34c55a92018-04-10 23:35:15 +0200682 /* array of pages to receive */
683 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200684 /* packet allocated len */
685 uint32_t packet_len;
686 /* pointer to the packet */
687 MultiFDPacket_t *packet;
688 /* multifd flags for each packet */
689 uint32_t flags;
690 /* global number of generated multifd packets */
691 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200692 /* thread local variables */
Juan Quintela2a34ee52019-01-04 19:45:39 +0100693 /* size of the next packet that contains pages */
694 uint32_t next_packet_size;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200695 /* packets sent through this channel */
696 uint64_t num_packets;
697 /* pages sent through this channel */
698 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100699 /* syncs main thread and channels */
700 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200701} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100702
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200703static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
704{
705 MultiFDInit_t msg;
706 int ret;
707
708 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
709 msg.version = cpu_to_be32(MULTIFD_VERSION);
710 msg.id = p->id;
711 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
712
713 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
714 if (ret != 0) {
715 return -1;
716 }
717 return 0;
718}
719
720static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
721{
722 MultiFDInit_t msg;
723 int ret;
724
725 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
726 if (ret != 0) {
727 return -1;
728 }
729
Peter Maydell341ba0d2018-09-25 17:19:24 +0100730 msg.magic = be32_to_cpu(msg.magic);
731 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200732
733 if (msg.magic != MULTIFD_MAGIC) {
734 error_setg(errp, "multifd: received packet magic %x "
735 "expected %x", msg.magic, MULTIFD_MAGIC);
736 return -1;
737 }
738
739 if (msg.version != MULTIFD_VERSION) {
740 error_setg(errp, "multifd: received packet version %d "
741 "expected %d", msg.version, MULTIFD_VERSION);
742 return -1;
743 }
744
745 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
746 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
747 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
748
749 error_setg(errp, "multifd: received uuid '%s' and expected "
750 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
751 g_free(uuid);
752 g_free(msg_uuid);
753 return -1;
754 }
755
756 if (msg.id > migrate_multifd_channels()) {
757 error_setg(errp, "multifd: received channel version %d "
758 "expected %d", msg.version, MULTIFD_VERSION);
759 return -1;
760 }
761
762 return msg.id;
763}
764
Juan Quintela34c55a92018-04-10 23:35:15 +0200765static MultiFDPages_t *multifd_pages_init(size_t size)
766{
767 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
768
769 pages->allocated = size;
770 pages->iov = g_new0(struct iovec, size);
771 pages->offset = g_new0(ram_addr_t, size);
772
773 return pages;
774}
775
776static void multifd_pages_clear(MultiFDPages_t *pages)
777{
778 pages->used = 0;
779 pages->allocated = 0;
780 pages->packet_num = 0;
781 pages->block = NULL;
782 g_free(pages->iov);
783 pages->iov = NULL;
784 g_free(pages->offset);
785 pages->offset = NULL;
786 g_free(pages);
787}
788
Juan Quintela2a26c972018-04-04 11:26:58 +0200789static void multifd_send_fill_packet(MultiFDSendParams *p)
790{
791 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100792 uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200793 int i;
794
795 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
796 packet->version = cpu_to_be32(MULTIFD_VERSION);
797 packet->flags = cpu_to_be32(p->flags);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100798 packet->pages_alloc = cpu_to_be32(page_max);
Juan Quintela6f862692019-02-20 12:04:04 +0100799 packet->pages_used = cpu_to_be32(p->pages->used);
Juan Quintela2a34ee52019-01-04 19:45:39 +0100800 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200801 packet->packet_num = cpu_to_be64(p->packet_num);
802
803 if (p->pages->block) {
804 strncpy(packet->ramblock, p->pages->block->idstr, 256);
805 }
806
807 for (i = 0; i < p->pages->used; i++) {
808 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
809 }
810}
811
812static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
813{
814 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100815 uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200816 RAMBlock *block;
817 int i;
818
Peter Maydell341ba0d2018-09-25 17:19:24 +0100819 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200820 if (packet->magic != MULTIFD_MAGIC) {
821 error_setg(errp, "multifd: received packet "
822 "magic %x and expected magic %x",
823 packet->magic, MULTIFD_MAGIC);
824 return -1;
825 }
826
Peter Maydell341ba0d2018-09-25 17:19:24 +0100827 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200828 if (packet->version != MULTIFD_VERSION) {
829 error_setg(errp, "multifd: received packet "
830 "version %d and expected version %d",
831 packet->version, MULTIFD_VERSION);
832 return -1;
833 }
834
835 p->flags = be32_to_cpu(packet->flags);
836
Juan Quintela6f862692019-02-20 12:04:04 +0100837 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100838 /*
839 * If we recevied a packet that is 100 times bigger than expected
840 * just stop migration. It is a magic number.
841 */
842 if (packet->pages_alloc > pages_max * 100) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200843 error_setg(errp, "multifd: received packet "
Juan Quintela7ed379b2019-02-20 12:44:07 +0100844 "with size %d and expected a maximum size of %d",
845 packet->pages_alloc, pages_max * 100) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200846 return -1;
847 }
Juan Quintela7ed379b2019-02-20 12:44:07 +0100848 /*
849 * We received a packet that is bigger than expected but inside
850 * reasonable limits (see previous comment). Just reallocate.
851 */
852 if (packet->pages_alloc > p->pages->allocated) {
853 multifd_pages_clear(p->pages);
Peter Maydellf151f8a2019-04-09 16:18:30 +0100854 p->pages = multifd_pages_init(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100855 }
Juan Quintela2a26c972018-04-04 11:26:58 +0200856
Juan Quintela6f862692019-02-20 12:04:04 +0100857 p->pages->used = be32_to_cpu(packet->pages_used);
858 if (p->pages->used > packet->pages_alloc) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200859 error_setg(errp, "multifd: received packet "
Juan Quintela6f862692019-02-20 12:04:04 +0100860 "with %d pages and expected maximum pages are %d",
861 p->pages->used, packet->pages_alloc) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200862 return -1;
863 }
864
Juan Quintela2a34ee52019-01-04 19:45:39 +0100865 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200866 p->packet_num = be64_to_cpu(packet->packet_num);
867
868 if (p->pages->used) {
869 /* make sure that ramblock is 0 terminated */
870 packet->ramblock[255] = 0;
871 block = qemu_ram_block_by_name(packet->ramblock);
872 if (!block) {
873 error_setg(errp, "multifd: unknown ram block %s",
874 packet->ramblock);
875 return -1;
876 }
877 }
878
879 for (i = 0; i < p->pages->used; i++) {
880 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
881
882 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
883 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
884 " (max " RAM_ADDR_FMT ")",
885 offset, block->max_length);
886 return -1;
887 }
888 p->pages->iov[i].iov_base = block->host + offset;
889 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
890 }
891
892 return 0;
893}
894
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100895struct {
896 MultiFDSendParams *params;
Juan Quintela34c55a92018-04-10 23:35:15 +0200897 /* array of pages to sent */
898 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100899 /* syncs main thread and channels */
900 QemuSemaphore sem_sync;
901 /* global number of generated multifd packets */
902 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100903 /* send channels ready */
904 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100905} *multifd_send_state;
906
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100907/*
908 * How we use multifd_send_state->pages and channel->pages?
909 *
910 * We create a pages for each channel, and a main one. Each time that
911 * we need to send a batch of pages we interchange the ones between
912 * multifd_send_state and the channel that is sending it. There are
913 * two reasons for that:
914 * - to not have to do so many mallocs during migration
915 * - to make easier to know what to free at the end of migration
916 *
917 * This way we always know who is the owner of each "pages" struct,
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800918 * and we don't need any locking. It belongs to the migration thread
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100919 * or to the channel thread. Switching is safe because the migration
920 * thread is using the channel mutex when changing it, and the channel
921 * have to had finish with its own, otherwise pending_job can't be
922 * false.
923 */
924
Ivan Ren713f7622019-06-25 21:18:17 +0800925static int multifd_send_pages(void)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100926{
927 int i;
928 static int next_channel;
929 MultiFDSendParams *p = NULL; /* make happy gcc */
930 MultiFDPages_t *pages = multifd_send_state->pages;
931 uint64_t transferred;
932
933 qemu_sem_wait(&multifd_send_state->channels_ready);
934 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
935 p = &multifd_send_state->params[i];
936
937 qemu_mutex_lock(&p->mutex);
Ivan Ren713f7622019-06-25 21:18:17 +0800938 if (p->quit) {
939 error_report("%s: channel %d has already quit!", __func__, i);
940 qemu_mutex_unlock(&p->mutex);
941 return -1;
942 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100943 if (!p->pending_job) {
944 p->pending_job++;
945 next_channel = (i + 1) % migrate_multifd_channels();
946 break;
947 }
948 qemu_mutex_unlock(&p->mutex);
949 }
950 p->pages->used = 0;
951
952 p->packet_num = multifd_send_state->packet_num++;
953 p->pages->block = NULL;
954 multifd_send_state->pages = p->pages;
955 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800956 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100957 ram_counters.multifd_bytes += transferred;
958 ram_counters.transferred += transferred;;
959 qemu_mutex_unlock(&p->mutex);
960 qemu_sem_post(&p->sem);
Ivan Ren713f7622019-06-25 21:18:17 +0800961
962 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100963}
964
Ivan Ren713f7622019-06-25 21:18:17 +0800965static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100966{
967 MultiFDPages_t *pages = multifd_send_state->pages;
968
969 if (!pages->block) {
970 pages->block = block;
971 }
972
973 if (pages->block == block) {
974 pages->offset[pages->used] = offset;
975 pages->iov[pages->used].iov_base = block->host + offset;
976 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
977 pages->used++;
978
979 if (pages->used < pages->allocated) {
Ivan Ren713f7622019-06-25 21:18:17 +0800980 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100981 }
982 }
983
Ivan Ren713f7622019-06-25 21:18:17 +0800984 if (multifd_send_pages() < 0) {
985 return -1;
986 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100987
988 if (pages->block != block) {
Ivan Ren713f7622019-06-25 21:18:17 +0800989 return multifd_queue_page(block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100990 }
Ivan Ren713f7622019-06-25 21:18:17 +0800991
992 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100993}
994
Juan Quintela66770702018-02-19 19:01:45 +0100995static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100996{
997 int i;
998
Juan Quintela7a169d72018-02-19 19:01:15 +0100999 if (err) {
1000 MigrationState *s = migrate_get_current();
1001 migrate_set_error(s, err);
1002 if (s->state == MIGRATION_STATUS_SETUP ||
1003 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
1004 s->state == MIGRATION_STATUS_DEVICE ||
1005 s->state == MIGRATION_STATUS_ACTIVE) {
1006 migrate_set_state(&s->state, s->state,
1007 MIGRATION_STATUS_FAILED);
1008 }
1009 }
1010
Juan Quintela66770702018-02-19 19:01:45 +01001011 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001012 MultiFDSendParams *p = &multifd_send_state->params[i];
1013
1014 qemu_mutex_lock(&p->mutex);
1015 p->quit = true;
1016 qemu_sem_post(&p->sem);
1017 qemu_mutex_unlock(&p->mutex);
1018 }
1019}
1020
Fei Li1398b2e2019-01-13 22:08:47 +08001021void multifd_save_cleanup(void)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001022{
1023 int i;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001024
1025 if (!migrate_use_multifd()) {
Fei Li1398b2e2019-01-13 22:08:47 +08001026 return;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001027 }
Juan Quintela66770702018-02-19 19:01:45 +01001028 multifd_send_terminate_threads(NULL);
1029 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001030 MultiFDSendParams *p = &multifd_send_state->params[i];
1031
Juan Quintela66770702018-02-19 19:01:45 +01001032 if (p->running) {
1033 qemu_thread_join(&p->thread);
1034 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001035 socket_send_channel_destroy(p->c);
1036 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001037 qemu_mutex_destroy(&p->mutex);
1038 qemu_sem_destroy(&p->sem);
1039 g_free(p->name);
1040 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001041 multifd_pages_clear(p->pages);
1042 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001043 p->packet_len = 0;
1044 g_free(p->packet);
1045 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001046 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001047 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintela6df264a2018-02-28 09:10:07 +01001048 qemu_sem_destroy(&multifd_send_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001049 g_free(multifd_send_state->params);
1050 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001051 multifd_pages_clear(multifd_send_state->pages);
1052 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001053 g_free(multifd_send_state);
1054 multifd_send_state = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001055}
1056
Juan Quintela6df264a2018-02-28 09:10:07 +01001057static void multifd_send_sync_main(void)
1058{
1059 int i;
1060
1061 if (!migrate_use_multifd()) {
1062 return;
1063 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001064 if (multifd_send_state->pages->used) {
Ivan Ren713f7622019-06-25 21:18:17 +08001065 if (multifd_send_pages() < 0) {
1066 error_report("%s: multifd_send_pages fail", __func__);
1067 return;
1068 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001069 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001070 for (i = 0; i < migrate_multifd_channels(); i++) {
1071 MultiFDSendParams *p = &multifd_send_state->params[i];
1072
1073 trace_multifd_send_sync_main_signal(p->id);
1074
1075 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001076
Ivan Ren713f7622019-06-25 21:18:17 +08001077 if (p->quit) {
1078 error_report("%s: channel %d has already quit", __func__, i);
1079 qemu_mutex_unlock(&p->mutex);
1080 return;
1081 }
1082
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001083 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +01001084 p->flags |= MULTIFD_FLAG_SYNC;
1085 p->pending_job++;
1086 qemu_mutex_unlock(&p->mutex);
1087 qemu_sem_post(&p->sem);
1088 }
1089 for (i = 0; i < migrate_multifd_channels(); i++) {
1090 MultiFDSendParams *p = &multifd_send_state->params[i];
1091
1092 trace_multifd_send_sync_main_wait(p->id);
1093 qemu_sem_wait(&multifd_send_state->sem_sync);
1094 }
1095 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1096}
1097
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001098static void *multifd_send_thread(void *opaque)
1099{
1100 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001101 Error *local_err = NULL;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001102 int ret = 0;
1103 uint32_t flags = 0;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001104
Juan Quintela408ea6a2018-04-06 18:28:59 +02001105 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001106 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001107
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001108 if (multifd_send_initial_packet(p, &local_err) < 0) {
1109 goto out;
1110 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001111 /* initial packet */
1112 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001113
1114 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001115 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001116 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001117
1118 if (p->pending_job) {
1119 uint32_t used = p->pages->used;
1120 uint64_t packet_num = p->packet_num;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001121 flags = p->flags;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001122
Juan Quintela2a34ee52019-01-04 19:45:39 +01001123 p->next_packet_size = used * qemu_target_page_size();
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001124 multifd_send_fill_packet(p);
1125 p->flags = 0;
1126 p->num_packets++;
1127 p->num_pages += used;
1128 p->pages->used = 0;
1129 qemu_mutex_unlock(&p->mutex);
1130
Juan Quintela2a34ee52019-01-04 19:45:39 +01001131 trace_multifd_send(p->id, packet_num, used, flags,
1132 p->next_packet_size);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001133
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001134 ret = qio_channel_write_all(p->c, (void *)p->packet,
1135 p->packet_len, &local_err);
1136 if (ret != 0) {
1137 break;
1138 }
1139
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001140 if (used) {
1141 ret = qio_channel_writev_all(p->c, p->pages->iov,
1142 used, &local_err);
1143 if (ret != 0) {
1144 break;
1145 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001146 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001147
1148 qemu_mutex_lock(&p->mutex);
1149 p->pending_job--;
1150 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001151
1152 if (flags & MULTIFD_FLAG_SYNC) {
1153 qemu_sem_post(&multifd_send_state->sem_sync);
1154 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001155 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001156 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001157 qemu_mutex_unlock(&p->mutex);
1158 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001159 } else {
1160 qemu_mutex_unlock(&p->mutex);
1161 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001162 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001163 }
1164
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001165out:
1166 if (local_err) {
1167 multifd_send_terminate_threads(local_err);
1168 }
1169
Ivan Rena3ec6b72019-06-25 21:18:18 +08001170 /*
1171 * Error happen, I will exit, but I can't just leave, tell
1172 * who pay attention to me.
1173 */
1174 if (ret != 0) {
1175 if (flags & MULTIFD_FLAG_SYNC) {
1176 qemu_sem_post(&multifd_send_state->sem_sync);
1177 }
1178 qemu_sem_post(&multifd_send_state->channels_ready);
1179 }
1180
Juan Quintela66770702018-02-19 19:01:45 +01001181 qemu_mutex_lock(&p->mutex);
1182 p->running = false;
1183 qemu_mutex_unlock(&p->mutex);
1184
Lidong Chen74637e62018-08-06 21:29:29 +08001185 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001186 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1187
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001188 return NULL;
1189}
1190
Juan Quintela60df2d42018-03-07 07:56:15 +01001191static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1192{
1193 MultiFDSendParams *p = opaque;
1194 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1195 Error *local_err = NULL;
1196
1197 if (qio_task_propagate_error(task, &local_err)) {
Fei Li1398b2e2019-01-13 22:08:47 +08001198 migrate_set_error(migrate_get_current(), local_err);
1199 multifd_save_cleanup();
Juan Quintela60df2d42018-03-07 07:56:15 +01001200 } else {
1201 p->c = QIO_CHANNEL(sioc);
1202 qio_channel_set_delay(p->c, false);
1203 p->running = true;
1204 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1205 QEMU_THREAD_JOINABLE);
Juan Quintela60df2d42018-03-07 07:56:15 +01001206 }
1207}
1208
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001209int multifd_save_setup(void)
1210{
1211 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001212 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001213 uint8_t i;
1214
1215 if (!migrate_use_multifd()) {
1216 return 0;
1217 }
1218 thread_count = migrate_multifd_channels();
1219 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1220 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela34c55a92018-04-10 23:35:15 +02001221 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintela6df264a2018-02-28 09:10:07 +01001222 qemu_sem_init(&multifd_send_state->sem_sync, 0);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001223 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001224
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001225 for (i = 0; i < thread_count; i++) {
1226 MultiFDSendParams *p = &multifd_send_state->params[i];
1227
1228 qemu_mutex_init(&p->mutex);
1229 qemu_sem_init(&p->sem, 0);
1230 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001231 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001232 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001233 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001234 p->packet_len = sizeof(MultiFDPacket_t)
1235 + sizeof(ram_addr_t) * page_count;
1236 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001237 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001238 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001239 }
1240 return 0;
1241}
1242
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001243struct {
1244 MultiFDRecvParams *params;
1245 /* number of created threads */
1246 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001247 /* syncs main thread and channels */
1248 QemuSemaphore sem_sync;
1249 /* global number of generated multifd packets */
1250 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001251} *multifd_recv_state;
1252
Juan Quintela66770702018-02-19 19:01:45 +01001253static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001254{
1255 int i;
1256
Juan Quintela7a169d72018-02-19 19:01:15 +01001257 if (err) {
1258 MigrationState *s = migrate_get_current();
1259 migrate_set_error(s, err);
1260 if (s->state == MIGRATION_STATUS_SETUP ||
1261 s->state == MIGRATION_STATUS_ACTIVE) {
1262 migrate_set_state(&s->state, s->state,
1263 MIGRATION_STATUS_FAILED);
1264 }
1265 }
1266
Juan Quintela66770702018-02-19 19:01:45 +01001267 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001268 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1269
1270 qemu_mutex_lock(&p->mutex);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001271 p->quit = true;
Juan Quintela7a5cc332018-04-18 00:49:19 +02001272 /* We could arrive here for two reasons:
1273 - normal quit, i.e. everything went fine, just finished
1274 - error quit: We close the channels so the channel threads
1275 finish the qio_channel_read_all_eof() */
1276 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001277 qemu_mutex_unlock(&p->mutex);
1278 }
1279}
1280
1281int multifd_load_cleanup(Error **errp)
1282{
1283 int i;
1284 int ret = 0;
1285
1286 if (!migrate_use_multifd()) {
1287 return 0;
1288 }
Juan Quintela66770702018-02-19 19:01:45 +01001289 multifd_recv_terminate_threads(NULL);
1290 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001291 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1292
Juan Quintela66770702018-02-19 19:01:45 +01001293 if (p->running) {
Juan Quintela3c3ca252019-07-24 11:46:24 +02001294 p->quit = true;
Ivan Renf193bc02019-06-25 21:18:19 +08001295 /*
1296 * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
1297 * however try to wakeup it without harm in cleanup phase.
1298 */
1299 qemu_sem_post(&p->sem_sync);
Juan Quintela66770702018-02-19 19:01:45 +01001300 qemu_thread_join(&p->thread);
1301 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001302 object_unref(OBJECT(p->c));
1303 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001304 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001305 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001306 g_free(p->name);
1307 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001308 multifd_pages_clear(p->pages);
1309 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001310 p->packet_len = 0;
1311 g_free(p->packet);
1312 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001313 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001314 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001315 g_free(multifd_recv_state->params);
1316 multifd_recv_state->params = NULL;
1317 g_free(multifd_recv_state);
1318 multifd_recv_state = NULL;
1319
1320 return ret;
1321}
1322
Juan Quintela6df264a2018-02-28 09:10:07 +01001323static void multifd_recv_sync_main(void)
1324{
1325 int i;
1326
1327 if (!migrate_use_multifd()) {
1328 return;
1329 }
1330 for (i = 0; i < migrate_multifd_channels(); i++) {
1331 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1332
Juan Quintela6df264a2018-02-28 09:10:07 +01001333 trace_multifd_recv_sync_main_wait(p->id);
1334 qemu_sem_wait(&multifd_recv_state->sem_sync);
Wei Yang77568ea2019-06-04 10:35:40 +08001335 }
1336 for (i = 0; i < migrate_multifd_channels(); i++) {
1337 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1338
Juan Quintela6df264a2018-02-28 09:10:07 +01001339 qemu_mutex_lock(&p->mutex);
1340 if (multifd_recv_state->packet_num < p->packet_num) {
1341 multifd_recv_state->packet_num = p->packet_num;
1342 }
1343 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001344 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001345 qemu_sem_post(&p->sem_sync);
1346 }
1347 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1348}
1349
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001350static void *multifd_recv_thread(void *opaque)
1351{
1352 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001353 Error *local_err = NULL;
1354 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001355
Juan Quintela408ea6a2018-04-06 18:28:59 +02001356 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001357 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001358
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001359 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001360 uint32_t used;
1361 uint32_t flags;
1362
Juan Quintela3c3ca252019-07-24 11:46:24 +02001363 if (p->quit) {
1364 break;
1365 }
1366
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001367 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1368 p->packet_len, &local_err);
1369 if (ret == 0) { /* EOF */
1370 break;
1371 }
1372 if (ret == -1) { /* Error */
1373 break;
1374 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001375
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001376 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001377 ret = multifd_recv_unfill_packet(p, &local_err);
1378 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001379 qemu_mutex_unlock(&p->mutex);
1380 break;
1381 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001382
1383 used = p->pages->used;
1384 flags = p->flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +01001385 trace_multifd_recv(p->id, p->packet_num, used, flags,
1386 p->next_packet_size);
Juan Quintela6df264a2018-02-28 09:10:07 +01001387 p->num_packets++;
1388 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001389 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001390
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001391 if (used) {
1392 ret = qio_channel_readv_all(p->c, p->pages->iov,
1393 used, &local_err);
1394 if (ret != 0) {
1395 break;
1396 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001397 }
1398
Juan Quintela6df264a2018-02-28 09:10:07 +01001399 if (flags & MULTIFD_FLAG_SYNC) {
1400 qemu_sem_post(&multifd_recv_state->sem_sync);
1401 qemu_sem_wait(&p->sem_sync);
1402 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001403 }
1404
Juan Quintelad82628e2018-04-11 02:44:24 +02001405 if (local_err) {
1406 multifd_recv_terminate_threads(local_err);
1407 }
Juan Quintela66770702018-02-19 19:01:45 +01001408 qemu_mutex_lock(&p->mutex);
1409 p->running = false;
1410 qemu_mutex_unlock(&p->mutex);
1411
Lidong Chen74637e62018-08-06 21:29:29 +08001412 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001413 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1414
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001415 return NULL;
1416}
1417
1418int multifd_load_setup(void)
1419{
1420 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001421 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001422 uint8_t i;
1423
1424 if (!migrate_use_multifd()) {
1425 return 0;
1426 }
1427 thread_count = migrate_multifd_channels();
1428 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1429 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001430 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001431 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001432
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001433 for (i = 0; i < thread_count; i++) {
1434 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1435
1436 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001437 qemu_sem_init(&p->sem_sync, 0);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001438 p->quit = false;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001439 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001440 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001441 p->packet_len = sizeof(MultiFDPacket_t)
1442 + sizeof(ram_addr_t) * page_count;
1443 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001444 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001445 }
1446 return 0;
1447}
1448
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001449bool multifd_recv_all_channels_created(void)
1450{
1451 int thread_count = migrate_multifd_channels();
1452
1453 if (!migrate_use_multifd()) {
1454 return true;
1455 }
1456
1457 return thread_count == atomic_read(&multifd_recv_state->count);
1458}
1459
Fei Li49ed0d22019-01-13 22:08:46 +08001460/*
1461 * Try to receive all multifd channels to get ready for the migration.
1462 * - Return true and do not set @errp when correctly receving all channels;
1463 * - Return false and do not set @errp when correctly receiving the current one;
1464 * - Return false and set @errp when failing to receive the current channel.
1465 */
1466bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001467{
Juan Quintela60df2d42018-03-07 07:56:15 +01001468 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001469 Error *local_err = NULL;
1470 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001471
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001472 id = multifd_recv_initial_packet(ioc, &local_err);
1473 if (id < 0) {
1474 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001475 error_propagate_prepend(errp, local_err,
1476 "failed to receive packet"
1477 " via multifd channel %d: ",
1478 atomic_read(&multifd_recv_state->count));
Peter Xu81e62052018-06-27 21:22:44 +08001479 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001480 }
1481
1482 p = &multifd_recv_state->params[id];
1483 if (p->c != NULL) {
1484 error_setg(&local_err, "multifd: received id '%d' already setup'",
1485 id);
1486 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001487 error_propagate(errp, local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001488 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001489 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001490 p->c = ioc;
1491 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001492 /* initial packet */
1493 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001494
1495 p->running = true;
1496 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1497 QEMU_THREAD_JOINABLE);
1498 atomic_inc(&multifd_recv_state->count);
Fei Li49ed0d22019-01-13 22:08:46 +08001499 return atomic_read(&multifd_recv_state->count) ==
1500 migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001501}
1502
Juan Quintela56e93d22015-05-07 19:33:31 +02001503/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001504 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001505 *
1506 * If this is the 1st block, it also writes the block identification
1507 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001508 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001509 *
1510 * @f: QEMUFile where to send the data
1511 * @block: block that contains the page we want to send
1512 * @offset: offset inside the block for the page
1513 * in the lower bits, it contains flags
1514 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001515static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1516 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001517{
Liang Li9f5f3802015-07-13 17:34:10 +08001518 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001519
Juan Quintela24795692017-03-21 11:45:01 +01001520 if (block == rs->last_sent_block) {
1521 offset |= RAM_SAVE_FLAG_CONTINUE;
1522 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001523 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001524 size = 8;
1525
1526 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001527 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001528 qemu_put_byte(f, len);
1529 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001530 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001531 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001532 }
1533 return size;
1534}
1535
Juan Quintela3d0684b2017-03-23 15:06:39 +01001536/**
1537 * mig_throttle_guest_down: throotle down the guest
1538 *
1539 * Reduce amount of guest cpu execution to hopefully slow down memory
1540 * writes. If guest dirty memory rate is reduced below the rate at
1541 * which we can transfer pages to the destination then we should be
1542 * able to complete migration. Some workloads dirty memory way too
1543 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001544 */
1545static void mig_throttle_guest_down(void)
1546{
1547 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001548 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1549 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001550 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001551
1552 /* We have not started throttling yet. Let's start it. */
1553 if (!cpu_throttle_active()) {
1554 cpu_throttle_set(pct_initial);
1555 } else {
1556 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001557 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1558 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001559 }
1560}
1561
Juan Quintela3d0684b2017-03-23 15:06:39 +01001562/**
1563 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1564 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001565 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001566 * @current_addr: address for the zero page
1567 *
1568 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001569 * The important thing is that a stale (not-yet-0'd) page be replaced
1570 * by the new data.
1571 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001572 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001573 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001574static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001575{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001576 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001577 return;
1578 }
1579
1580 /* We don't care if this fails to allocate a new cache page
1581 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001582 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001583 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001584}
1585
1586#define ENCODING_FLAG_XBZRLE 0x1
1587
1588/**
1589 * save_xbzrle_page: compress and send current page
1590 *
1591 * Returns: 1 means that we wrote the page
1592 * 0 means that page is identical to the one already sent
1593 * -1 means that xbzrle would be longer than normal
1594 *
Juan Quintela5a987732017-03-13 19:39:02 +01001595 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001596 * @current_data: pointer to the address of the page contents
1597 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001598 * @block: block that contains the page we want to send
1599 * @offset: offset inside the block for the page
1600 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001601 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001602static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001603 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001604 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001605{
1606 int encoded_len = 0, bytes_xbzrle;
1607 uint8_t *prev_cached_page;
1608
Juan Quintela93604472017-06-06 19:49:03 +02001609 if (!cache_is_cached(XBZRLE.cache, current_addr,
1610 ram_counters.dirty_sync_count)) {
1611 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001612 if (!last_stage) {
1613 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001614 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001615 return -1;
1616 } else {
1617 /* update *current_data when the page has been
1618 inserted into cache */
1619 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1620 }
1621 }
1622 return -1;
1623 }
1624
1625 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1626
1627 /* save current buffer into memory */
1628 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1629
1630 /* XBZRLE encoding (if there is no overflow) */
1631 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1632 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1633 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +08001634
1635 /*
1636 * Update the cache contents, so that it corresponds to the data
1637 * sent, in all cases except where we skip the page.
1638 */
1639 if (!last_stage && encoded_len != 0) {
1640 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1641 /*
1642 * In the case where we couldn't compress, ensure that the caller
1643 * sends the data from the cache, since the guest might have
1644 * changed the RAM since we copied it.
1645 */
1646 *current_data = prev_cached_page;
1647 }
1648
Juan Quintela56e93d22015-05-07 19:33:31 +02001649 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001650 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001651 return 0;
1652 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001653 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001654 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001655 return -1;
1656 }
1657
Juan Quintela56e93d22015-05-07 19:33:31 +02001658 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001659 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001660 offset | RAM_SAVE_FLAG_XBZRLE);
1661 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1662 qemu_put_be16(rs->f, encoded_len);
1663 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001664 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001665 xbzrle_counters.pages++;
1666 xbzrle_counters.bytes += bytes_xbzrle;
1667 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001668
1669 return 1;
1670}
1671
Juan Quintela3d0684b2017-03-23 15:06:39 +01001672/**
1673 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001674 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001675 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +01001676 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001677 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001678 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001679 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001680 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001681static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001682unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001683 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001684{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001685 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1686 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001687 unsigned long next;
1688
Yury Kotovfbd162e2019-02-15 20:45:46 +03001689 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001690 return size;
1691 }
1692
Wei Wang6eeb63f2018-12-11 16:24:52 +08001693 /*
1694 * When the free page optimization is enabled, we need to check the bitmap
1695 * to send the non-free pages rather than all the pages in the bulk stage.
1696 */
1697 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001698 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001699 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001700 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001701 }
1702
Juan Quintela6b6712e2017-03-22 15:18:04 +01001703 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001704}
1705
Juan Quintela06b10682017-03-21 15:18:05 +01001706static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001707 RAMBlock *rb,
1708 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001709{
1710 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001711
Wei Wang386a9072018-12-11 16:24:49 +08001712 qemu_mutex_lock(&rs->bitmap_mutex);
Peter Xu002cad62019-06-03 14:50:56 +08001713
1714 /*
1715 * Clear dirty bitmap if needed. This _must_ be called before we
1716 * send any of the page in the chunk because we need to make sure
1717 * we can capture further page content changes when we sync dirty
1718 * log the next time. So as long as we are going to send any of
1719 * the page in the chunk we clear the remote dirty bitmap for all.
1720 * Clearing it earlier won't be a problem, but too late will.
1721 */
1722 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
1723 uint8_t shift = rb->clear_bmap_shift;
1724 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
1725 hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
1726
1727 /*
1728 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
1729 * can make things easier sometimes since then start address
1730 * of the small chunk will always be 64 pages aligned so the
1731 * bitmap will always be aligned to unsigned long. We should
1732 * even be able to remove this restriction but I'm simply
1733 * keeping it.
1734 */
1735 assert(shift >= 6);
1736 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
1737 memory_region_clear_dirty_bitmap(rb->mr, start, size);
1738 }
1739
Juan Quintela6b6712e2017-03-22 15:18:04 +01001740 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001741
1742 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001743 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001744 }
Wei Wang386a9072018-12-11 16:24:49 +08001745 qemu_mutex_unlock(&rs->bitmap_mutex);
1746
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001747 return ret;
1748}
1749
Peter Xu267691b2019-06-03 14:50:46 +08001750/* Called with RCU critical section */
Wei Yang5d0980a2019-07-18 09:25:47 +08001751static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +02001752{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001753 rs->migration_dirty_pages +=
Wei Yang5d0980a2019-07-18 09:25:47 +08001754 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001755 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001756}
1757
Juan Quintela3d0684b2017-03-23 15:06:39 +01001758/**
1759 * ram_pagesize_summary: calculate all the pagesizes of a VM
1760 *
1761 * Returns a summary bitmap of the page sizes of all RAMBlocks
1762 *
1763 * For VMs with just normal pages this is equivalent to the host page
1764 * size. If it's got some huge pages then it's the OR of all the
1765 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001766 */
1767uint64_t ram_pagesize_summary(void)
1768{
1769 RAMBlock *block;
1770 uint64_t summary = 0;
1771
Yury Kotovfbd162e2019-02-15 20:45:46 +03001772 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001773 summary |= block->page_size;
1774 }
1775
1776 return summary;
1777}
1778
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001779uint64_t ram_get_total_transferred_pages(void)
1780{
1781 return ram_counters.normal + ram_counters.duplicate +
1782 compression_counters.pages + xbzrle_counters.pages;
1783}
1784
Xiao Guangrongb7340352018-06-04 17:55:12 +08001785static void migration_update_rates(RAMState *rs, int64_t end_time)
1786{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001787 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001788 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001789
1790 /* calculate period counters */
1791 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1792 / (end_time - rs->time_last_bitmap_sync);
1793
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001794 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001795 return;
1796 }
1797
1798 if (migrate_use_xbzrle()) {
1799 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001800 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001801 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1802 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001803
1804 if (migrate_use_compression()) {
1805 compression_counters.busy_rate = (double)(compression_counters.busy -
1806 rs->compress_thread_busy_prev) / page_count;
1807 rs->compress_thread_busy_prev = compression_counters.busy;
1808
1809 compressed_size = compression_counters.compressed_size -
1810 rs->compressed_size_prev;
1811 if (compressed_size) {
1812 double uncompressed_size = (compression_counters.pages -
1813 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1814
1815 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1816 compression_counters.compression_rate =
1817 uncompressed_size / compressed_size;
1818
1819 rs->compress_pages_prev = compression_counters.pages;
1820 rs->compressed_size_prev = compression_counters.compressed_size;
1821 }
1822 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001823}
1824
Juan Quintela8d820d62017-03-13 19:35:50 +01001825static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001826{
1827 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001828 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001829 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001830
Juan Quintela93604472017-06-06 19:49:03 +02001831 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001832
Juan Quintelaf664da82017-03-13 19:44:57 +01001833 if (!rs->time_last_bitmap_sync) {
1834 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001835 }
1836
1837 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001838 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001839
Juan Quintela108cfae2017-03-13 21:38:09 +01001840 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001841 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03001842 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang5d0980a2019-07-18 09:25:47 +08001843 migration_bitmap_sync_range(rs, block);
Juan Quintela56e93d22015-05-07 19:33:31 +02001844 }
Balamuruhan S650af892018-06-12 14:20:09 +05301845 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001846 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001847 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001848
Juan Quintelaa66cd902017-03-28 15:02:43 +02001849 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001850
Juan Quintela56e93d22015-05-07 19:33:31 +02001851 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1852
1853 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001854 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001855 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001856
Peter Lieven9ac78b62017-09-26 12:33:16 +02001857 /* During block migration the auto-converge logic incorrectly detects
1858 * that ram migration makes no progress. Avoid this by disabling the
1859 * throttling logic during the bulk phase of block migration. */
1860 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001861 /* The following detection logic can be refined later. For now:
1862 Check to see if the dirtied bytes is 50% more than the approx.
1863 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001864 were in this routine. If that happens twice, start or increase
1865 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001866
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001867 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001868 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001869 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001870 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001871 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001872 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001873 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001874 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001875
Xiao Guangrongb7340352018-06-04 17:55:12 +08001876 migration_update_rates(rs, end_time);
1877
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001878 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001879
1880 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001881 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001882 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001883 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001884 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001885 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001886 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001887 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001888}
1889
Wei Wangbd227062018-12-11 16:24:51 +08001890static void migration_bitmap_sync_precopy(RAMState *rs)
1891{
1892 Error *local_err = NULL;
1893
1894 /*
1895 * The current notifier usage is just an optimization to migration, so we
1896 * don't stop the normal migration process in the error case.
1897 */
1898 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1899 error_report_err(local_err);
1900 }
1901
1902 migration_bitmap_sync(rs);
1903
1904 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1905 error_report_err(local_err);
1906 }
1907}
1908
Juan Quintela56e93d22015-05-07 19:33:31 +02001909/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001910 * save_zero_page_to_file: send the zero page to the file
1911 *
1912 * Returns the size of data written to the file, 0 means the page is not
1913 * a zero page
1914 *
1915 * @rs: current RAM state
1916 * @file: the file where the data is saved
1917 * @block: block that contains the page we want to send
1918 * @offset: offset inside the block for the page
1919 */
1920static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1921 RAMBlock *block, ram_addr_t offset)
1922{
1923 uint8_t *p = block->host + offset;
1924 int len = 0;
1925
1926 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1927 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1928 qemu_put_byte(file, 0);
1929 len += 1;
1930 }
1931 return len;
1932}
1933
1934/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001935 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001936 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001937 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001938 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001939 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001940 * @block: block that contains the page we want to send
1941 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001942 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001943static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001944{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001945 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001946
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001947 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001948 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001949 ram_counters.transferred += len;
1950 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001951 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001952 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001953}
1954
Juan Quintela57273092017-03-20 22:25:28 +01001955static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001956{
Juan Quintela57273092017-03-20 22:25:28 +01001957 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001958 return;
1959 }
1960
Juan Quintelaaaa20642017-03-21 11:35:24 +01001961 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001962}
1963
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001964/*
1965 * @pages: the number of pages written by the control path,
1966 * < 0 - error
1967 * > 0 - number of pages written
1968 *
1969 * Return true if the pages has been saved, otherwise false is returned.
1970 */
1971static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1972 int *pages)
1973{
1974 uint64_t bytes_xmit = 0;
1975 int ret;
1976
1977 *pages = -1;
1978 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1979 &bytes_xmit);
1980 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1981 return false;
1982 }
1983
1984 if (bytes_xmit) {
1985 ram_counters.transferred += bytes_xmit;
1986 *pages = 1;
1987 }
1988
1989 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1990 return true;
1991 }
1992
1993 if (bytes_xmit > 0) {
1994 ram_counters.normal++;
1995 } else if (bytes_xmit == 0) {
1996 ram_counters.duplicate++;
1997 }
1998
1999 return true;
2000}
2001
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002002/*
2003 * directly send the page to the stream
2004 *
2005 * Returns the number of pages written.
2006 *
2007 * @rs: current RAM state
2008 * @block: block that contains the page we want to send
2009 * @offset: offset inside the block for the page
2010 * @buf: the page to be sent
2011 * @async: send to page asyncly
2012 */
2013static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
2014 uint8_t *buf, bool async)
2015{
2016 ram_counters.transferred += save_page_header(rs, rs->f, block,
2017 offset | RAM_SAVE_FLAG_PAGE);
2018 if (async) {
2019 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
2020 migrate_release_ram() &
2021 migration_in_postcopy());
2022 } else {
2023 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
2024 }
2025 ram_counters.transferred += TARGET_PAGE_SIZE;
2026 ram_counters.normal++;
2027 return 1;
2028}
2029
Juan Quintela56e93d22015-05-07 19:33:31 +02002030/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002031 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02002032 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002033 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00002034 * < 0 - error
2035 * >=0 - Number of pages written - this might legally be 0
2036 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02002037 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002038 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002039 * @block: block that contains the page we want to send
2040 * @offset: offset inside the block for the page
2041 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02002042 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002043static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002044{
2045 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002046 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02002047 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08002048 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002049 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002050 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002051
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01002052 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01002053 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02002054
Juan Quintela56e93d22015-05-07 19:33:31 +02002055 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002056 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
2057 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002058 pages = save_xbzrle_page(rs, &p, current_addr, block,
2059 offset, last_stage);
2060 if (!last_stage) {
2061 /* Can't send this cached data async, since the cache page
2062 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02002063 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002064 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002065 }
2066 }
2067
2068 /* XBZRLE overflow or normal page */
2069 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002070 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02002071 }
2072
2073 XBZRLE_cache_unlock();
2074
2075 return pages;
2076}
2077
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002078static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
2079 ram_addr_t offset)
2080{
Ivan Ren713f7622019-06-25 21:18:17 +08002081 if (multifd_queue_page(block, offset) < 0) {
2082 return -1;
2083 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002084 ram_counters.normal++;
2085
2086 return 1;
2087}
2088
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002089static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002090 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02002091{
Juan Quintela53518d92017-05-04 11:46:24 +02002092 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08002093 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002094 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002095 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002096
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002097 if (save_zero_page_to_file(rs, f, block, offset)) {
2098 zero_page = true;
2099 goto exit;
2100 }
2101
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002102 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002103
2104 /*
2105 * copy it to a internal buffer to avoid it being modified by VM
2106 * so that we can catch up the error during compression and
2107 * decompression
2108 */
2109 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002110 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2111 if (ret < 0) {
2112 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08002113 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002114 return false;
Liang Lib3be2892016-05-05 15:32:54 +08002115 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002116
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002117exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002118 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002119 return zero_page;
2120}
2121
2122static void
2123update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2124{
Xiao Guangrong76e03002018-09-06 15:01:00 +08002125 ram_counters.transferred += bytes_xmit;
2126
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002127 if (param->zero_page) {
2128 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08002129 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002130 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08002131
2132 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
2133 compression_counters.compressed_size += bytes_xmit - 8;
2134 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002135}
2136
Xiao Guangrong32b05492018-09-06 15:01:01 +08002137static bool save_page_use_compression(RAMState *rs);
2138
Juan Quintelace25d332017-03-15 11:00:51 +01002139static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002140{
2141 int idx, len, thread_count;
2142
Xiao Guangrong32b05492018-09-06 15:01:01 +08002143 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002144 return;
2145 }
2146 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08002147
Liang Li0d9f9a52016-05-05 15:32:59 +08002148 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002149 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08002150 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08002151 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002152 }
Liang Lia7a9a882016-05-05 15:32:57 +08002153 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002154 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08002155
2156 for (idx = 0; idx < thread_count; idx++) {
2157 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002158 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01002159 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002160 /*
2161 * it's safe to fetch zero_page without holding comp_done_lock
2162 * as there is no further request submitted to the thread,
2163 * i.e, the thread should be waiting for a request at this point.
2164 */
2165 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002166 }
Liang Lia7a9a882016-05-05 15:32:57 +08002167 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002168 }
2169}
2170
2171static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2172 ram_addr_t offset)
2173{
2174 param->block = block;
2175 param->offset = offset;
2176}
2177
Juan Quintelace25d332017-03-15 11:00:51 +01002178static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2179 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02002180{
2181 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08002182 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02002183
2184 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08002185 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08002186retry:
2187 for (idx = 0; idx < thread_count; idx++) {
2188 if (comp_param[idx].done) {
2189 comp_param[idx].done = false;
2190 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2191 qemu_mutex_lock(&comp_param[idx].mutex);
2192 set_compress_params(&comp_param[idx], block, offset);
2193 qemu_cond_signal(&comp_param[idx].cond);
2194 qemu_mutex_unlock(&comp_param[idx].mutex);
2195 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002196 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02002197 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002198 }
2199 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08002200
2201 /*
2202 * wait for the free thread if the user specifies 'compress-wait-thread',
2203 * otherwise we will post the page out in the main thread as normal page.
2204 */
2205 if (pages < 0 && wait) {
2206 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2207 goto retry;
2208 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002209 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002210
2211 return pages;
2212}
2213
2214/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002215 * find_dirty_block: find the next dirty page and update any state
2216 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002217 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002218 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002219 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002220 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002221 * @pss: data about the state of the current dirty page scan
2222 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002223 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002224static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002225{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002226 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002227 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002228 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002229 /*
2230 * We've been once around the RAM and haven't found anything.
2231 * Give up.
2232 */
2233 *again = false;
2234 return false;
2235 }
Juan Quintelaa935e302017-03-21 15:36:51 +01002236 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002237 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002238 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002239 pss->block = QLIST_NEXT_RCU(pss->block, next);
2240 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002241 /*
2242 * If memory migration starts over, we will meet a dirtied page
2243 * which may still exists in compression threads's ring, so we
2244 * should flush the compressed data to make sure the new page
2245 * is not overwritten by the old one in the destination.
2246 *
2247 * Also If xbzrle is on, stop using the data compression at this
2248 * point. In theory, xbzrle can do better than compression.
2249 */
2250 flush_compressed_data(rs);
2251
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002252 /* Hit the end of the list */
2253 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2254 /* Flag that we've looped */
2255 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002256 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002257 }
2258 /* Didn't find anything this time, but try again on the new block */
2259 *again = true;
2260 return false;
2261 } else {
2262 /* Can go around again, but... */
2263 *again = true;
2264 /* We've found something so probably don't need to */
2265 return true;
2266 }
2267}
2268
Juan Quintela3d0684b2017-03-23 15:06:39 +01002269/**
2270 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002271 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002272 * Helper for 'get_queued_page' - gets a page off the queue
2273 *
2274 * Returns the block of the page (or NULL if none available)
2275 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002276 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002277 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002278 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002279static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002280{
2281 RAMBlock *block = NULL;
2282
Xiao Guangrongae526e32018-08-21 16:10:25 +08002283 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2284 return NULL;
2285 }
2286
Juan Quintelaec481c62017-03-20 22:12:40 +01002287 qemu_mutex_lock(&rs->src_page_req_mutex);
2288 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2289 struct RAMSrcPageRequest *entry =
2290 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002291 block = entry->rb;
2292 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002293
2294 if (entry->len > TARGET_PAGE_SIZE) {
2295 entry->len -= TARGET_PAGE_SIZE;
2296 entry->offset += TARGET_PAGE_SIZE;
2297 } else {
2298 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002299 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002300 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002301 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002302 }
2303 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002304 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002305
2306 return block;
2307}
2308
Juan Quintela3d0684b2017-03-23 15:06:39 +01002309/**
Li Qiangff1543a2019-05-24 23:28:32 -07002310 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002311 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002312 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002313 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002314 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01002315 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002316 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002317 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002318 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002319static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002320{
2321 RAMBlock *block;
2322 ram_addr_t offset;
2323 bool dirty;
2324
2325 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002326 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002327 /*
2328 * We're sending this page, and since it's postcopy nothing else
2329 * will dirty it, and we must make sure it doesn't get sent again
2330 * even if this queue request was received after the background
2331 * search already sent it.
2332 */
2333 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002334 unsigned long page;
2335
Juan Quintela6b6712e2017-03-22 15:18:04 +01002336 page = offset >> TARGET_PAGE_BITS;
2337 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002338 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002339 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002340 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002341 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002342 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002343 }
2344 }
2345
2346 } while (block && !dirty);
2347
2348 if (block) {
2349 /*
2350 * As soon as we start servicing pages out of order, then we have
2351 * to kill the bulk stage, since the bulk stage assumes
2352 * in (migration_bitmap_find_and_reset_dirty) that every page is
2353 * dirty, that's no longer true.
2354 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002355 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002356
2357 /*
2358 * We want the background search to continue from the queued page
2359 * since the guest is likely to want other pages near to the page
2360 * it just requested.
2361 */
2362 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002363 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08002364
2365 /*
2366 * This unqueued page would break the "one round" check, even is
2367 * really rare.
2368 */
2369 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002370 }
2371
2372 return !!block;
2373}
2374
Juan Quintela56e93d22015-05-07 19:33:31 +02002375/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002376 * migration_page_queue_free: drop any remaining pages in the ram
2377 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002378 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002379 * It should be empty at the end anyway, but in error cases there may
2380 * be some left. in case that there is any page left, we drop it.
2381 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002382 */
Juan Quintela83c13382017-05-04 11:45:01 +02002383static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002384{
Juan Quintelaec481c62017-03-20 22:12:40 +01002385 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002386 /* This queue generally should be empty - but in the case of a failed
2387 * migration might have some droppings in.
2388 */
2389 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002390 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002391 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002392 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002393 g_free(mspr);
2394 }
2395 rcu_read_unlock();
2396}
2397
2398/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002399 * ram_save_queue_pages: queue the page for transmission
2400 *
2401 * A request from postcopy destination for example.
2402 *
2403 * Returns zero on success or negative on error
2404 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002405 * @rbname: Name of the RAMBLock of the request. NULL means the
2406 * same that last one.
2407 * @start: starting address from the start of the RAMBlock
2408 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002409 */
Juan Quintela96506892017-03-14 18:41:03 +01002410int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002411{
2412 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002413 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002414
Juan Quintela93604472017-06-06 19:49:03 +02002415 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002416 rcu_read_lock();
2417 if (!rbname) {
2418 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002419 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002420
2421 if (!ramblock) {
2422 /*
2423 * Shouldn't happen, we can't reuse the last RAMBlock if
2424 * it's the 1st request.
2425 */
2426 error_report("ram_save_queue_pages no previous block");
2427 goto err;
2428 }
2429 } else {
2430 ramblock = qemu_ram_block_by_name(rbname);
2431
2432 if (!ramblock) {
2433 /* We shouldn't be asked for a non-existent RAMBlock */
2434 error_report("ram_save_queue_pages no block '%s'", rbname);
2435 goto err;
2436 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002437 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002438 }
2439 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2440 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002441 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2442 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002443 __func__, start, len, ramblock->used_length);
2444 goto err;
2445 }
2446
Juan Quintelaec481c62017-03-20 22:12:40 +01002447 struct RAMSrcPageRequest *new_entry =
2448 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002449 new_entry->rb = ramblock;
2450 new_entry->offset = start;
2451 new_entry->len = len;
2452
2453 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002454 qemu_mutex_lock(&rs->src_page_req_mutex);
2455 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002456 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002457 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002458 rcu_read_unlock();
2459
2460 return 0;
2461
2462err:
2463 rcu_read_unlock();
2464 return -1;
2465}
2466
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002467static bool save_page_use_compression(RAMState *rs)
2468{
2469 if (!migrate_use_compression()) {
2470 return false;
2471 }
2472
2473 /*
2474 * If xbzrle is on, stop using the data compression after first
2475 * round of migration even if compression is enabled. In theory,
2476 * xbzrle can do better than compression.
2477 */
2478 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2479 return true;
2480 }
2481
2482 return false;
2483}
2484
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002485/*
2486 * try to compress the page before posting it out, return true if the page
2487 * has been properly handled by compression, otherwise needs other
2488 * paths to handle it
2489 */
2490static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2491{
2492 if (!save_page_use_compression(rs)) {
2493 return false;
2494 }
2495
2496 /*
2497 * When starting the process of a new block, the first page of
2498 * the block should be sent out before other pages in the same
2499 * block, and all the pages in last block should have been sent
2500 * out, keeping this order is important, because the 'cont' flag
2501 * is used to avoid resending the block name.
2502 *
2503 * We post the fist page as normal page as compression will take
2504 * much CPU resource.
2505 */
2506 if (block != rs->last_sent_block) {
2507 flush_compressed_data(rs);
2508 return false;
2509 }
2510
2511 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2512 return true;
2513 }
2514
Xiao Guangrong76e03002018-09-06 15:01:00 +08002515 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002516 return false;
2517}
2518
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002519/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002520 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002521 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002522 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002523 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002524 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002525 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002526 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002527 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002528static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002529 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002530{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002531 RAMBlock *block = pss->block;
2532 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2533 int res;
2534
2535 if (control_save_page(rs, block, offset, &res)) {
2536 return res;
2537 }
2538
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002539 if (save_compress_page(rs, block, offset)) {
2540 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002541 }
2542
2543 res = save_zero_page(rs, block, offset);
2544 if (res > 0) {
2545 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2546 * page would be stale
2547 */
2548 if (!save_page_use_compression(rs)) {
2549 XBZRLE_cache_lock();
2550 xbzrle_cache_zero_page(rs, block->offset + offset);
2551 XBZRLE_cache_unlock();
2552 }
2553 ram_release_pages(block->idstr, offset, res);
2554 return res;
2555 }
2556
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002557 /*
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002558 * do not use multifd for compression as the first page in the new
2559 * block should be posted out before sending the compressed page
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002560 */
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002561 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002562 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002563 }
2564
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002565 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002566}
2567
2568/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002569 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002570 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002571 * Starting at *offset send pages up to the end of the current host
2572 * page. It's valid for the initial offset to point into the middle of
2573 * a host page in which case the remainder of the hostpage is sent.
2574 * Only dirty target pages are sent. Note that the host page size may
2575 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002576 * The saving stops at the boundary of the used_length of the block
2577 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002578 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002579 * Returns the number of pages written or negative on error
2580 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002581 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002582 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002583 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002584 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002585 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002586static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002587 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002588{
2589 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002590 size_t pagesize_bits =
2591 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002592
Yury Kotovfbd162e2019-02-15 20:45:46 +03002593 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002594 error_report("block %s should not be migrated !", pss->block->idstr);
2595 return 0;
2596 }
2597
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002598 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002599 /* Check the pages is dirty and if it is send it */
2600 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2601 pss->page++;
2602 continue;
2603 }
2604
Juan Quintelaf20e2862017-03-21 16:19:05 +01002605 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002606 if (tmppages < 0) {
2607 return tmppages;
2608 }
2609
2610 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002611 if (pss->block->unsentmap) {
2612 clear_bit(pss->page, pss->block->unsentmap);
2613 }
2614
Juan Quintelaa935e302017-03-21 15:36:51 +01002615 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002616 } while ((pss->page & (pagesize_bits - 1)) &&
2617 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002618
2619 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002620 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002621 return pages;
2622}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002623
2624/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002625 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002626 *
2627 * Called within an RCU critical section.
2628 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002629 * Returns the number of pages written where zero means no dirty pages,
2630 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002631 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002632 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002633 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002634 *
2635 * On systems where host-page-size > target-page-size it will send all the
2636 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002637 */
2638
Juan Quintelace25d332017-03-15 11:00:51 +01002639static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002640{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002641 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002642 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002643 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002644
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302645 /* No dirty page as there is zero RAM */
2646 if (!ram_bytes_total()) {
2647 return pages;
2648 }
2649
Juan Quintela6f37bb82017-03-13 19:26:29 +01002650 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002651 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002652 pss.complete_round = false;
2653
2654 if (!pss.block) {
2655 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2656 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002657
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002658 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002659 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002660 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002661
2662 if (!found) {
2663 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002664 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002665 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002666
2667 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002668 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002669 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002670 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002671
Juan Quintela6f37bb82017-03-13 19:26:29 +01002672 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002673 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002674
2675 return pages;
2676}
2677
2678void acct_update_position(QEMUFile *f, size_t size, bool zero)
2679{
2680 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002681
Juan Quintela56e93d22015-05-07 19:33:31 +02002682 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002683 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002684 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002685 ram_counters.normal += pages;
2686 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002687 qemu_update_position(f, size);
2688 }
2689}
2690
Yury Kotovfbd162e2019-02-15 20:45:46 +03002691static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002692{
2693 RAMBlock *block;
2694 uint64_t total = 0;
2695
2696 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03002697 if (count_ignored) {
2698 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2699 total += block->used_length;
2700 }
2701 } else {
2702 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2703 total += block->used_length;
2704 }
Peter Xu99e15582017-05-12 12:17:39 +08002705 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002706 rcu_read_unlock();
2707 return total;
2708}
2709
Yury Kotovfbd162e2019-02-15 20:45:46 +03002710uint64_t ram_bytes_total(void)
2711{
2712 return ram_bytes_total_common(false);
2713}
2714
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002715static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002716{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002717 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002718}
2719
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002720static void xbzrle_load_cleanup(void)
2721{
2722 g_free(XBZRLE.decoded_buf);
2723 XBZRLE.decoded_buf = NULL;
2724}
2725
Peter Xu7d7c96b2017-10-19 14:31:58 +08002726static void ram_state_cleanup(RAMState **rsp)
2727{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002728 if (*rsp) {
2729 migration_page_queue_free(*rsp);
2730 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2731 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2732 g_free(*rsp);
2733 *rsp = NULL;
2734 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002735}
2736
Peter Xu84593a02017-10-19 14:31:59 +08002737static void xbzrle_cleanup(void)
2738{
2739 XBZRLE_cache_lock();
2740 if (XBZRLE.cache) {
2741 cache_fini(XBZRLE.cache);
2742 g_free(XBZRLE.encoded_buf);
2743 g_free(XBZRLE.current_buf);
2744 g_free(XBZRLE.zero_target_page);
2745 XBZRLE.cache = NULL;
2746 XBZRLE.encoded_buf = NULL;
2747 XBZRLE.current_buf = NULL;
2748 XBZRLE.zero_target_page = NULL;
2749 }
2750 XBZRLE_cache_unlock();
2751}
2752
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002753static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002754{
Juan Quintela53518d92017-05-04 11:46:24 +02002755 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002756 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002757
Li Zhijian2ff64032015-07-02 20:18:05 +08002758 /* caller have hold iothread lock or is in a bh, so there is
Yi Wang46334562019-04-15 14:51:29 +08002759 * no writing race against the migration bitmap
Li Zhijian2ff64032015-07-02 20:18:05 +08002760 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002761 memory_global_dirty_log_stop();
2762
Yury Kotovfbd162e2019-02-15 20:45:46 +03002763 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08002764 g_free(block->clear_bmap);
2765 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002766 g_free(block->bmap);
2767 block->bmap = NULL;
2768 g_free(block->unsentmap);
2769 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002770 }
2771
Peter Xu84593a02017-10-19 14:31:59 +08002772 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002773 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002774 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002775}
2776
Juan Quintela6f37bb82017-03-13 19:26:29 +01002777static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002778{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002779 rs->last_seen_block = NULL;
2780 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002781 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002782 rs->last_version = ram_list.version;
2783 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08002784 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002785}
2786
2787#define MAX_WAIT 50 /* ms, half buffered_file limit */
2788
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002789/*
2790 * 'expected' is the value you expect the bitmap mostly to be full
2791 * of; it won't bother printing lines that are all this value.
2792 * If 'todump' is null the migration bitmap is dumped.
2793 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002794void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2795 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002796{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002797 int64_t cur;
2798 int64_t linelen = 128;
2799 char linebuf[129];
2800
Juan Quintela6b6712e2017-03-22 15:18:04 +01002801 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002802 int64_t curb;
2803 bool found = false;
2804 /*
2805 * Last line; catch the case where the line length
2806 * is longer than remaining ram
2807 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002808 if (cur + linelen > pages) {
2809 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002810 }
2811 for (curb = 0; curb < linelen; curb++) {
2812 bool thisbit = test_bit(cur + curb, todump);
2813 linebuf[curb] = thisbit ? '1' : '.';
2814 found = found || (thisbit != expected);
2815 }
2816 if (found) {
2817 linebuf[curb] = '\0';
2818 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2819 }
2820 }
2821}
2822
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002823/* **** functions for postcopy ***** */
2824
Pavel Butsykinced1c612017-02-03 18:23:21 +03002825void ram_postcopy_migrated_memory_release(MigrationState *ms)
2826{
2827 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002828
Yury Kotovfbd162e2019-02-15 20:45:46 +03002829 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002830 unsigned long *bitmap = block->bmap;
2831 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2832 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002833
2834 while (run_start < range) {
2835 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002836 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002837 (run_end - run_start) << TARGET_PAGE_BITS);
2838 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2839 }
2840 }
2841}
2842
Juan Quintela3d0684b2017-03-23 15:06:39 +01002843/**
2844 * postcopy_send_discard_bm_ram: discard a RAMBlock
2845 *
2846 * Returns zero on success
2847 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002848 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2849 * Note: At this point the 'unsentmap' is the processed bitmap combined
2850 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002851 *
2852 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08002853 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002854 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002855static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002856{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002857 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002858 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002859 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002860
Juan Quintela6b6712e2017-03-22 15:18:04 +01002861 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002862 unsigned long one = find_next_bit(unsentmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08002863 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002864
Wei Yang33a5cb622019-06-27 10:08:21 +08002865 if (one >= end) {
2866 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002867 }
Wei Yang33a5cb622019-06-27 10:08:21 +08002868
2869 zero = find_next_zero_bit(unsentmap, end, one + 1);
2870
2871 if (zero >= end) {
2872 discard_length = end - one;
2873 } else {
2874 discard_length = zero - one;
2875 }
Wei Yang810cf2b2019-07-24 09:07:21 +08002876 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002877 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002878 }
2879
2880 return 0;
2881}
2882
Juan Quintela3d0684b2017-03-23 15:06:39 +01002883/**
2884 * postcopy_each_ram_send_discard: discard all RAMBlocks
2885 *
2886 * Returns 0 for success or negative for error
2887 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002888 * Utility for the outgoing postcopy code.
2889 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2890 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002891 * (qemu_ram_foreach_block ends up passing unscaled lengths
2892 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002893 *
2894 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002895 */
2896static int postcopy_each_ram_send_discard(MigrationState *ms)
2897{
2898 struct RAMBlock *block;
2899 int ret;
2900
Yury Kotovfbd162e2019-02-15 20:45:46 +03002901 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002902 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002903
2904 /*
2905 * Postcopy sends chunks of bitmap over the wire, but it
2906 * just needs indexes at this point, avoids it having
2907 * target page specific code.
2908 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002909 ret = postcopy_send_discard_bm_ram(ms, block);
2910 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002911 if (ret) {
2912 return ret;
2913 }
2914 }
2915
2916 return 0;
2917}
2918
Juan Quintela3d0684b2017-03-23 15:06:39 +01002919/**
2920 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002921 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002922 * Helper for postcopy_chunk_hostpages; it's called twice to
2923 * canonicalize the two bitmaps, that are similar, but one is
2924 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002925 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002926 * Postcopy requires that all target pages in a hostpage are dirty or
2927 * clean, not a mix. This function canonicalizes the bitmaps.
2928 *
2929 * @ms: current migration state
2930 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2931 * otherwise we need to canonicalize partially dirty host pages
2932 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002933 */
2934static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
Wei Yang810cf2b2019-07-24 09:07:21 +08002935 RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002936{
Juan Quintela53518d92017-05-04 11:46:24 +02002937 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002938 unsigned long *bitmap = block->bmap;
2939 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002940 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002941 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002942 unsigned long run_start;
2943
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002944 if (block->page_size == TARGET_PAGE_SIZE) {
2945 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2946 return;
2947 }
2948
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002949 if (unsent_pass) {
2950 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002951 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002952 } else {
2953 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002954 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002955 }
2956
Juan Quintela6b6712e2017-03-22 15:18:04 +01002957 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002958 unsigned long host_offset;
2959
2960 /*
2961 * If the start of this run of pages is in the middle of a host
2962 * page, then we need to fixup this host page.
2963 */
2964 host_offset = run_start % host_ratio;
Wei Yangdad45ab2019-08-06 08:46:47 +08002965 if (!host_offset) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002966 /* Find the end of this run */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002967 if (unsent_pass) {
Wei Yangdad45ab2019-08-06 08:46:47 +08002968 run_start = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002969 } else {
Wei Yangdad45ab2019-08-06 08:46:47 +08002970 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002971 }
2972 /*
2973 * If the end isn't at the start of a host page, then the
2974 * run doesn't finish at the end of a host page
2975 * and we need to discard.
2976 */
Wei Yangdad45ab2019-08-06 08:46:47 +08002977 host_offset = run_start % host_ratio;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002978 }
2979
Wei Yang89966042019-07-10 13:08:14 +08002980 if (host_offset) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002981 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08002982 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2983 host_ratio);
2984 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002985
2986 /* Tell the destination to discard this page */
2987 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2988 /* For the unsent_pass we:
2989 * discard partially sent pages
2990 * For the !unsent_pass (dirty) we:
2991 * discard partially dirty pages that were sent
2992 * (any partially sent pages were already discarded
2993 * by the previous unsent_pass)
2994 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002995 postcopy_discard_send_range(ms, fixup_start_addr, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002996 }
2997
2998 /* Clean up the bitmap */
2999 for (page = fixup_start_addr;
3000 page < fixup_start_addr + host_ratio; page++) {
3001 /* All pages in this host page are now not sent */
3002 set_bit(page, unsentmap);
3003
3004 /*
3005 * Remark them as dirty, updating the count for any pages
3006 * that weren't previously dirty.
3007 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01003008 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003009 }
3010 }
3011
3012 if (unsent_pass) {
3013 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003014 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003015 } else {
3016 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003017 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003018 }
3019 }
3020}
3021
Juan Quintela3d0684b2017-03-23 15:06:39 +01003022/**
Wei Yang89dab312019-07-15 10:05:49 +08003023 * postcopy_chunk_hostpages: discard any partially sent host page
Juan Quintela3d0684b2017-03-23 15:06:39 +01003024 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003025 * Utility for the outgoing postcopy code.
3026 *
3027 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00003028 * dirty host-page size chunks as all dirty. In this case the host-page
3029 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003030 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003031 * Returns zero on success
3032 *
3033 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01003034 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003035 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003036static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003037{
Wei Yang810cf2b2019-07-24 09:07:21 +08003038 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003039
Juan Quintela6b6712e2017-03-22 15:18:04 +01003040 /* First pass: Discard all partially sent host pages */
Wei Yang810cf2b2019-07-24 09:07:21 +08003041 postcopy_chunk_hostpages_pass(ms, true, block);
Juan Quintela6b6712e2017-03-22 15:18:04 +01003042 /*
3043 * Second pass: Ensure that all partially dirty host pages are made
3044 * fully dirty.
3045 */
Wei Yang810cf2b2019-07-24 09:07:21 +08003046 postcopy_chunk_hostpages_pass(ms, false, block);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003047
Wei Yang810cf2b2019-07-24 09:07:21 +08003048 postcopy_discard_send_finish(ms);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003049 return 0;
3050}
3051
Juan Quintela3d0684b2017-03-23 15:06:39 +01003052/**
3053 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
3054 *
3055 * Returns zero on success
3056 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003057 * Transmit the set of pages to be discarded after precopy to the target
3058 * these are pages that:
3059 * a) Have been previously transmitted but are now dirty again
3060 * b) Pages that have never been transmitted, this ensures that
3061 * any pages on the destination that have been mapped by background
3062 * tasks get discarded (transparent huge pages is the specific concern)
3063 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01003064 *
3065 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003066 */
3067int ram_postcopy_send_discard_bitmap(MigrationState *ms)
3068{
Juan Quintela53518d92017-05-04 11:46:24 +02003069 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01003070 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003071 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003072
3073 rcu_read_lock();
3074
3075 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01003076 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003077
Juan Quintela6b6712e2017-03-22 15:18:04 +01003078 /* Easiest way to make sure we don't resume in the middle of a host-page */
3079 rs->last_seen_block = NULL;
3080 rs->last_sent_block = NULL;
3081 rs->last_page = 0;
3082
Yury Kotovfbd162e2019-02-15 20:45:46 +03003083 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003084 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
3085 unsigned long *bitmap = block->bmap;
3086 unsigned long *unsentmap = block->unsentmap;
3087
3088 if (!unsentmap) {
3089 /* We don't have a safe way to resize the sentmap, so
3090 * if the bitmap was resized it will be NULL at this
3091 * point.
3092 */
3093 error_report("migration ram resized during precopy phase");
3094 rcu_read_unlock();
3095 return -EINVAL;
3096 }
3097 /* Deal with TPS != HPS and huge pages */
3098 ret = postcopy_chunk_hostpages(ms, block);
3099 if (ret) {
3100 rcu_read_unlock();
3101 return ret;
3102 }
3103
3104 /*
3105 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003106 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003107 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003108#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01003109 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003110#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01003111 }
3112 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003113
3114 ret = postcopy_each_ram_send_discard(ms);
3115 rcu_read_unlock();
3116
3117 return ret;
3118}
3119
Juan Quintela3d0684b2017-03-23 15:06:39 +01003120/**
3121 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003122 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003123 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003124 *
Juan Quintela36449152017-03-23 15:11:59 +01003125 * @rbname: name of the RAMBlock of the request. NULL means the
3126 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003127 * @start: RAMBlock starting page
3128 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003129 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01003130int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003131{
3132 int ret = -1;
3133
Juan Quintela36449152017-03-23 15:11:59 +01003134 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003135
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003136 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01003137 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003138
3139 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01003140 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003141 goto err;
3142 }
3143
Peter Xu814bb082018-07-23 20:33:02 +08003144 /*
3145 * On source VM, we don't need to update the received bitmap since
3146 * we don't even have one.
3147 */
3148 if (rb->receivedmap) {
3149 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3150 length >> qemu_target_page_bits());
3151 }
3152
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003153 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003154
3155err:
3156 rcu_read_unlock();
3157
3158 return ret;
3159}
3160
Peter Xu84593a02017-10-19 14:31:59 +08003161/*
3162 * For every allocation, we will try not to crash the VM if the
3163 * allocation failed.
3164 */
3165static int xbzrle_init(void)
3166{
3167 Error *local_err = NULL;
3168
3169 if (!migrate_use_xbzrle()) {
3170 return 0;
3171 }
3172
3173 XBZRLE_cache_lock();
3174
3175 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3176 if (!XBZRLE.zero_target_page) {
3177 error_report("%s: Error allocating zero page", __func__);
3178 goto err_out;
3179 }
3180
3181 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3182 TARGET_PAGE_SIZE, &local_err);
3183 if (!XBZRLE.cache) {
3184 error_report_err(local_err);
3185 goto free_zero_page;
3186 }
3187
3188 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3189 if (!XBZRLE.encoded_buf) {
3190 error_report("%s: Error allocating encoded_buf", __func__);
3191 goto free_cache;
3192 }
3193
3194 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3195 if (!XBZRLE.current_buf) {
3196 error_report("%s: Error allocating current_buf", __func__);
3197 goto free_encoded_buf;
3198 }
3199
3200 /* We are all good */
3201 XBZRLE_cache_unlock();
3202 return 0;
3203
3204free_encoded_buf:
3205 g_free(XBZRLE.encoded_buf);
3206 XBZRLE.encoded_buf = NULL;
3207free_cache:
3208 cache_fini(XBZRLE.cache);
3209 XBZRLE.cache = NULL;
3210free_zero_page:
3211 g_free(XBZRLE.zero_target_page);
3212 XBZRLE.zero_target_page = NULL;
3213err_out:
3214 XBZRLE_cache_unlock();
3215 return -ENOMEM;
3216}
3217
Juan Quintela53518d92017-05-04 11:46:24 +02003218static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003219{
Peter Xu7d00ee62017-10-19 14:31:57 +08003220 *rsp = g_try_new0(RAMState, 1);
3221
3222 if (!*rsp) {
3223 error_report("%s: Init ramstate fail", __func__);
3224 return -1;
3225 }
Juan Quintela53518d92017-05-04 11:46:24 +02003226
3227 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3228 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3229 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003230
Peter Xu7d00ee62017-10-19 14:31:57 +08003231 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003232 * Count the total number of pages used by ram blocks not including any
3233 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08003234 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08003235 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003236 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08003237 ram_state_reset(*rsp);
3238
3239 return 0;
3240}
3241
Peter Xud6eff5d2017-10-19 14:32:00 +08003242static void ram_list_init_bitmaps(void)
3243{
Peter Xu002cad62019-06-03 14:50:56 +08003244 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08003245 RAMBlock *block;
3246 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08003247 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08003248
3249 /* Skip setting bitmap if there is no RAM */
3250 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08003251 shift = ms->clear_bitmap_shift;
3252 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
3253 error_report("clear_bitmap_shift (%u) too big, using "
3254 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
3255 shift = CLEAR_BITMAP_SHIFT_MAX;
3256 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
3257 error_report("clear_bitmap_shift (%u) too small, using "
3258 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
3259 shift = CLEAR_BITMAP_SHIFT_MIN;
3260 }
3261
Yury Kotovfbd162e2019-02-15 20:45:46 +03003262 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003263 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08003264 /*
3265 * The initial dirty bitmap for migration must be set with all
3266 * ones to make sure we'll migrate every guest RAM page to
3267 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003268 * Here we set RAMBlock.bmap all to 1 because when rebegin a
3269 * new migration after a failed migration, ram_list.
3270 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
3271 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08003272 */
Peter Xud6eff5d2017-10-19 14:32:00 +08003273 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003274 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08003275 block->clear_bmap_shift = shift;
3276 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08003277 if (migrate_postcopy_ram()) {
3278 block->unsentmap = bitmap_new(pages);
3279 bitmap_set(block->unsentmap, 0, pages);
3280 }
3281 }
3282 }
3283}
3284
3285static void ram_init_bitmaps(RAMState *rs)
3286{
3287 /* For memory_global_dirty_log_start below. */
3288 qemu_mutex_lock_iothread();
3289 qemu_mutex_lock_ramlist();
3290 rcu_read_lock();
3291
3292 ram_list_init_bitmaps();
3293 memory_global_dirty_log_start();
Wei Wangbd227062018-12-11 16:24:51 +08003294 migration_bitmap_sync_precopy(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08003295
3296 rcu_read_unlock();
3297 qemu_mutex_unlock_ramlist();
3298 qemu_mutex_unlock_iothread();
3299}
3300
Peter Xu7d00ee62017-10-19 14:31:57 +08003301static int ram_init_all(RAMState **rsp)
3302{
Peter Xu7d00ee62017-10-19 14:31:57 +08003303 if (ram_state_init(rsp)) {
3304 return -1;
3305 }
3306
Peter Xu84593a02017-10-19 14:31:59 +08003307 if (xbzrle_init()) {
3308 ram_state_cleanup(rsp);
3309 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003310 }
3311
Peter Xud6eff5d2017-10-19 14:32:00 +08003312 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003313
3314 return 0;
3315}
3316
Peter Xu08614f32018-05-02 18:47:33 +08003317static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3318{
3319 RAMBlock *block;
3320 uint64_t pages = 0;
3321
3322 /*
3323 * Postcopy is not using xbzrle/compression, so no need for that.
3324 * Also, since source are already halted, we don't need to care
3325 * about dirty page logging as well.
3326 */
3327
Yury Kotovfbd162e2019-02-15 20:45:46 +03003328 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003329 pages += bitmap_count_one(block->bmap,
3330 block->used_length >> TARGET_PAGE_BITS);
3331 }
3332
3333 /* This may not be aligned with current bitmaps. Recalculate. */
3334 rs->migration_dirty_pages = pages;
3335
3336 rs->last_seen_block = NULL;
3337 rs->last_sent_block = NULL;
3338 rs->last_page = 0;
3339 rs->last_version = ram_list.version;
3340 /*
3341 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3342 * matter what we have sent.
3343 */
3344 rs->ram_bulk_stage = false;
3345
3346 /* Update RAMState cache of output QEMUFile */
3347 rs->f = out;
3348
3349 trace_ram_state_resume_prepare(pages);
3350}
3351
Juan Quintela3d0684b2017-03-23 15:06:39 +01003352/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08003353 * This function clears bits of the free pages reported by the caller from the
3354 * migration dirty bitmap. @addr is the host address corresponding to the
3355 * start of the continuous guest free pages, and @len is the total bytes of
3356 * those pages.
3357 */
3358void qemu_guest_free_page_hint(void *addr, size_t len)
3359{
3360 RAMBlock *block;
3361 ram_addr_t offset;
3362 size_t used_len, start, npages;
3363 MigrationState *s = migrate_get_current();
3364
3365 /* This function is currently expected to be used during live migration */
3366 if (!migration_is_setup_or_active(s->state)) {
3367 return;
3368 }
3369
3370 for (; len > 0; len -= used_len, addr += used_len) {
3371 block = qemu_ram_block_from_host(addr, false, &offset);
3372 if (unlikely(!block || offset >= block->used_length)) {
3373 /*
3374 * The implementation might not support RAMBlock resize during
3375 * live migration, but it could happen in theory with future
3376 * updates. So we add a check here to capture that case.
3377 */
3378 error_report_once("%s unexpected error", __func__);
3379 return;
3380 }
3381
3382 if (len <= block->used_length - offset) {
3383 used_len = len;
3384 } else {
3385 used_len = block->used_length - offset;
3386 }
3387
3388 start = offset >> TARGET_PAGE_BITS;
3389 npages = used_len >> TARGET_PAGE_BITS;
3390
3391 qemu_mutex_lock(&ram_state->bitmap_mutex);
3392 ram_state->migration_dirty_pages -=
3393 bitmap_count_one_with_offset(block->bmap, start, npages);
3394 bitmap_clear(block->bmap, start, npages);
3395 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3396 }
3397}
3398
3399/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01003400 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003401 * long-running RCU critical section. When rcu-reclaims in the code
3402 * start to become numerous it will be necessary to reduce the
3403 * granularity of these critical sections.
3404 */
3405
Juan Quintela3d0684b2017-03-23 15:06:39 +01003406/**
3407 * ram_save_setup: Setup RAM for migration
3408 *
3409 * Returns zero to indicate success and negative for error
3410 *
3411 * @f: QEMUFile where to send the data
3412 * @opaque: RAMState pointer
3413 */
zhanghailianga91246c2016-10-27 14:42:59 +08003414static int ram_save_setup(QEMUFile *f, void *opaque)
3415{
Juan Quintela53518d92017-05-04 11:46:24 +02003416 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003417 RAMBlock *block;
3418
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003419 if (compress_threads_save_setup()) {
3420 return -1;
3421 }
3422
zhanghailianga91246c2016-10-27 14:42:59 +08003423 /* migration has already setup the bitmap, reuse it. */
3424 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003425 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003426 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003427 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003428 }
zhanghailianga91246c2016-10-27 14:42:59 +08003429 }
Juan Quintela53518d92017-05-04 11:46:24 +02003430 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003431
3432 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003433
Yury Kotovfbd162e2019-02-15 20:45:46 +03003434 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02003435
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003436 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003437 qemu_put_byte(f, strlen(block->idstr));
3438 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3439 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003440 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3441 qemu_put_be64(f, block->page_size);
3442 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003443 if (migrate_ignore_shared()) {
3444 qemu_put_be64(f, block->mr->addr);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003445 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003446 }
3447
3448 rcu_read_unlock();
3449
3450 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3451 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3452
Juan Quintela6df264a2018-02-28 09:10:07 +01003453 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003454 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003455 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003456
3457 return 0;
3458}
3459
Juan Quintela3d0684b2017-03-23 15:06:39 +01003460/**
3461 * ram_save_iterate: iterative stage for migration
3462 *
3463 * Returns zero to indicate success and negative for error
3464 *
3465 * @f: QEMUFile where to send the data
3466 * @opaque: RAMState pointer
3467 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003468static int ram_save_iterate(QEMUFile *f, void *opaque)
3469{
Juan Quintela53518d92017-05-04 11:46:24 +02003470 RAMState **temp = opaque;
3471 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003472 int ret;
3473 int i;
3474 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003475 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003476
Peter Lievenb2557342018-03-08 12:18:24 +01003477 if (blk_mig_bulk_active()) {
3478 /* Avoid transferring ram during bulk phase of block migration as
3479 * the bulk phase will usually take a long time and transferring
3480 * ram updates during that time is pointless. */
3481 goto out;
3482 }
3483
Juan Quintela56e93d22015-05-07 19:33:31 +02003484 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003485 if (ram_list.version != rs->last_version) {
3486 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003487 }
3488
3489 /* Read version before ram_list.blocks */
3490 smp_rmb();
3491
3492 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3493
3494 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3495 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003496 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3497 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003498 int pages;
3499
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003500 if (qemu_file_get_error(f)) {
3501 break;
3502 }
3503
Juan Quintelace25d332017-03-15 11:00:51 +01003504 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003505 /* no more pages to sent */
3506 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003507 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003508 break;
3509 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003510
3511 if (pages < 0) {
3512 qemu_file_set_error(f, pages);
3513 break;
3514 }
3515
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08003516 rs->target_page_count += pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003517
Juan Quintela56e93d22015-05-07 19:33:31 +02003518 /* we want to check in the 1st loop, just in case it was the 1st time
3519 and we had to sync the dirty bitmap.
Wei Yanga5f7b1a2019-05-11 07:37:29 +08003520 qemu_clock_get_ns() is a bit expensive, so we only check each some
Juan Quintela56e93d22015-05-07 19:33:31 +02003521 iterations
3522 */
3523 if ((i & 63) == 0) {
3524 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3525 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003526 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003527 break;
3528 }
3529 }
3530 i++;
3531 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003532 rcu_read_unlock();
3533
3534 /*
3535 * Must occur before EOS (or any QEMUFile operation)
3536 * because of RDMA protocol.
3537 */
3538 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3539
Peter Lievenb2557342018-03-08 12:18:24 +01003540out:
Wei Yangb6526c42019-06-12 09:43:37 +08003541 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003542 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003543 qemu_fflush(f);
Juan Quintela93604472017-06-06 19:49:03 +02003544 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003545
3546 ret = qemu_file_get_error(f);
3547 if (ret < 0) {
3548 return ret;
3549 }
3550
Thomas Huth5c903082016-11-04 14:10:17 +01003551 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003552}
3553
Juan Quintela3d0684b2017-03-23 15:06:39 +01003554/**
3555 * ram_save_complete: function called to send the remaining amount of ram
3556 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003557 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003558 *
3559 * Called with iothread lock
3560 *
3561 * @f: QEMUFile where to send the data
3562 * @opaque: RAMState pointer
3563 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003564static int ram_save_complete(QEMUFile *f, void *opaque)
3565{
Juan Quintela53518d92017-05-04 11:46:24 +02003566 RAMState **temp = opaque;
3567 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003568 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003569
Juan Quintela56e93d22015-05-07 19:33:31 +02003570 rcu_read_lock();
3571
Juan Quintela57273092017-03-20 22:25:28 +01003572 if (!migration_in_postcopy()) {
Wei Wangbd227062018-12-11 16:24:51 +08003573 migration_bitmap_sync_precopy(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003574 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003575
3576 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3577
3578 /* try transferring iterative blocks of memory */
3579
3580 /* flush all remaining blocks regardless of rate limiting */
3581 while (true) {
3582 int pages;
3583
Juan Quintelace25d332017-03-15 11:00:51 +01003584 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003585 /* no more blocks to sent */
3586 if (pages == 0) {
3587 break;
3588 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003589 if (pages < 0) {
3590 ret = pages;
3591 break;
3592 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003593 }
3594
Juan Quintelace25d332017-03-15 11:00:51 +01003595 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003596 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003597
3598 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003599
Juan Quintela6df264a2018-02-28 09:10:07 +01003600 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003601 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003602 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003603
Xiao Guangronge8f37352018-09-03 17:26:44 +08003604 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003605}
3606
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003607static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003608 uint64_t *res_precopy_only,
3609 uint64_t *res_compatible,
3610 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003611{
Juan Quintela53518d92017-05-04 11:46:24 +02003612 RAMState **temp = opaque;
3613 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003614 uint64_t remaining_size;
3615
Juan Quintela9edabd42017-03-14 12:02:16 +01003616 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003617
Juan Quintela57273092017-03-20 22:25:28 +01003618 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003619 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003620 qemu_mutex_lock_iothread();
3621 rcu_read_lock();
Wei Wangbd227062018-12-11 16:24:51 +08003622 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003623 rcu_read_unlock();
3624 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003625 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003626 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003627
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003628 if (migrate_postcopy_ram()) {
3629 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003630 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003631 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003632 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003633 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003634}
3635
3636static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3637{
3638 unsigned int xh_len;
3639 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003640 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003641
Juan Quintela56e93d22015-05-07 19:33:31 +02003642 /* extract RLE header */
3643 xh_flags = qemu_get_byte(f);
3644 xh_len = qemu_get_be16(f);
3645
3646 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3647 error_report("Failed to load XBZRLE page - wrong compression!");
3648 return -1;
3649 }
3650
3651 if (xh_len > TARGET_PAGE_SIZE) {
3652 error_report("Failed to load XBZRLE page - len overflow!");
3653 return -1;
3654 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003655 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003656 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003657 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003658 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003659
3660 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003661 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003662 TARGET_PAGE_SIZE) == -1) {
3663 error_report("Failed to load XBZRLE page - decode error!");
3664 return -1;
3665 }
3666
3667 return 0;
3668}
3669
Juan Quintela3d0684b2017-03-23 15:06:39 +01003670/**
3671 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003672 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003673 * Must be called from within a rcu critical section.
3674 *
3675 * Returns a pointer from within the RCU-protected ram_list.
3676 *
3677 * @f: QEMUFile where to read the data from
3678 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003679 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003680static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003681{
3682 static RAMBlock *block = NULL;
3683 char id[256];
3684 uint8_t len;
3685
3686 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003687 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003688 error_report("Ack, bad migration stream!");
3689 return NULL;
3690 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003691 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003692 }
3693
3694 len = qemu_get_byte(f);
3695 qemu_get_buffer(f, (uint8_t *)id, len);
3696 id[len] = 0;
3697
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003698 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003699 if (!block) {
3700 error_report("Can't find block %s", id);
3701 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003702 }
3703
Yury Kotovfbd162e2019-02-15 20:45:46 +03003704 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003705 error_report("block %s should not be migrated !", id);
3706 return NULL;
3707 }
3708
zhanghailiang4c4bad42016-01-15 11:37:41 +08003709 return block;
3710}
3711
3712static inline void *host_from_ram_block_offset(RAMBlock *block,
3713 ram_addr_t offset)
3714{
3715 if (!offset_in_ramblock(block, offset)) {
3716 return NULL;
3717 }
3718
3719 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003720}
3721
Zhang Chen13af18f2018-09-03 12:38:48 +08003722static inline void *colo_cache_from_block_offset(RAMBlock *block,
3723 ram_addr_t offset)
3724{
3725 if (!offset_in_ramblock(block, offset)) {
3726 return NULL;
3727 }
3728 if (!block->colo_cache) {
3729 error_report("%s: colo_cache is NULL in block :%s",
3730 __func__, block->idstr);
3731 return NULL;
3732 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003733
3734 /*
3735 * During colo checkpoint, we need bitmap of these migrated pages.
3736 * It help us to decide which pages in ram cache should be flushed
3737 * into VM's RAM later.
3738 */
3739 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3740 ram_state->migration_dirty_pages++;
3741 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003742 return block->colo_cache + offset;
3743}
3744
Juan Quintela3d0684b2017-03-23 15:06:39 +01003745/**
3746 * ram_handle_compressed: handle the zero page case
3747 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003748 * If a page (or a whole RDMA chunk) has been
3749 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003750 *
3751 * @host: host address for the zero page
3752 * @ch: what the page is filled from. We only support zero
3753 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003754 */
3755void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3756{
3757 if (ch != 0 || !is_zero_range(host, size)) {
3758 memset(host, ch, size);
3759 }
3760}
3761
Xiao Guangrong797ca152018-03-30 15:51:21 +08003762/* return the size after decompression, or negative value on error */
3763static int
3764qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3765 const uint8_t *source, size_t source_len)
3766{
3767 int err;
3768
3769 err = inflateReset(stream);
3770 if (err != Z_OK) {
3771 return -1;
3772 }
3773
3774 stream->avail_in = source_len;
3775 stream->next_in = (uint8_t *)source;
3776 stream->avail_out = dest_len;
3777 stream->next_out = dest;
3778
3779 err = inflate(stream, Z_NO_FLUSH);
3780 if (err != Z_STREAM_END) {
3781 return -1;
3782 }
3783
3784 return stream->total_out;
3785}
3786
Juan Quintela56e93d22015-05-07 19:33:31 +02003787static void *do_data_decompress(void *opaque)
3788{
3789 DecompressParam *param = opaque;
3790 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003791 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003792 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003793
Liang Li33d151f2016-05-05 15:32:58 +08003794 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003795 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003796 if (param->des) {
3797 des = param->des;
3798 len = param->len;
3799 param->des = 0;
3800 qemu_mutex_unlock(&param->mutex);
3801
Liang Li73a89122016-05-05 15:32:51 +08003802 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003803
3804 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3805 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003806 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003807 error_report("decompress data failed");
3808 qemu_file_set_error(decomp_file, ret);
3809 }
Liang Li73a89122016-05-05 15:32:51 +08003810
Liang Li33d151f2016-05-05 15:32:58 +08003811 qemu_mutex_lock(&decomp_done_lock);
3812 param->done = true;
3813 qemu_cond_signal(&decomp_done_cond);
3814 qemu_mutex_unlock(&decomp_done_lock);
3815
3816 qemu_mutex_lock(&param->mutex);
3817 } else {
3818 qemu_cond_wait(&param->cond, &param->mutex);
3819 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003820 }
Liang Li33d151f2016-05-05 15:32:58 +08003821 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003822
3823 return NULL;
3824}
3825
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003826static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003827{
3828 int idx, thread_count;
3829
3830 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003831 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003832 }
3833
3834 thread_count = migrate_decompress_threads();
3835 qemu_mutex_lock(&decomp_done_lock);
3836 for (idx = 0; idx < thread_count; idx++) {
3837 while (!decomp_param[idx].done) {
3838 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3839 }
3840 }
3841 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003842 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003843}
3844
Juan Quintelaf0afa332017-06-28 11:52:28 +02003845static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003846{
3847 int i, thread_count;
3848
Juan Quintela3416ab52016-04-20 11:56:01 +02003849 if (!migrate_use_compression()) {
3850 return;
3851 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003852 thread_count = migrate_decompress_threads();
3853 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003854 /*
3855 * we use it as a indicator which shows if the thread is
3856 * properly init'd or not
3857 */
3858 if (!decomp_param[i].compbuf) {
3859 break;
3860 }
3861
Juan Quintela56e93d22015-05-07 19:33:31 +02003862 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003863 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003864 qemu_cond_signal(&decomp_param[i].cond);
3865 qemu_mutex_unlock(&decomp_param[i].mutex);
3866 }
3867 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003868 if (!decomp_param[i].compbuf) {
3869 break;
3870 }
3871
Juan Quintela56e93d22015-05-07 19:33:31 +02003872 qemu_thread_join(decompress_threads + i);
3873 qemu_mutex_destroy(&decomp_param[i].mutex);
3874 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003875 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003876 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003877 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003878 }
3879 g_free(decompress_threads);
3880 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003881 decompress_threads = NULL;
3882 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003883 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003884}
3885
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003886static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003887{
3888 int i, thread_count;
3889
3890 if (!migrate_use_compression()) {
3891 return 0;
3892 }
3893
3894 thread_count = migrate_decompress_threads();
3895 decompress_threads = g_new0(QemuThread, thread_count);
3896 decomp_param = g_new0(DecompressParam, thread_count);
3897 qemu_mutex_init(&decomp_done_lock);
3898 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003899 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003900 for (i = 0; i < thread_count; i++) {
3901 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3902 goto exit;
3903 }
3904
3905 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3906 qemu_mutex_init(&decomp_param[i].mutex);
3907 qemu_cond_init(&decomp_param[i].cond);
3908 decomp_param[i].done = true;
3909 decomp_param[i].quit = false;
3910 qemu_thread_create(decompress_threads + i, "decompress",
3911 do_data_decompress, decomp_param + i,
3912 QEMU_THREAD_JOINABLE);
3913 }
3914 return 0;
3915exit:
3916 compress_threads_load_cleanup();
3917 return -1;
3918}
3919
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003920static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003921 void *host, int len)
3922{
3923 int idx, thread_count;
3924
3925 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003926 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003927 while (true) {
3928 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003929 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003930 decomp_param[idx].done = false;
3931 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003932 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003933 decomp_param[idx].des = host;
3934 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003935 qemu_cond_signal(&decomp_param[idx].cond);
3936 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003937 break;
3938 }
3939 }
3940 if (idx < thread_count) {
3941 break;
Liang Li73a89122016-05-05 15:32:51 +08003942 } else {
3943 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003944 }
3945 }
Liang Li73a89122016-05-05 15:32:51 +08003946 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003947}
3948
Zhang Chen13af18f2018-09-03 12:38:48 +08003949/*
3950 * colo cache: this is for secondary VM, we cache the whole
3951 * memory of the secondary VM, it is need to hold the global lock
3952 * to call this helper.
3953 */
3954int colo_init_ram_cache(void)
3955{
3956 RAMBlock *block;
3957
3958 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003959 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003960 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3961 NULL,
3962 false);
3963 if (!block->colo_cache) {
3964 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3965 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3966 block->used_length);
3967 goto out_locked;
3968 }
3969 memcpy(block->colo_cache, block->host, block->used_length);
3970 }
3971 rcu_read_unlock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003972 /*
3973 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3974 * with to decide which page in cache should be flushed into SVM's RAM. Here
3975 * we use the same name 'ram_bitmap' as for migration.
3976 */
3977 if (ram_bytes_total()) {
3978 RAMBlock *block;
3979
Yury Kotovfbd162e2019-02-15 20:45:46 +03003980 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003981 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3982
3983 block->bmap = bitmap_new(pages);
3984 bitmap_set(block->bmap, 0, pages);
3985 }
3986 }
3987 ram_state = g_new0(RAMState, 1);
3988 ram_state->migration_dirty_pages = 0;
Zhang Chenc6e5baf2019-03-30 06:29:51 +08003989 qemu_mutex_init(&ram_state->bitmap_mutex);
zhanghailiangd1955d22018-09-03 12:38:55 +08003990 memory_global_dirty_log_start();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003991
Zhang Chen13af18f2018-09-03 12:38:48 +08003992 return 0;
3993
3994out_locked:
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003995
Yury Kotovfbd162e2019-02-15 20:45:46 +03003996 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003997 if (block->colo_cache) {
3998 qemu_anon_ram_free(block->colo_cache, block->used_length);
3999 block->colo_cache = NULL;
4000 }
4001 }
4002
4003 rcu_read_unlock();
4004 return -errno;
4005}
4006
4007/* It is need to hold the global lock to call this helper */
4008void colo_release_ram_cache(void)
4009{
4010 RAMBlock *block;
4011
zhanghailiangd1955d22018-09-03 12:38:55 +08004012 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004013 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004014 g_free(block->bmap);
4015 block->bmap = NULL;
4016 }
4017
Zhang Chen13af18f2018-09-03 12:38:48 +08004018 rcu_read_lock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004019
Yury Kotovfbd162e2019-02-15 20:45:46 +03004020 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08004021 if (block->colo_cache) {
4022 qemu_anon_ram_free(block->colo_cache, block->used_length);
4023 block->colo_cache = NULL;
4024 }
4025 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004026
Zhang Chen13af18f2018-09-03 12:38:48 +08004027 rcu_read_unlock();
Zhang Chenc6e5baf2019-03-30 06:29:51 +08004028 qemu_mutex_destroy(&ram_state->bitmap_mutex);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004029 g_free(ram_state);
4030 ram_state = NULL;
Zhang Chen13af18f2018-09-03 12:38:48 +08004031}
4032
Juan Quintela3d0684b2017-03-23 15:06:39 +01004033/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004034 * ram_load_setup: Setup RAM for migration incoming side
4035 *
4036 * Returns zero to indicate success and negative for error
4037 *
4038 * @f: QEMUFile where to receive the data
4039 * @opaque: RAMState pointer
4040 */
4041static int ram_load_setup(QEMUFile *f, void *opaque)
4042{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004043 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08004044 return -1;
4045 }
4046
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004047 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004048 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08004049
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004050 return 0;
4051}
4052
4053static int ram_load_cleanup(void *opaque)
4054{
Alexey Perevalovf9494612017-10-05 14:13:20 +03004055 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08004056
Yury Kotovfbd162e2019-02-15 20:45:46 +03004057 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He56eb90a2018-07-18 15:48:03 +08004058 if (ramblock_is_pmem(rb)) {
4059 pmem_persist(rb->host, rb->used_length);
4060 }
4061 }
4062
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004063 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02004064 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004065
Yury Kotovfbd162e2019-02-15 20:45:46 +03004066 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03004067 g_free(rb->receivedmap);
4068 rb->receivedmap = NULL;
4069 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004070
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004071 return 0;
4072}
4073
4074/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01004075 * ram_postcopy_incoming_init: allocate postcopy data structures
4076 *
4077 * Returns 0 for success and negative if there was one error
4078 *
4079 * @mis: current migration incoming state
4080 *
4081 * Allocate data structures etc needed by incoming migration with
4082 * postcopy-ram. postcopy-ram's similarly names
4083 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004084 */
4085int ram_postcopy_incoming_init(MigrationIncomingState *mis)
4086{
David Hildenbrandc1361802018-06-20 22:27:36 +02004087 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004088}
4089
Juan Quintela3d0684b2017-03-23 15:06:39 +01004090/**
4091 * ram_load_postcopy: load a page in postcopy case
4092 *
4093 * Returns 0 for success or -errno in case of error
4094 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004095 * Called in postcopy mode by ram_load().
4096 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01004097 *
4098 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004099 */
4100static int ram_load_postcopy(QEMUFile *f)
4101{
4102 int flags = 0, ret = 0;
4103 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004104 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004105 MigrationIncomingState *mis = migration_incoming_get_current();
4106 /* Temporary page that is later 'placed' */
4107 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004108 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00004109 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004110
4111 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4112 ram_addr_t addr;
4113 void *host = NULL;
4114 void *page_buffer = NULL;
4115 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004116 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004117 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004118
4119 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004120
4121 /*
4122 * If qemu file error, we should stop here, and then "addr"
4123 * may be invalid
4124 */
4125 ret = qemu_file_get_error(f);
4126 if (ret) {
4127 break;
4128 }
4129
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004130 flags = addr & ~TARGET_PAGE_MASK;
4131 addr &= TARGET_PAGE_MASK;
4132
4133 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4134 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02004135 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004136 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08004137
4138 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004139 if (!host) {
4140 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4141 ret = -EINVAL;
4142 break;
4143 }
Peter Xu1aa83672018-07-10 17:18:53 +08004144 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004145 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004146 * Postcopy requires that we place whole host pages atomically;
4147 * these may be huge pages for RAMBlocks that are backed by
4148 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004149 * To make it atomic, the data is read into a temporary page
4150 * that's moved into place later.
4151 * The migration protocol uses, possibly smaller, target-pages
4152 * however the source ensures it always sends all the components
4153 * of a host page in order.
4154 */
4155 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004156 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004157 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004158 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004159 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004160 } else {
4161 /* not the 1st TP within the HP */
4162 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01004163 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004164 host, last_host);
4165 ret = -EINVAL;
4166 break;
4167 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004168 }
4169
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004170
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004171 /*
4172 * If it's the last part of a host page then we place the host
4173 * page
4174 */
4175 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004176 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004177 place_source = postcopy_host_page;
4178 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004179 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004180
4181 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02004182 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004183 ch = qemu_get_byte(f);
4184 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4185 if (ch) {
4186 all_zero = false;
4187 }
4188 break;
4189
4190 case RAM_SAVE_FLAG_PAGE:
4191 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004192 if (!matches_target_page_size) {
4193 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004194 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4195 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08004196 /*
4197 * For small pages that matches target page size, we
4198 * avoid the qemu_file copy. Instead we directly use
4199 * the buffer of QEMUFile to place the page. Note: we
4200 * cannot do any QEMUFile operation before using that
4201 * buffer to make sure the buffer is valid when
4202 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004203 */
4204 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4205 TARGET_PAGE_SIZE);
4206 }
4207 break;
4208 case RAM_SAVE_FLAG_EOS:
4209 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004210 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004211 break;
4212 default:
4213 error_report("Unknown combination of migration flags: %#x"
4214 " (postcopy mode)", flags);
4215 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004216 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004217 }
4218
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004219 /* Detect for any possible file errors */
4220 if (!ret && qemu_file_get_error(f)) {
4221 ret = qemu_file_get_error(f);
4222 }
4223
4224 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004225 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004226 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
4227
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004228 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004229 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004230 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004231 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004232 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004233 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004234 }
4235 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004236 }
4237
4238 return ret;
4239}
4240
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004241static bool postcopy_is_advised(void)
4242{
4243 PostcopyState ps = postcopy_state_get();
4244 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4245}
4246
4247static bool postcopy_is_running(void)
4248{
4249 PostcopyState ps = postcopy_state_get();
4250 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4251}
4252
Zhang Chene6f4aa12018-09-03 12:38:50 +08004253/*
4254 * Flush content of RAM cache into SVM's memory.
4255 * Only flush the pages that be dirtied by PVM or SVM or both.
4256 */
4257static void colo_flush_ram_cache(void)
4258{
4259 RAMBlock *block = NULL;
4260 void *dst_host;
4261 void *src_host;
4262 unsigned long offset = 0;
4263
zhanghailiangd1955d22018-09-03 12:38:55 +08004264 memory_global_dirty_log_sync();
4265 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004266 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang5d0980a2019-07-18 09:25:47 +08004267 migration_bitmap_sync_range(ram_state, block);
zhanghailiangd1955d22018-09-03 12:38:55 +08004268 }
4269 rcu_read_unlock();
4270
Zhang Chene6f4aa12018-09-03 12:38:50 +08004271 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4272 rcu_read_lock();
4273 block = QLIST_FIRST_RCU(&ram_list.blocks);
4274
4275 while (block) {
4276 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4277
4278 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4279 offset = 0;
4280 block = QLIST_NEXT_RCU(block, next);
4281 } else {
4282 migration_bitmap_clear_dirty(ram_state, block, offset);
4283 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4284 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4285 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4286 }
4287 }
4288
4289 rcu_read_unlock();
4290 trace_colo_flush_ram_cache_end();
4291}
4292
Wei Yang10da4a32019-07-25 08:20:23 +08004293/**
4294 * ram_load_precopy: load pages in precopy case
4295 *
4296 * Returns 0 for success or -errno in case of error
4297 *
4298 * Called in precopy mode by ram_load().
4299 * rcu_read_lock is taken prior to this being called.
4300 *
4301 * @f: QEMUFile where to send the data
4302 */
4303static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02004304{
Wei Yang10da4a32019-07-25 08:20:23 +08004305 int flags = 0, ret = 0, invalid_flags = 0, len = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004306 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004307 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01004308 if (!migrate_use_compression()) {
4309 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4310 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004311
Wei Yang10da4a32019-07-25 08:20:23 +08004312 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02004313 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004314 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004315 uint8_t ch;
4316
4317 addr = qemu_get_be64(f);
4318 flags = addr & ~TARGET_PAGE_MASK;
4319 addr &= TARGET_PAGE_MASK;
4320
Juan Quintelaedc60122016-11-02 12:40:46 +01004321 if (flags & invalid_flags) {
4322 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4323 error_report("Received an unexpected compressed page");
4324 }
4325
4326 ret = -EINVAL;
4327 break;
4328 }
4329
Juan Quintelabb890ed2017-04-28 09:39:55 +02004330 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004331 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08004332 RAMBlock *block = ram_block_from_stream(f, flags);
4333
Zhang Chen13af18f2018-09-03 12:38:48 +08004334 /*
4335 * After going into COLO, we should load the Page into colo_cache.
4336 */
4337 if (migration_incoming_in_colo_state()) {
4338 host = colo_cache_from_block_offset(block, addr);
4339 } else {
4340 host = host_from_ram_block_offset(block, addr);
4341 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004342 if (!host) {
4343 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4344 ret = -EINVAL;
4345 break;
4346 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004347
4348 if (!migration_incoming_in_colo_state()) {
4349 ramblock_recv_bitmap_set(block, host);
4350 }
4351
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004352 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004353 }
4354
Juan Quintela56e93d22015-05-07 19:33:31 +02004355 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4356 case RAM_SAVE_FLAG_MEM_SIZE:
4357 /* Synchronize RAM block list */
4358 total_ram_bytes = addr;
4359 while (!ret && total_ram_bytes) {
4360 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004361 char id[256];
4362 ram_addr_t length;
4363
4364 len = qemu_get_byte(f);
4365 qemu_get_buffer(f, (uint8_t *)id, len);
4366 id[len] = 0;
4367 length = qemu_get_be64(f);
4368
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004369 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004370 if (block && !qemu_ram_is_migratable(block)) {
4371 error_report("block %s should not be migrated !", id);
4372 ret = -EINVAL;
4373 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004374 if (length != block->used_length) {
4375 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004376
Gongleifa53a0e2016-05-10 10:04:59 +08004377 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004378 &local_err);
4379 if (local_err) {
4380 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004381 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004382 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004383 /* For postcopy we need to check hugepage sizes match */
4384 if (postcopy_advised &&
4385 block->page_size != qemu_host_page_size) {
4386 uint64_t remote_page_size = qemu_get_be64(f);
4387 if (remote_page_size != block->page_size) {
4388 error_report("Mismatched RAM page size %s "
4389 "(local) %zd != %" PRId64,
4390 id, block->page_size,
4391 remote_page_size);
4392 ret = -EINVAL;
4393 }
4394 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004395 if (migrate_ignore_shared()) {
4396 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03004397 if (ramblock_is_ignored(block) &&
4398 block->mr->addr != addr) {
4399 error_report("Mismatched GPAs for block %s "
4400 "%" PRId64 "!= %" PRId64,
4401 id, (uint64_t)addr,
4402 (uint64_t)block->mr->addr);
4403 ret = -EINVAL;
4404 }
4405 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004406 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4407 block->idstr);
4408 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004409 error_report("Unknown ramblock \"%s\", cannot "
4410 "accept migration", id);
4411 ret = -EINVAL;
4412 }
4413
4414 total_ram_bytes -= length;
4415 }
4416 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004417
Juan Quintelabb890ed2017-04-28 09:39:55 +02004418 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004419 ch = qemu_get_byte(f);
4420 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4421 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004422
Juan Quintela56e93d22015-05-07 19:33:31 +02004423 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004424 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4425 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004426
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004427 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004428 len = qemu_get_be32(f);
4429 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4430 error_report("Invalid compressed data length: %d", len);
4431 ret = -EINVAL;
4432 break;
4433 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004434 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004435 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004436
Juan Quintela56e93d22015-05-07 19:33:31 +02004437 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004438 if (load_xbzrle(f, addr, host) < 0) {
4439 error_report("Failed to decompress XBZRLE page at "
4440 RAM_ADDR_FMT, addr);
4441 ret = -EINVAL;
4442 break;
4443 }
4444 break;
4445 case RAM_SAVE_FLAG_EOS:
4446 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004447 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004448 break;
4449 default:
4450 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004451 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004452 } else {
4453 error_report("Unknown combination of migration flags: %#x",
4454 flags);
4455 ret = -EINVAL;
4456 }
4457 }
4458 if (!ret) {
4459 ret = qemu_file_get_error(f);
4460 }
4461 }
4462
Wei Yang10da4a32019-07-25 08:20:23 +08004463 return ret;
4464}
4465
4466static int ram_load(QEMUFile *f, void *opaque, int version_id)
4467{
4468 int ret = 0;
4469 static uint64_t seq_iter;
4470 /*
4471 * If system is running in postcopy mode, page inserts to host memory must
4472 * be atomic
4473 */
4474 bool postcopy_running = postcopy_is_running();
4475
4476 seq_iter++;
4477
4478 if (version_id != 4) {
4479 return -EINVAL;
4480 }
4481
4482 /*
4483 * This RCU critical section can be very long running.
4484 * When RCU reclaims in the code start to become numerous,
4485 * it will be necessary to reduce the granularity of this
4486 * critical section.
4487 */
4488 rcu_read_lock();
4489
4490 if (postcopy_running) {
4491 ret = ram_load_postcopy(f);
4492 } else {
4493 ret = ram_load_precopy(f);
4494 }
4495
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004496 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02004497 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01004498 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004499
4500 if (!ret && migration_incoming_in_colo_state()) {
4501 colo_flush_ram_cache();
4502 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004503 return ret;
4504}
4505
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004506static bool ram_has_postcopy(void *opaque)
4507{
Junyan He469dd512018-07-18 15:48:02 +08004508 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004509 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004510 if (ramblock_is_pmem(rb)) {
4511 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4512 "is not supported now!", rb->idstr, rb->host);
4513 return false;
4514 }
4515 }
4516
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004517 return migrate_postcopy_ram();
4518}
4519
Peter Xuedd090c2018-05-02 18:47:32 +08004520/* Sync all the dirty bitmap with destination VM. */
4521static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4522{
4523 RAMBlock *block;
4524 QEMUFile *file = s->to_dst_file;
4525 int ramblock_count = 0;
4526
4527 trace_ram_dirty_bitmap_sync_start();
4528
Yury Kotovfbd162e2019-02-15 20:45:46 +03004529 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004530 qemu_savevm_send_recv_bitmap(file, block->idstr);
4531 trace_ram_dirty_bitmap_request(block->idstr);
4532 ramblock_count++;
4533 }
4534
4535 trace_ram_dirty_bitmap_sync_wait();
4536
4537 /* Wait until all the ramblocks' dirty bitmap synced */
4538 while (ramblock_count--) {
4539 qemu_sem_wait(&s->rp_state.rp_sem);
4540 }
4541
4542 trace_ram_dirty_bitmap_sync_complete();
4543
4544 return 0;
4545}
4546
4547static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4548{
4549 qemu_sem_post(&s->rp_state.rp_sem);
4550}
4551
Peter Xua335deb2018-05-02 18:47:28 +08004552/*
4553 * Read the received bitmap, revert it as the initial dirty bitmap.
4554 * This is only used when the postcopy migration is paused but wants
4555 * to resume from a middle point.
4556 */
4557int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4558{
4559 int ret = -EINVAL;
4560 QEMUFile *file = s->rp_state.from_dst_file;
4561 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004562 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004563 uint64_t size, end_mark;
4564
4565 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4566
4567 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4568 error_report("%s: incorrect state %s", __func__,
4569 MigrationStatus_str(s->state));
4570 return -EINVAL;
4571 }
4572
4573 /*
4574 * Note: see comments in ramblock_recv_bitmap_send() on why we
4575 * need the endianess convertion, and the paddings.
4576 */
4577 local_size = ROUND_UP(local_size, 8);
4578
4579 /* Add paddings */
4580 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4581
4582 size = qemu_get_be64(file);
4583
4584 /* The size of the bitmap should match with our ramblock */
4585 if (size != local_size) {
4586 error_report("%s: ramblock '%s' bitmap size mismatch "
4587 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4588 block->idstr, size, local_size);
4589 ret = -EINVAL;
4590 goto out;
4591 }
4592
4593 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4594 end_mark = qemu_get_be64(file);
4595
4596 ret = qemu_file_get_error(file);
4597 if (ret || size != local_size) {
4598 error_report("%s: read bitmap failed for ramblock '%s': %d"
4599 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4600 __func__, block->idstr, ret, local_size, size);
4601 ret = -EIO;
4602 goto out;
4603 }
4604
4605 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4606 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4607 __func__, block->idstr, end_mark);
4608 ret = -EINVAL;
4609 goto out;
4610 }
4611
4612 /*
4613 * Endianess convertion. We are during postcopy (though paused).
4614 * The dirty bitmap won't change. We can directly modify it.
4615 */
4616 bitmap_from_le(block->bmap, le_bitmap, nbits);
4617
4618 /*
4619 * What we received is "received bitmap". Revert it as the initial
4620 * dirty bitmap for this ramblock.
4621 */
4622 bitmap_complement(block->bmap, block->bmap, nbits);
4623
4624 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4625
Peter Xuedd090c2018-05-02 18:47:32 +08004626 /*
4627 * We succeeded to sync bitmap for current ramblock. If this is
4628 * the last one to sync, we need to notify the main send thread.
4629 */
4630 ram_dirty_bitmap_reload_notify(s);
4631
Peter Xua335deb2018-05-02 18:47:28 +08004632 ret = 0;
4633out:
Peter Xubf269902018-05-25 09:50:42 +08004634 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004635 return ret;
4636}
4637
Peter Xuedd090c2018-05-02 18:47:32 +08004638static int ram_resume_prepare(MigrationState *s, void *opaque)
4639{
4640 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004641 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004642
Peter Xu08614f32018-05-02 18:47:33 +08004643 ret = ram_dirty_bitmap_sync_all(s, rs);
4644 if (ret) {
4645 return ret;
4646 }
4647
4648 ram_state_resume_prepare(rs, s->to_dst_file);
4649
4650 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004651}
4652
Juan Quintela56e93d22015-05-07 19:33:31 +02004653static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004654 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004655 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004656 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004657 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004658 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004659 .save_live_pending = ram_save_pending,
4660 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004661 .save_cleanup = ram_save_cleanup,
4662 .load_setup = ram_load_setup,
4663 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004664 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004665};
4666
4667void ram_mig_init(void)
4668{
4669 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004670 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004671}