blob: 0047286b7ed9289dcdd4da72b095ce4d7c081f9d [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Junyan He56eb90a2018-07-18 15:48:03 +080036#include "qemu/pmem.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020037#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020039#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010040#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020044#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030045#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010047#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010048#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020049#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030052#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080054#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030055#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020056#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080058#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010059#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020060
Juan Quintela56e93d22015-05-07 19:33:31 +020061/***********************************************************/
62/* ram save/restore */
63
Juan Quintelabb890ed2017-04-28 09:39:55 +020064/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
65 * worked for pages that where filled with the same char. We switched
66 * it to only search for the zero value. And to avoid confusion with
67 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 */
69
Juan Quintela56e93d22015-05-07 19:33:31 +020070#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020071#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77/* 0x80 is reserved in migration.h start with 0x100 next */
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
Richard Hendersona1febc42016-08-29 11:46:14 -070082 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020083}
84
Juan Quintela93604472017-06-06 19:49:03 +020085XBZRLECacheStats xbzrle_counters;
86
Juan Quintela56e93d22015-05-07 19:33:31 +020087/* struct contains XBZRLE cache and a static page
88 used by the compression */
89static struct {
90 /* buffer used for XBZRLE encoding */
91 uint8_t *encoded_buf;
92 /* buffer for storing page content */
93 uint8_t *current_buf;
94 /* Cache for XBZRLE, Protected by lock. */
95 PageCache *cache;
96 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020097 /* it will store a page full of zeros */
98 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020099 /* buffer used for XBZRLE decoding */
100 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200101} XBZRLE;
102
Juan Quintela56e93d22015-05-07 19:33:31 +0200103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
Juan Quintela3d0684b2017-03-23 15:06:39 +0100115/**
116 * xbzrle_cache_resize: resize the xbzrle cache
117 *
118 * This function is called from qmp_migrate_set_cache_size in main
119 * thread, possibly while a migration is in progress. A running
120 * migration may be using the cache and might finish during this call,
121 * hence changes to the cache are protected by XBZRLE.lock().
122 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200123 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100124 *
125 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200126 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200127 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200128int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200129{
130 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200131 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200132
Juan Quintela8acabf62017-10-05 22:00:31 +0200133 /* Check for truncation */
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 if (new_size == migrate_xbzrle_cache_size()) {
141 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200142 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200143 }
144
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
Yury Kotovfbd162e2019-02-15 20:45:46 +0300162static bool ramblock_is_ignored(RAMBlock *block)
163{
164 return !qemu_ram_is_migratable(block) ||
165 (migrate_ignore_shared() && qemu_ram_is_shared(block));
166}
167
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200168/* Should be holding either ram_list.mutex, or the RCU lock. */
Yury Kotovfbd162e2019-02-15 20:45:46 +0300169#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
170 INTERNAL_RAMBLOCK_FOREACH(block) \
171 if (ramblock_is_ignored(block)) {} else
172
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200173#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100174 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200175 if (!qemu_ram_is_migratable(block)) {} else
176
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100177#undef RAMBLOCK_FOREACH
178
Yury Kotovfbd162e2019-02-15 20:45:46 +0300179int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
180{
181 RAMBlock *block;
182 int ret = 0;
183
184 rcu_read_lock();
185 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
186 ret = func(block, opaque);
187 if (ret) {
188 break;
189 }
190 }
191 rcu_read_unlock();
192 return ret;
193}
194
Alexey Perevalovf9494612017-10-05 14:13:20 +0300195static void ramblock_recv_map_init(void)
196{
197 RAMBlock *rb;
198
Yury Kotovfbd162e2019-02-15 20:45:46 +0300199 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300200 assert(!rb->receivedmap);
201 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
202 }
203}
204
205int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
206{
207 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
208 rb->receivedmap);
209}
210
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000211bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
212{
213 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
214}
215
Alexey Perevalovf9494612017-10-05 14:13:20 +0300216void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
217{
218 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
219}
220
221void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
222 size_t nr)
223{
224 bitmap_set_atomic(rb->receivedmap,
225 ramblock_recv_bitmap_offset(host_addr, rb),
226 nr);
227}
228
Peter Xua335deb2018-05-02 18:47:28 +0800229#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
230
231/*
232 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
233 *
234 * Returns >0 if success with sent bytes, or <0 if error.
235 */
236int64_t ramblock_recv_bitmap_send(QEMUFile *file,
237 const char *block_name)
238{
239 RAMBlock *block = qemu_ram_block_by_name(block_name);
240 unsigned long *le_bitmap, nbits;
241 uint64_t size;
242
243 if (!block) {
244 error_report("%s: invalid block name: %s", __func__, block_name);
245 return -1;
246 }
247
248 nbits = block->used_length >> TARGET_PAGE_BITS;
249
250 /*
251 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
252 * machines we may need 4 more bytes for padding (see below
253 * comment). So extend it a bit before hand.
254 */
255 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
256
257 /*
258 * Always use little endian when sending the bitmap. This is
259 * required that when source and destination VMs are not using the
260 * same endianess. (Note: big endian won't work.)
261 */
262 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
263
264 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800265 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800266
267 /*
268 * size is always aligned to 8 bytes for 64bit machines, but it
269 * may not be true for 32bit machines. We need this padding to
270 * make sure the migration can survive even between 32bit and
271 * 64bit machines.
272 */
273 size = ROUND_UP(size, 8);
274
275 qemu_put_be64(file, size);
276 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
277 /*
278 * Mark as an end, in case the middle part is screwed up due to
279 * some "misterious" reason.
280 */
281 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
282 qemu_fflush(file);
283
Peter Xubf269902018-05-25 09:50:42 +0800284 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800285
286 if (qemu_file_get_error(file)) {
287 return qemu_file_get_error(file);
288 }
289
290 return size + sizeof(size);
291}
292
Juan Quintelaec481c62017-03-20 22:12:40 +0100293/*
294 * An outstanding page request, on the source, having been received
295 * and queued
296 */
297struct RAMSrcPageRequest {
298 RAMBlock *rb;
299 hwaddr offset;
300 hwaddr len;
301
302 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
303};
304
Juan Quintela6f37bb82017-03-13 19:26:29 +0100305/* State of RAM for migration */
306struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100307 /* QEMUFile used for this migration */
308 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100309 /* Last block that we have visited searching for dirty pages */
310 RAMBlock *last_seen_block;
311 /* Last block from where we have sent data */
312 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100313 /* Last dirty target page we have sent */
314 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100315 /* last ram version we have seen */
316 uint32_t last_version;
317 /* We are in the first round */
318 bool ram_bulk_stage;
Wei Wang6eeb63f2018-12-11 16:24:52 +0800319 /* The free page optimization is enabled */
320 bool fpo_enabled;
Juan Quintela8d820d62017-03-13 19:35:50 +0100321 /* How many times we have dirty too many pages */
322 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100323 /* these variables are used for bitmap sync */
324 /* last time we did a full bitmap_sync */
325 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200326 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200327 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200328 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200329 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100330 /* xbzrle misses since the beginning of the period */
331 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800332
333 /* compression statistics since the beginning of the period */
334 /* amount of count that no free thread to compress data */
335 uint64_t compress_thread_busy_prev;
336 /* amount bytes after compression */
337 uint64_t compressed_size_prev;
338 /* amount of compressed pages */
339 uint64_t compress_pages_prev;
340
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800341 /* total handled target pages at the beginning of period */
342 uint64_t target_page_count_prev;
343 /* total handled target pages since start */
344 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200345 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800346 uint64_t migration_dirty_pages;
Wei Wang386a9072018-12-11 16:24:49 +0800347 /* Protects modification of the bitmap and migration dirty pages */
Juan Quintela108cfae2017-03-13 21:38:09 +0100348 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100349 /* The RAMBlock used in the last src_page_requests */
350 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100351 /* Queue of outstanding page requests from the destination */
352 QemuMutex src_page_req_mutex;
Paolo Bonzinib58deb32018-12-06 11:58:10 +0100353 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100354};
355typedef struct RAMState RAMState;
356
Juan Quintela53518d92017-05-04 11:46:24 +0200357static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100358
Wei Wangbd227062018-12-11 16:24:51 +0800359static NotifierWithReturnList precopy_notifier_list;
360
361void precopy_infrastructure_init(void)
362{
363 notifier_with_return_list_init(&precopy_notifier_list);
364}
365
366void precopy_add_notifier(NotifierWithReturn *n)
367{
368 notifier_with_return_list_add(&precopy_notifier_list, n);
369}
370
371void precopy_remove_notifier(NotifierWithReturn *n)
372{
373 notifier_with_return_remove(n);
374}
375
376int precopy_notify(PrecopyNotifyReason reason, Error **errp)
377{
378 PrecopyNotifyData pnd;
379 pnd.reason = reason;
380 pnd.errp = errp;
381
382 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
383}
384
Wei Wang6eeb63f2018-12-11 16:24:52 +0800385void precopy_enable_free_page_optimization(void)
386{
387 if (!ram_state) {
388 return;
389 }
390
391 ram_state->fpo_enabled = true;
392}
393
Juan Quintela9edabd42017-03-14 12:02:16 +0100394uint64_t ram_bytes_remaining(void)
395{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000396 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
397 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100398}
399
Juan Quintela93604472017-06-06 19:49:03 +0200400MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100401
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100402/* used by the search for pages to send */
403struct PageSearchStatus {
404 /* Current block being searched */
405 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100406 /* Current page to search from */
407 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100408 /* Set once we wrap around */
409 bool complete_round;
410};
411typedef struct PageSearchStatus PageSearchStatus;
412
Xiao Guangrong76e03002018-09-06 15:01:00 +0800413CompressionStats compression_counters;
414
Juan Quintela56e93d22015-05-07 19:33:31 +0200415struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200416 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800417 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800418 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200419 QEMUFile *file;
420 QemuMutex mutex;
421 QemuCond cond;
422 RAMBlock *block;
423 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800424
425 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800426 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800427 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428};
429typedef struct CompressParam CompressParam;
430
431struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800432 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800433 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 QemuMutex mutex;
435 QemuCond cond;
436 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000437 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200438 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800439 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200440};
441typedef struct DecompressParam DecompressParam;
442
443static CompressParam *comp_param;
444static QemuThread *compress_threads;
445/* comp_done_cond is used to wake up the migration thread when
446 * one of the compression threads has finished the compression.
447 * comp_done_lock is used to co-work with comp_done_cond.
448 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800449static QemuMutex comp_done_lock;
450static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200451/* The empty QEMUFileOps will be used by file in CompressParam */
452static const QEMUFileOps empty_ops = { };
453
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800454static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200455static DecompressParam *decomp_param;
456static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800457static QemuMutex decomp_done_lock;
458static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200459
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800460static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800461 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462
463static void *do_data_compress(void *opaque)
464{
465 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800466 RAMBlock *block;
467 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800468 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200469
Liang Lia7a9a882016-05-05 15:32:57 +0800470 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800471 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800472 if (param->block) {
473 block = param->block;
474 offset = param->offset;
475 param->block = NULL;
476 qemu_mutex_unlock(&param->mutex);
477
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800478 zero_page = do_compress_ram_page(param->file, &param->stream,
479 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800480
Liang Li0d9f9a52016-05-05 15:32:59 +0800481 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800482 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800483 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800484 qemu_cond_signal(&comp_done_cond);
485 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800486
487 qemu_mutex_lock(&param->mutex);
488 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200489 qemu_cond_wait(&param->cond, &param->mutex);
490 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200491 }
Liang Lia7a9a882016-05-05 15:32:57 +0800492 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200493
494 return NULL;
495}
496
Juan Quintelaf0afa332017-06-28 11:52:28 +0200497static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200498{
499 int i, thread_count;
500
Fei Li05306932018-09-25 17:14:40 +0800501 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200502 return;
503 }
Fei Li05306932018-09-25 17:14:40 +0800504
Juan Quintela56e93d22015-05-07 19:33:31 +0200505 thread_count = migrate_compress_threads();
506 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800507 /*
508 * we use it as a indicator which shows if the thread is
509 * properly init'd or not
510 */
511 if (!comp_param[i].file) {
512 break;
513 }
Fei Li05306932018-09-25 17:14:40 +0800514
515 qemu_mutex_lock(&comp_param[i].mutex);
516 comp_param[i].quit = true;
517 qemu_cond_signal(&comp_param[i].cond);
518 qemu_mutex_unlock(&comp_param[i].mutex);
519
Juan Quintela56e93d22015-05-07 19:33:31 +0200520 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200521 qemu_mutex_destroy(&comp_param[i].mutex);
522 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800523 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800524 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800525 qemu_fclose(comp_param[i].file);
526 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200527 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800528 qemu_mutex_destroy(&comp_done_lock);
529 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200530 g_free(compress_threads);
531 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200532 compress_threads = NULL;
533 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200534}
535
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800536static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200537{
538 int i, thread_count;
539
540 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800541 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200542 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200543 thread_count = migrate_compress_threads();
544 compress_threads = g_new0(QemuThread, thread_count);
545 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800546 qemu_cond_init(&comp_done_cond);
547 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200548 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800549 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
550 if (!comp_param[i].originbuf) {
551 goto exit;
552 }
553
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800554 if (deflateInit(&comp_param[i].stream,
555 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800556 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800557 goto exit;
558 }
559
Cao jine110aa92016-07-29 15:10:31 +0800560 /* comp_param[i].file is just used as a dummy buffer to save data,
561 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200562 */
563 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
564 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800565 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200566 qemu_mutex_init(&comp_param[i].mutex);
567 qemu_cond_init(&comp_param[i].cond);
568 qemu_thread_create(compress_threads + i, "compress",
569 do_data_compress, comp_param + i,
570 QEMU_THREAD_JOINABLE);
571 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800572 return 0;
573
574exit:
575 compress_threads_save_cleanup();
576 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200577}
578
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100579/* Multiple fd's */
580
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200581#define MULTIFD_MAGIC 0x11223344U
582#define MULTIFD_VERSION 1
583
Juan Quintela6df264a2018-02-28 09:10:07 +0100584#define MULTIFD_FLAG_SYNC (1 << 0)
585
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100586/* This value needs to be a multiple of qemu_target_page_size() */
Juan Quintela4b0c7262019-02-20 12:45:57 +0100587#define MULTIFD_PACKET_SIZE (512 * 1024)
Juan Quintelaefd1a1d2019-02-20 12:06:03 +0100588
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200589typedef struct {
590 uint32_t magic;
591 uint32_t version;
592 unsigned char uuid[16]; /* QemuUUID */
593 uint8_t id;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100594 uint8_t unused1[7]; /* Reserved for future use */
595 uint64_t unused2[4]; /* Reserved for future use */
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200596} __attribute__((packed)) MultiFDInit_t;
597
Juan Quintela8c4598f2018-04-07 13:59:07 +0200598typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200599 uint32_t magic;
600 uint32_t version;
601 uint32_t flags;
Juan Quintela6f862692019-02-20 12:04:04 +0100602 /* maximum number of allocated pages */
603 uint32_t pages_alloc;
604 uint32_t pages_used;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100605 /* size of the next packet that contains pages */
606 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200607 uint64_t packet_num;
Juan Quintela5fbd8b42019-03-13 10:54:58 +0100608 uint64_t unused[4]; /* Reserved for future use */
Juan Quintela2a26c972018-04-04 11:26:58 +0200609 char ramblock[256];
610 uint64_t offset[];
611} __attribute__((packed)) MultiFDPacket_t;
612
613typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200614 /* number of used pages */
615 uint32_t used;
616 /* number of allocated pages */
617 uint32_t allocated;
618 /* global number of generated multifd packets */
619 uint64_t packet_num;
620 /* offset of each page */
621 ram_addr_t *offset;
622 /* pointer to each page */
623 struct iovec *iov;
624 RAMBlock *block;
625} MultiFDPages_t;
626
627typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200628 /* this fields are not changed once the thread is created */
629 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100630 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200631 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100632 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200633 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100634 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200635 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100636 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200637 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100638 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200639 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100640 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200641 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100642 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200643 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100644 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200645 /* thread has work to do */
646 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200647 /* array of pages to sent */
648 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200649 /* packet allocated len */
650 uint32_t packet_len;
651 /* pointer to the packet */
652 MultiFDPacket_t *packet;
653 /* multifd flags for each packet */
654 uint32_t flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +0100655 /* size of the next packet that contains pages */
656 uint32_t next_packet_size;
Juan Quintela2a26c972018-04-04 11:26:58 +0200657 /* global number of generated multifd packets */
658 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200659 /* thread local variables */
660 /* packets sent through this channel */
661 uint64_t num_packets;
662 /* pages sent through this channel */
663 uint64_t num_pages;
Juan Quintela18cdcea2019-08-14 04:02:14 +0200664 /* syncs main thread and channels */
665 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200666} MultiFDSendParams;
667
668typedef struct {
669 /* this fields are not changed once the thread is created */
670 /* channel number */
671 uint8_t id;
672 /* channel thread name */
673 char *name;
674 /* channel thread id */
675 QemuThread thread;
676 /* communication channel */
677 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200678 /* this mutex protects the following parameters */
679 QemuMutex mutex;
680 /* is this channel thread running */
681 bool running;
Juan Quintela3c3ca252019-07-24 11:46:24 +0200682 /* should this thread finish */
683 bool quit;
Juan Quintela34c55a92018-04-10 23:35:15 +0200684 /* array of pages to receive */
685 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200686 /* packet allocated len */
687 uint32_t packet_len;
688 /* pointer to the packet */
689 MultiFDPacket_t *packet;
690 /* multifd flags for each packet */
691 uint32_t flags;
692 /* global number of generated multifd packets */
693 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200694 /* thread local variables */
Juan Quintela2a34ee52019-01-04 19:45:39 +0100695 /* size of the next packet that contains pages */
696 uint32_t next_packet_size;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200697 /* packets sent through this channel */
698 uint64_t num_packets;
699 /* pages sent through this channel */
700 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100701 /* syncs main thread and channels */
702 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200703} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100704
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200705static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
706{
707 MultiFDInit_t msg;
708 int ret;
709
710 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
711 msg.version = cpu_to_be32(MULTIFD_VERSION);
712 msg.id = p->id;
713 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
714
715 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
716 if (ret != 0) {
717 return -1;
718 }
719 return 0;
720}
721
722static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
723{
724 MultiFDInit_t msg;
725 int ret;
726
727 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
728 if (ret != 0) {
729 return -1;
730 }
731
Peter Maydell341ba0d2018-09-25 17:19:24 +0100732 msg.magic = be32_to_cpu(msg.magic);
733 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200734
735 if (msg.magic != MULTIFD_MAGIC) {
736 error_setg(errp, "multifd: received packet magic %x "
737 "expected %x", msg.magic, MULTIFD_MAGIC);
738 return -1;
739 }
740
741 if (msg.version != MULTIFD_VERSION) {
742 error_setg(errp, "multifd: received packet version %d "
743 "expected %d", msg.version, MULTIFD_VERSION);
744 return -1;
745 }
746
747 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
748 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
749 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
750
751 error_setg(errp, "multifd: received uuid '%s' and expected "
752 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
753 g_free(uuid);
754 g_free(msg_uuid);
755 return -1;
756 }
757
758 if (msg.id > migrate_multifd_channels()) {
759 error_setg(errp, "multifd: received channel version %d "
760 "expected %d", msg.version, MULTIFD_VERSION);
761 return -1;
762 }
763
764 return msg.id;
765}
766
Juan Quintela34c55a92018-04-10 23:35:15 +0200767static MultiFDPages_t *multifd_pages_init(size_t size)
768{
769 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
770
771 pages->allocated = size;
772 pages->iov = g_new0(struct iovec, size);
773 pages->offset = g_new0(ram_addr_t, size);
774
775 return pages;
776}
777
778static void multifd_pages_clear(MultiFDPages_t *pages)
779{
780 pages->used = 0;
781 pages->allocated = 0;
782 pages->packet_num = 0;
783 pages->block = NULL;
784 g_free(pages->iov);
785 pages->iov = NULL;
786 g_free(pages->offset);
787 pages->offset = NULL;
788 g_free(pages);
789}
790
Juan Quintela2a26c972018-04-04 11:26:58 +0200791static void multifd_send_fill_packet(MultiFDSendParams *p)
792{
793 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100794 uint32_t page_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200795 int i;
796
797 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
798 packet->version = cpu_to_be32(MULTIFD_VERSION);
799 packet->flags = cpu_to_be32(p->flags);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100800 packet->pages_alloc = cpu_to_be32(page_max);
Juan Quintela6f862692019-02-20 12:04:04 +0100801 packet->pages_used = cpu_to_be32(p->pages->used);
Juan Quintela2a34ee52019-01-04 19:45:39 +0100802 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200803 packet->packet_num = cpu_to_be64(p->packet_num);
804
805 if (p->pages->block) {
806 strncpy(packet->ramblock, p->pages->block->idstr, 256);
807 }
808
809 for (i = 0; i < p->pages->used; i++) {
810 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
811 }
812}
813
814static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
815{
816 MultiFDPacket_t *packet = p->packet;
Juan Quintela7ed379b2019-02-20 12:44:07 +0100817 uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintela2a26c972018-04-04 11:26:58 +0200818 RAMBlock *block;
819 int i;
820
Peter Maydell341ba0d2018-09-25 17:19:24 +0100821 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200822 if (packet->magic != MULTIFD_MAGIC) {
823 error_setg(errp, "multifd: received packet "
824 "magic %x and expected magic %x",
825 packet->magic, MULTIFD_MAGIC);
826 return -1;
827 }
828
Peter Maydell341ba0d2018-09-25 17:19:24 +0100829 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200830 if (packet->version != MULTIFD_VERSION) {
831 error_setg(errp, "multifd: received packet "
832 "version %d and expected version %d",
833 packet->version, MULTIFD_VERSION);
834 return -1;
835 }
836
837 p->flags = be32_to_cpu(packet->flags);
838
Juan Quintela6f862692019-02-20 12:04:04 +0100839 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100840 /*
841 * If we recevied a packet that is 100 times bigger than expected
842 * just stop migration. It is a magic number.
843 */
844 if (packet->pages_alloc > pages_max * 100) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200845 error_setg(errp, "multifd: received packet "
Juan Quintela7ed379b2019-02-20 12:44:07 +0100846 "with size %d and expected a maximum size of %d",
847 packet->pages_alloc, pages_max * 100) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200848 return -1;
849 }
Juan Quintela7ed379b2019-02-20 12:44:07 +0100850 /*
851 * We received a packet that is bigger than expected but inside
852 * reasonable limits (see previous comment). Just reallocate.
853 */
854 if (packet->pages_alloc > p->pages->allocated) {
855 multifd_pages_clear(p->pages);
Peter Maydellf151f8a2019-04-09 16:18:30 +0100856 p->pages = multifd_pages_init(packet->pages_alloc);
Juan Quintela7ed379b2019-02-20 12:44:07 +0100857 }
Juan Quintela2a26c972018-04-04 11:26:58 +0200858
Juan Quintela6f862692019-02-20 12:04:04 +0100859 p->pages->used = be32_to_cpu(packet->pages_used);
860 if (p->pages->used > packet->pages_alloc) {
Juan Quintela2a26c972018-04-04 11:26:58 +0200861 error_setg(errp, "multifd: received packet "
Juan Quintela6f862692019-02-20 12:04:04 +0100862 "with %d pages and expected maximum pages are %d",
863 p->pages->used, packet->pages_alloc) ;
Juan Quintela2a26c972018-04-04 11:26:58 +0200864 return -1;
865 }
866
Juan Quintela2a34ee52019-01-04 19:45:39 +0100867 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200868 p->packet_num = be64_to_cpu(packet->packet_num);
869
870 if (p->pages->used) {
871 /* make sure that ramblock is 0 terminated */
872 packet->ramblock[255] = 0;
873 block = qemu_ram_block_by_name(packet->ramblock);
874 if (!block) {
875 error_setg(errp, "multifd: unknown ram block %s",
876 packet->ramblock);
877 return -1;
878 }
879 }
880
881 for (i = 0; i < p->pages->used; i++) {
882 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
883
884 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
885 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
886 " (max " RAM_ADDR_FMT ")",
887 offset, block->max_length);
888 return -1;
889 }
890 p->pages->iov[i].iov_base = block->host + offset;
891 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
892 }
893
894 return 0;
895}
896
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100897struct {
898 MultiFDSendParams *params;
Juan Quintela34c55a92018-04-10 23:35:15 +0200899 /* array of pages to sent */
900 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100901 /* global number of generated multifd packets */
902 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100903 /* send channels ready */
904 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100905} *multifd_send_state;
906
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100907/*
908 * How we use multifd_send_state->pages and channel->pages?
909 *
910 * We create a pages for each channel, and a main one. Each time that
911 * we need to send a batch of pages we interchange the ones between
912 * multifd_send_state and the channel that is sending it. There are
913 * two reasons for that:
914 * - to not have to do so many mallocs during migration
915 * - to make easier to know what to free at the end of migration
916 *
917 * This way we always know who is the owner of each "pages" struct,
Wei Yanga5f7b1a2019-05-11 07:37:29 +0800918 * and we don't need any locking. It belongs to the migration thread
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100919 * or to the channel thread. Switching is safe because the migration
920 * thread is using the channel mutex when changing it, and the channel
921 * have to had finish with its own, otherwise pending_job can't be
922 * false.
923 */
924
Ivan Ren1b81c972019-07-30 13:33:35 +0800925static int multifd_send_pages(RAMState *rs)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100926{
927 int i;
928 static int next_channel;
929 MultiFDSendParams *p = NULL; /* make happy gcc */
930 MultiFDPages_t *pages = multifd_send_state->pages;
931 uint64_t transferred;
932
933 qemu_sem_wait(&multifd_send_state->channels_ready);
934 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
935 p = &multifd_send_state->params[i];
936
937 qemu_mutex_lock(&p->mutex);
Ivan Ren713f7622019-06-25 21:18:17 +0800938 if (p->quit) {
939 error_report("%s: channel %d has already quit!", __func__, i);
940 qemu_mutex_unlock(&p->mutex);
941 return -1;
942 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100943 if (!p->pending_job) {
944 p->pending_job++;
945 next_channel = (i + 1) % migrate_multifd_channels();
946 break;
947 }
948 qemu_mutex_unlock(&p->mutex);
949 }
950 p->pages->used = 0;
951
952 p->packet_num = multifd_send_state->packet_num++;
953 p->pages->block = NULL;
954 multifd_send_state->pages = p->pages;
955 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800956 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Ivan Ren1b81c972019-07-30 13:33:35 +0800957 qemu_file_update_transfer(rs->f, transferred);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100958 ram_counters.multifd_bytes += transferred;
959 ram_counters.transferred += transferred;;
960 qemu_mutex_unlock(&p->mutex);
961 qemu_sem_post(&p->sem);
Ivan Ren713f7622019-06-25 21:18:17 +0800962
963 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100964}
965
Ivan Ren1b81c972019-07-30 13:33:35 +0800966static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100967{
968 MultiFDPages_t *pages = multifd_send_state->pages;
969
970 if (!pages->block) {
971 pages->block = block;
972 }
973
974 if (pages->block == block) {
975 pages->offset[pages->used] = offset;
976 pages->iov[pages->used].iov_base = block->host + offset;
977 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
978 pages->used++;
979
980 if (pages->used < pages->allocated) {
Ivan Ren713f7622019-06-25 21:18:17 +0800981 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100982 }
983 }
984
Ivan Ren1b81c972019-07-30 13:33:35 +0800985 if (multifd_send_pages(rs) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +0800986 return -1;
987 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100988
989 if (pages->block != block) {
Ivan Ren1b81c972019-07-30 13:33:35 +0800990 return multifd_queue_page(rs, block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100991 }
Ivan Ren713f7622019-06-25 21:18:17 +0800992
993 return 1;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100994}
995
Juan Quintela66770702018-02-19 19:01:45 +0100996static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100997{
998 int i;
999
Juan Quintela5558c912019-08-14 04:02:13 +02001000 trace_multifd_send_terminate_threads(err != NULL);
1001
Juan Quintela7a169d72018-02-19 19:01:15 +01001002 if (err) {
1003 MigrationState *s = migrate_get_current();
1004 migrate_set_error(s, err);
1005 if (s->state == MIGRATION_STATUS_SETUP ||
1006 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
1007 s->state == MIGRATION_STATUS_DEVICE ||
1008 s->state == MIGRATION_STATUS_ACTIVE) {
1009 migrate_set_state(&s->state, s->state,
1010 MIGRATION_STATUS_FAILED);
1011 }
1012 }
1013
Juan Quintela66770702018-02-19 19:01:45 +01001014 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001015 MultiFDSendParams *p = &multifd_send_state->params[i];
1016
1017 qemu_mutex_lock(&p->mutex);
1018 p->quit = true;
1019 qemu_sem_post(&p->sem);
1020 qemu_mutex_unlock(&p->mutex);
1021 }
1022}
1023
Fei Li1398b2e2019-01-13 22:08:47 +08001024void multifd_save_cleanup(void)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001025{
1026 int i;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001027
1028 if (!migrate_use_multifd()) {
Fei Li1398b2e2019-01-13 22:08:47 +08001029 return;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001030 }
Juan Quintela66770702018-02-19 19:01:45 +01001031 multifd_send_terminate_threads(NULL);
1032 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001033 MultiFDSendParams *p = &multifd_send_state->params[i];
1034
Juan Quintela66770702018-02-19 19:01:45 +01001035 if (p->running) {
1036 qemu_thread_join(&p->thread);
1037 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001038 socket_send_channel_destroy(p->c);
1039 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001040 qemu_mutex_destroy(&p->mutex);
1041 qemu_sem_destroy(&p->sem);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001042 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001043 g_free(p->name);
1044 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001045 multifd_pages_clear(p->pages);
1046 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001047 p->packet_len = 0;
1048 g_free(p->packet);
1049 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001050 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001051 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001052 g_free(multifd_send_state->params);
1053 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001054 multifd_pages_clear(multifd_send_state->pages);
1055 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001056 g_free(multifd_send_state);
1057 multifd_send_state = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001058}
1059
Ivan Ren1b81c972019-07-30 13:33:35 +08001060static void multifd_send_sync_main(RAMState *rs)
Juan Quintela6df264a2018-02-28 09:10:07 +01001061{
1062 int i;
1063
1064 if (!migrate_use_multifd()) {
1065 return;
1066 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001067 if (multifd_send_state->pages->used) {
Ivan Ren1b81c972019-07-30 13:33:35 +08001068 if (multifd_send_pages(rs) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08001069 error_report("%s: multifd_send_pages fail", __func__);
1070 return;
1071 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001072 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001073 for (i = 0; i < migrate_multifd_channels(); i++) {
1074 MultiFDSendParams *p = &multifd_send_state->params[i];
1075
1076 trace_multifd_send_sync_main_signal(p->id);
1077
1078 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001079
Ivan Ren713f7622019-06-25 21:18:17 +08001080 if (p->quit) {
1081 error_report("%s: channel %d has already quit", __func__, i);
1082 qemu_mutex_unlock(&p->mutex);
1083 return;
1084 }
1085
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001086 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +01001087 p->flags |= MULTIFD_FLAG_SYNC;
1088 p->pending_job++;
Ivan Ren1b81c972019-07-30 13:33:35 +08001089 qemu_file_update_transfer(rs->f, p->packet_len);
Ivan Ren81507f62019-07-30 13:33:36 +08001090 ram_counters.multifd_bytes += p->packet_len;
1091 ram_counters.transferred += p->packet_len;
Juan Quintela6df264a2018-02-28 09:10:07 +01001092 qemu_mutex_unlock(&p->mutex);
1093 qemu_sem_post(&p->sem);
1094 }
1095 for (i = 0; i < migrate_multifd_channels(); i++) {
1096 MultiFDSendParams *p = &multifd_send_state->params[i];
1097
1098 trace_multifd_send_sync_main_wait(p->id);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001099 qemu_sem_wait(&p->sem_sync);
Juan Quintela6df264a2018-02-28 09:10:07 +01001100 }
1101 trace_multifd_send_sync_main(multifd_send_state->packet_num);
1102}
1103
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001104static void *multifd_send_thread(void *opaque)
1105{
1106 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001107 Error *local_err = NULL;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001108 int ret = 0;
1109 uint32_t flags = 0;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001110
Juan Quintela408ea6a2018-04-06 18:28:59 +02001111 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001112 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001113
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001114 if (multifd_send_initial_packet(p, &local_err) < 0) {
Ivan Ren2f4aefd2019-08-29 10:16:36 +08001115 ret = -1;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001116 goto out;
1117 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001118 /* initial packet */
1119 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001120
1121 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001122 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001123 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001124
1125 if (p->pending_job) {
1126 uint32_t used = p->pages->used;
1127 uint64_t packet_num = p->packet_num;
Ivan Rena3ec6b72019-06-25 21:18:18 +08001128 flags = p->flags;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001129
Juan Quintela2a34ee52019-01-04 19:45:39 +01001130 p->next_packet_size = used * qemu_target_page_size();
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001131 multifd_send_fill_packet(p);
1132 p->flags = 0;
1133 p->num_packets++;
1134 p->num_pages += used;
1135 p->pages->used = 0;
1136 qemu_mutex_unlock(&p->mutex);
1137
Juan Quintela2a34ee52019-01-04 19:45:39 +01001138 trace_multifd_send(p->id, packet_num, used, flags,
1139 p->next_packet_size);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001140
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001141 ret = qio_channel_write_all(p->c, (void *)p->packet,
1142 p->packet_len, &local_err);
1143 if (ret != 0) {
1144 break;
1145 }
1146
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001147 if (used) {
1148 ret = qio_channel_writev_all(p->c, p->pages->iov,
1149 used, &local_err);
1150 if (ret != 0) {
1151 break;
1152 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001153 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001154
1155 qemu_mutex_lock(&p->mutex);
1156 p->pending_job--;
1157 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001158
1159 if (flags & MULTIFD_FLAG_SYNC) {
Juan Quintela18cdcea2019-08-14 04:02:14 +02001160 qemu_sem_post(&p->sem_sync);
Juan Quintela6df264a2018-02-28 09:10:07 +01001161 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001162 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001163 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001164 qemu_mutex_unlock(&p->mutex);
1165 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001166 } else {
1167 qemu_mutex_unlock(&p->mutex);
1168 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001169 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001170 }
1171
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001172out:
1173 if (local_err) {
Juan Quintela7dd59d02019-08-14 04:02:17 +02001174 trace_multifd_send_error(p->id);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001175 multifd_send_terminate_threads(local_err);
1176 }
1177
Ivan Rena3ec6b72019-06-25 21:18:18 +08001178 /*
1179 * Error happen, I will exit, but I can't just leave, tell
1180 * who pay attention to me.
1181 */
1182 if (ret != 0) {
Ivan Ren2f4aefd2019-08-29 10:16:36 +08001183 qemu_sem_post(&p->sem_sync);
Ivan Rena3ec6b72019-06-25 21:18:18 +08001184 qemu_sem_post(&multifd_send_state->channels_ready);
1185 }
1186
Juan Quintela66770702018-02-19 19:01:45 +01001187 qemu_mutex_lock(&p->mutex);
1188 p->running = false;
1189 qemu_mutex_unlock(&p->mutex);
1190
Lidong Chen74637e62018-08-06 21:29:29 +08001191 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001192 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1193
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001194 return NULL;
1195}
1196
Juan Quintela60df2d42018-03-07 07:56:15 +01001197static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1198{
1199 MultiFDSendParams *p = opaque;
1200 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1201 Error *local_err = NULL;
1202
Juan Quintela7dd59d02019-08-14 04:02:17 +02001203 trace_multifd_new_send_channel_async(p->id);
Juan Quintela60df2d42018-03-07 07:56:15 +01001204 if (qio_task_propagate_error(task, &local_err)) {
Fei Li1398b2e2019-01-13 22:08:47 +08001205 migrate_set_error(migrate_get_current(), local_err);
1206 multifd_save_cleanup();
Juan Quintela60df2d42018-03-07 07:56:15 +01001207 } else {
1208 p->c = QIO_CHANNEL(sioc);
1209 qio_channel_set_delay(p->c, false);
1210 p->running = true;
1211 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1212 QEMU_THREAD_JOINABLE);
Juan Quintela60df2d42018-03-07 07:56:15 +01001213 }
1214}
1215
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001216int multifd_save_setup(void)
1217{
1218 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001219 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001220 uint8_t i;
1221
1222 if (!migrate_use_multifd()) {
1223 return 0;
1224 }
1225 thread_count = migrate_multifd_channels();
1226 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1227 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela34c55a92018-04-10 23:35:15 +02001228 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001229 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001230
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001231 for (i = 0; i < thread_count; i++) {
1232 MultiFDSendParams *p = &multifd_send_state->params[i];
1233
1234 qemu_mutex_init(&p->mutex);
1235 qemu_sem_init(&p->sem, 0);
Juan Quintela18cdcea2019-08-14 04:02:14 +02001236 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001237 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001238 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001239 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001240 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001241 p->packet_len = sizeof(MultiFDPacket_t)
1242 + sizeof(ram_addr_t) * page_count;
1243 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001244 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001245 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001246 }
1247 return 0;
1248}
1249
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001250struct {
1251 MultiFDRecvParams *params;
1252 /* number of created threads */
1253 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001254 /* syncs main thread and channels */
1255 QemuSemaphore sem_sync;
1256 /* global number of generated multifd packets */
1257 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001258} *multifd_recv_state;
1259
Juan Quintela66770702018-02-19 19:01:45 +01001260static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001261{
1262 int i;
1263
Juan Quintela5558c912019-08-14 04:02:13 +02001264 trace_multifd_recv_terminate_threads(err != NULL);
1265
Juan Quintela7a169d72018-02-19 19:01:15 +01001266 if (err) {
1267 MigrationState *s = migrate_get_current();
1268 migrate_set_error(s, err);
1269 if (s->state == MIGRATION_STATUS_SETUP ||
1270 s->state == MIGRATION_STATUS_ACTIVE) {
1271 migrate_set_state(&s->state, s->state,
1272 MIGRATION_STATUS_FAILED);
1273 }
1274 }
1275
Juan Quintela66770702018-02-19 19:01:45 +01001276 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001277 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1278
1279 qemu_mutex_lock(&p->mutex);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001280 p->quit = true;
Juan Quintela7a5cc332018-04-18 00:49:19 +02001281 /* We could arrive here for two reasons:
1282 - normal quit, i.e. everything went fine, just finished
1283 - error quit: We close the channels so the channel threads
1284 finish the qio_channel_read_all_eof() */
1285 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001286 qemu_mutex_unlock(&p->mutex);
1287 }
1288}
1289
1290int multifd_load_cleanup(Error **errp)
1291{
1292 int i;
1293 int ret = 0;
1294
1295 if (!migrate_use_multifd()) {
1296 return 0;
1297 }
Juan Quintela66770702018-02-19 19:01:45 +01001298 multifd_recv_terminate_threads(NULL);
1299 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001300 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1301
Juan Quintela66770702018-02-19 19:01:45 +01001302 if (p->running) {
Juan Quintela3c3ca252019-07-24 11:46:24 +02001303 p->quit = true;
Ivan Renf193bc02019-06-25 21:18:19 +08001304 /*
1305 * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
1306 * however try to wakeup it without harm in cleanup phase.
1307 */
1308 qemu_sem_post(&p->sem_sync);
Juan Quintela66770702018-02-19 19:01:45 +01001309 qemu_thread_join(&p->thread);
1310 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001311 object_unref(OBJECT(p->c));
1312 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001313 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001314 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001315 g_free(p->name);
1316 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001317 multifd_pages_clear(p->pages);
1318 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001319 p->packet_len = 0;
1320 g_free(p->packet);
1321 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001322 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001323 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001324 g_free(multifd_recv_state->params);
1325 multifd_recv_state->params = NULL;
1326 g_free(multifd_recv_state);
1327 multifd_recv_state = NULL;
1328
1329 return ret;
1330}
1331
Juan Quintela6df264a2018-02-28 09:10:07 +01001332static void multifd_recv_sync_main(void)
1333{
1334 int i;
1335
1336 if (!migrate_use_multifd()) {
1337 return;
1338 }
1339 for (i = 0; i < migrate_multifd_channels(); i++) {
1340 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1341
Juan Quintela6df264a2018-02-28 09:10:07 +01001342 trace_multifd_recv_sync_main_wait(p->id);
1343 qemu_sem_wait(&multifd_recv_state->sem_sync);
Wei Yang77568ea2019-06-04 10:35:40 +08001344 }
1345 for (i = 0; i < migrate_multifd_channels(); i++) {
1346 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1347
Juan Quintela6df264a2018-02-28 09:10:07 +01001348 qemu_mutex_lock(&p->mutex);
1349 if (multifd_recv_state->packet_num < p->packet_num) {
1350 multifd_recv_state->packet_num = p->packet_num;
1351 }
1352 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001353 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001354 qemu_sem_post(&p->sem_sync);
1355 }
1356 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1357}
1358
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001359static void *multifd_recv_thread(void *opaque)
1360{
1361 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001362 Error *local_err = NULL;
1363 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001364
Juan Quintela408ea6a2018-04-06 18:28:59 +02001365 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001366 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001367
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001368 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001369 uint32_t used;
1370 uint32_t flags;
1371
Juan Quintela3c3ca252019-07-24 11:46:24 +02001372 if (p->quit) {
1373 break;
1374 }
1375
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001376 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1377 p->packet_len, &local_err);
1378 if (ret == 0) { /* EOF */
1379 break;
1380 }
1381 if (ret == -1) { /* Error */
1382 break;
1383 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001384
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001385 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001386 ret = multifd_recv_unfill_packet(p, &local_err);
1387 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001388 qemu_mutex_unlock(&p->mutex);
1389 break;
1390 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001391
1392 used = p->pages->used;
1393 flags = p->flags;
Juan Quintela2a34ee52019-01-04 19:45:39 +01001394 trace_multifd_recv(p->id, p->packet_num, used, flags,
1395 p->next_packet_size);
Juan Quintela6df264a2018-02-28 09:10:07 +01001396 p->num_packets++;
1397 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001398 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001399
Juan Quintelaad24c7c2019-01-04 19:12:35 +01001400 if (used) {
1401 ret = qio_channel_readv_all(p->c, p->pages->iov,
1402 used, &local_err);
1403 if (ret != 0) {
1404 break;
1405 }
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001406 }
1407
Juan Quintela6df264a2018-02-28 09:10:07 +01001408 if (flags & MULTIFD_FLAG_SYNC) {
1409 qemu_sem_post(&multifd_recv_state->sem_sync);
1410 qemu_sem_wait(&p->sem_sync);
1411 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001412 }
1413
Juan Quintelad82628e2018-04-11 02:44:24 +02001414 if (local_err) {
1415 multifd_recv_terminate_threads(local_err);
1416 }
Juan Quintela66770702018-02-19 19:01:45 +01001417 qemu_mutex_lock(&p->mutex);
1418 p->running = false;
1419 qemu_mutex_unlock(&p->mutex);
1420
Lidong Chen74637e62018-08-06 21:29:29 +08001421 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001422 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1423
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001424 return NULL;
1425}
1426
1427int multifd_load_setup(void)
1428{
1429 int thread_count;
Juan Quintelaefd1a1d2019-02-20 12:06:03 +01001430 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001431 uint8_t i;
1432
1433 if (!migrate_use_multifd()) {
1434 return 0;
1435 }
1436 thread_count = migrate_multifd_channels();
1437 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1438 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001439 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001440 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001441
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001442 for (i = 0; i < thread_count; i++) {
1443 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1444
1445 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001446 qemu_sem_init(&p->sem_sync, 0);
Juan Quintela3c3ca252019-07-24 11:46:24 +02001447 p->quit = false;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001448 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001449 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001450 p->packet_len = sizeof(MultiFDPacket_t)
1451 + sizeof(ram_addr_t) * page_count;
1452 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001453 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001454 }
1455 return 0;
1456}
1457
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001458bool multifd_recv_all_channels_created(void)
1459{
1460 int thread_count = migrate_multifd_channels();
1461
1462 if (!migrate_use_multifd()) {
1463 return true;
1464 }
1465
1466 return thread_count == atomic_read(&multifd_recv_state->count);
1467}
1468
Fei Li49ed0d22019-01-13 22:08:46 +08001469/*
1470 * Try to receive all multifd channels to get ready for the migration.
1471 * - Return true and do not set @errp when correctly receving all channels;
1472 * - Return false and do not set @errp when correctly receiving the current one;
1473 * - Return false and set @errp when failing to receive the current channel.
1474 */
1475bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001476{
Juan Quintela60df2d42018-03-07 07:56:15 +01001477 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001478 Error *local_err = NULL;
1479 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001480
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001481 id = multifd_recv_initial_packet(ioc, &local_err);
1482 if (id < 0) {
1483 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001484 error_propagate_prepend(errp, local_err,
1485 "failed to receive packet"
1486 " via multifd channel %d: ",
1487 atomic_read(&multifd_recv_state->count));
Peter Xu81e62052018-06-27 21:22:44 +08001488 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001489 }
Juan Quintela7dd59d02019-08-14 04:02:17 +02001490 trace_multifd_recv_new_channel(id);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001491
1492 p = &multifd_recv_state->params[id];
1493 if (p->c != NULL) {
1494 error_setg(&local_err, "multifd: received id '%d' already setup'",
1495 id);
1496 multifd_recv_terminate_threads(local_err);
Fei Li49ed0d22019-01-13 22:08:46 +08001497 error_propagate(errp, local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001498 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001499 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001500 p->c = ioc;
1501 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001502 /* initial packet */
1503 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001504
1505 p->running = true;
1506 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1507 QEMU_THREAD_JOINABLE);
1508 atomic_inc(&multifd_recv_state->count);
Fei Li49ed0d22019-01-13 22:08:46 +08001509 return atomic_read(&multifd_recv_state->count) ==
1510 migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001511}
1512
Juan Quintela56e93d22015-05-07 19:33:31 +02001513/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001514 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001515 *
1516 * If this is the 1st block, it also writes the block identification
1517 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001518 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001519 *
1520 * @f: QEMUFile where to send the data
1521 * @block: block that contains the page we want to send
1522 * @offset: offset inside the block for the page
1523 * in the lower bits, it contains flags
1524 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001525static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1526 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001527{
Liang Li9f5f3802015-07-13 17:34:10 +08001528 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001529
Juan Quintela24795692017-03-21 11:45:01 +01001530 if (block == rs->last_sent_block) {
1531 offset |= RAM_SAVE_FLAG_CONTINUE;
1532 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001533 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001534 size = 8;
1535
1536 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001537 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001538 qemu_put_byte(f, len);
1539 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001540 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001541 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001542 }
1543 return size;
1544}
1545
Juan Quintela3d0684b2017-03-23 15:06:39 +01001546/**
1547 * mig_throttle_guest_down: throotle down the guest
1548 *
1549 * Reduce amount of guest cpu execution to hopefully slow down memory
1550 * writes. If guest dirty memory rate is reduced below the rate at
1551 * which we can transfer pages to the destination then we should be
1552 * able to complete migration. Some workloads dirty memory way too
1553 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001554 */
1555static void mig_throttle_guest_down(void)
1556{
1557 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001558 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1559 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001560 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001561
1562 /* We have not started throttling yet. Let's start it. */
1563 if (!cpu_throttle_active()) {
1564 cpu_throttle_set(pct_initial);
1565 } else {
1566 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001567 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1568 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001569 }
1570}
1571
Juan Quintela3d0684b2017-03-23 15:06:39 +01001572/**
1573 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1574 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001575 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001576 * @current_addr: address for the zero page
1577 *
1578 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001579 * The important thing is that a stale (not-yet-0'd) page be replaced
1580 * by the new data.
1581 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001582 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001583 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001584static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001585{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001586 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001587 return;
1588 }
1589
1590 /* We don't care if this fails to allocate a new cache page
1591 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001592 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001593 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001594}
1595
1596#define ENCODING_FLAG_XBZRLE 0x1
1597
1598/**
1599 * save_xbzrle_page: compress and send current page
1600 *
1601 * Returns: 1 means that we wrote the page
1602 * 0 means that page is identical to the one already sent
1603 * -1 means that xbzrle would be longer than normal
1604 *
Juan Quintela5a987732017-03-13 19:39:02 +01001605 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001606 * @current_data: pointer to the address of the page contents
1607 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001608 * @block: block that contains the page we want to send
1609 * @offset: offset inside the block for the page
1610 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001611 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001612static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001613 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001614 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001615{
1616 int encoded_len = 0, bytes_xbzrle;
1617 uint8_t *prev_cached_page;
1618
Juan Quintela93604472017-06-06 19:49:03 +02001619 if (!cache_is_cached(XBZRLE.cache, current_addr,
1620 ram_counters.dirty_sync_count)) {
1621 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001622 if (!last_stage) {
1623 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001624 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001625 return -1;
1626 } else {
1627 /* update *current_data when the page has been
1628 inserted into cache */
1629 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1630 }
1631 }
1632 return -1;
1633 }
1634
1635 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1636
1637 /* save current buffer into memory */
1638 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1639
1640 /* XBZRLE encoding (if there is no overflow) */
1641 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1642 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1643 TARGET_PAGE_SIZE);
Wei Yangca353802019-06-10 08:41:59 +08001644
1645 /*
1646 * Update the cache contents, so that it corresponds to the data
1647 * sent, in all cases except where we skip the page.
1648 */
1649 if (!last_stage && encoded_len != 0) {
1650 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1651 /*
1652 * In the case where we couldn't compress, ensure that the caller
1653 * sends the data from the cache, since the guest might have
1654 * changed the RAM since we copied it.
1655 */
1656 *current_data = prev_cached_page;
1657 }
1658
Juan Quintela56e93d22015-05-07 19:33:31 +02001659 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001660 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001661 return 0;
1662 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001663 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001664 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001665 return -1;
1666 }
1667
Juan Quintela56e93d22015-05-07 19:33:31 +02001668 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001669 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001670 offset | RAM_SAVE_FLAG_XBZRLE);
1671 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1672 qemu_put_be16(rs->f, encoded_len);
1673 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001674 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001675 xbzrle_counters.pages++;
1676 xbzrle_counters.bytes += bytes_xbzrle;
1677 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001678
1679 return 1;
1680}
1681
Juan Quintela3d0684b2017-03-23 15:06:39 +01001682/**
1683 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001684 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08001685 * Returns the page offset within memory region of the start of a dirty page
Juan Quintela3d0684b2017-03-23 15:06:39 +01001686 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001687 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001688 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001689 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001690 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001691static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001692unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001693 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001694{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001695 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1696 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001697 unsigned long next;
1698
Yury Kotovfbd162e2019-02-15 20:45:46 +03001699 if (ramblock_is_ignored(rb)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001700 return size;
1701 }
1702
Wei Wang6eeb63f2018-12-11 16:24:52 +08001703 /*
1704 * When the free page optimization is enabled, we need to check the bitmap
1705 * to send the non-free pages rather than all the pages in the bulk stage.
1706 */
1707 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001708 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001709 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001710 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001711 }
1712
Juan Quintela6b6712e2017-03-22 15:18:04 +01001713 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001714}
1715
Juan Quintela06b10682017-03-21 15:18:05 +01001716static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001717 RAMBlock *rb,
1718 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001719{
1720 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001721
Wei Wang386a9072018-12-11 16:24:49 +08001722 qemu_mutex_lock(&rs->bitmap_mutex);
Peter Xu002cad62019-06-03 14:50:56 +08001723
1724 /*
1725 * Clear dirty bitmap if needed. This _must_ be called before we
1726 * send any of the page in the chunk because we need to make sure
1727 * we can capture further page content changes when we sync dirty
1728 * log the next time. So as long as we are going to send any of
1729 * the page in the chunk we clear the remote dirty bitmap for all.
1730 * Clearing it earlier won't be a problem, but too late will.
1731 */
1732 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
1733 uint8_t shift = rb->clear_bmap_shift;
1734 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
1735 hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
1736
1737 /*
1738 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
1739 * can make things easier sometimes since then start address
1740 * of the small chunk will always be 64 pages aligned so the
1741 * bitmap will always be aligned to unsigned long. We should
1742 * even be able to remove this restriction but I'm simply
1743 * keeping it.
1744 */
1745 assert(shift >= 6);
1746 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
1747 memory_region_clear_dirty_bitmap(rb->mr, start, size);
1748 }
1749
Juan Quintela6b6712e2017-03-22 15:18:04 +01001750 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001751
1752 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001753 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001754 }
Wei Wang386a9072018-12-11 16:24:49 +08001755 qemu_mutex_unlock(&rs->bitmap_mutex);
1756
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001757 return ret;
1758}
1759
Peter Xu267691b2019-06-03 14:50:46 +08001760/* Called with RCU critical section */
Wei Yang7a3e9572019-08-08 11:31:55 +08001761static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
Juan Quintela56e93d22015-05-07 19:33:31 +02001762{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001763 rs->migration_dirty_pages +=
Wei Yang5d0980a2019-07-18 09:25:47 +08001764 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001765 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001766}
1767
Juan Quintela3d0684b2017-03-23 15:06:39 +01001768/**
1769 * ram_pagesize_summary: calculate all the pagesizes of a VM
1770 *
1771 * Returns a summary bitmap of the page sizes of all RAMBlocks
1772 *
1773 * For VMs with just normal pages this is equivalent to the host page
1774 * size. If it's got some huge pages then it's the OR of all the
1775 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001776 */
1777uint64_t ram_pagesize_summary(void)
1778{
1779 RAMBlock *block;
1780 uint64_t summary = 0;
1781
Yury Kotovfbd162e2019-02-15 20:45:46 +03001782 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001783 summary |= block->page_size;
1784 }
1785
1786 return summary;
1787}
1788
Xiao Guangrongaecbfe92019-01-11 14:37:30 +08001789uint64_t ram_get_total_transferred_pages(void)
1790{
1791 return ram_counters.normal + ram_counters.duplicate +
1792 compression_counters.pages + xbzrle_counters.pages;
1793}
1794
Xiao Guangrongb7340352018-06-04 17:55:12 +08001795static void migration_update_rates(RAMState *rs, int64_t end_time)
1796{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001797 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001798 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001799
1800 /* calculate period counters */
1801 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1802 / (end_time - rs->time_last_bitmap_sync);
1803
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001804 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001805 return;
1806 }
1807
1808 if (migrate_use_xbzrle()) {
1809 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001810 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001811 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1812 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001813
1814 if (migrate_use_compression()) {
1815 compression_counters.busy_rate = (double)(compression_counters.busy -
1816 rs->compress_thread_busy_prev) / page_count;
1817 rs->compress_thread_busy_prev = compression_counters.busy;
1818
1819 compressed_size = compression_counters.compressed_size -
1820 rs->compressed_size_prev;
1821 if (compressed_size) {
1822 double uncompressed_size = (compression_counters.pages -
1823 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1824
1825 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1826 compression_counters.compression_rate =
1827 uncompressed_size / compressed_size;
1828
1829 rs->compress_pages_prev = compression_counters.pages;
1830 rs->compressed_size_prev = compression_counters.compressed_size;
1831 }
1832 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001833}
1834
Juan Quintela8d820d62017-03-13 19:35:50 +01001835static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001836{
1837 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001838 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001839 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001840
Juan Quintela93604472017-06-06 19:49:03 +02001841 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001842
Juan Quintelaf664da82017-03-13 19:44:57 +01001843 if (!rs->time_last_bitmap_sync) {
1844 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001845 }
1846
1847 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001848 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001849
Juan Quintela108cfae2017-03-13 21:38:09 +01001850 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001851 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03001852 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang7a3e9572019-08-08 11:31:55 +08001853 ramblock_sync_dirty_bitmap(rs, block);
Juan Quintela56e93d22015-05-07 19:33:31 +02001854 }
Balamuruhan S650af892018-06-12 14:20:09 +05301855 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001856 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001857 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001858
Paolo Bonzini9458a9a2018-02-06 18:37:39 +01001859 memory_global_after_dirty_log_sync();
Juan Quintelaa66cd902017-03-28 15:02:43 +02001860 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001861
Juan Quintela56e93d22015-05-07 19:33:31 +02001862 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1863
1864 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001865 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001866 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001867
Peter Lieven9ac78b62017-09-26 12:33:16 +02001868 /* During block migration the auto-converge logic incorrectly detects
1869 * that ram migration makes no progress. Avoid this by disabling the
1870 * throttling logic during the bulk phase of block migration. */
1871 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001872 /* The following detection logic can be refined later. For now:
1873 Check to see if the dirtied bytes is 50% more than the approx.
1874 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001875 were in this routine. If that happens twice, start or increase
1876 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001877
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001878 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001879 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001880 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001881 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001882 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001883 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001884 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001885 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001886
Xiao Guangrongb7340352018-06-04 17:55:12 +08001887 migration_update_rates(rs, end_time);
1888
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001889 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001890
1891 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001892 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001893 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001894 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001895 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001896 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001897 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001898 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001899}
1900
Wei Wangbd227062018-12-11 16:24:51 +08001901static void migration_bitmap_sync_precopy(RAMState *rs)
1902{
1903 Error *local_err = NULL;
1904
1905 /*
1906 * The current notifier usage is just an optimization to migration, so we
1907 * don't stop the normal migration process in the error case.
1908 */
1909 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1910 error_report_err(local_err);
1911 }
1912
1913 migration_bitmap_sync(rs);
1914
1915 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1916 error_report_err(local_err);
1917 }
1918}
1919
Juan Quintela56e93d22015-05-07 19:33:31 +02001920/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001921 * save_zero_page_to_file: send the zero page to the file
1922 *
1923 * Returns the size of data written to the file, 0 means the page is not
1924 * a zero page
1925 *
1926 * @rs: current RAM state
1927 * @file: the file where the data is saved
1928 * @block: block that contains the page we want to send
1929 * @offset: offset inside the block for the page
1930 */
1931static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1932 RAMBlock *block, ram_addr_t offset)
1933{
1934 uint8_t *p = block->host + offset;
1935 int len = 0;
1936
1937 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1938 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1939 qemu_put_byte(file, 0);
1940 len += 1;
1941 }
1942 return len;
1943}
1944
1945/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001946 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001947 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001948 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001949 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001950 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001951 * @block: block that contains the page we want to send
1952 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001953 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001954static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001955{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001956 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001957
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001958 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001959 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001960 ram_counters.transferred += len;
1961 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001962 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001963 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001964}
1965
Juan Quintela57273092017-03-20 22:25:28 +01001966static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001967{
Juan Quintela57273092017-03-20 22:25:28 +01001968 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001969 return;
1970 }
1971
Juan Quintelaaaa20642017-03-21 11:35:24 +01001972 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001973}
1974
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001975/*
1976 * @pages: the number of pages written by the control path,
1977 * < 0 - error
1978 * > 0 - number of pages written
1979 *
1980 * Return true if the pages has been saved, otherwise false is returned.
1981 */
1982static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1983 int *pages)
1984{
1985 uint64_t bytes_xmit = 0;
1986 int ret;
1987
1988 *pages = -1;
1989 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1990 &bytes_xmit);
1991 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1992 return false;
1993 }
1994
1995 if (bytes_xmit) {
1996 ram_counters.transferred += bytes_xmit;
1997 *pages = 1;
1998 }
1999
2000 if (ret == RAM_SAVE_CONTROL_DELAYED) {
2001 return true;
2002 }
2003
2004 if (bytes_xmit > 0) {
2005 ram_counters.normal++;
2006 } else if (bytes_xmit == 0) {
2007 ram_counters.duplicate++;
2008 }
2009
2010 return true;
2011}
2012
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002013/*
2014 * directly send the page to the stream
2015 *
2016 * Returns the number of pages written.
2017 *
2018 * @rs: current RAM state
2019 * @block: block that contains the page we want to send
2020 * @offset: offset inside the block for the page
2021 * @buf: the page to be sent
2022 * @async: send to page asyncly
2023 */
2024static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
2025 uint8_t *buf, bool async)
2026{
2027 ram_counters.transferred += save_page_header(rs, rs->f, block,
2028 offset | RAM_SAVE_FLAG_PAGE);
2029 if (async) {
2030 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
2031 migrate_release_ram() &
2032 migration_in_postcopy());
2033 } else {
2034 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
2035 }
2036 ram_counters.transferred += TARGET_PAGE_SIZE;
2037 ram_counters.normal++;
2038 return 1;
2039}
2040
Juan Quintela56e93d22015-05-07 19:33:31 +02002041/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002042 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02002043 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002044 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00002045 * < 0 - error
2046 * >=0 - Number of pages written - this might legally be 0
2047 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02002048 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002049 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002050 * @block: block that contains the page we want to send
2051 * @offset: offset inside the block for the page
2052 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02002053 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002054static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002055{
2056 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002057 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02002058 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08002059 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002060 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002061 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002062
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01002063 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01002064 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02002065
Juan Quintela56e93d22015-05-07 19:33:31 +02002066 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002067 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
2068 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002069 pages = save_xbzrle_page(rs, &p, current_addr, block,
2070 offset, last_stage);
2071 if (!last_stage) {
2072 /* Can't send this cached data async, since the cache page
2073 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02002074 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08002075 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002076 }
2077 }
2078
2079 /* XBZRLE overflow or normal page */
2080 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08002081 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02002082 }
2083
2084 XBZRLE_cache_unlock();
2085
2086 return pages;
2087}
2088
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002089static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
2090 ram_addr_t offset)
2091{
Ivan Ren1b81c972019-07-30 13:33:35 +08002092 if (multifd_queue_page(rs, block, offset) < 0) {
Ivan Ren713f7622019-06-25 21:18:17 +08002093 return -1;
2094 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002095 ram_counters.normal++;
2096
2097 return 1;
2098}
2099
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002100static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002101 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02002102{
Juan Quintela53518d92017-05-04 11:46:24 +02002103 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08002104 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002105 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002106 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002107
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002108 if (save_zero_page_to_file(rs, f, block, offset)) {
2109 zero_page = true;
2110 goto exit;
2111 }
2112
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002113 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002114
2115 /*
2116 * copy it to a internal buffer to avoid it being modified by VM
2117 * so that we can catch up the error during compression and
2118 * decompression
2119 */
2120 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002121 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
2122 if (ret < 0) {
2123 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08002124 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002125 return false;
Liang Lib3be2892016-05-05 15:32:54 +08002126 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002127
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002128exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08002129 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002130 return zero_page;
2131}
2132
2133static void
2134update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
2135{
Xiao Guangrong76e03002018-09-06 15:01:00 +08002136 ram_counters.transferred += bytes_xmit;
2137
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002138 if (param->zero_page) {
2139 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08002140 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002141 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08002142
2143 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
2144 compression_counters.compressed_size += bytes_xmit - 8;
2145 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02002146}
2147
Xiao Guangrong32b05492018-09-06 15:01:01 +08002148static bool save_page_use_compression(RAMState *rs);
2149
Juan Quintelace25d332017-03-15 11:00:51 +01002150static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002151{
2152 int idx, len, thread_count;
2153
Xiao Guangrong32b05492018-09-06 15:01:01 +08002154 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002155 return;
2156 }
2157 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08002158
Liang Li0d9f9a52016-05-05 15:32:59 +08002159 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002160 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08002161 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08002162 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002163 }
Liang Lia7a9a882016-05-05 15:32:57 +08002164 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002165 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08002166
2167 for (idx = 0; idx < thread_count; idx++) {
2168 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002169 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01002170 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002171 /*
2172 * it's safe to fetch zero_page without holding comp_done_lock
2173 * as there is no further request submitted to the thread,
2174 * i.e, the thread should be waiting for a request at this point.
2175 */
2176 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002177 }
Liang Lia7a9a882016-05-05 15:32:57 +08002178 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002179 }
2180}
2181
2182static inline void set_compress_params(CompressParam *param, RAMBlock *block,
2183 ram_addr_t offset)
2184{
2185 param->block = block;
2186 param->offset = offset;
2187}
2188
Juan Quintelace25d332017-03-15 11:00:51 +01002189static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
2190 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02002191{
2192 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08002193 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02002194
2195 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08002196 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08002197retry:
2198 for (idx = 0; idx < thread_count; idx++) {
2199 if (comp_param[idx].done) {
2200 comp_param[idx].done = false;
2201 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
2202 qemu_mutex_lock(&comp_param[idx].mutex);
2203 set_compress_params(&comp_param[idx], block, offset);
2204 qemu_cond_signal(&comp_param[idx].cond);
2205 qemu_mutex_unlock(&comp_param[idx].mutex);
2206 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002207 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02002208 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002209 }
2210 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08002211
2212 /*
2213 * wait for the free thread if the user specifies 'compress-wait-thread',
2214 * otherwise we will post the page out in the main thread as normal page.
2215 */
2216 if (pages < 0 && wait) {
2217 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
2218 goto retry;
2219 }
Liang Li0d9f9a52016-05-05 15:32:59 +08002220 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002221
2222 return pages;
2223}
2224
2225/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002226 * find_dirty_block: find the next dirty page and update any state
2227 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002228 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002229 * Returns true if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002230 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002231 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002232 * @pss: data about the state of the current dirty page scan
2233 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002234 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002235static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002236{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002237 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002238 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002239 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002240 /*
2241 * We've been once around the RAM and haven't found anything.
2242 * Give up.
2243 */
2244 *again = false;
2245 return false;
2246 }
Juan Quintelaa935e302017-03-21 15:36:51 +01002247 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002248 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002249 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002250 pss->block = QLIST_NEXT_RCU(pss->block, next);
2251 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002252 /*
2253 * If memory migration starts over, we will meet a dirtied page
2254 * which may still exists in compression threads's ring, so we
2255 * should flush the compressed data to make sure the new page
2256 * is not overwritten by the old one in the destination.
2257 *
2258 * Also If xbzrle is on, stop using the data compression at this
2259 * point. In theory, xbzrle can do better than compression.
2260 */
2261 flush_compressed_data(rs);
2262
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002263 /* Hit the end of the list */
2264 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2265 /* Flag that we've looped */
2266 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002267 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002268 }
2269 /* Didn't find anything this time, but try again on the new block */
2270 *again = true;
2271 return false;
2272 } else {
2273 /* Can go around again, but... */
2274 *again = true;
2275 /* We've found something so probably don't need to */
2276 return true;
2277 }
2278}
2279
Juan Quintela3d0684b2017-03-23 15:06:39 +01002280/**
2281 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002282 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002283 * Helper for 'get_queued_page' - gets a page off the queue
2284 *
2285 * Returns the block of the page (or NULL if none available)
2286 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002287 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002288 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002289 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002290static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002291{
2292 RAMBlock *block = NULL;
2293
Xiao Guangrongae526e32018-08-21 16:10:25 +08002294 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2295 return NULL;
2296 }
2297
Juan Quintelaec481c62017-03-20 22:12:40 +01002298 qemu_mutex_lock(&rs->src_page_req_mutex);
2299 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2300 struct RAMSrcPageRequest *entry =
2301 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002302 block = entry->rb;
2303 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002304
2305 if (entry->len > TARGET_PAGE_SIZE) {
2306 entry->len -= TARGET_PAGE_SIZE;
2307 entry->offset += TARGET_PAGE_SIZE;
2308 } else {
2309 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002310 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002311 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002312 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002313 }
2314 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002315 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002316
2317 return block;
2318}
2319
Juan Quintela3d0684b2017-03-23 15:06:39 +01002320/**
Li Qiangff1543a2019-05-24 23:28:32 -07002321 * get_queued_page: unqueue a page from the postcopy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002322 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002323 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002324 *
Wei Yanga5f7b1a2019-05-11 07:37:29 +08002325 * Returns true if a queued page is found
Juan Quintela3d0684b2017-03-23 15:06:39 +01002326 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002327 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002328 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002329 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002330static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002331{
2332 RAMBlock *block;
2333 ram_addr_t offset;
2334 bool dirty;
2335
2336 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002337 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002338 /*
2339 * We're sending this page, and since it's postcopy nothing else
2340 * will dirty it, and we must make sure it doesn't get sent again
2341 * even if this queue request was received after the background
2342 * search already sent it.
2343 */
2344 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002345 unsigned long page;
2346
Juan Quintela6b6712e2017-03-22 15:18:04 +01002347 page = offset >> TARGET_PAGE_BITS;
2348 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002349 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002350 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002351 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002352 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002353 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002354 }
2355 }
2356
2357 } while (block && !dirty);
2358
2359 if (block) {
2360 /*
2361 * As soon as we start servicing pages out of order, then we have
2362 * to kill the bulk stage, since the bulk stage assumes
2363 * in (migration_bitmap_find_and_reset_dirty) that every page is
2364 * dirty, that's no longer true.
2365 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002366 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002367
2368 /*
2369 * We want the background search to continue from the queued page
2370 * since the guest is likely to want other pages near to the page
2371 * it just requested.
2372 */
2373 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002374 pss->page = offset >> TARGET_PAGE_BITS;
Wei Yang422314e2019-06-05 09:08:28 +08002375
2376 /*
2377 * This unqueued page would break the "one round" check, even is
2378 * really rare.
2379 */
2380 pss->complete_round = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002381 }
2382
2383 return !!block;
2384}
2385
Juan Quintela56e93d22015-05-07 19:33:31 +02002386/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002387 * migration_page_queue_free: drop any remaining pages in the ram
2388 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002389 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002390 * It should be empty at the end anyway, but in error cases there may
2391 * be some left. in case that there is any page left, we drop it.
2392 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002393 */
Juan Quintela83c13382017-05-04 11:45:01 +02002394static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002395{
Juan Quintelaec481c62017-03-20 22:12:40 +01002396 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002397 /* This queue generally should be empty - but in the case of a failed
2398 * migration might have some droppings in.
2399 */
2400 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002401 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002402 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002403 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002404 g_free(mspr);
2405 }
2406 rcu_read_unlock();
2407}
2408
2409/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002410 * ram_save_queue_pages: queue the page for transmission
2411 *
2412 * A request from postcopy destination for example.
2413 *
2414 * Returns zero on success or negative on error
2415 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002416 * @rbname: Name of the RAMBLock of the request. NULL means the
2417 * same that last one.
2418 * @start: starting address from the start of the RAMBlock
2419 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002420 */
Juan Quintela96506892017-03-14 18:41:03 +01002421int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002422{
2423 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002424 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002425
Juan Quintela93604472017-06-06 19:49:03 +02002426 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002427 rcu_read_lock();
2428 if (!rbname) {
2429 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002430 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002431
2432 if (!ramblock) {
2433 /*
2434 * Shouldn't happen, we can't reuse the last RAMBlock if
2435 * it's the 1st request.
2436 */
2437 error_report("ram_save_queue_pages no previous block");
2438 goto err;
2439 }
2440 } else {
2441 ramblock = qemu_ram_block_by_name(rbname);
2442
2443 if (!ramblock) {
2444 /* We shouldn't be asked for a non-existent RAMBlock */
2445 error_report("ram_save_queue_pages no block '%s'", rbname);
2446 goto err;
2447 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002448 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002449 }
2450 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2451 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002452 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2453 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002454 __func__, start, len, ramblock->used_length);
2455 goto err;
2456 }
2457
Juan Quintelaec481c62017-03-20 22:12:40 +01002458 struct RAMSrcPageRequest *new_entry =
2459 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002460 new_entry->rb = ramblock;
2461 new_entry->offset = start;
2462 new_entry->len = len;
2463
2464 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002465 qemu_mutex_lock(&rs->src_page_req_mutex);
2466 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002467 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002468 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002469 rcu_read_unlock();
2470
2471 return 0;
2472
2473err:
2474 rcu_read_unlock();
2475 return -1;
2476}
2477
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002478static bool save_page_use_compression(RAMState *rs)
2479{
2480 if (!migrate_use_compression()) {
2481 return false;
2482 }
2483
2484 /*
2485 * If xbzrle is on, stop using the data compression after first
2486 * round of migration even if compression is enabled. In theory,
2487 * xbzrle can do better than compression.
2488 */
2489 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2490 return true;
2491 }
2492
2493 return false;
2494}
2495
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002496/*
2497 * try to compress the page before posting it out, return true if the page
2498 * has been properly handled by compression, otherwise needs other
2499 * paths to handle it
2500 */
2501static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2502{
2503 if (!save_page_use_compression(rs)) {
2504 return false;
2505 }
2506
2507 /*
2508 * When starting the process of a new block, the first page of
2509 * the block should be sent out before other pages in the same
2510 * block, and all the pages in last block should have been sent
2511 * out, keeping this order is important, because the 'cont' flag
2512 * is used to avoid resending the block name.
2513 *
2514 * We post the fist page as normal page as compression will take
2515 * much CPU resource.
2516 */
2517 if (block != rs->last_sent_block) {
2518 flush_compressed_data(rs);
2519 return false;
2520 }
2521
2522 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2523 return true;
2524 }
2525
Xiao Guangrong76e03002018-09-06 15:01:00 +08002526 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002527 return false;
2528}
2529
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002530/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002531 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002532 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002533 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002534 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002535 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002536 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002537 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002538 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002539static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002540 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002541{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002542 RAMBlock *block = pss->block;
2543 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2544 int res;
2545
2546 if (control_save_page(rs, block, offset, &res)) {
2547 return res;
2548 }
2549
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002550 if (save_compress_page(rs, block, offset)) {
2551 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002552 }
2553
2554 res = save_zero_page(rs, block, offset);
2555 if (res > 0) {
2556 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2557 * page would be stale
2558 */
2559 if (!save_page_use_compression(rs)) {
2560 XBZRLE_cache_lock();
2561 xbzrle_cache_zero_page(rs, block->offset + offset);
2562 XBZRLE_cache_unlock();
2563 }
2564 ram_release_pages(block->idstr, offset, res);
2565 return res;
2566 }
2567
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002568 /*
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002569 * do not use multifd for compression as the first page in the new
2570 * block should be posted out before sending the compressed page
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002571 */
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002572 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002573 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002574 }
2575
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002576 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002577}
2578
2579/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002580 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002581 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002582 * Starting at *offset send pages up to the end of the current host
2583 * page. It's valid for the initial offset to point into the middle of
2584 * a host page in which case the remainder of the hostpage is sent.
2585 * Only dirty target pages are sent. Note that the host page size may
2586 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002587 * The saving stops at the boundary of the used_length of the block
2588 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002589 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002590 * Returns the number of pages written or negative on error
2591 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002592 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002593 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002594 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002595 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002596 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002597static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002598 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002599{
2600 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002601 size_t pagesize_bits =
2602 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002603
Yury Kotovfbd162e2019-02-15 20:45:46 +03002604 if (ramblock_is_ignored(pss->block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002605 error_report("block %s should not be migrated !", pss->block->idstr);
2606 return 0;
2607 }
2608
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002609 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002610 /* Check the pages is dirty and if it is send it */
2611 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2612 pss->page++;
2613 continue;
2614 }
2615
Juan Quintelaf20e2862017-03-21 16:19:05 +01002616 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002617 if (tmppages < 0) {
2618 return tmppages;
2619 }
2620
2621 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002622 if (pss->block->unsentmap) {
2623 clear_bit(pss->page, pss->block->unsentmap);
2624 }
2625
Juan Quintelaa935e302017-03-21 15:36:51 +01002626 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002627 } while ((pss->page & (pagesize_bits - 1)) &&
2628 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002629
2630 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002631 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002632 return pages;
2633}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002634
2635/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002636 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002637 *
2638 * Called within an RCU critical section.
2639 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002640 * Returns the number of pages written where zero means no dirty pages,
2641 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002642 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002643 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002644 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002645 *
2646 * On systems where host-page-size > target-page-size it will send all the
2647 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002648 */
2649
Juan Quintelace25d332017-03-15 11:00:51 +01002650static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002651{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002652 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002653 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002654 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002655
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302656 /* No dirty page as there is zero RAM */
2657 if (!ram_bytes_total()) {
2658 return pages;
2659 }
2660
Juan Quintela6f37bb82017-03-13 19:26:29 +01002661 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002662 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002663 pss.complete_round = false;
2664
2665 if (!pss.block) {
2666 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2667 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002668
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002669 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002670 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002671 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002672
2673 if (!found) {
2674 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002675 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002676 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002677
2678 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002679 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002680 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002681 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002682
Juan Quintela6f37bb82017-03-13 19:26:29 +01002683 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002684 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002685
2686 return pages;
2687}
2688
2689void acct_update_position(QEMUFile *f, size_t size, bool zero)
2690{
2691 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002692
Juan Quintela56e93d22015-05-07 19:33:31 +02002693 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002694 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002695 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002696 ram_counters.normal += pages;
2697 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002698 qemu_update_position(f, size);
2699 }
2700}
2701
Yury Kotovfbd162e2019-02-15 20:45:46 +03002702static uint64_t ram_bytes_total_common(bool count_ignored)
Juan Quintela56e93d22015-05-07 19:33:31 +02002703{
2704 RAMBlock *block;
2705 uint64_t total = 0;
2706
2707 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03002708 if (count_ignored) {
2709 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2710 total += block->used_length;
2711 }
2712 } else {
2713 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2714 total += block->used_length;
2715 }
Peter Xu99e15582017-05-12 12:17:39 +08002716 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002717 rcu_read_unlock();
2718 return total;
2719}
2720
Yury Kotovfbd162e2019-02-15 20:45:46 +03002721uint64_t ram_bytes_total(void)
2722{
2723 return ram_bytes_total_common(false);
2724}
2725
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002726static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002727{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002728 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002729}
2730
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002731static void xbzrle_load_cleanup(void)
2732{
2733 g_free(XBZRLE.decoded_buf);
2734 XBZRLE.decoded_buf = NULL;
2735}
2736
Peter Xu7d7c96b2017-10-19 14:31:58 +08002737static void ram_state_cleanup(RAMState **rsp)
2738{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002739 if (*rsp) {
2740 migration_page_queue_free(*rsp);
2741 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2742 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2743 g_free(*rsp);
2744 *rsp = NULL;
2745 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002746}
2747
Peter Xu84593a02017-10-19 14:31:59 +08002748static void xbzrle_cleanup(void)
2749{
2750 XBZRLE_cache_lock();
2751 if (XBZRLE.cache) {
2752 cache_fini(XBZRLE.cache);
2753 g_free(XBZRLE.encoded_buf);
2754 g_free(XBZRLE.current_buf);
2755 g_free(XBZRLE.zero_target_page);
2756 XBZRLE.cache = NULL;
2757 XBZRLE.encoded_buf = NULL;
2758 XBZRLE.current_buf = NULL;
2759 XBZRLE.zero_target_page = NULL;
2760 }
2761 XBZRLE_cache_unlock();
2762}
2763
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002764static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002765{
Juan Quintela53518d92017-05-04 11:46:24 +02002766 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002767 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002768
Li Zhijian2ff64032015-07-02 20:18:05 +08002769 /* caller have hold iothread lock or is in a bh, so there is
Yi Wang46334562019-04-15 14:51:29 +08002770 * no writing race against the migration bitmap
Li Zhijian2ff64032015-07-02 20:18:05 +08002771 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002772 memory_global_dirty_log_stop();
2773
Yury Kotovfbd162e2019-02-15 20:45:46 +03002774 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu002cad62019-06-03 14:50:56 +08002775 g_free(block->clear_bmap);
2776 block->clear_bmap = NULL;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002777 g_free(block->bmap);
2778 block->bmap = NULL;
2779 g_free(block->unsentmap);
2780 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002781 }
2782
Peter Xu84593a02017-10-19 14:31:59 +08002783 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002784 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002785 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002786}
2787
Juan Quintela6f37bb82017-03-13 19:26:29 +01002788static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002789{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002790 rs->last_seen_block = NULL;
2791 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002792 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002793 rs->last_version = ram_list.version;
2794 rs->ram_bulk_stage = true;
Wei Wang6eeb63f2018-12-11 16:24:52 +08002795 rs->fpo_enabled = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002796}
2797
2798#define MAX_WAIT 50 /* ms, half buffered_file limit */
2799
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002800/*
2801 * 'expected' is the value you expect the bitmap mostly to be full
2802 * of; it won't bother printing lines that are all this value.
2803 * If 'todump' is null the migration bitmap is dumped.
2804 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002805void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2806 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002807{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002808 int64_t cur;
2809 int64_t linelen = 128;
2810 char linebuf[129];
2811
Juan Quintela6b6712e2017-03-22 15:18:04 +01002812 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002813 int64_t curb;
2814 bool found = false;
2815 /*
2816 * Last line; catch the case where the line length
2817 * is longer than remaining ram
2818 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002819 if (cur + linelen > pages) {
2820 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002821 }
2822 for (curb = 0; curb < linelen; curb++) {
2823 bool thisbit = test_bit(cur + curb, todump);
2824 linebuf[curb] = thisbit ? '1' : '.';
2825 found = found || (thisbit != expected);
2826 }
2827 if (found) {
2828 linebuf[curb] = '\0';
2829 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2830 }
2831 }
2832}
2833
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002834/* **** functions for postcopy ***** */
2835
Pavel Butsykinced1c612017-02-03 18:23:21 +03002836void ram_postcopy_migrated_memory_release(MigrationState *ms)
2837{
2838 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002839
Yury Kotovfbd162e2019-02-15 20:45:46 +03002840 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002841 unsigned long *bitmap = block->bmap;
2842 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2843 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002844
2845 while (run_start < range) {
2846 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002847 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002848 (run_end - run_start) << TARGET_PAGE_BITS);
2849 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2850 }
2851 }
2852}
2853
Juan Quintela3d0684b2017-03-23 15:06:39 +01002854/**
2855 * postcopy_send_discard_bm_ram: discard a RAMBlock
2856 *
2857 * Returns zero on success
2858 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002859 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2860 * Note: At this point the 'unsentmap' is the processed bitmap combined
2861 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002862 *
2863 * @ms: current migration state
Wei Yang89dab312019-07-15 10:05:49 +08002864 * @block: RAMBlock to discard
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002865 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002866static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002867{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002868 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002869 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002870 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002871
Juan Quintela6b6712e2017-03-22 15:18:04 +01002872 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002873 unsigned long one = find_next_bit(unsentmap, end, current);
Wei Yang33a5cb622019-06-27 10:08:21 +08002874 unsigned long zero, discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002875
Wei Yang33a5cb622019-06-27 10:08:21 +08002876 if (one >= end) {
2877 break;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002878 }
Wei Yang33a5cb622019-06-27 10:08:21 +08002879
2880 zero = find_next_zero_bit(unsentmap, end, one + 1);
2881
2882 if (zero >= end) {
2883 discard_length = end - one;
2884 } else {
2885 discard_length = zero - one;
2886 }
Wei Yang810cf2b2019-07-24 09:07:21 +08002887 postcopy_discard_send_range(ms, one, discard_length);
Wei Yang33a5cb622019-06-27 10:08:21 +08002888 current = one + discard_length;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002889 }
2890
2891 return 0;
2892}
2893
Juan Quintela3d0684b2017-03-23 15:06:39 +01002894/**
2895 * postcopy_each_ram_send_discard: discard all RAMBlocks
2896 *
2897 * Returns 0 for success or negative for error
2898 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002899 * Utility for the outgoing postcopy code.
2900 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2901 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002902 * (qemu_ram_foreach_block ends up passing unscaled lengths
2903 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002904 *
2905 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002906 */
2907static int postcopy_each_ram_send_discard(MigrationState *ms)
2908{
2909 struct RAMBlock *block;
2910 int ret;
2911
Yury Kotovfbd162e2019-02-15 20:45:46 +03002912 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang810cf2b2019-07-24 09:07:21 +08002913 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002914
2915 /*
2916 * Postcopy sends chunks of bitmap over the wire, but it
2917 * just needs indexes at this point, avoids it having
2918 * target page specific code.
2919 */
Wei Yang810cf2b2019-07-24 09:07:21 +08002920 ret = postcopy_send_discard_bm_ram(ms, block);
2921 postcopy_discard_send_finish(ms);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002922 if (ret) {
2923 return ret;
2924 }
2925 }
2926
2927 return 0;
2928}
2929
Juan Quintela3d0684b2017-03-23 15:06:39 +01002930/**
2931 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002932 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002933 * Helper for postcopy_chunk_hostpages; it's called twice to
2934 * canonicalize the two bitmaps, that are similar, but one is
2935 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002936 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002937 * Postcopy requires that all target pages in a hostpage are dirty or
2938 * clean, not a mix. This function canonicalizes the bitmaps.
2939 *
2940 * @ms: current migration state
2941 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2942 * otherwise we need to canonicalize partially dirty host pages
2943 * @block: block that contains the page we want to canonicalize
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002944 */
2945static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
Wei Yang810cf2b2019-07-24 09:07:21 +08002946 RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002947{
Juan Quintela53518d92017-05-04 11:46:24 +02002948 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002949 unsigned long *bitmap = block->bmap;
2950 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002951 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002952 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002953 unsigned long run_start;
2954
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002955 if (block->page_size == TARGET_PAGE_SIZE) {
2956 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2957 return;
2958 }
2959
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002960 if (unsent_pass) {
2961 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002962 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002963 } else {
2964 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002965 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002966 }
2967
Juan Quintela6b6712e2017-03-22 15:18:04 +01002968 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002969
2970 /*
2971 * If the start of this run of pages is in the middle of a host
2972 * page, then we need to fixup this host page.
2973 */
Wei Yang9dec3cc2019-08-06 08:46:48 +08002974 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002975 /* Find the end of this run */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002976 if (unsent_pass) {
Wei Yangdad45ab2019-08-06 08:46:47 +08002977 run_start = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002978 } else {
Wei Yangdad45ab2019-08-06 08:46:47 +08002979 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002980 }
2981 /*
2982 * If the end isn't at the start of a host page, then the
2983 * run doesn't finish at the end of a host page
2984 * and we need to discard.
2985 */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002986 }
2987
Wei Yang9dec3cc2019-08-06 08:46:48 +08002988 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002989 unsigned long page;
Wei Yangdad45ab2019-08-06 08:46:47 +08002990 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2991 host_ratio);
2992 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002993
2994 /* Tell the destination to discard this page */
2995 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2996 /* For the unsent_pass we:
2997 * discard partially sent pages
2998 * For the !unsent_pass (dirty) we:
2999 * discard partially dirty pages that were sent
3000 * (any partially sent pages were already discarded
3001 * by the previous unsent_pass)
3002 */
Wei Yang810cf2b2019-07-24 09:07:21 +08003003 postcopy_discard_send_range(ms, fixup_start_addr, host_ratio);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003004 }
3005
3006 /* Clean up the bitmap */
3007 for (page = fixup_start_addr;
3008 page < fixup_start_addr + host_ratio; page++) {
3009 /* All pages in this host page are now not sent */
3010 set_bit(page, unsentmap);
3011
3012 /*
3013 * Remark them as dirty, updating the count for any pages
3014 * that weren't previously dirty.
3015 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01003016 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003017 }
3018 }
3019
3020 if (unsent_pass) {
3021 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003022 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003023 } else {
3024 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003025 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003026 }
3027 }
3028}
3029
Juan Quintela3d0684b2017-03-23 15:06:39 +01003030/**
Wei Yang89dab312019-07-15 10:05:49 +08003031 * postcopy_chunk_hostpages: discard any partially sent host page
Juan Quintela3d0684b2017-03-23 15:06:39 +01003032 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003033 * Utility for the outgoing postcopy code.
3034 *
3035 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00003036 * dirty host-page size chunks as all dirty. In this case the host-page
3037 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003038 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003039 * Returns zero on success
3040 *
3041 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01003042 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003043 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003044static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003045{
Wei Yang810cf2b2019-07-24 09:07:21 +08003046 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003047
Juan Quintela6b6712e2017-03-22 15:18:04 +01003048 /* First pass: Discard all partially sent host pages */
Wei Yang810cf2b2019-07-24 09:07:21 +08003049 postcopy_chunk_hostpages_pass(ms, true, block);
Juan Quintela6b6712e2017-03-22 15:18:04 +01003050 /*
3051 * Second pass: Ensure that all partially dirty host pages are made
3052 * fully dirty.
3053 */
Wei Yang810cf2b2019-07-24 09:07:21 +08003054 postcopy_chunk_hostpages_pass(ms, false, block);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003055
Wei Yang810cf2b2019-07-24 09:07:21 +08003056 postcopy_discard_send_finish(ms);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00003057 return 0;
3058}
3059
Juan Quintela3d0684b2017-03-23 15:06:39 +01003060/**
3061 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
3062 *
3063 * Returns zero on success
3064 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003065 * Transmit the set of pages to be discarded after precopy to the target
3066 * these are pages that:
3067 * a) Have been previously transmitted but are now dirty again
3068 * b) Pages that have never been transmitted, this ensures that
3069 * any pages on the destination that have been mapped by background
3070 * tasks get discarded (transparent huge pages is the specific concern)
3071 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01003072 *
3073 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003074 */
3075int ram_postcopy_send_discard_bitmap(MigrationState *ms)
3076{
Juan Quintela53518d92017-05-04 11:46:24 +02003077 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01003078 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003079 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003080
3081 rcu_read_lock();
3082
3083 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01003084 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003085
Juan Quintela6b6712e2017-03-22 15:18:04 +01003086 /* Easiest way to make sure we don't resume in the middle of a host-page */
3087 rs->last_seen_block = NULL;
3088 rs->last_sent_block = NULL;
3089 rs->last_page = 0;
3090
Yury Kotovfbd162e2019-02-15 20:45:46 +03003091 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01003092 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
3093 unsigned long *bitmap = block->bmap;
3094 unsigned long *unsentmap = block->unsentmap;
3095
3096 if (!unsentmap) {
3097 /* We don't have a safe way to resize the sentmap, so
3098 * if the bitmap was resized it will be NULL at this
3099 * point.
3100 */
3101 error_report("migration ram resized during precopy phase");
3102 rcu_read_unlock();
3103 return -EINVAL;
3104 }
3105 /* Deal with TPS != HPS and huge pages */
3106 ret = postcopy_chunk_hostpages(ms, block);
3107 if (ret) {
3108 rcu_read_unlock();
3109 return ret;
3110 }
3111
3112 /*
3113 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003114 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01003115 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003116#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01003117 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003118#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01003119 }
3120 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003121
3122 ret = postcopy_each_ram_send_discard(ms);
3123 rcu_read_unlock();
3124
3125 return ret;
3126}
3127
Juan Quintela3d0684b2017-03-23 15:06:39 +01003128/**
3129 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003130 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003131 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003132 *
Juan Quintela36449152017-03-23 15:11:59 +01003133 * @rbname: name of the RAMBlock of the request. NULL means the
3134 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003135 * @start: RAMBlock starting page
3136 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003137 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01003138int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003139{
3140 int ret = -1;
3141
Juan Quintela36449152017-03-23 15:11:59 +01003142 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003143
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003144 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01003145 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003146
3147 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01003148 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003149 goto err;
3150 }
3151
Peter Xu814bb082018-07-23 20:33:02 +08003152 /*
3153 * On source VM, we don't need to update the received bitmap since
3154 * we don't even have one.
3155 */
3156 if (rb->receivedmap) {
3157 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
3158 length >> qemu_target_page_bits());
3159 }
3160
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00003161 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00003162
3163err:
3164 rcu_read_unlock();
3165
3166 return ret;
3167}
3168
Peter Xu84593a02017-10-19 14:31:59 +08003169/*
3170 * For every allocation, we will try not to crash the VM if the
3171 * allocation failed.
3172 */
3173static int xbzrle_init(void)
3174{
3175 Error *local_err = NULL;
3176
3177 if (!migrate_use_xbzrle()) {
3178 return 0;
3179 }
3180
3181 XBZRLE_cache_lock();
3182
3183 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
3184 if (!XBZRLE.zero_target_page) {
3185 error_report("%s: Error allocating zero page", __func__);
3186 goto err_out;
3187 }
3188
3189 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
3190 TARGET_PAGE_SIZE, &local_err);
3191 if (!XBZRLE.cache) {
3192 error_report_err(local_err);
3193 goto free_zero_page;
3194 }
3195
3196 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
3197 if (!XBZRLE.encoded_buf) {
3198 error_report("%s: Error allocating encoded_buf", __func__);
3199 goto free_cache;
3200 }
3201
3202 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
3203 if (!XBZRLE.current_buf) {
3204 error_report("%s: Error allocating current_buf", __func__);
3205 goto free_encoded_buf;
3206 }
3207
3208 /* We are all good */
3209 XBZRLE_cache_unlock();
3210 return 0;
3211
3212free_encoded_buf:
3213 g_free(XBZRLE.encoded_buf);
3214 XBZRLE.encoded_buf = NULL;
3215free_cache:
3216 cache_fini(XBZRLE.cache);
3217 XBZRLE.cache = NULL;
3218free_zero_page:
3219 g_free(XBZRLE.zero_target_page);
3220 XBZRLE.zero_target_page = NULL;
3221err_out:
3222 XBZRLE_cache_unlock();
3223 return -ENOMEM;
3224}
3225
Juan Quintela53518d92017-05-04 11:46:24 +02003226static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003227{
Peter Xu7d00ee62017-10-19 14:31:57 +08003228 *rsp = g_try_new0(RAMState, 1);
3229
3230 if (!*rsp) {
3231 error_report("%s: Init ramstate fail", __func__);
3232 return -1;
3233 }
Juan Quintela53518d92017-05-04 11:46:24 +02003234
3235 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3236 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3237 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003238
Peter Xu7d00ee62017-10-19 14:31:57 +08003239 /*
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003240 * Count the total number of pages used by ram blocks not including any
3241 * gaps due to alignment or unplugs.
Wei Yang03158512019-06-04 14:17:27 +08003242 * This must match with the initial values of dirty bitmap.
Peter Xu7d00ee62017-10-19 14:31:57 +08003243 */
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003244 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Peter Xu7d00ee62017-10-19 14:31:57 +08003245 ram_state_reset(*rsp);
3246
3247 return 0;
3248}
3249
Peter Xud6eff5d2017-10-19 14:32:00 +08003250static void ram_list_init_bitmaps(void)
3251{
Peter Xu002cad62019-06-03 14:50:56 +08003252 MigrationState *ms = migrate_get_current();
Peter Xud6eff5d2017-10-19 14:32:00 +08003253 RAMBlock *block;
3254 unsigned long pages;
Peter Xu002cad62019-06-03 14:50:56 +08003255 uint8_t shift;
Peter Xud6eff5d2017-10-19 14:32:00 +08003256
3257 /* Skip setting bitmap if there is no RAM */
3258 if (ram_bytes_total()) {
Peter Xu002cad62019-06-03 14:50:56 +08003259 shift = ms->clear_bitmap_shift;
3260 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
3261 error_report("clear_bitmap_shift (%u) too big, using "
3262 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
3263 shift = CLEAR_BITMAP_SHIFT_MAX;
3264 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
3265 error_report("clear_bitmap_shift (%u) too small, using "
3266 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
3267 shift = CLEAR_BITMAP_SHIFT_MIN;
3268 }
3269
Yury Kotovfbd162e2019-02-15 20:45:46 +03003270 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003271 pages = block->max_length >> TARGET_PAGE_BITS;
Wei Yang03158512019-06-04 14:17:27 +08003272 /*
3273 * The initial dirty bitmap for migration must be set with all
3274 * ones to make sure we'll migrate every guest RAM page to
3275 * destination.
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003276 * Here we set RAMBlock.bmap all to 1 because when rebegin a
3277 * new migration after a failed migration, ram_list.
3278 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
3279 * guest memory.
Wei Yang03158512019-06-04 14:17:27 +08003280 */
Peter Xud6eff5d2017-10-19 14:32:00 +08003281 block->bmap = bitmap_new(pages);
Ivan Ren40c4d4a2019-07-14 22:51:19 +08003282 bitmap_set(block->bmap, 0, pages);
Peter Xu002cad62019-06-03 14:50:56 +08003283 block->clear_bmap_shift = shift;
3284 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
Peter Xud6eff5d2017-10-19 14:32:00 +08003285 if (migrate_postcopy_ram()) {
3286 block->unsentmap = bitmap_new(pages);
3287 bitmap_set(block->unsentmap, 0, pages);
3288 }
3289 }
3290 }
3291}
3292
3293static void ram_init_bitmaps(RAMState *rs)
3294{
3295 /* For memory_global_dirty_log_start below. */
3296 qemu_mutex_lock_iothread();
3297 qemu_mutex_lock_ramlist();
3298 rcu_read_lock();
3299
3300 ram_list_init_bitmaps();
3301 memory_global_dirty_log_start();
Wei Wangbd227062018-12-11 16:24:51 +08003302 migration_bitmap_sync_precopy(rs);
Peter Xud6eff5d2017-10-19 14:32:00 +08003303
3304 rcu_read_unlock();
3305 qemu_mutex_unlock_ramlist();
3306 qemu_mutex_unlock_iothread();
3307}
3308
Peter Xu7d00ee62017-10-19 14:31:57 +08003309static int ram_init_all(RAMState **rsp)
3310{
Peter Xu7d00ee62017-10-19 14:31:57 +08003311 if (ram_state_init(rsp)) {
3312 return -1;
3313 }
3314
Peter Xu84593a02017-10-19 14:31:59 +08003315 if (xbzrle_init()) {
3316 ram_state_cleanup(rsp);
3317 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003318 }
3319
Peter Xud6eff5d2017-10-19 14:32:00 +08003320 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003321
3322 return 0;
3323}
3324
Peter Xu08614f32018-05-02 18:47:33 +08003325static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3326{
3327 RAMBlock *block;
3328 uint64_t pages = 0;
3329
3330 /*
3331 * Postcopy is not using xbzrle/compression, so no need for that.
3332 * Also, since source are already halted, we don't need to care
3333 * about dirty page logging as well.
3334 */
3335
Yury Kotovfbd162e2019-02-15 20:45:46 +03003336 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003337 pages += bitmap_count_one(block->bmap,
3338 block->used_length >> TARGET_PAGE_BITS);
3339 }
3340
3341 /* This may not be aligned with current bitmaps. Recalculate. */
3342 rs->migration_dirty_pages = pages;
3343
3344 rs->last_seen_block = NULL;
3345 rs->last_sent_block = NULL;
3346 rs->last_page = 0;
3347 rs->last_version = ram_list.version;
3348 /*
3349 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3350 * matter what we have sent.
3351 */
3352 rs->ram_bulk_stage = false;
3353
3354 /* Update RAMState cache of output QEMUFile */
3355 rs->f = out;
3356
3357 trace_ram_state_resume_prepare(pages);
3358}
3359
Juan Quintela3d0684b2017-03-23 15:06:39 +01003360/*
Wei Wang6bcb05f2018-12-11 16:24:50 +08003361 * This function clears bits of the free pages reported by the caller from the
3362 * migration dirty bitmap. @addr is the host address corresponding to the
3363 * start of the continuous guest free pages, and @len is the total bytes of
3364 * those pages.
3365 */
3366void qemu_guest_free_page_hint(void *addr, size_t len)
3367{
3368 RAMBlock *block;
3369 ram_addr_t offset;
3370 size_t used_len, start, npages;
3371 MigrationState *s = migrate_get_current();
3372
3373 /* This function is currently expected to be used during live migration */
3374 if (!migration_is_setup_or_active(s->state)) {
3375 return;
3376 }
3377
3378 for (; len > 0; len -= used_len, addr += used_len) {
3379 block = qemu_ram_block_from_host(addr, false, &offset);
3380 if (unlikely(!block || offset >= block->used_length)) {
3381 /*
3382 * The implementation might not support RAMBlock resize during
3383 * live migration, but it could happen in theory with future
3384 * updates. So we add a check here to capture that case.
3385 */
3386 error_report_once("%s unexpected error", __func__);
3387 return;
3388 }
3389
3390 if (len <= block->used_length - offset) {
3391 used_len = len;
3392 } else {
3393 used_len = block->used_length - offset;
3394 }
3395
3396 start = offset >> TARGET_PAGE_BITS;
3397 npages = used_len >> TARGET_PAGE_BITS;
3398
3399 qemu_mutex_lock(&ram_state->bitmap_mutex);
3400 ram_state->migration_dirty_pages -=
3401 bitmap_count_one_with_offset(block->bmap, start, npages);
3402 bitmap_clear(block->bmap, start, npages);
3403 qemu_mutex_unlock(&ram_state->bitmap_mutex);
3404 }
3405}
3406
3407/*
Juan Quintela3d0684b2017-03-23 15:06:39 +01003408 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003409 * long-running RCU critical section. When rcu-reclaims in the code
3410 * start to become numerous it will be necessary to reduce the
3411 * granularity of these critical sections.
3412 */
3413
Juan Quintela3d0684b2017-03-23 15:06:39 +01003414/**
3415 * ram_save_setup: Setup RAM for migration
3416 *
3417 * Returns zero to indicate success and negative for error
3418 *
3419 * @f: QEMUFile where to send the data
3420 * @opaque: RAMState pointer
3421 */
zhanghailianga91246c2016-10-27 14:42:59 +08003422static int ram_save_setup(QEMUFile *f, void *opaque)
3423{
Juan Quintela53518d92017-05-04 11:46:24 +02003424 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003425 RAMBlock *block;
3426
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003427 if (compress_threads_save_setup()) {
3428 return -1;
3429 }
3430
zhanghailianga91246c2016-10-27 14:42:59 +08003431 /* migration has already setup the bitmap, reuse it. */
3432 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003433 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003434 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003435 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003436 }
zhanghailianga91246c2016-10-27 14:42:59 +08003437 }
Juan Quintela53518d92017-05-04 11:46:24 +02003438 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003439
3440 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003441
Yury Kotovfbd162e2019-02-15 20:45:46 +03003442 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02003443
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003444 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003445 qemu_put_byte(f, strlen(block->idstr));
3446 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3447 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003448 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3449 qemu_put_be64(f, block->page_size);
3450 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03003451 if (migrate_ignore_shared()) {
3452 qemu_put_be64(f, block->mr->addr);
Yury Kotovfbd162e2019-02-15 20:45:46 +03003453 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003454 }
3455
3456 rcu_read_unlock();
3457
3458 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3459 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3460
Ivan Ren1b81c972019-07-30 13:33:35 +08003461 multifd_send_sync_main(*rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02003462 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003463 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003464
3465 return 0;
3466}
3467
Juan Quintela3d0684b2017-03-23 15:06:39 +01003468/**
3469 * ram_save_iterate: iterative stage for migration
3470 *
3471 * Returns zero to indicate success and negative for error
3472 *
3473 * @f: QEMUFile where to send the data
3474 * @opaque: RAMState pointer
3475 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003476static int ram_save_iterate(QEMUFile *f, void *opaque)
3477{
Juan Quintela53518d92017-05-04 11:46:24 +02003478 RAMState **temp = opaque;
3479 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003480 int ret;
3481 int i;
3482 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003483 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003484
Peter Lievenb2557342018-03-08 12:18:24 +01003485 if (blk_mig_bulk_active()) {
3486 /* Avoid transferring ram during bulk phase of block migration as
3487 * the bulk phase will usually take a long time and transferring
3488 * ram updates during that time is pointless. */
3489 goto out;
3490 }
3491
Juan Quintela56e93d22015-05-07 19:33:31 +02003492 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003493 if (ram_list.version != rs->last_version) {
3494 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003495 }
3496
3497 /* Read version before ram_list.blocks */
3498 smp_rmb();
3499
3500 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3501
3502 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3503 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003504 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3505 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003506 int pages;
3507
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003508 if (qemu_file_get_error(f)) {
3509 break;
3510 }
3511
Juan Quintelace25d332017-03-15 11:00:51 +01003512 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003513 /* no more pages to sent */
3514 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003515 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003516 break;
3517 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003518
3519 if (pages < 0) {
3520 qemu_file_set_error(f, pages);
3521 break;
3522 }
3523
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08003524 rs->target_page_count += pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003525
Juan Quintela56e93d22015-05-07 19:33:31 +02003526 /* we want to check in the 1st loop, just in case it was the 1st time
3527 and we had to sync the dirty bitmap.
Wei Yanga5f7b1a2019-05-11 07:37:29 +08003528 qemu_clock_get_ns() is a bit expensive, so we only check each some
Juan Quintela56e93d22015-05-07 19:33:31 +02003529 iterations
3530 */
3531 if ((i & 63) == 0) {
3532 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3533 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003534 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003535 break;
3536 }
3537 }
3538 i++;
3539 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003540 rcu_read_unlock();
3541
3542 /*
3543 * Must occur before EOS (or any QEMUFile operation)
3544 * because of RDMA protocol.
3545 */
3546 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3547
Peter Lievenb2557342018-03-08 12:18:24 +01003548out:
Ivan Ren1b81c972019-07-30 13:33:35 +08003549 multifd_send_sync_main(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003550 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003551 qemu_fflush(f);
Juan Quintela93604472017-06-06 19:49:03 +02003552 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003553
3554 ret = qemu_file_get_error(f);
3555 if (ret < 0) {
3556 return ret;
3557 }
3558
Thomas Huth5c903082016-11-04 14:10:17 +01003559 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003560}
3561
Juan Quintela3d0684b2017-03-23 15:06:39 +01003562/**
3563 * ram_save_complete: function called to send the remaining amount of ram
3564 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003565 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003566 *
3567 * Called with iothread lock
3568 *
3569 * @f: QEMUFile where to send the data
3570 * @opaque: RAMState pointer
3571 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003572static int ram_save_complete(QEMUFile *f, void *opaque)
3573{
Juan Quintela53518d92017-05-04 11:46:24 +02003574 RAMState **temp = opaque;
3575 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003576 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003577
Juan Quintela56e93d22015-05-07 19:33:31 +02003578 rcu_read_lock();
3579
Juan Quintela57273092017-03-20 22:25:28 +01003580 if (!migration_in_postcopy()) {
Wei Wangbd227062018-12-11 16:24:51 +08003581 migration_bitmap_sync_precopy(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003582 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003583
3584 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3585
3586 /* try transferring iterative blocks of memory */
3587
3588 /* flush all remaining blocks regardless of rate limiting */
3589 while (true) {
3590 int pages;
3591
Juan Quintelace25d332017-03-15 11:00:51 +01003592 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003593 /* no more blocks to sent */
3594 if (pages == 0) {
3595 break;
3596 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003597 if (pages < 0) {
3598 ret = pages;
3599 break;
3600 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003601 }
3602
Juan Quintelace25d332017-03-15 11:00:51 +01003603 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003604 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003605
3606 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003607
Ivan Ren1b81c972019-07-30 13:33:35 +08003608 multifd_send_sync_main(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003609 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003610 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003611
Xiao Guangronge8f37352018-09-03 17:26:44 +08003612 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003613}
3614
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003615static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003616 uint64_t *res_precopy_only,
3617 uint64_t *res_compatible,
3618 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003619{
Juan Quintela53518d92017-05-04 11:46:24 +02003620 RAMState **temp = opaque;
3621 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003622 uint64_t remaining_size;
3623
Juan Quintela9edabd42017-03-14 12:02:16 +01003624 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003625
Juan Quintela57273092017-03-20 22:25:28 +01003626 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003627 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003628 qemu_mutex_lock_iothread();
3629 rcu_read_lock();
Wei Wangbd227062018-12-11 16:24:51 +08003630 migration_bitmap_sync_precopy(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003631 rcu_read_unlock();
3632 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003633 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003634 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003635
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003636 if (migrate_postcopy_ram()) {
3637 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003638 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003639 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003640 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003641 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003642}
3643
3644static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3645{
3646 unsigned int xh_len;
3647 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003648 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003649
Juan Quintela56e93d22015-05-07 19:33:31 +02003650 /* extract RLE header */
3651 xh_flags = qemu_get_byte(f);
3652 xh_len = qemu_get_be16(f);
3653
3654 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3655 error_report("Failed to load XBZRLE page - wrong compression!");
3656 return -1;
3657 }
3658
3659 if (xh_len > TARGET_PAGE_SIZE) {
3660 error_report("Failed to load XBZRLE page - len overflow!");
3661 return -1;
3662 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003663 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003664 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003665 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003666 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003667
3668 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003669 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003670 TARGET_PAGE_SIZE) == -1) {
3671 error_report("Failed to load XBZRLE page - decode error!");
3672 return -1;
3673 }
3674
3675 return 0;
3676}
3677
Juan Quintela3d0684b2017-03-23 15:06:39 +01003678/**
3679 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003680 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003681 * Must be called from within a rcu critical section.
3682 *
3683 * Returns a pointer from within the RCU-protected ram_list.
3684 *
3685 * @f: QEMUFile where to read the data from
3686 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003687 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003688static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003689{
3690 static RAMBlock *block = NULL;
3691 char id[256];
3692 uint8_t len;
3693
3694 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003695 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003696 error_report("Ack, bad migration stream!");
3697 return NULL;
3698 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003699 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003700 }
3701
3702 len = qemu_get_byte(f);
3703 qemu_get_buffer(f, (uint8_t *)id, len);
3704 id[len] = 0;
3705
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003706 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003707 if (!block) {
3708 error_report("Can't find block %s", id);
3709 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003710 }
3711
Yury Kotovfbd162e2019-02-15 20:45:46 +03003712 if (ramblock_is_ignored(block)) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003713 error_report("block %s should not be migrated !", id);
3714 return NULL;
3715 }
3716
zhanghailiang4c4bad42016-01-15 11:37:41 +08003717 return block;
3718}
3719
3720static inline void *host_from_ram_block_offset(RAMBlock *block,
3721 ram_addr_t offset)
3722{
3723 if (!offset_in_ramblock(block, offset)) {
3724 return NULL;
3725 }
3726
3727 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003728}
3729
Zhang Chen13af18f2018-09-03 12:38:48 +08003730static inline void *colo_cache_from_block_offset(RAMBlock *block,
3731 ram_addr_t offset)
3732{
3733 if (!offset_in_ramblock(block, offset)) {
3734 return NULL;
3735 }
3736 if (!block->colo_cache) {
3737 error_report("%s: colo_cache is NULL in block :%s",
3738 __func__, block->idstr);
3739 return NULL;
3740 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003741
3742 /*
3743 * During colo checkpoint, we need bitmap of these migrated pages.
3744 * It help us to decide which pages in ram cache should be flushed
3745 * into VM's RAM later.
3746 */
3747 if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
3748 ram_state->migration_dirty_pages++;
3749 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003750 return block->colo_cache + offset;
3751}
3752
Juan Quintela3d0684b2017-03-23 15:06:39 +01003753/**
3754 * ram_handle_compressed: handle the zero page case
3755 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003756 * If a page (or a whole RDMA chunk) has been
3757 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003758 *
3759 * @host: host address for the zero page
3760 * @ch: what the page is filled from. We only support zero
3761 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003762 */
3763void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3764{
3765 if (ch != 0 || !is_zero_range(host, size)) {
3766 memset(host, ch, size);
3767 }
3768}
3769
Xiao Guangrong797ca152018-03-30 15:51:21 +08003770/* return the size after decompression, or negative value on error */
3771static int
3772qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3773 const uint8_t *source, size_t source_len)
3774{
3775 int err;
3776
3777 err = inflateReset(stream);
3778 if (err != Z_OK) {
3779 return -1;
3780 }
3781
3782 stream->avail_in = source_len;
3783 stream->next_in = (uint8_t *)source;
3784 stream->avail_out = dest_len;
3785 stream->next_out = dest;
3786
3787 err = inflate(stream, Z_NO_FLUSH);
3788 if (err != Z_STREAM_END) {
3789 return -1;
3790 }
3791
3792 return stream->total_out;
3793}
3794
Juan Quintela56e93d22015-05-07 19:33:31 +02003795static void *do_data_decompress(void *opaque)
3796{
3797 DecompressParam *param = opaque;
3798 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003799 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003800 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003801
Liang Li33d151f2016-05-05 15:32:58 +08003802 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003803 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003804 if (param->des) {
3805 des = param->des;
3806 len = param->len;
3807 param->des = 0;
3808 qemu_mutex_unlock(&param->mutex);
3809
Liang Li73a89122016-05-05 15:32:51 +08003810 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003811
3812 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3813 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003814 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003815 error_report("decompress data failed");
3816 qemu_file_set_error(decomp_file, ret);
3817 }
Liang Li73a89122016-05-05 15:32:51 +08003818
Liang Li33d151f2016-05-05 15:32:58 +08003819 qemu_mutex_lock(&decomp_done_lock);
3820 param->done = true;
3821 qemu_cond_signal(&decomp_done_cond);
3822 qemu_mutex_unlock(&decomp_done_lock);
3823
3824 qemu_mutex_lock(&param->mutex);
3825 } else {
3826 qemu_cond_wait(&param->cond, &param->mutex);
3827 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003828 }
Liang Li33d151f2016-05-05 15:32:58 +08003829 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003830
3831 return NULL;
3832}
3833
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003834static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003835{
3836 int idx, thread_count;
3837
3838 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003839 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003840 }
3841
3842 thread_count = migrate_decompress_threads();
3843 qemu_mutex_lock(&decomp_done_lock);
3844 for (idx = 0; idx < thread_count; idx++) {
3845 while (!decomp_param[idx].done) {
3846 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3847 }
3848 }
3849 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003850 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003851}
3852
Juan Quintelaf0afa332017-06-28 11:52:28 +02003853static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003854{
3855 int i, thread_count;
3856
Juan Quintela3416ab52016-04-20 11:56:01 +02003857 if (!migrate_use_compression()) {
3858 return;
3859 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003860 thread_count = migrate_decompress_threads();
3861 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003862 /*
3863 * we use it as a indicator which shows if the thread is
3864 * properly init'd or not
3865 */
3866 if (!decomp_param[i].compbuf) {
3867 break;
3868 }
3869
Juan Quintela56e93d22015-05-07 19:33:31 +02003870 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003871 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003872 qemu_cond_signal(&decomp_param[i].cond);
3873 qemu_mutex_unlock(&decomp_param[i].mutex);
3874 }
3875 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003876 if (!decomp_param[i].compbuf) {
3877 break;
3878 }
3879
Juan Quintela56e93d22015-05-07 19:33:31 +02003880 qemu_thread_join(decompress_threads + i);
3881 qemu_mutex_destroy(&decomp_param[i].mutex);
3882 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003883 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003884 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003885 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003886 }
3887 g_free(decompress_threads);
3888 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003889 decompress_threads = NULL;
3890 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003891 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003892}
3893
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003894static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003895{
3896 int i, thread_count;
3897
3898 if (!migrate_use_compression()) {
3899 return 0;
3900 }
3901
3902 thread_count = migrate_decompress_threads();
3903 decompress_threads = g_new0(QemuThread, thread_count);
3904 decomp_param = g_new0(DecompressParam, thread_count);
3905 qemu_mutex_init(&decomp_done_lock);
3906 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003907 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003908 for (i = 0; i < thread_count; i++) {
3909 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3910 goto exit;
3911 }
3912
3913 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3914 qemu_mutex_init(&decomp_param[i].mutex);
3915 qemu_cond_init(&decomp_param[i].cond);
3916 decomp_param[i].done = true;
3917 decomp_param[i].quit = false;
3918 qemu_thread_create(decompress_threads + i, "decompress",
3919 do_data_decompress, decomp_param + i,
3920 QEMU_THREAD_JOINABLE);
3921 }
3922 return 0;
3923exit:
3924 compress_threads_load_cleanup();
3925 return -1;
3926}
3927
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003928static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003929 void *host, int len)
3930{
3931 int idx, thread_count;
3932
3933 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003934 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003935 while (true) {
3936 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003937 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003938 decomp_param[idx].done = false;
3939 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003940 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003941 decomp_param[idx].des = host;
3942 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003943 qemu_cond_signal(&decomp_param[idx].cond);
3944 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003945 break;
3946 }
3947 }
3948 if (idx < thread_count) {
3949 break;
Liang Li73a89122016-05-05 15:32:51 +08003950 } else {
3951 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003952 }
3953 }
Liang Li73a89122016-05-05 15:32:51 +08003954 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003955}
3956
Zhang Chen13af18f2018-09-03 12:38:48 +08003957/*
3958 * colo cache: this is for secondary VM, we cache the whole
3959 * memory of the secondary VM, it is need to hold the global lock
3960 * to call this helper.
3961 */
3962int colo_init_ram_cache(void)
3963{
3964 RAMBlock *block;
3965
3966 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03003967 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08003968 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3969 NULL,
3970 false);
3971 if (!block->colo_cache) {
3972 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3973 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3974 block->used_length);
3975 goto out_locked;
3976 }
3977 memcpy(block->colo_cache, block->host, block->used_length);
3978 }
3979 rcu_read_unlock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003980 /*
3981 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3982 * with to decide which page in cache should be flushed into SVM's RAM. Here
3983 * we use the same name 'ram_bitmap' as for migration.
3984 */
3985 if (ram_bytes_total()) {
3986 RAMBlock *block;
3987
Yury Kotovfbd162e2019-02-15 20:45:46 +03003988 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003989 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3990
3991 block->bmap = bitmap_new(pages);
3992 bitmap_set(block->bmap, 0, pages);
3993 }
3994 }
3995 ram_state = g_new0(RAMState, 1);
3996 ram_state->migration_dirty_pages = 0;
Zhang Chenc6e5baf2019-03-30 06:29:51 +08003997 qemu_mutex_init(&ram_state->bitmap_mutex);
zhanghailiangd1955d22018-09-03 12:38:55 +08003998 memory_global_dirty_log_start();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08003999
Zhang Chen13af18f2018-09-03 12:38:48 +08004000 return 0;
4001
4002out_locked:
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004003
Yury Kotovfbd162e2019-02-15 20:45:46 +03004004 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08004005 if (block->colo_cache) {
4006 qemu_anon_ram_free(block->colo_cache, block->used_length);
4007 block->colo_cache = NULL;
4008 }
4009 }
4010
4011 rcu_read_unlock();
4012 return -errno;
4013}
4014
4015/* It is need to hold the global lock to call this helper */
4016void colo_release_ram_cache(void)
4017{
4018 RAMBlock *block;
4019
zhanghailiangd1955d22018-09-03 12:38:55 +08004020 memory_global_dirty_log_stop();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004021 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004022 g_free(block->bmap);
4023 block->bmap = NULL;
4024 }
4025
Zhang Chen13af18f2018-09-03 12:38:48 +08004026 rcu_read_lock();
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004027
Yury Kotovfbd162e2019-02-15 20:45:46 +03004028 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Zhang Chen13af18f2018-09-03 12:38:48 +08004029 if (block->colo_cache) {
4030 qemu_anon_ram_free(block->colo_cache, block->used_length);
4031 block->colo_cache = NULL;
4032 }
4033 }
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004034
Zhang Chen13af18f2018-09-03 12:38:48 +08004035 rcu_read_unlock();
Zhang Chenc6e5baf2019-03-30 06:29:51 +08004036 qemu_mutex_destroy(&ram_state->bitmap_mutex);
Zhang Chen7d9acaf2018-09-03 12:38:49 +08004037 g_free(ram_state);
4038 ram_state = NULL;
Zhang Chen13af18f2018-09-03 12:38:48 +08004039}
4040
Juan Quintela3d0684b2017-03-23 15:06:39 +01004041/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004042 * ram_load_setup: Setup RAM for migration incoming side
4043 *
4044 * Returns zero to indicate success and negative for error
4045 *
4046 * @f: QEMUFile where to receive the data
4047 * @opaque: RAMState pointer
4048 */
4049static int ram_load_setup(QEMUFile *f, void *opaque)
4050{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004051 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08004052 return -1;
4053 }
4054
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004055 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004056 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08004057
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004058 return 0;
4059}
4060
4061static int ram_load_cleanup(void *opaque)
4062{
Alexey Perevalovf9494612017-10-05 14:13:20 +03004063 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08004064
Yury Kotovfbd162e2019-02-15 20:45:46 +03004065 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He56eb90a2018-07-18 15:48:03 +08004066 if (ramblock_is_pmem(rb)) {
4067 pmem_persist(rb->host, rb->used_length);
4068 }
4069 }
4070
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004071 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02004072 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03004073
Yury Kotovfbd162e2019-02-15 20:45:46 +03004074 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03004075 g_free(rb->receivedmap);
4076 rb->receivedmap = NULL;
4077 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004078
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004079 return 0;
4080}
4081
4082/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01004083 * ram_postcopy_incoming_init: allocate postcopy data structures
4084 *
4085 * Returns 0 for success and negative if there was one error
4086 *
4087 * @mis: current migration incoming state
4088 *
4089 * Allocate data structures etc needed by incoming migration with
4090 * postcopy-ram. postcopy-ram's similarly names
4091 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004092 */
4093int ram_postcopy_incoming_init(MigrationIncomingState *mis)
4094{
David Hildenbrandc1361802018-06-20 22:27:36 +02004095 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00004096}
4097
Juan Quintela3d0684b2017-03-23 15:06:39 +01004098/**
4099 * ram_load_postcopy: load a page in postcopy case
4100 *
4101 * Returns 0 for success or -errno in case of error
4102 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004103 * Called in postcopy mode by ram_load().
4104 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01004105 *
4106 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004107 */
4108static int ram_load_postcopy(QEMUFile *f)
4109{
4110 int flags = 0, ret = 0;
4111 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004112 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004113 MigrationIncomingState *mis = migration_incoming_get_current();
4114 /* Temporary page that is later 'placed' */
4115 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004116 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00004117 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004118
4119 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
4120 ram_addr_t addr;
4121 void *host = NULL;
4122 void *page_buffer = NULL;
4123 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004124 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004125 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004126
4127 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004128
4129 /*
4130 * If qemu file error, we should stop here, and then "addr"
4131 * may be invalid
4132 */
4133 ret = qemu_file_get_error(f);
4134 if (ret) {
4135 break;
4136 }
4137
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004138 flags = addr & ~TARGET_PAGE_MASK;
4139 addr &= TARGET_PAGE_MASK;
4140
4141 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
4142 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02004143 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004144 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08004145
4146 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004147 if (!host) {
4148 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4149 ret = -EINVAL;
4150 break;
4151 }
Peter Xu1aa83672018-07-10 17:18:53 +08004152 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004153 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004154 * Postcopy requires that we place whole host pages atomically;
4155 * these may be huge pages for RAMBlocks that are backed by
4156 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004157 * To make it atomic, the data is read into a temporary page
4158 * that's moved into place later.
4159 * The migration protocol uses, possibly smaller, target-pages
4160 * however the source ensures it always sends all the components
4161 * of a host page in order.
4162 */
4163 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004164 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004165 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004166 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004167 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004168 } else {
4169 /* not the 1st TP within the HP */
4170 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01004171 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004172 host, last_host);
4173 ret = -EINVAL;
4174 break;
4175 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004176 }
4177
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004178
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004179 /*
4180 * If it's the last part of a host page then we place the host
4181 * page
4182 */
4183 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00004184 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004185 place_source = postcopy_host_page;
4186 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00004187 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004188
4189 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02004190 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004191 ch = qemu_get_byte(f);
4192 memset(page_buffer, ch, TARGET_PAGE_SIZE);
4193 if (ch) {
4194 all_zero = false;
4195 }
4196 break;
4197
4198 case RAM_SAVE_FLAG_PAGE:
4199 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08004200 if (!matches_target_page_size) {
4201 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004202 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
4203 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08004204 /*
4205 * For small pages that matches target page size, we
4206 * avoid the qemu_file copy. Instead we directly use
4207 * the buffer of QEMUFile to place the page. Note: we
4208 * cannot do any QEMUFile operation before using that
4209 * buffer to make sure the buffer is valid when
4210 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004211 */
4212 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
4213 TARGET_PAGE_SIZE);
4214 }
4215 break;
4216 case RAM_SAVE_FLAG_EOS:
4217 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004218 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004219 break;
4220 default:
4221 error_report("Unknown combination of migration flags: %#x"
4222 " (postcopy mode)", flags);
4223 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004224 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004225 }
4226
Peter Xu7a9ddfb2018-02-08 18:31:05 +08004227 /* Detect for any possible file errors */
4228 if (!ret && qemu_file_get_error(f)) {
4229 ret = qemu_file_get_error(f);
4230 }
4231
4232 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004233 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004234 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
4235
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004236 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004237 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004238 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004239 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00004240 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03004241 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004242 }
4243 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004244 }
4245
4246 return ret;
4247}
4248
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004249static bool postcopy_is_advised(void)
4250{
4251 PostcopyState ps = postcopy_state_get();
4252 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
4253}
4254
4255static bool postcopy_is_running(void)
4256{
4257 PostcopyState ps = postcopy_state_get();
4258 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
4259}
4260
Zhang Chene6f4aa12018-09-03 12:38:50 +08004261/*
4262 * Flush content of RAM cache into SVM's memory.
4263 * Only flush the pages that be dirtied by PVM or SVM or both.
4264 */
4265static void colo_flush_ram_cache(void)
4266{
4267 RAMBlock *block = NULL;
4268 void *dst_host;
4269 void *src_host;
4270 unsigned long offset = 0;
4271
zhanghailiangd1955d22018-09-03 12:38:55 +08004272 memory_global_dirty_log_sync();
4273 rcu_read_lock();
Yury Kotovfbd162e2019-02-15 20:45:46 +03004274 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Wei Yang7a3e9572019-08-08 11:31:55 +08004275 ramblock_sync_dirty_bitmap(ram_state, block);
zhanghailiangd1955d22018-09-03 12:38:55 +08004276 }
4277 rcu_read_unlock();
4278
Zhang Chene6f4aa12018-09-03 12:38:50 +08004279 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
4280 rcu_read_lock();
4281 block = QLIST_FIRST_RCU(&ram_list.blocks);
4282
4283 while (block) {
4284 offset = migration_bitmap_find_dirty(ram_state, block, offset);
4285
4286 if (offset << TARGET_PAGE_BITS >= block->used_length) {
4287 offset = 0;
4288 block = QLIST_NEXT_RCU(block, next);
4289 } else {
4290 migration_bitmap_clear_dirty(ram_state, block, offset);
4291 dst_host = block->host + (offset << TARGET_PAGE_BITS);
4292 src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
4293 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
4294 }
4295 }
4296
4297 rcu_read_unlock();
4298 trace_colo_flush_ram_cache_end();
4299}
4300
Wei Yang10da4a32019-07-25 08:20:23 +08004301/**
4302 * ram_load_precopy: load pages in precopy case
4303 *
4304 * Returns 0 for success or -errno in case of error
4305 *
4306 * Called in precopy mode by ram_load().
4307 * rcu_read_lock is taken prior to this being called.
4308 *
4309 * @f: QEMUFile where to send the data
4310 */
4311static int ram_load_precopy(QEMUFile *f)
Juan Quintela56e93d22015-05-07 19:33:31 +02004312{
Wei Yang10da4a32019-07-25 08:20:23 +08004313 int flags = 0, ret = 0, invalid_flags = 0, len = 0;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004314 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02004315 bool postcopy_advised = postcopy_is_advised();
Juan Quintelaedc60122016-11-02 12:40:46 +01004316 if (!migrate_use_compression()) {
4317 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
4318 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00004319
Wei Yang10da4a32019-07-25 08:20:23 +08004320 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02004321 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004322 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004323 uint8_t ch;
4324
4325 addr = qemu_get_be64(f);
4326 flags = addr & ~TARGET_PAGE_MASK;
4327 addr &= TARGET_PAGE_MASK;
4328
Juan Quintelaedc60122016-11-02 12:40:46 +01004329 if (flags & invalid_flags) {
4330 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
4331 error_report("Received an unexpected compressed page");
4332 }
4333
4334 ret = -EINVAL;
4335 break;
4336 }
4337
Juan Quintelabb890ed2017-04-28 09:39:55 +02004338 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004339 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08004340 RAMBlock *block = ram_block_from_stream(f, flags);
4341
Zhang Chen13af18f2018-09-03 12:38:48 +08004342 /*
4343 * After going into COLO, we should load the Page into colo_cache.
4344 */
4345 if (migration_incoming_in_colo_state()) {
4346 host = colo_cache_from_block_offset(block, addr);
4347 } else {
4348 host = host_from_ram_block_offset(block, addr);
4349 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004350 if (!host) {
4351 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4352 ret = -EINVAL;
4353 break;
4354 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004355
4356 if (!migration_incoming_in_colo_state()) {
4357 ramblock_recv_bitmap_set(block, host);
4358 }
4359
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004360 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004361 }
4362
Juan Quintela56e93d22015-05-07 19:33:31 +02004363 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4364 case RAM_SAVE_FLAG_MEM_SIZE:
4365 /* Synchronize RAM block list */
4366 total_ram_bytes = addr;
4367 while (!ret && total_ram_bytes) {
4368 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004369 char id[256];
4370 ram_addr_t length;
4371
4372 len = qemu_get_byte(f);
4373 qemu_get_buffer(f, (uint8_t *)id, len);
4374 id[len] = 0;
4375 length = qemu_get_be64(f);
4376
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004377 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004378 if (block && !qemu_ram_is_migratable(block)) {
4379 error_report("block %s should not be migrated !", id);
4380 ret = -EINVAL;
4381 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004382 if (length != block->used_length) {
4383 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004384
Gongleifa53a0e2016-05-10 10:04:59 +08004385 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004386 &local_err);
4387 if (local_err) {
4388 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004389 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004390 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004391 /* For postcopy we need to check hugepage sizes match */
4392 if (postcopy_advised &&
4393 block->page_size != qemu_host_page_size) {
4394 uint64_t remote_page_size = qemu_get_be64(f);
4395 if (remote_page_size != block->page_size) {
4396 error_report("Mismatched RAM page size %s "
4397 "(local) %zd != %" PRId64,
4398 id, block->page_size,
4399 remote_page_size);
4400 ret = -EINVAL;
4401 }
4402 }
Yury Kotovfbd162e2019-02-15 20:45:46 +03004403 if (migrate_ignore_shared()) {
4404 hwaddr addr = qemu_get_be64(f);
Yury Kotovfbd162e2019-02-15 20:45:46 +03004405 if (ramblock_is_ignored(block) &&
4406 block->mr->addr != addr) {
4407 error_report("Mismatched GPAs for block %s "
4408 "%" PRId64 "!= %" PRId64,
4409 id, (uint64_t)addr,
4410 (uint64_t)block->mr->addr);
4411 ret = -EINVAL;
4412 }
4413 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004414 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4415 block->idstr);
4416 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004417 error_report("Unknown ramblock \"%s\", cannot "
4418 "accept migration", id);
4419 ret = -EINVAL;
4420 }
4421
4422 total_ram_bytes -= length;
4423 }
4424 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004425
Juan Quintelabb890ed2017-04-28 09:39:55 +02004426 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004427 ch = qemu_get_byte(f);
4428 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4429 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004430
Juan Quintela56e93d22015-05-07 19:33:31 +02004431 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004432 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4433 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004434
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004435 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004436 len = qemu_get_be32(f);
4437 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4438 error_report("Invalid compressed data length: %d", len);
4439 ret = -EINVAL;
4440 break;
4441 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004442 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004443 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004444
Juan Quintela56e93d22015-05-07 19:33:31 +02004445 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004446 if (load_xbzrle(f, addr, host) < 0) {
4447 error_report("Failed to decompress XBZRLE page at "
4448 RAM_ADDR_FMT, addr);
4449 ret = -EINVAL;
4450 break;
4451 }
4452 break;
4453 case RAM_SAVE_FLAG_EOS:
4454 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004455 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004456 break;
4457 default:
4458 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004459 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004460 } else {
4461 error_report("Unknown combination of migration flags: %#x",
4462 flags);
4463 ret = -EINVAL;
4464 }
4465 }
4466 if (!ret) {
4467 ret = qemu_file_get_error(f);
4468 }
4469 }
4470
Wei Yang10da4a32019-07-25 08:20:23 +08004471 return ret;
4472}
4473
4474static int ram_load(QEMUFile *f, void *opaque, int version_id)
4475{
4476 int ret = 0;
4477 static uint64_t seq_iter;
4478 /*
4479 * If system is running in postcopy mode, page inserts to host memory must
4480 * be atomic
4481 */
4482 bool postcopy_running = postcopy_is_running();
4483
4484 seq_iter++;
4485
4486 if (version_id != 4) {
4487 return -EINVAL;
4488 }
4489
4490 /*
4491 * This RCU critical section can be very long running.
4492 * When RCU reclaims in the code start to become numerous,
4493 * it will be necessary to reduce the granularity of this
4494 * critical section.
4495 */
4496 rcu_read_lock();
4497
4498 if (postcopy_running) {
4499 ret = ram_load_postcopy(f);
4500 } else {
4501 ret = ram_load_precopy(f);
4502 }
4503
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004504 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02004505 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01004506 trace_ram_load_complete(ret, seq_iter);
Zhang Chene6f4aa12018-09-03 12:38:50 +08004507
4508 if (!ret && migration_incoming_in_colo_state()) {
4509 colo_flush_ram_cache();
4510 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004511 return ret;
4512}
4513
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004514static bool ram_has_postcopy(void *opaque)
4515{
Junyan He469dd512018-07-18 15:48:02 +08004516 RAMBlock *rb;
Yury Kotovfbd162e2019-02-15 20:45:46 +03004517 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
Junyan He469dd512018-07-18 15:48:02 +08004518 if (ramblock_is_pmem(rb)) {
4519 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4520 "is not supported now!", rb->idstr, rb->host);
4521 return false;
4522 }
4523 }
4524
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004525 return migrate_postcopy_ram();
4526}
4527
Peter Xuedd090c2018-05-02 18:47:32 +08004528/* Sync all the dirty bitmap with destination VM. */
4529static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4530{
4531 RAMBlock *block;
4532 QEMUFile *file = s->to_dst_file;
4533 int ramblock_count = 0;
4534
4535 trace_ram_dirty_bitmap_sync_start();
4536
Yury Kotovfbd162e2019-02-15 20:45:46 +03004537 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004538 qemu_savevm_send_recv_bitmap(file, block->idstr);
4539 trace_ram_dirty_bitmap_request(block->idstr);
4540 ramblock_count++;
4541 }
4542
4543 trace_ram_dirty_bitmap_sync_wait();
4544
4545 /* Wait until all the ramblocks' dirty bitmap synced */
4546 while (ramblock_count--) {
4547 qemu_sem_wait(&s->rp_state.rp_sem);
4548 }
4549
4550 trace_ram_dirty_bitmap_sync_complete();
4551
4552 return 0;
4553}
4554
4555static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4556{
4557 qemu_sem_post(&s->rp_state.rp_sem);
4558}
4559
Peter Xua335deb2018-05-02 18:47:28 +08004560/*
4561 * Read the received bitmap, revert it as the initial dirty bitmap.
4562 * This is only used when the postcopy migration is paused but wants
4563 * to resume from a middle point.
4564 */
4565int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4566{
4567 int ret = -EINVAL;
4568 QEMUFile *file = s->rp_state.from_dst_file;
4569 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004570 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004571 uint64_t size, end_mark;
4572
4573 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4574
4575 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4576 error_report("%s: incorrect state %s", __func__,
4577 MigrationStatus_str(s->state));
4578 return -EINVAL;
4579 }
4580
4581 /*
4582 * Note: see comments in ramblock_recv_bitmap_send() on why we
4583 * need the endianess convertion, and the paddings.
4584 */
4585 local_size = ROUND_UP(local_size, 8);
4586
4587 /* Add paddings */
4588 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4589
4590 size = qemu_get_be64(file);
4591
4592 /* The size of the bitmap should match with our ramblock */
4593 if (size != local_size) {
4594 error_report("%s: ramblock '%s' bitmap size mismatch "
4595 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4596 block->idstr, size, local_size);
4597 ret = -EINVAL;
4598 goto out;
4599 }
4600
4601 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4602 end_mark = qemu_get_be64(file);
4603
4604 ret = qemu_file_get_error(file);
4605 if (ret || size != local_size) {
4606 error_report("%s: read bitmap failed for ramblock '%s': %d"
4607 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4608 __func__, block->idstr, ret, local_size, size);
4609 ret = -EIO;
4610 goto out;
4611 }
4612
4613 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4614 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4615 __func__, block->idstr, end_mark);
4616 ret = -EINVAL;
4617 goto out;
4618 }
4619
4620 /*
4621 * Endianess convertion. We are during postcopy (though paused).
4622 * The dirty bitmap won't change. We can directly modify it.
4623 */
4624 bitmap_from_le(block->bmap, le_bitmap, nbits);
4625
4626 /*
4627 * What we received is "received bitmap". Revert it as the initial
4628 * dirty bitmap for this ramblock.
4629 */
4630 bitmap_complement(block->bmap, block->bmap, nbits);
4631
4632 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4633
Peter Xuedd090c2018-05-02 18:47:32 +08004634 /*
4635 * We succeeded to sync bitmap for current ramblock. If this is
4636 * the last one to sync, we need to notify the main send thread.
4637 */
4638 ram_dirty_bitmap_reload_notify(s);
4639
Peter Xua335deb2018-05-02 18:47:28 +08004640 ret = 0;
4641out:
Peter Xubf269902018-05-25 09:50:42 +08004642 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004643 return ret;
4644}
4645
Peter Xuedd090c2018-05-02 18:47:32 +08004646static int ram_resume_prepare(MigrationState *s, void *opaque)
4647{
4648 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004649 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004650
Peter Xu08614f32018-05-02 18:47:33 +08004651 ret = ram_dirty_bitmap_sync_all(s, rs);
4652 if (ret) {
4653 return ret;
4654 }
4655
4656 ram_state_resume_prepare(rs, s->to_dst_file);
4657
4658 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004659}
4660
Juan Quintela56e93d22015-05-07 19:33:31 +02004661static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004662 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004663 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004664 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004665 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004666 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004667 .save_live_pending = ram_save_pending,
4668 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004669 .save_cleanup = ram_save_cleanup,
4670 .load_setup = ram_load_setup,
4671 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004672 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004673};
4674
4675void ram_mig_init(void)
4676{
4677 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004678 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004679}