blob: 61f731309337fa6272d877dda0394e5e3826fcdb [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020036#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020037#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020038#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010039#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020040#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020041#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020042#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020043#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030044#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010046#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010047#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020048#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030051#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020052#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080053#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030054#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020055#include "sysemu/sysemu.h"
56#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080057#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010058#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020059
Juan Quintela56e93d22015-05-07 19:33:31 +020060/***********************************************************/
61/* ram save/restore */
62
Juan Quintelabb890ed2017-04-28 09:39:55 +020063/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
64 * worked for pages that where filled with the same char. We switched
65 * it to only search for the zero value. And to avoid confusion with
66 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
67 */
68
Juan Quintela56e93d22015-05-07 19:33:31 +020069#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020070#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020071#define RAM_SAVE_FLAG_MEM_SIZE 0x04
72#define RAM_SAVE_FLAG_PAGE 0x08
73#define RAM_SAVE_FLAG_EOS 0x10
74#define RAM_SAVE_FLAG_CONTINUE 0x20
75#define RAM_SAVE_FLAG_XBZRLE 0x40
76/* 0x80 is reserved in migration.h start with 0x100 next */
77#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78
Juan Quintela56e93d22015-05-07 19:33:31 +020079static inline bool is_zero_range(uint8_t *p, uint64_t size)
80{
Richard Hendersona1febc42016-08-29 11:46:14 -070081 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020082}
83
Juan Quintela93604472017-06-06 19:49:03 +020084XBZRLECacheStats xbzrle_counters;
85
Juan Quintela56e93d22015-05-07 19:33:31 +020086/* struct contains XBZRLE cache and a static page
87 used by the compression */
88static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020096 /* it will store a page full of zeros */
97 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020098 /* buffer used for XBZRLE decoding */
99 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200100} XBZRLE;
101
Juan Quintela56e93d22015-05-07 19:33:31 +0200102static void XBZRLE_cache_lock(void)
103{
104 if (migrate_use_xbzrle())
105 qemu_mutex_lock(&XBZRLE.lock);
106}
107
108static void XBZRLE_cache_unlock(void)
109{
110 if (migrate_use_xbzrle())
111 qemu_mutex_unlock(&XBZRLE.lock);
112}
113
Juan Quintela3d0684b2017-03-23 15:06:39 +0100114/**
115 * xbzrle_cache_resize: resize the xbzrle cache
116 *
117 * This function is called from qmp_migrate_set_cache_size in main
118 * thread, possibly while a migration is in progress. A running
119 * migration may be using the cache and might finish during this call,
120 * hence changes to the cache are protected by XBZRLE.lock().
121 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200122 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100123 *
124 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200125 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200126 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200127int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200128{
129 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200130 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200131
Juan Quintela8acabf62017-10-05 22:00:31 +0200132 /* Check for truncation */
133 if (new_size != (size_t)new_size) {
134 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
135 "exceeding address space");
136 return -1;
137 }
138
Juan Quintela2a313e52017-10-06 23:00:12 +0200139 if (new_size == migrate_xbzrle_cache_size()) {
140 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200141 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200142 }
143
Juan Quintela56e93d22015-05-07 19:33:31 +0200144 XBZRLE_cache_lock();
145
146 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200147 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200148 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 ret = -1;
150 goto out;
151 }
152
153 cache_fini(XBZRLE.cache);
154 XBZRLE.cache = new_cache;
155 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200156out:
157 XBZRLE_cache_unlock();
158 return ret;
159}
160
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200161/* Should be holding either ram_list.mutex, or the RCU lock. */
162#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100163 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200164 if (!qemu_ram_is_migratable(block)) {} else
165
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100166#undef RAMBLOCK_FOREACH
167
Alexey Perevalovf9494612017-10-05 14:13:20 +0300168static void ramblock_recv_map_init(void)
169{
170 RAMBlock *rb;
171
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200172 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300173 assert(!rb->receivedmap);
174 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
175 }
176}
177
178int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
179{
180 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
181 rb->receivedmap);
182}
183
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000184bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
185{
186 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
187}
188
Alexey Perevalovf9494612017-10-05 14:13:20 +0300189void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
190{
191 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
192}
193
194void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
195 size_t nr)
196{
197 bitmap_set_atomic(rb->receivedmap,
198 ramblock_recv_bitmap_offset(host_addr, rb),
199 nr);
200}
201
Peter Xua335deb2018-05-02 18:47:28 +0800202#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
203
204/*
205 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
206 *
207 * Returns >0 if success with sent bytes, or <0 if error.
208 */
209int64_t ramblock_recv_bitmap_send(QEMUFile *file,
210 const char *block_name)
211{
212 RAMBlock *block = qemu_ram_block_by_name(block_name);
213 unsigned long *le_bitmap, nbits;
214 uint64_t size;
215
216 if (!block) {
217 error_report("%s: invalid block name: %s", __func__, block_name);
218 return -1;
219 }
220
221 nbits = block->used_length >> TARGET_PAGE_BITS;
222
223 /*
224 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
225 * machines we may need 4 more bytes for padding (see below
226 * comment). So extend it a bit before hand.
227 */
228 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
229
230 /*
231 * Always use little endian when sending the bitmap. This is
232 * required that when source and destination VMs are not using the
233 * same endianess. (Note: big endian won't work.)
234 */
235 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
236
237 /* Size of the bitmap, in bytes */
238 size = nbits / 8;
239
240 /*
241 * size is always aligned to 8 bytes for 64bit machines, but it
242 * may not be true for 32bit machines. We need this padding to
243 * make sure the migration can survive even between 32bit and
244 * 64bit machines.
245 */
246 size = ROUND_UP(size, 8);
247
248 qemu_put_be64(file, size);
249 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
250 /*
251 * Mark as an end, in case the middle part is screwed up due to
252 * some "misterious" reason.
253 */
254 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
255 qemu_fflush(file);
256
Peter Xubf269902018-05-25 09:50:42 +0800257 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800258
259 if (qemu_file_get_error(file)) {
260 return qemu_file_get_error(file);
261 }
262
263 return size + sizeof(size);
264}
265
Juan Quintelaec481c62017-03-20 22:12:40 +0100266/*
267 * An outstanding page request, on the source, having been received
268 * and queued
269 */
270struct RAMSrcPageRequest {
271 RAMBlock *rb;
272 hwaddr offset;
273 hwaddr len;
274
275 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
276};
277
Juan Quintela6f37bb82017-03-13 19:26:29 +0100278/* State of RAM for migration */
279struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100280 /* QEMUFile used for this migration */
281 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100282 /* Last block that we have visited searching for dirty pages */
283 RAMBlock *last_seen_block;
284 /* Last block from where we have sent data */
285 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100286 /* Last dirty target page we have sent */
287 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100288 /* last ram version we have seen */
289 uint32_t last_version;
290 /* We are in the first round */
291 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100292 /* How many times we have dirty too many pages */
293 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100294 /* these variables are used for bitmap sync */
295 /* last time we did a full bitmap_sync */
296 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200297 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200298 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200299 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200300 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100301 /* xbzrle misses since the beginning of the period */
302 uint64_t xbzrle_cache_miss_prev;
Juan Quintela36040d92017-03-13 19:51:13 +0100303 /* number of iterations at the beginning of period */
304 uint64_t iterations_prev;
Juan Quintela23b28c32017-03-13 20:51:34 +0100305 /* Iterations since start */
306 uint64_t iterations;
Juan Quintela93604472017-06-06 19:49:03 +0200307 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800308 uint64_t migration_dirty_pages;
309 /* protects modification of the bitmap */
Juan Quintela108cfae2017-03-13 21:38:09 +0100310 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100311 /* The RAMBlock used in the last src_page_requests */
312 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100313 /* Queue of outstanding page requests from the destination */
314 QemuMutex src_page_req_mutex;
315 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100316};
317typedef struct RAMState RAMState;
318
Juan Quintela53518d92017-05-04 11:46:24 +0200319static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100320
Juan Quintela9edabd42017-03-14 12:02:16 +0100321uint64_t ram_bytes_remaining(void)
322{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000323 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
324 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100325}
326
Juan Quintela93604472017-06-06 19:49:03 +0200327MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100328
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100329/* used by the search for pages to send */
330struct PageSearchStatus {
331 /* Current block being searched */
332 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100333 /* Current page to search from */
334 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100335 /* Set once we wrap around */
336 bool complete_round;
337};
338typedef struct PageSearchStatus PageSearchStatus;
339
Juan Quintela56e93d22015-05-07 19:33:31 +0200340struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200341 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800342 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200343 QEMUFile *file;
344 QemuMutex mutex;
345 QemuCond cond;
346 RAMBlock *block;
347 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800348
349 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800350 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800351 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200352};
353typedef struct CompressParam CompressParam;
354
355struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800356 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800357 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200358 QemuMutex mutex;
359 QemuCond cond;
360 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000361 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200362 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800363 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200364};
365typedef struct DecompressParam DecompressParam;
366
367static CompressParam *comp_param;
368static QemuThread *compress_threads;
369/* comp_done_cond is used to wake up the migration thread when
370 * one of the compression threads has finished the compression.
371 * comp_done_lock is used to co-work with comp_done_cond.
372 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800373static QemuMutex comp_done_lock;
374static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200375/* The empty QEMUFileOps will be used by file in CompressParam */
376static const QEMUFileOps empty_ops = { };
377
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800378static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200379static DecompressParam *decomp_param;
380static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800381static QemuMutex decomp_done_lock;
382static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200383
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800384static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800385 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200386
387static void *do_data_compress(void *opaque)
388{
389 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800390 RAMBlock *block;
391 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200392
Liang Lia7a9a882016-05-05 15:32:57 +0800393 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800394 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800395 if (param->block) {
396 block = param->block;
397 offset = param->offset;
398 param->block = NULL;
399 qemu_mutex_unlock(&param->mutex);
400
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800401 do_compress_ram_page(param->file, &param->stream, block, offset,
402 param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800403
Liang Li0d9f9a52016-05-05 15:32:59 +0800404 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800405 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800406 qemu_cond_signal(&comp_done_cond);
407 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800408
409 qemu_mutex_lock(&param->mutex);
410 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200411 qemu_cond_wait(&param->cond, &param->mutex);
412 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200413 }
Liang Lia7a9a882016-05-05 15:32:57 +0800414 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200415
416 return NULL;
417}
418
419static inline void terminate_compression_threads(void)
420{
421 int idx, thread_count;
422
423 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100424
Juan Quintela56e93d22015-05-07 19:33:31 +0200425 for (idx = 0; idx < thread_count; idx++) {
426 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800427 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200428 qemu_cond_signal(&comp_param[idx].cond);
429 qemu_mutex_unlock(&comp_param[idx].mutex);
430 }
431}
432
Juan Quintelaf0afa332017-06-28 11:52:28 +0200433static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200434{
435 int i, thread_count;
436
437 if (!migrate_use_compression()) {
438 return;
439 }
440 terminate_compression_threads();
441 thread_count = migrate_compress_threads();
442 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800443 /*
444 * we use it as a indicator which shows if the thread is
445 * properly init'd or not
446 */
447 if (!comp_param[i].file) {
448 break;
449 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200450 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200451 qemu_mutex_destroy(&comp_param[i].mutex);
452 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800453 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800454 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800455 qemu_fclose(comp_param[i].file);
456 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200457 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800458 qemu_mutex_destroy(&comp_done_lock);
459 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200460 g_free(compress_threads);
461 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462 compress_threads = NULL;
463 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200464}
465
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800466static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200467{
468 int i, thread_count;
469
470 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800471 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200472 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200473 thread_count = migrate_compress_threads();
474 compress_threads = g_new0(QemuThread, thread_count);
475 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800476 qemu_cond_init(&comp_done_cond);
477 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200478 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800479 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
480 if (!comp_param[i].originbuf) {
481 goto exit;
482 }
483
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800484 if (deflateInit(&comp_param[i].stream,
485 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800486 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800487 goto exit;
488 }
489
Cao jine110aa92016-07-29 15:10:31 +0800490 /* comp_param[i].file is just used as a dummy buffer to save data,
491 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200492 */
493 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
494 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800495 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200496 qemu_mutex_init(&comp_param[i].mutex);
497 qemu_cond_init(&comp_param[i].cond);
498 qemu_thread_create(compress_threads + i, "compress",
499 do_data_compress, comp_param + i,
500 QEMU_THREAD_JOINABLE);
501 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800502 return 0;
503
504exit:
505 compress_threads_save_cleanup();
506 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200507}
508
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100509/* Multiple fd's */
510
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200511#define MULTIFD_MAGIC 0x11223344U
512#define MULTIFD_VERSION 1
513
Juan Quintela6df264a2018-02-28 09:10:07 +0100514#define MULTIFD_FLAG_SYNC (1 << 0)
515
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200516typedef struct {
517 uint32_t magic;
518 uint32_t version;
519 unsigned char uuid[16]; /* QemuUUID */
520 uint8_t id;
521} __attribute__((packed)) MultiFDInit_t;
522
Juan Quintela8c4598f2018-04-07 13:59:07 +0200523typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200524 uint32_t magic;
525 uint32_t version;
526 uint32_t flags;
527 uint32_t size;
528 uint32_t used;
529 uint64_t packet_num;
530 char ramblock[256];
531 uint64_t offset[];
532} __attribute__((packed)) MultiFDPacket_t;
533
534typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200535 /* number of used pages */
536 uint32_t used;
537 /* number of allocated pages */
538 uint32_t allocated;
539 /* global number of generated multifd packets */
540 uint64_t packet_num;
541 /* offset of each page */
542 ram_addr_t *offset;
543 /* pointer to each page */
544 struct iovec *iov;
545 RAMBlock *block;
546} MultiFDPages_t;
547
548typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200549 /* this fields are not changed once the thread is created */
550 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100551 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200552 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100553 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200554 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100555 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200556 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100557 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200558 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100559 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200560 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100561 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200562 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100563 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200564 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100565 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200566 /* thread has work to do */
567 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200568 /* array of pages to sent */
569 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200570 /* packet allocated len */
571 uint32_t packet_len;
572 /* pointer to the packet */
573 MultiFDPacket_t *packet;
574 /* multifd flags for each packet */
575 uint32_t flags;
576 /* global number of generated multifd packets */
577 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200578 /* thread local variables */
579 /* packets sent through this channel */
580 uint64_t num_packets;
581 /* pages sent through this channel */
582 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100583 /* syncs main thread and channels */
584 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200585} MultiFDSendParams;
586
587typedef struct {
588 /* this fields are not changed once the thread is created */
589 /* channel number */
590 uint8_t id;
591 /* channel thread name */
592 char *name;
593 /* channel thread id */
594 QemuThread thread;
595 /* communication channel */
596 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200597 /* this mutex protects the following parameters */
598 QemuMutex mutex;
599 /* is this channel thread running */
600 bool running;
Juan Quintela34c55a92018-04-10 23:35:15 +0200601 /* array of pages to receive */
602 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200603 /* packet allocated len */
604 uint32_t packet_len;
605 /* pointer to the packet */
606 MultiFDPacket_t *packet;
607 /* multifd flags for each packet */
608 uint32_t flags;
609 /* global number of generated multifd packets */
610 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200611 /* thread local variables */
612 /* packets sent through this channel */
613 uint64_t num_packets;
614 /* pages sent through this channel */
615 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100616 /* syncs main thread and channels */
617 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200618} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100619
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200620static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
621{
622 MultiFDInit_t msg;
623 int ret;
624
625 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
626 msg.version = cpu_to_be32(MULTIFD_VERSION);
627 msg.id = p->id;
628 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
629
630 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
631 if (ret != 0) {
632 return -1;
633 }
634 return 0;
635}
636
637static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
638{
639 MultiFDInit_t msg;
640 int ret;
641
642 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
643 if (ret != 0) {
644 return -1;
645 }
646
647 be32_to_cpus(&msg.magic);
648 be32_to_cpus(&msg.version);
649
650 if (msg.magic != MULTIFD_MAGIC) {
651 error_setg(errp, "multifd: received packet magic %x "
652 "expected %x", msg.magic, MULTIFD_MAGIC);
653 return -1;
654 }
655
656 if (msg.version != MULTIFD_VERSION) {
657 error_setg(errp, "multifd: received packet version %d "
658 "expected %d", msg.version, MULTIFD_VERSION);
659 return -1;
660 }
661
662 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
663 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
664 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
665
666 error_setg(errp, "multifd: received uuid '%s' and expected "
667 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
668 g_free(uuid);
669 g_free(msg_uuid);
670 return -1;
671 }
672
673 if (msg.id > migrate_multifd_channels()) {
674 error_setg(errp, "multifd: received channel version %d "
675 "expected %d", msg.version, MULTIFD_VERSION);
676 return -1;
677 }
678
679 return msg.id;
680}
681
Juan Quintela34c55a92018-04-10 23:35:15 +0200682static MultiFDPages_t *multifd_pages_init(size_t size)
683{
684 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
685
686 pages->allocated = size;
687 pages->iov = g_new0(struct iovec, size);
688 pages->offset = g_new0(ram_addr_t, size);
689
690 return pages;
691}
692
693static void multifd_pages_clear(MultiFDPages_t *pages)
694{
695 pages->used = 0;
696 pages->allocated = 0;
697 pages->packet_num = 0;
698 pages->block = NULL;
699 g_free(pages->iov);
700 pages->iov = NULL;
701 g_free(pages->offset);
702 pages->offset = NULL;
703 g_free(pages);
704}
705
Juan Quintela2a26c972018-04-04 11:26:58 +0200706static void multifd_send_fill_packet(MultiFDSendParams *p)
707{
708 MultiFDPacket_t *packet = p->packet;
709 int i;
710
711 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
712 packet->version = cpu_to_be32(MULTIFD_VERSION);
713 packet->flags = cpu_to_be32(p->flags);
714 packet->size = cpu_to_be32(migrate_multifd_page_count());
715 packet->used = cpu_to_be32(p->pages->used);
716 packet->packet_num = cpu_to_be64(p->packet_num);
717
718 if (p->pages->block) {
719 strncpy(packet->ramblock, p->pages->block->idstr, 256);
720 }
721
722 for (i = 0; i < p->pages->used; i++) {
723 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
724 }
725}
726
727static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
728{
729 MultiFDPacket_t *packet = p->packet;
730 RAMBlock *block;
731 int i;
732
Juan Quintela2a26c972018-04-04 11:26:58 +0200733 be32_to_cpus(&packet->magic);
734 if (packet->magic != MULTIFD_MAGIC) {
735 error_setg(errp, "multifd: received packet "
736 "magic %x and expected magic %x",
737 packet->magic, MULTIFD_MAGIC);
738 return -1;
739 }
740
741 be32_to_cpus(&packet->version);
742 if (packet->version != MULTIFD_VERSION) {
743 error_setg(errp, "multifd: received packet "
744 "version %d and expected version %d",
745 packet->version, MULTIFD_VERSION);
746 return -1;
747 }
748
749 p->flags = be32_to_cpu(packet->flags);
750
751 be32_to_cpus(&packet->size);
752 if (packet->size > migrate_multifd_page_count()) {
753 error_setg(errp, "multifd: received packet "
754 "with size %d and expected maximum size %d",
755 packet->size, migrate_multifd_page_count()) ;
756 return -1;
757 }
758
759 p->pages->used = be32_to_cpu(packet->used);
760 if (p->pages->used > packet->size) {
761 error_setg(errp, "multifd: received packet "
762 "with size %d and expected maximum size %d",
763 p->pages->used, packet->size) ;
764 return -1;
765 }
766
767 p->packet_num = be64_to_cpu(packet->packet_num);
768
769 if (p->pages->used) {
770 /* make sure that ramblock is 0 terminated */
771 packet->ramblock[255] = 0;
772 block = qemu_ram_block_by_name(packet->ramblock);
773 if (!block) {
774 error_setg(errp, "multifd: unknown ram block %s",
775 packet->ramblock);
776 return -1;
777 }
778 }
779
780 for (i = 0; i < p->pages->used; i++) {
781 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
782
783 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
784 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
785 " (max " RAM_ADDR_FMT ")",
786 offset, block->max_length);
787 return -1;
788 }
789 p->pages->iov[i].iov_base = block->host + offset;
790 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
791 }
792
793 return 0;
794}
795
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100796struct {
797 MultiFDSendParams *params;
798 /* number of created threads */
799 int count;
Juan Quintela34c55a92018-04-10 23:35:15 +0200800 /* array of pages to sent */
801 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100802 /* syncs main thread and channels */
803 QemuSemaphore sem_sync;
804 /* global number of generated multifd packets */
805 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100806 /* send channels ready */
807 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100808} *multifd_send_state;
809
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100810/*
811 * How we use multifd_send_state->pages and channel->pages?
812 *
813 * We create a pages for each channel, and a main one. Each time that
814 * we need to send a batch of pages we interchange the ones between
815 * multifd_send_state and the channel that is sending it. There are
816 * two reasons for that:
817 * - to not have to do so many mallocs during migration
818 * - to make easier to know what to free at the end of migration
819 *
820 * This way we always know who is the owner of each "pages" struct,
821 * and we don't need any loocking. It belongs to the migration thread
822 * or to the channel thread. Switching is safe because the migration
823 * thread is using the channel mutex when changing it, and the channel
824 * have to had finish with its own, otherwise pending_job can't be
825 * false.
826 */
827
828static void multifd_send_pages(void)
829{
830 int i;
831 static int next_channel;
832 MultiFDSendParams *p = NULL; /* make happy gcc */
833 MultiFDPages_t *pages = multifd_send_state->pages;
834 uint64_t transferred;
835
836 qemu_sem_wait(&multifd_send_state->channels_ready);
837 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
838 p = &multifd_send_state->params[i];
839
840 qemu_mutex_lock(&p->mutex);
841 if (!p->pending_job) {
842 p->pending_job++;
843 next_channel = (i + 1) % migrate_multifd_channels();
844 break;
845 }
846 qemu_mutex_unlock(&p->mutex);
847 }
848 p->pages->used = 0;
849
850 p->packet_num = multifd_send_state->packet_num++;
851 p->pages->block = NULL;
852 multifd_send_state->pages = p->pages;
853 p->pages = pages;
854 transferred = pages->used * TARGET_PAGE_SIZE + p->packet_len;
855 ram_counters.multifd_bytes += transferred;
856 ram_counters.transferred += transferred;;
857 qemu_mutex_unlock(&p->mutex);
858 qemu_sem_post(&p->sem);
859}
860
861static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
862{
863 MultiFDPages_t *pages = multifd_send_state->pages;
864
865 if (!pages->block) {
866 pages->block = block;
867 }
868
869 if (pages->block == block) {
870 pages->offset[pages->used] = offset;
871 pages->iov[pages->used].iov_base = block->host + offset;
872 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
873 pages->used++;
874
875 if (pages->used < pages->allocated) {
876 return;
877 }
878 }
879
880 multifd_send_pages();
881
882 if (pages->block != block) {
883 multifd_queue_page(block, offset);
884 }
885}
886
Juan Quintela66770702018-02-19 19:01:45 +0100887static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100888{
889 int i;
890
Juan Quintela7a169d72018-02-19 19:01:15 +0100891 if (err) {
892 MigrationState *s = migrate_get_current();
893 migrate_set_error(s, err);
894 if (s->state == MIGRATION_STATUS_SETUP ||
895 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
896 s->state == MIGRATION_STATUS_DEVICE ||
897 s->state == MIGRATION_STATUS_ACTIVE) {
898 migrate_set_state(&s->state, s->state,
899 MIGRATION_STATUS_FAILED);
900 }
901 }
902
Juan Quintela66770702018-02-19 19:01:45 +0100903 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100904 MultiFDSendParams *p = &multifd_send_state->params[i];
905
906 qemu_mutex_lock(&p->mutex);
907 p->quit = true;
908 qemu_sem_post(&p->sem);
909 qemu_mutex_unlock(&p->mutex);
910 }
911}
912
913int multifd_save_cleanup(Error **errp)
914{
915 int i;
916 int ret = 0;
917
918 if (!migrate_use_multifd()) {
919 return 0;
920 }
Juan Quintela66770702018-02-19 19:01:45 +0100921 multifd_send_terminate_threads(NULL);
922 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100923 MultiFDSendParams *p = &multifd_send_state->params[i];
924
Juan Quintela66770702018-02-19 19:01:45 +0100925 if (p->running) {
926 qemu_thread_join(&p->thread);
927 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100928 socket_send_channel_destroy(p->c);
929 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100930 qemu_mutex_destroy(&p->mutex);
931 qemu_sem_destroy(&p->sem);
Juan Quintela6df264a2018-02-28 09:10:07 +0100932 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100933 g_free(p->name);
934 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +0200935 multifd_pages_clear(p->pages);
936 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +0200937 p->packet_len = 0;
938 g_free(p->packet);
939 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100940 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100941 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintela6df264a2018-02-28 09:10:07 +0100942 qemu_sem_destroy(&multifd_send_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100943 g_free(multifd_send_state->params);
944 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +0200945 multifd_pages_clear(multifd_send_state->pages);
946 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100947 g_free(multifd_send_state);
948 multifd_send_state = NULL;
949 return ret;
950}
951
Juan Quintela6df264a2018-02-28 09:10:07 +0100952static void multifd_send_sync_main(void)
953{
954 int i;
955
956 if (!migrate_use_multifd()) {
957 return;
958 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100959 if (multifd_send_state->pages->used) {
960 multifd_send_pages();
961 }
Juan Quintela6df264a2018-02-28 09:10:07 +0100962 for (i = 0; i < migrate_multifd_channels(); i++) {
963 MultiFDSendParams *p = &multifd_send_state->params[i];
964
965 trace_multifd_send_sync_main_signal(p->id);
966
967 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100968
969 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +0100970 p->flags |= MULTIFD_FLAG_SYNC;
971 p->pending_job++;
972 qemu_mutex_unlock(&p->mutex);
973 qemu_sem_post(&p->sem);
974 }
975 for (i = 0; i < migrate_multifd_channels(); i++) {
976 MultiFDSendParams *p = &multifd_send_state->params[i];
977
978 trace_multifd_send_sync_main_wait(p->id);
979 qemu_sem_wait(&multifd_send_state->sem_sync);
980 }
981 trace_multifd_send_sync_main(multifd_send_state->packet_num);
982}
983
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100984static void *multifd_send_thread(void *opaque)
985{
986 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200987 Error *local_err = NULL;
Juan Quintela8b2db7f2018-04-11 12:36:13 +0200988 int ret;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200989
Juan Quintela408ea6a2018-04-06 18:28:59 +0200990 trace_multifd_send_thread_start(p->id);
991
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200992 if (multifd_send_initial_packet(p, &local_err) < 0) {
993 goto out;
994 }
Juan Quintela408ea6a2018-04-06 18:28:59 +0200995 /* initial packet */
996 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100997
998 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +0200999 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001000 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001001
1002 if (p->pending_job) {
1003 uint32_t used = p->pages->used;
1004 uint64_t packet_num = p->packet_num;
1005 uint32_t flags = p->flags;
1006
1007 multifd_send_fill_packet(p);
1008 p->flags = 0;
1009 p->num_packets++;
1010 p->num_pages += used;
1011 p->pages->used = 0;
1012 qemu_mutex_unlock(&p->mutex);
1013
1014 trace_multifd_send(p->id, packet_num, used, flags);
1015
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001016 ret = qio_channel_write_all(p->c, (void *)p->packet,
1017 p->packet_len, &local_err);
1018 if (ret != 0) {
1019 break;
1020 }
1021
1022 ret = qio_channel_writev_all(p->c, p->pages->iov, used, &local_err);
1023 if (ret != 0) {
1024 break;
1025 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001026
1027 qemu_mutex_lock(&p->mutex);
1028 p->pending_job--;
1029 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001030
1031 if (flags & MULTIFD_FLAG_SYNC) {
1032 qemu_sem_post(&multifd_send_state->sem_sync);
1033 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001034 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001035 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001036 qemu_mutex_unlock(&p->mutex);
1037 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001038 } else {
1039 qemu_mutex_unlock(&p->mutex);
1040 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001041 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001042 }
1043
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001044out:
1045 if (local_err) {
1046 multifd_send_terminate_threads(local_err);
1047 }
1048
Juan Quintela66770702018-02-19 19:01:45 +01001049 qemu_mutex_lock(&p->mutex);
1050 p->running = false;
1051 qemu_mutex_unlock(&p->mutex);
1052
Juan Quintela408ea6a2018-04-06 18:28:59 +02001053 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1054
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001055 return NULL;
1056}
1057
Juan Quintela60df2d42018-03-07 07:56:15 +01001058static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1059{
1060 MultiFDSendParams *p = opaque;
1061 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1062 Error *local_err = NULL;
1063
1064 if (qio_task_propagate_error(task, &local_err)) {
1065 if (multifd_save_cleanup(&local_err) != 0) {
1066 migrate_set_error(migrate_get_current(), local_err);
1067 }
1068 } else {
1069 p->c = QIO_CHANNEL(sioc);
1070 qio_channel_set_delay(p->c, false);
1071 p->running = true;
1072 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1073 QEMU_THREAD_JOINABLE);
1074
1075 atomic_inc(&multifd_send_state->count);
1076 }
1077}
1078
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001079int multifd_save_setup(void)
1080{
1081 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001082 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001083 uint8_t i;
1084
1085 if (!migrate_use_multifd()) {
1086 return 0;
1087 }
1088 thread_count = migrate_multifd_channels();
1089 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1090 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001091 atomic_set(&multifd_send_state->count, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001092 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintela6df264a2018-02-28 09:10:07 +01001093 qemu_sem_init(&multifd_send_state->sem_sync, 0);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001094 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001095
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001096 for (i = 0; i < thread_count; i++) {
1097 MultiFDSendParams *p = &multifd_send_state->params[i];
1098
1099 qemu_mutex_init(&p->mutex);
1100 qemu_sem_init(&p->sem, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001101 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001102 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001103 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001104 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001105 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001106 p->packet_len = sizeof(MultiFDPacket_t)
1107 + sizeof(ram_addr_t) * page_count;
1108 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001109 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001110 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001111 }
1112 return 0;
1113}
1114
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001115struct {
1116 MultiFDRecvParams *params;
1117 /* number of created threads */
1118 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001119 /* syncs main thread and channels */
1120 QemuSemaphore sem_sync;
1121 /* global number of generated multifd packets */
1122 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001123} *multifd_recv_state;
1124
Juan Quintela66770702018-02-19 19:01:45 +01001125static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001126{
1127 int i;
1128
Juan Quintela7a169d72018-02-19 19:01:15 +01001129 if (err) {
1130 MigrationState *s = migrate_get_current();
1131 migrate_set_error(s, err);
1132 if (s->state == MIGRATION_STATUS_SETUP ||
1133 s->state == MIGRATION_STATUS_ACTIVE) {
1134 migrate_set_state(&s->state, s->state,
1135 MIGRATION_STATUS_FAILED);
1136 }
1137 }
1138
Juan Quintela66770702018-02-19 19:01:45 +01001139 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001140 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1141
1142 qemu_mutex_lock(&p->mutex);
Juan Quintela7a5cc332018-04-18 00:49:19 +02001143 /* We could arrive here for two reasons:
1144 - normal quit, i.e. everything went fine, just finished
1145 - error quit: We close the channels so the channel threads
1146 finish the qio_channel_read_all_eof() */
1147 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001148 qemu_mutex_unlock(&p->mutex);
1149 }
1150}
1151
1152int multifd_load_cleanup(Error **errp)
1153{
1154 int i;
1155 int ret = 0;
1156
1157 if (!migrate_use_multifd()) {
1158 return 0;
1159 }
Juan Quintela66770702018-02-19 19:01:45 +01001160 multifd_recv_terminate_threads(NULL);
1161 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001162 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1163
Juan Quintela66770702018-02-19 19:01:45 +01001164 if (p->running) {
1165 qemu_thread_join(&p->thread);
1166 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001167 object_unref(OBJECT(p->c));
1168 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001169 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001170 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001171 g_free(p->name);
1172 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001173 multifd_pages_clear(p->pages);
1174 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001175 p->packet_len = 0;
1176 g_free(p->packet);
1177 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001178 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001179 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001180 g_free(multifd_recv_state->params);
1181 multifd_recv_state->params = NULL;
1182 g_free(multifd_recv_state);
1183 multifd_recv_state = NULL;
1184
1185 return ret;
1186}
1187
Juan Quintela6df264a2018-02-28 09:10:07 +01001188static void multifd_recv_sync_main(void)
1189{
1190 int i;
1191
1192 if (!migrate_use_multifd()) {
1193 return;
1194 }
1195 for (i = 0; i < migrate_multifd_channels(); i++) {
1196 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1197
Juan Quintela6df264a2018-02-28 09:10:07 +01001198 trace_multifd_recv_sync_main_wait(p->id);
1199 qemu_sem_wait(&multifd_recv_state->sem_sync);
1200 qemu_mutex_lock(&p->mutex);
1201 if (multifd_recv_state->packet_num < p->packet_num) {
1202 multifd_recv_state->packet_num = p->packet_num;
1203 }
1204 qemu_mutex_unlock(&p->mutex);
1205 }
1206 for (i = 0; i < migrate_multifd_channels(); i++) {
1207 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1208
1209 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001210 qemu_sem_post(&p->sem_sync);
1211 }
1212 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1213}
1214
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001215static void *multifd_recv_thread(void *opaque)
1216{
1217 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001218 Error *local_err = NULL;
1219 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001220
Juan Quintela408ea6a2018-04-06 18:28:59 +02001221 trace_multifd_recv_thread_start(p->id);
1222
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001223 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001224 uint32_t used;
1225 uint32_t flags;
1226
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001227 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1228 p->packet_len, &local_err);
1229 if (ret == 0) { /* EOF */
1230 break;
1231 }
1232 if (ret == -1) { /* Error */
1233 break;
1234 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001235
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001236 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001237 ret = multifd_recv_unfill_packet(p, &local_err);
1238 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001239 qemu_mutex_unlock(&p->mutex);
1240 break;
1241 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001242
1243 used = p->pages->used;
1244 flags = p->flags;
1245 trace_multifd_recv(p->id, p->packet_num, used, flags);
Juan Quintela6df264a2018-02-28 09:10:07 +01001246 p->num_packets++;
1247 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001248 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001249
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001250 ret = qio_channel_readv_all(p->c, p->pages->iov, used, &local_err);
1251 if (ret != 0) {
1252 break;
1253 }
1254
Juan Quintela6df264a2018-02-28 09:10:07 +01001255 if (flags & MULTIFD_FLAG_SYNC) {
1256 qemu_sem_post(&multifd_recv_state->sem_sync);
1257 qemu_sem_wait(&p->sem_sync);
1258 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001259 }
1260
Juan Quintelad82628e2018-04-11 02:44:24 +02001261 if (local_err) {
1262 multifd_recv_terminate_threads(local_err);
1263 }
Juan Quintela66770702018-02-19 19:01:45 +01001264 qemu_mutex_lock(&p->mutex);
1265 p->running = false;
1266 qemu_mutex_unlock(&p->mutex);
1267
Juan Quintela408ea6a2018-04-06 18:28:59 +02001268 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1269
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001270 return NULL;
1271}
1272
1273int multifd_load_setup(void)
1274{
1275 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001276 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001277 uint8_t i;
1278
1279 if (!migrate_use_multifd()) {
1280 return 0;
1281 }
1282 thread_count = migrate_multifd_channels();
1283 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1284 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001285 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001286 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001287
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001288 for (i = 0; i < thread_count; i++) {
1289 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1290
1291 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001292 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001293 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001294 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001295 p->packet_len = sizeof(MultiFDPacket_t)
1296 + sizeof(ram_addr_t) * page_count;
1297 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001298 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001299 }
1300 return 0;
1301}
1302
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001303bool multifd_recv_all_channels_created(void)
1304{
1305 int thread_count = migrate_multifd_channels();
1306
1307 if (!migrate_use_multifd()) {
1308 return true;
1309 }
1310
1311 return thread_count == atomic_read(&multifd_recv_state->count);
1312}
1313
Juan Quintela71bb07d2018-02-19 19:01:03 +01001314void multifd_recv_new_channel(QIOChannel *ioc)
1315{
Juan Quintela60df2d42018-03-07 07:56:15 +01001316 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001317 Error *local_err = NULL;
1318 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001319
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001320 id = multifd_recv_initial_packet(ioc, &local_err);
1321 if (id < 0) {
1322 multifd_recv_terminate_threads(local_err);
1323 return;
1324 }
1325
1326 p = &multifd_recv_state->params[id];
1327 if (p->c != NULL) {
1328 error_setg(&local_err, "multifd: received id '%d' already setup'",
1329 id);
1330 multifd_recv_terminate_threads(local_err);
1331 return;
1332 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001333 p->c = ioc;
1334 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001335 /* initial packet */
1336 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001337
1338 p->running = true;
1339 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1340 QEMU_THREAD_JOINABLE);
1341 atomic_inc(&multifd_recv_state->count);
Juan Quintela36c2f8b2018-03-07 08:40:52 +01001342 if (multifd_recv_state->count == migrate_multifd_channels()) {
1343 migration_incoming_process();
1344 }
Juan Quintela71bb07d2018-02-19 19:01:03 +01001345}
1346
Juan Quintela56e93d22015-05-07 19:33:31 +02001347/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001348 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001349 *
1350 * If this is the 1st block, it also writes the block identification
1351 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001352 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001353 *
1354 * @f: QEMUFile where to send the data
1355 * @block: block that contains the page we want to send
1356 * @offset: offset inside the block for the page
1357 * in the lower bits, it contains flags
1358 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001359static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1360 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001361{
Liang Li9f5f3802015-07-13 17:34:10 +08001362 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001363
Juan Quintela24795692017-03-21 11:45:01 +01001364 if (block == rs->last_sent_block) {
1365 offset |= RAM_SAVE_FLAG_CONTINUE;
1366 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001367 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001368 size = 8;
1369
1370 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001371 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001372 qemu_put_byte(f, len);
1373 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001374 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001375 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001376 }
1377 return size;
1378}
1379
Juan Quintela3d0684b2017-03-23 15:06:39 +01001380/**
1381 * mig_throttle_guest_down: throotle down the guest
1382 *
1383 * Reduce amount of guest cpu execution to hopefully slow down memory
1384 * writes. If guest dirty memory rate is reduced below the rate at
1385 * which we can transfer pages to the destination then we should be
1386 * able to complete migration. Some workloads dirty memory way too
1387 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001388 */
1389static void mig_throttle_guest_down(void)
1390{
1391 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001392 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1393 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -04001394
1395 /* We have not started throttling yet. Let's start it. */
1396 if (!cpu_throttle_active()) {
1397 cpu_throttle_set(pct_initial);
1398 } else {
1399 /* Throttling already on, just increase the rate */
1400 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
1401 }
1402}
1403
Juan Quintela3d0684b2017-03-23 15:06:39 +01001404/**
1405 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1406 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001407 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001408 * @current_addr: address for the zero page
1409 *
1410 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001411 * The important thing is that a stale (not-yet-0'd) page be replaced
1412 * by the new data.
1413 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001414 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001415 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001416static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001417{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001418 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001419 return;
1420 }
1421
1422 /* We don't care if this fails to allocate a new cache page
1423 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001424 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001425 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001426}
1427
1428#define ENCODING_FLAG_XBZRLE 0x1
1429
1430/**
1431 * save_xbzrle_page: compress and send current page
1432 *
1433 * Returns: 1 means that we wrote the page
1434 * 0 means that page is identical to the one already sent
1435 * -1 means that xbzrle would be longer than normal
1436 *
Juan Quintela5a987732017-03-13 19:39:02 +01001437 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001438 * @current_data: pointer to the address of the page contents
1439 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001440 * @block: block that contains the page we want to send
1441 * @offset: offset inside the block for the page
1442 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001443 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001444static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001445 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001446 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001447{
1448 int encoded_len = 0, bytes_xbzrle;
1449 uint8_t *prev_cached_page;
1450
Juan Quintela93604472017-06-06 19:49:03 +02001451 if (!cache_is_cached(XBZRLE.cache, current_addr,
1452 ram_counters.dirty_sync_count)) {
1453 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001454 if (!last_stage) {
1455 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001456 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001457 return -1;
1458 } else {
1459 /* update *current_data when the page has been
1460 inserted into cache */
1461 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1462 }
1463 }
1464 return -1;
1465 }
1466
1467 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1468
1469 /* save current buffer into memory */
1470 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1471
1472 /* XBZRLE encoding (if there is no overflow) */
1473 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1474 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1475 TARGET_PAGE_SIZE);
1476 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001477 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001478 return 0;
1479 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001480 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001481 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001482 /* update data in the cache */
1483 if (!last_stage) {
1484 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1485 *current_data = prev_cached_page;
1486 }
1487 return -1;
1488 }
1489
1490 /* we need to update the data in the cache, in order to get the same data */
1491 if (!last_stage) {
1492 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1493 }
1494
1495 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001496 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001497 offset | RAM_SAVE_FLAG_XBZRLE);
1498 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1499 qemu_put_be16(rs->f, encoded_len);
1500 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001501 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001502 xbzrle_counters.pages++;
1503 xbzrle_counters.bytes += bytes_xbzrle;
1504 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001505
1506 return 1;
1507}
1508
Juan Quintela3d0684b2017-03-23 15:06:39 +01001509/**
1510 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001511 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001512 * Called with rcu_read_lock() to protect migration_bitmap
1513 *
1514 * Returns the byte offset within memory region of the start of a dirty page
1515 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001516 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001517 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001518 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001519 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001520static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001521unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001522 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001523{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001524 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1525 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001526 unsigned long next;
1527
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001528 if (!qemu_ram_is_migratable(rb)) {
1529 return size;
1530 }
1531
Juan Quintela6b6712e2017-03-22 15:18:04 +01001532 if (rs->ram_bulk_stage && start > 0) {
1533 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001534 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001535 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001536 }
1537
Juan Quintela6b6712e2017-03-22 15:18:04 +01001538 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001539}
1540
Juan Quintela06b10682017-03-21 15:18:05 +01001541static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001542 RAMBlock *rb,
1543 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001544{
1545 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001546
Juan Quintela6b6712e2017-03-22 15:18:04 +01001547 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001548
1549 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001550 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001551 }
1552 return ret;
1553}
1554
Juan Quintela15440dd2017-03-21 09:35:04 +01001555static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1556 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +02001557{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001558 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +01001559 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001560 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001561}
1562
Juan Quintela3d0684b2017-03-23 15:06:39 +01001563/**
1564 * ram_pagesize_summary: calculate all the pagesizes of a VM
1565 *
1566 * Returns a summary bitmap of the page sizes of all RAMBlocks
1567 *
1568 * For VMs with just normal pages this is equivalent to the host page
1569 * size. If it's got some huge pages then it's the OR of all the
1570 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001571 */
1572uint64_t ram_pagesize_summary(void)
1573{
1574 RAMBlock *block;
1575 uint64_t summary = 0;
1576
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001577 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001578 summary |= block->page_size;
1579 }
1580
1581 return summary;
1582}
1583
Xiao Guangrongb7340352018-06-04 17:55:12 +08001584static void migration_update_rates(RAMState *rs, int64_t end_time)
1585{
1586 uint64_t iter_count = rs->iterations - rs->iterations_prev;
1587
1588 /* calculate period counters */
1589 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1590 / (end_time - rs->time_last_bitmap_sync);
1591
1592 if (!iter_count) {
1593 return;
1594 }
1595
1596 if (migrate_use_xbzrle()) {
1597 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1598 rs->xbzrle_cache_miss_prev) / iter_count;
1599 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1600 }
1601}
1602
Juan Quintela8d820d62017-03-13 19:35:50 +01001603static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001604{
1605 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001606 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001607 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001608
Juan Quintela93604472017-06-06 19:49:03 +02001609 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001610
Juan Quintelaf664da82017-03-13 19:44:57 +01001611 if (!rs->time_last_bitmap_sync) {
1612 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001613 }
1614
1615 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001616 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001617
Juan Quintela108cfae2017-03-13 21:38:09 +01001618 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001619 rcu_read_lock();
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001620 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +01001621 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +02001622 }
Balamuruhan S650af892018-06-12 14:20:09 +05301623 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001624 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001625 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001626
Juan Quintelaa66cd902017-03-28 15:02:43 +02001627 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001628
Juan Quintela56e93d22015-05-07 19:33:31 +02001629 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1630
1631 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001632 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001633 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001634
Peter Lieven9ac78b62017-09-26 12:33:16 +02001635 /* During block migration the auto-converge logic incorrectly detects
1636 * that ram migration makes no progress. Avoid this by disabling the
1637 * throttling logic during the bulk phase of block migration. */
1638 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001639 /* The following detection logic can be refined later. For now:
1640 Check to see if the dirtied bytes is 50% more than the approx.
1641 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001642 were in this routine. If that happens twice, start or increase
1643 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001644
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001645 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001646 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001647 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001648 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001649 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001650 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001651 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001652 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001653
Xiao Guangrongb7340352018-06-04 17:55:12 +08001654 migration_update_rates(rs, end_time);
1655
1656 rs->iterations_prev = rs->iterations;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001657
1658 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001659 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001660 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001661 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001662 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001663 if (migrate_use_events()) {
Juan Quintela93604472017-06-06 19:49:03 +02001664 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001665 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001666}
1667
1668/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001669 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001670 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001671 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001672 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001673 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001674 * @block: block that contains the page we want to send
1675 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001676 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001677static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001678{
Juan Quintela7faccdc2018-01-08 18:58:17 +01001679 uint8_t *p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001680 int pages = -1;
1681
1682 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
Juan Quintela93604472017-06-06 19:49:03 +02001683 ram_counters.duplicate++;
1684 ram_counters.transferred +=
Juan Quintelabb890ed2017-04-28 09:39:55 +02001685 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
Juan Quintelace25d332017-03-15 11:00:51 +01001686 qemu_put_byte(rs->f, 0);
Juan Quintela93604472017-06-06 19:49:03 +02001687 ram_counters.transferred += 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001688 pages = 1;
1689 }
1690
1691 return pages;
1692}
1693
Juan Quintela57273092017-03-20 22:25:28 +01001694static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001695{
Juan Quintela57273092017-03-20 22:25:28 +01001696 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001697 return;
1698 }
1699
Juan Quintelaaaa20642017-03-21 11:35:24 +01001700 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001701}
1702
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001703/*
1704 * @pages: the number of pages written by the control path,
1705 * < 0 - error
1706 * > 0 - number of pages written
1707 *
1708 * Return true if the pages has been saved, otherwise false is returned.
1709 */
1710static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1711 int *pages)
1712{
1713 uint64_t bytes_xmit = 0;
1714 int ret;
1715
1716 *pages = -1;
1717 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1718 &bytes_xmit);
1719 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1720 return false;
1721 }
1722
1723 if (bytes_xmit) {
1724 ram_counters.transferred += bytes_xmit;
1725 *pages = 1;
1726 }
1727
1728 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1729 return true;
1730 }
1731
1732 if (bytes_xmit > 0) {
1733 ram_counters.normal++;
1734 } else if (bytes_xmit == 0) {
1735 ram_counters.duplicate++;
1736 }
1737
1738 return true;
1739}
1740
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001741/*
1742 * directly send the page to the stream
1743 *
1744 * Returns the number of pages written.
1745 *
1746 * @rs: current RAM state
1747 * @block: block that contains the page we want to send
1748 * @offset: offset inside the block for the page
1749 * @buf: the page to be sent
1750 * @async: send to page asyncly
1751 */
1752static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1753 uint8_t *buf, bool async)
1754{
1755 ram_counters.transferred += save_page_header(rs, rs->f, block,
1756 offset | RAM_SAVE_FLAG_PAGE);
1757 if (async) {
1758 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1759 migrate_release_ram() &
1760 migration_in_postcopy());
1761 } else {
1762 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1763 }
1764 ram_counters.transferred += TARGET_PAGE_SIZE;
1765 ram_counters.normal++;
1766 return 1;
1767}
1768
Juan Quintela56e93d22015-05-07 19:33:31 +02001769/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001770 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001771 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001772 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001773 * < 0 - error
1774 * >=0 - Number of pages written - this might legally be 0
1775 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001776 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001777 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001778 * @block: block that contains the page we want to send
1779 * @offset: offset inside the block for the page
1780 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001781 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001782static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001783{
1784 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001785 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001786 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001787 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001788 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001789 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001790
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001791 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001792 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001793
Juan Quintela56e93d22015-05-07 19:33:31 +02001794 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001795 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1796 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001797 pages = save_xbzrle_page(rs, &p, current_addr, block,
1798 offset, last_stage);
1799 if (!last_stage) {
1800 /* Can't send this cached data async, since the cache page
1801 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001802 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001803 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001804 }
1805 }
1806
1807 /* XBZRLE overflow or normal page */
1808 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001809 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001810 }
1811
1812 XBZRLE_cache_unlock();
1813
1814 return pages;
1815}
1816
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001817static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1818 ram_addr_t offset)
1819{
1820 uint8_t *p;
1821
1822 p = block->host + offset;
1823
1824 ram_counters.transferred += save_page_header(rs, rs->f, block,
1825 offset | RAM_SAVE_FLAG_PAGE);
1826 multifd_queue_page(block, offset);
1827 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
1828 ram_counters.transferred += TARGET_PAGE_SIZE;
1829 ram_counters.normal++;
1830
1831 return 1;
1832}
1833
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08001834static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001835 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001836{
Juan Quintela53518d92017-05-04 11:46:24 +02001837 RAMState *rs = ram_state;
Juan Quintela56e93d22015-05-07 19:33:31 +02001838 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +08001839 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +02001840
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001841 bytes_sent = save_page_header(rs, f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +02001842 RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001843
1844 /*
1845 * copy it to a internal buffer to avoid it being modified by VM
1846 * so that we can catch up the error during compression and
1847 * decompression
1848 */
1849 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1850 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
Liang Lib3be2892016-05-05 15:32:54 +08001851 if (blen < 0) {
1852 bytes_sent = 0;
1853 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1854 error_report("compressed data failed!");
1855 } else {
1856 bytes_sent += blen;
Juan Quintela57273092017-03-20 22:25:28 +01001857 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +08001858 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001859
1860 return bytes_sent;
1861}
1862
Juan Quintelace25d332017-03-15 11:00:51 +01001863static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001864{
1865 int idx, len, thread_count;
1866
1867 if (!migrate_use_compression()) {
1868 return;
1869 }
1870 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001871
Liang Li0d9f9a52016-05-05 15:32:59 +08001872 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001873 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001874 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001875 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001876 }
Liang Lia7a9a882016-05-05 15:32:57 +08001877 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001878 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001879
1880 for (idx = 0; idx < thread_count; idx++) {
1881 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001882 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001883 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Juan Quintela93604472017-06-06 19:49:03 +02001884 ram_counters.transferred += len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001885 }
Liang Lia7a9a882016-05-05 15:32:57 +08001886 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001887 }
1888}
1889
1890static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1891 ram_addr_t offset)
1892{
1893 param->block = block;
1894 param->offset = offset;
1895}
1896
Juan Quintelace25d332017-03-15 11:00:51 +01001897static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1898 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001899{
1900 int idx, thread_count, bytes_xmit = -1, pages = -1;
1901
1902 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001903 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001904 while (true) {
1905 for (idx = 0; idx < thread_count; idx++) {
1906 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +08001907 comp_param[idx].done = false;
Juan Quintelace25d332017-03-15 11:00:51 +01001908 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +08001909 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001910 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +08001911 qemu_cond_signal(&comp_param[idx].cond);
1912 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001913 pages = 1;
Juan Quintela93604472017-06-06 19:49:03 +02001914 ram_counters.normal++;
1915 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +02001916 break;
1917 }
1918 }
1919 if (pages > 0) {
1920 break;
1921 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +08001922 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001923 }
1924 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001925 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001926
1927 return pages;
1928}
1929
1930/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001931 * find_dirty_block: find the next dirty page and update any state
1932 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001933 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001934 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001935 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001936 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001937 * @pss: data about the state of the current dirty page scan
1938 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001939 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001940static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001941{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001942 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001943 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001944 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001945 /*
1946 * We've been once around the RAM and haven't found anything.
1947 * Give up.
1948 */
1949 *again = false;
1950 return false;
1951 }
Juan Quintelaa935e302017-03-21 15:36:51 +01001952 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001953 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001954 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001955 pss->block = QLIST_NEXT_RCU(pss->block, next);
1956 if (!pss->block) {
1957 /* Hit the end of the list */
1958 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1959 /* Flag that we've looped */
1960 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001961 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001962 if (migrate_use_xbzrle()) {
1963 /* If xbzrle is on, stop using the data compression at this
1964 * point. In theory, xbzrle can do better than compression.
1965 */
Juan Quintelace25d332017-03-15 11:00:51 +01001966 flush_compressed_data(rs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001967 }
1968 }
1969 /* Didn't find anything this time, but try again on the new block */
1970 *again = true;
1971 return false;
1972 } else {
1973 /* Can go around again, but... */
1974 *again = true;
1975 /* We've found something so probably don't need to */
1976 return true;
1977 }
1978}
1979
Juan Quintela3d0684b2017-03-23 15:06:39 +01001980/**
1981 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001982 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001983 * Helper for 'get_queued_page' - gets a page off the queue
1984 *
1985 * Returns the block of the page (or NULL if none available)
1986 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001987 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001988 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001989 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001990static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001991{
1992 RAMBlock *block = NULL;
1993
Juan Quintelaec481c62017-03-20 22:12:40 +01001994 qemu_mutex_lock(&rs->src_page_req_mutex);
1995 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1996 struct RAMSrcPageRequest *entry =
1997 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001998 block = entry->rb;
1999 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002000
2001 if (entry->len > TARGET_PAGE_SIZE) {
2002 entry->len -= TARGET_PAGE_SIZE;
2003 entry->offset += TARGET_PAGE_SIZE;
2004 } else {
2005 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002006 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002007 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002008 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002009 }
2010 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002011 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002012
2013 return block;
2014}
2015
Juan Quintela3d0684b2017-03-23 15:06:39 +01002016/**
2017 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002018 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002019 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002020 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002021 * Returns if a queued page is found
2022 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002023 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002024 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002025 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002026static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002027{
2028 RAMBlock *block;
2029 ram_addr_t offset;
2030 bool dirty;
2031
2032 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002033 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002034 /*
2035 * We're sending this page, and since it's postcopy nothing else
2036 * will dirty it, and we must make sure it doesn't get sent again
2037 * even if this queue request was received after the background
2038 * search already sent it.
2039 */
2040 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002041 unsigned long page;
2042
Juan Quintela6b6712e2017-03-22 15:18:04 +01002043 page = offset >> TARGET_PAGE_BITS;
2044 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002045 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002046 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002047 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002048 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002049 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002050 }
2051 }
2052
2053 } while (block && !dirty);
2054
2055 if (block) {
2056 /*
2057 * As soon as we start servicing pages out of order, then we have
2058 * to kill the bulk stage, since the bulk stage assumes
2059 * in (migration_bitmap_find_and_reset_dirty) that every page is
2060 * dirty, that's no longer true.
2061 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002062 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002063
2064 /*
2065 * We want the background search to continue from the queued page
2066 * since the guest is likely to want other pages near to the page
2067 * it just requested.
2068 */
2069 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002070 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002071 }
2072
2073 return !!block;
2074}
2075
Juan Quintela56e93d22015-05-07 19:33:31 +02002076/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002077 * migration_page_queue_free: drop any remaining pages in the ram
2078 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002079 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002080 * It should be empty at the end anyway, but in error cases there may
2081 * be some left. in case that there is any page left, we drop it.
2082 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002083 */
Juan Quintela83c13382017-05-04 11:45:01 +02002084static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002085{
Juan Quintelaec481c62017-03-20 22:12:40 +01002086 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002087 /* This queue generally should be empty - but in the case of a failed
2088 * migration might have some droppings in.
2089 */
2090 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002091 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002092 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002093 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002094 g_free(mspr);
2095 }
2096 rcu_read_unlock();
2097}
2098
2099/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002100 * ram_save_queue_pages: queue the page for transmission
2101 *
2102 * A request from postcopy destination for example.
2103 *
2104 * Returns zero on success or negative on error
2105 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002106 * @rbname: Name of the RAMBLock of the request. NULL means the
2107 * same that last one.
2108 * @start: starting address from the start of the RAMBlock
2109 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002110 */
Juan Quintela96506892017-03-14 18:41:03 +01002111int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002112{
2113 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002114 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002115
Juan Quintela93604472017-06-06 19:49:03 +02002116 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002117 rcu_read_lock();
2118 if (!rbname) {
2119 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002120 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002121
2122 if (!ramblock) {
2123 /*
2124 * Shouldn't happen, we can't reuse the last RAMBlock if
2125 * it's the 1st request.
2126 */
2127 error_report("ram_save_queue_pages no previous block");
2128 goto err;
2129 }
2130 } else {
2131 ramblock = qemu_ram_block_by_name(rbname);
2132
2133 if (!ramblock) {
2134 /* We shouldn't be asked for a non-existent RAMBlock */
2135 error_report("ram_save_queue_pages no block '%s'", rbname);
2136 goto err;
2137 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002138 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002139 }
2140 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2141 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002142 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2143 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002144 __func__, start, len, ramblock->used_length);
2145 goto err;
2146 }
2147
Juan Quintelaec481c62017-03-20 22:12:40 +01002148 struct RAMSrcPageRequest *new_entry =
2149 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002150 new_entry->rb = ramblock;
2151 new_entry->offset = start;
2152 new_entry->len = len;
2153
2154 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002155 qemu_mutex_lock(&rs->src_page_req_mutex);
2156 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002157 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002158 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002159 rcu_read_unlock();
2160
2161 return 0;
2162
2163err:
2164 rcu_read_unlock();
2165 return -1;
2166}
2167
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002168static bool save_page_use_compression(RAMState *rs)
2169{
2170 if (!migrate_use_compression()) {
2171 return false;
2172 }
2173
2174 /*
2175 * If xbzrle is on, stop using the data compression after first
2176 * round of migration even if compression is enabled. In theory,
2177 * xbzrle can do better than compression.
2178 */
2179 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2180 return true;
2181 }
2182
2183 return false;
2184}
2185
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002186/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002187 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002188 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002189 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002190 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002191 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002192 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002193 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002194 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002195static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002196 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002197{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002198 RAMBlock *block = pss->block;
2199 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2200 int res;
2201
2202 if (control_save_page(rs, block, offset, &res)) {
2203 return res;
2204 }
2205
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002206 /*
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002207 * When starting the process of a new block, the first page of
2208 * the block should be sent out before other pages in the same
2209 * block, and all the pages in last block should have been sent
2210 * out, keeping this order is important, because the 'cont' flag
2211 * is used to avoid resending the block name.
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002212 */
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002213 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
2214 flush_compressed_data(rs);
2215 }
2216
2217 res = save_zero_page(rs, block, offset);
2218 if (res > 0) {
2219 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2220 * page would be stale
2221 */
2222 if (!save_page_use_compression(rs)) {
2223 XBZRLE_cache_lock();
2224 xbzrle_cache_zero_page(rs, block->offset + offset);
2225 XBZRLE_cache_unlock();
2226 }
2227 ram_release_pages(block->idstr, offset, res);
2228 return res;
2229 }
2230
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002231 /*
2232 * Make sure the first page is sent out before other pages.
2233 *
2234 * we post it as normal page as compression will take much
2235 * CPU resource.
2236 */
2237 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
Xiao Guangrong701b1872018-04-28 16:10:45 +08002238 return compress_page_with_multi_thread(rs, block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002239 } else if (migrate_use_multifd()) {
2240 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002241 }
2242
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002243 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002244}
2245
2246/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002247 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002248 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002249 * Starting at *offset send pages up to the end of the current host
2250 * page. It's valid for the initial offset to point into the middle of
2251 * a host page in which case the remainder of the hostpage is sent.
2252 * Only dirty target pages are sent. Note that the host page size may
2253 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002254 * The saving stops at the boundary of the used_length of the block
2255 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002256 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002257 * Returns the number of pages written or negative on error
2258 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002259 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002260 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002261 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002262 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002263 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002264static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002265 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002266{
2267 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002268 size_t pagesize_bits =
2269 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002270
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002271 if (!qemu_ram_is_migratable(pss->block)) {
2272 error_report("block %s should not be migrated !", pss->block->idstr);
2273 return 0;
2274 }
2275
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002276 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002277 /* Check the pages is dirty and if it is send it */
2278 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2279 pss->page++;
2280 continue;
2281 }
2282
Juan Quintelaf20e2862017-03-21 16:19:05 +01002283 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002284 if (tmppages < 0) {
2285 return tmppages;
2286 }
2287
2288 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002289 if (pss->block->unsentmap) {
2290 clear_bit(pss->page, pss->block->unsentmap);
2291 }
2292
Juan Quintelaa935e302017-03-21 15:36:51 +01002293 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002294 } while ((pss->page & (pagesize_bits - 1)) &&
2295 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002296
2297 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002298 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002299 return pages;
2300}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002301
2302/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002303 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002304 *
2305 * Called within an RCU critical section.
2306 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002307 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02002308 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002309 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002310 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002311 *
2312 * On systems where host-page-size > target-page-size it will send all the
2313 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002314 */
2315
Juan Quintelace25d332017-03-15 11:00:51 +01002316static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002317{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002318 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002319 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002320 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002321
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302322 /* No dirty page as there is zero RAM */
2323 if (!ram_bytes_total()) {
2324 return pages;
2325 }
2326
Juan Quintela6f37bb82017-03-13 19:26:29 +01002327 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002328 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002329 pss.complete_round = false;
2330
2331 if (!pss.block) {
2332 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2333 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002334
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002335 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002336 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002337 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002338
2339 if (!found) {
2340 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002341 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002342 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002343
2344 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002345 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002346 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002347 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002348
Juan Quintela6f37bb82017-03-13 19:26:29 +01002349 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002350 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002351
2352 return pages;
2353}
2354
2355void acct_update_position(QEMUFile *f, size_t size, bool zero)
2356{
2357 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002358
Juan Quintela56e93d22015-05-07 19:33:31 +02002359 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002360 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002361 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002362 ram_counters.normal += pages;
2363 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002364 qemu_update_position(f, size);
2365 }
2366}
2367
Juan Quintela56e93d22015-05-07 19:33:31 +02002368uint64_t ram_bytes_total(void)
2369{
2370 RAMBlock *block;
2371 uint64_t total = 0;
2372
2373 rcu_read_lock();
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002374 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002375 total += block->used_length;
Peter Xu99e15582017-05-12 12:17:39 +08002376 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002377 rcu_read_unlock();
2378 return total;
2379}
2380
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002381static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002382{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002383 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002384}
2385
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002386static void xbzrle_load_cleanup(void)
2387{
2388 g_free(XBZRLE.decoded_buf);
2389 XBZRLE.decoded_buf = NULL;
2390}
2391
Peter Xu7d7c96b2017-10-19 14:31:58 +08002392static void ram_state_cleanup(RAMState **rsp)
2393{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002394 if (*rsp) {
2395 migration_page_queue_free(*rsp);
2396 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2397 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2398 g_free(*rsp);
2399 *rsp = NULL;
2400 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002401}
2402
Peter Xu84593a02017-10-19 14:31:59 +08002403static void xbzrle_cleanup(void)
2404{
2405 XBZRLE_cache_lock();
2406 if (XBZRLE.cache) {
2407 cache_fini(XBZRLE.cache);
2408 g_free(XBZRLE.encoded_buf);
2409 g_free(XBZRLE.current_buf);
2410 g_free(XBZRLE.zero_target_page);
2411 XBZRLE.cache = NULL;
2412 XBZRLE.encoded_buf = NULL;
2413 XBZRLE.current_buf = NULL;
2414 XBZRLE.zero_target_page = NULL;
2415 }
2416 XBZRLE_cache_unlock();
2417}
2418
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002419static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002420{
Juan Quintela53518d92017-05-04 11:46:24 +02002421 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002422 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002423
Li Zhijian2ff64032015-07-02 20:18:05 +08002424 /* caller have hold iothread lock or is in a bh, so there is
2425 * no writing race against this migration_bitmap
2426 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002427 memory_global_dirty_log_stop();
2428
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002429 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002430 g_free(block->bmap);
2431 block->bmap = NULL;
2432 g_free(block->unsentmap);
2433 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002434 }
2435
Peter Xu84593a02017-10-19 14:31:59 +08002436 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002437 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002438 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002439}
2440
Juan Quintela6f37bb82017-03-13 19:26:29 +01002441static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002442{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002443 rs->last_seen_block = NULL;
2444 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002445 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002446 rs->last_version = ram_list.version;
2447 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002448}
2449
2450#define MAX_WAIT 50 /* ms, half buffered_file limit */
2451
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002452/*
2453 * 'expected' is the value you expect the bitmap mostly to be full
2454 * of; it won't bother printing lines that are all this value.
2455 * If 'todump' is null the migration bitmap is dumped.
2456 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002457void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2458 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002459{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002460 int64_t cur;
2461 int64_t linelen = 128;
2462 char linebuf[129];
2463
Juan Quintela6b6712e2017-03-22 15:18:04 +01002464 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002465 int64_t curb;
2466 bool found = false;
2467 /*
2468 * Last line; catch the case where the line length
2469 * is longer than remaining ram
2470 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002471 if (cur + linelen > pages) {
2472 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002473 }
2474 for (curb = 0; curb < linelen; curb++) {
2475 bool thisbit = test_bit(cur + curb, todump);
2476 linebuf[curb] = thisbit ? '1' : '.';
2477 found = found || (thisbit != expected);
2478 }
2479 if (found) {
2480 linebuf[curb] = '\0';
2481 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2482 }
2483 }
2484}
2485
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002486/* **** functions for postcopy ***** */
2487
Pavel Butsykinced1c612017-02-03 18:23:21 +03002488void ram_postcopy_migrated_memory_release(MigrationState *ms)
2489{
2490 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002491
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002492 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002493 unsigned long *bitmap = block->bmap;
2494 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2495 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002496
2497 while (run_start < range) {
2498 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002499 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002500 (run_end - run_start) << TARGET_PAGE_BITS);
2501 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2502 }
2503 }
2504}
2505
Juan Quintela3d0684b2017-03-23 15:06:39 +01002506/**
2507 * postcopy_send_discard_bm_ram: discard a RAMBlock
2508 *
2509 * Returns zero on success
2510 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002511 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2512 * Note: At this point the 'unsentmap' is the processed bitmap combined
2513 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002514 *
2515 * @ms: current migration state
2516 * @pds: state for postcopy
2517 * @start: RAMBlock starting page
2518 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002519 */
2520static int postcopy_send_discard_bm_ram(MigrationState *ms,
2521 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002522 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002523{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002524 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002525 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002526 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002527
Juan Quintela6b6712e2017-03-22 15:18:04 +01002528 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002529 unsigned long one = find_next_bit(unsentmap, end, current);
2530
2531 if (one <= end) {
2532 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2533 unsigned long discard_length;
2534
2535 if (zero >= end) {
2536 discard_length = end - one;
2537 } else {
2538 discard_length = zero - one;
2539 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01002540 if (discard_length) {
2541 postcopy_discard_send_range(ms, pds, one, discard_length);
2542 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002543 current = one + discard_length;
2544 } else {
2545 current = one;
2546 }
2547 }
2548
2549 return 0;
2550}
2551
Juan Quintela3d0684b2017-03-23 15:06:39 +01002552/**
2553 * postcopy_each_ram_send_discard: discard all RAMBlocks
2554 *
2555 * Returns 0 for success or negative for error
2556 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002557 * Utility for the outgoing postcopy code.
2558 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2559 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002560 * (qemu_ram_foreach_block ends up passing unscaled lengths
2561 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002562 *
2563 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002564 */
2565static int postcopy_each_ram_send_discard(MigrationState *ms)
2566{
2567 struct RAMBlock *block;
2568 int ret;
2569
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002570 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002571 PostcopyDiscardState *pds =
2572 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002573
2574 /*
2575 * Postcopy sends chunks of bitmap over the wire, but it
2576 * just needs indexes at this point, avoids it having
2577 * target page specific code.
2578 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002579 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002580 postcopy_discard_send_finish(ms, pds);
2581 if (ret) {
2582 return ret;
2583 }
2584 }
2585
2586 return 0;
2587}
2588
Juan Quintela3d0684b2017-03-23 15:06:39 +01002589/**
2590 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002591 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002592 * Helper for postcopy_chunk_hostpages; it's called twice to
2593 * canonicalize the two bitmaps, that are similar, but one is
2594 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002595 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002596 * Postcopy requires that all target pages in a hostpage are dirty or
2597 * clean, not a mix. This function canonicalizes the bitmaps.
2598 *
2599 * @ms: current migration state
2600 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2601 * otherwise we need to canonicalize partially dirty host pages
2602 * @block: block that contains the page we want to canonicalize
2603 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002604 */
2605static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2606 RAMBlock *block,
2607 PostcopyDiscardState *pds)
2608{
Juan Quintela53518d92017-05-04 11:46:24 +02002609 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002610 unsigned long *bitmap = block->bmap;
2611 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002612 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002613 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002614 unsigned long run_start;
2615
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002616 if (block->page_size == TARGET_PAGE_SIZE) {
2617 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2618 return;
2619 }
2620
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002621 if (unsent_pass) {
2622 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002623 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002624 } else {
2625 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002626 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002627 }
2628
Juan Quintela6b6712e2017-03-22 15:18:04 +01002629 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002630 bool do_fixup = false;
2631 unsigned long fixup_start_addr;
2632 unsigned long host_offset;
2633
2634 /*
2635 * If the start of this run of pages is in the middle of a host
2636 * page, then we need to fixup this host page.
2637 */
2638 host_offset = run_start % host_ratio;
2639 if (host_offset) {
2640 do_fixup = true;
2641 run_start -= host_offset;
2642 fixup_start_addr = run_start;
2643 /* For the next pass */
2644 run_start = run_start + host_ratio;
2645 } else {
2646 /* Find the end of this run */
2647 unsigned long run_end;
2648 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002649 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002650 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002651 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002652 }
2653 /*
2654 * If the end isn't at the start of a host page, then the
2655 * run doesn't finish at the end of a host page
2656 * and we need to discard.
2657 */
2658 host_offset = run_end % host_ratio;
2659 if (host_offset) {
2660 do_fixup = true;
2661 fixup_start_addr = run_end - host_offset;
2662 /*
2663 * This host page has gone, the next loop iteration starts
2664 * from after the fixup
2665 */
2666 run_start = fixup_start_addr + host_ratio;
2667 } else {
2668 /*
2669 * No discards on this iteration, next loop starts from
2670 * next sent/dirty page
2671 */
2672 run_start = run_end + 1;
2673 }
2674 }
2675
2676 if (do_fixup) {
2677 unsigned long page;
2678
2679 /* Tell the destination to discard this page */
2680 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2681 /* For the unsent_pass we:
2682 * discard partially sent pages
2683 * For the !unsent_pass (dirty) we:
2684 * discard partially dirty pages that were sent
2685 * (any partially sent pages were already discarded
2686 * by the previous unsent_pass)
2687 */
2688 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2689 host_ratio);
2690 }
2691
2692 /* Clean up the bitmap */
2693 for (page = fixup_start_addr;
2694 page < fixup_start_addr + host_ratio; page++) {
2695 /* All pages in this host page are now not sent */
2696 set_bit(page, unsentmap);
2697
2698 /*
2699 * Remark them as dirty, updating the count for any pages
2700 * that weren't previously dirty.
2701 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002702 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002703 }
2704 }
2705
2706 if (unsent_pass) {
2707 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002708 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002709 } else {
2710 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002711 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002712 }
2713 }
2714}
2715
Juan Quintela3d0684b2017-03-23 15:06:39 +01002716/**
2717 * postcopy_chuck_hostpages: discrad any partially sent host page
2718 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002719 * Utility for the outgoing postcopy code.
2720 *
2721 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002722 * dirty host-page size chunks as all dirty. In this case the host-page
2723 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002724 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002725 * Returns zero on success
2726 *
2727 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002728 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002729 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002730static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002731{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002732 PostcopyDiscardState *pds =
2733 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002734
Juan Quintela6b6712e2017-03-22 15:18:04 +01002735 /* First pass: Discard all partially sent host pages */
2736 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2737 /*
2738 * Second pass: Ensure that all partially dirty host pages are made
2739 * fully dirty.
2740 */
2741 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002742
Juan Quintela6b6712e2017-03-22 15:18:04 +01002743 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002744 return 0;
2745}
2746
Juan Quintela3d0684b2017-03-23 15:06:39 +01002747/**
2748 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2749 *
2750 * Returns zero on success
2751 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002752 * Transmit the set of pages to be discarded after precopy to the target
2753 * these are pages that:
2754 * a) Have been previously transmitted but are now dirty again
2755 * b) Pages that have never been transmitted, this ensures that
2756 * any pages on the destination that have been mapped by background
2757 * tasks get discarded (transparent huge pages is the specific concern)
2758 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002759 *
2760 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002761 */
2762int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2763{
Juan Quintela53518d92017-05-04 11:46:24 +02002764 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002765 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002766 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002767
2768 rcu_read_lock();
2769
2770 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002771 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002772
Juan Quintela6b6712e2017-03-22 15:18:04 +01002773 /* Easiest way to make sure we don't resume in the middle of a host-page */
2774 rs->last_seen_block = NULL;
2775 rs->last_sent_block = NULL;
2776 rs->last_page = 0;
2777
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002778 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002779 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2780 unsigned long *bitmap = block->bmap;
2781 unsigned long *unsentmap = block->unsentmap;
2782
2783 if (!unsentmap) {
2784 /* We don't have a safe way to resize the sentmap, so
2785 * if the bitmap was resized it will be NULL at this
2786 * point.
2787 */
2788 error_report("migration ram resized during precopy phase");
2789 rcu_read_unlock();
2790 return -EINVAL;
2791 }
2792 /* Deal with TPS != HPS and huge pages */
2793 ret = postcopy_chunk_hostpages(ms, block);
2794 if (ret) {
2795 rcu_read_unlock();
2796 return ret;
2797 }
2798
2799 /*
2800 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002801 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002802 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002803#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01002804 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002805#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002806 }
2807 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002808
2809 ret = postcopy_each_ram_send_discard(ms);
2810 rcu_read_unlock();
2811
2812 return ret;
2813}
2814
Juan Quintela3d0684b2017-03-23 15:06:39 +01002815/**
2816 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002817 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002818 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002819 *
Juan Quintela36449152017-03-23 15:11:59 +01002820 * @rbname: name of the RAMBlock of the request. NULL means the
2821 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002822 * @start: RAMBlock starting page
2823 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002824 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002825int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002826{
2827 int ret = -1;
2828
Juan Quintela36449152017-03-23 15:11:59 +01002829 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002830
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002831 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01002832 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002833
2834 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002835 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002836 goto err;
2837 }
2838
Alexey Perevalovf9494612017-10-05 14:13:20 +03002839 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2840 length >> qemu_target_page_bits());
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002841 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002842
2843err:
2844 rcu_read_unlock();
2845
2846 return ret;
2847}
2848
Peter Xu84593a02017-10-19 14:31:59 +08002849/*
2850 * For every allocation, we will try not to crash the VM if the
2851 * allocation failed.
2852 */
2853static int xbzrle_init(void)
2854{
2855 Error *local_err = NULL;
2856
2857 if (!migrate_use_xbzrle()) {
2858 return 0;
2859 }
2860
2861 XBZRLE_cache_lock();
2862
2863 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2864 if (!XBZRLE.zero_target_page) {
2865 error_report("%s: Error allocating zero page", __func__);
2866 goto err_out;
2867 }
2868
2869 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2870 TARGET_PAGE_SIZE, &local_err);
2871 if (!XBZRLE.cache) {
2872 error_report_err(local_err);
2873 goto free_zero_page;
2874 }
2875
2876 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2877 if (!XBZRLE.encoded_buf) {
2878 error_report("%s: Error allocating encoded_buf", __func__);
2879 goto free_cache;
2880 }
2881
2882 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2883 if (!XBZRLE.current_buf) {
2884 error_report("%s: Error allocating current_buf", __func__);
2885 goto free_encoded_buf;
2886 }
2887
2888 /* We are all good */
2889 XBZRLE_cache_unlock();
2890 return 0;
2891
2892free_encoded_buf:
2893 g_free(XBZRLE.encoded_buf);
2894 XBZRLE.encoded_buf = NULL;
2895free_cache:
2896 cache_fini(XBZRLE.cache);
2897 XBZRLE.cache = NULL;
2898free_zero_page:
2899 g_free(XBZRLE.zero_target_page);
2900 XBZRLE.zero_target_page = NULL;
2901err_out:
2902 XBZRLE_cache_unlock();
2903 return -ENOMEM;
2904}
2905
Juan Quintela53518d92017-05-04 11:46:24 +02002906static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002907{
Peter Xu7d00ee62017-10-19 14:31:57 +08002908 *rsp = g_try_new0(RAMState, 1);
2909
2910 if (!*rsp) {
2911 error_report("%s: Init ramstate fail", __func__);
2912 return -1;
2913 }
Juan Quintela53518d92017-05-04 11:46:24 +02002914
2915 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2916 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2917 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002918
Peter Xu7d00ee62017-10-19 14:31:57 +08002919 /*
2920 * Count the total number of pages used by ram blocks not including any
2921 * gaps due to alignment or unplugs.
2922 */
2923 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2924
2925 ram_state_reset(*rsp);
2926
2927 return 0;
2928}
2929
Peter Xud6eff5d2017-10-19 14:32:00 +08002930static void ram_list_init_bitmaps(void)
2931{
2932 RAMBlock *block;
2933 unsigned long pages;
2934
2935 /* Skip setting bitmap if there is no RAM */
2936 if (ram_bytes_total()) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002937 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08002938 pages = block->max_length >> TARGET_PAGE_BITS;
2939 block->bmap = bitmap_new(pages);
2940 bitmap_set(block->bmap, 0, pages);
2941 if (migrate_postcopy_ram()) {
2942 block->unsentmap = bitmap_new(pages);
2943 bitmap_set(block->unsentmap, 0, pages);
2944 }
2945 }
2946 }
2947}
2948
2949static void ram_init_bitmaps(RAMState *rs)
2950{
2951 /* For memory_global_dirty_log_start below. */
2952 qemu_mutex_lock_iothread();
2953 qemu_mutex_lock_ramlist();
2954 rcu_read_lock();
2955
2956 ram_list_init_bitmaps();
2957 memory_global_dirty_log_start();
2958 migration_bitmap_sync(rs);
2959
2960 rcu_read_unlock();
2961 qemu_mutex_unlock_ramlist();
2962 qemu_mutex_unlock_iothread();
2963}
2964
Peter Xu7d00ee62017-10-19 14:31:57 +08002965static int ram_init_all(RAMState **rsp)
2966{
Peter Xu7d00ee62017-10-19 14:31:57 +08002967 if (ram_state_init(rsp)) {
2968 return -1;
2969 }
2970
Peter Xu84593a02017-10-19 14:31:59 +08002971 if (xbzrle_init()) {
2972 ram_state_cleanup(rsp);
2973 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002974 }
2975
Peter Xud6eff5d2017-10-19 14:32:00 +08002976 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002977
2978 return 0;
2979}
2980
Peter Xu08614f32018-05-02 18:47:33 +08002981static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2982{
2983 RAMBlock *block;
2984 uint64_t pages = 0;
2985
2986 /*
2987 * Postcopy is not using xbzrle/compression, so no need for that.
2988 * Also, since source are already halted, we don't need to care
2989 * about dirty page logging as well.
2990 */
2991
Dr. David Alan Gilbertff0769a2018-06-05 17:25:44 +01002992 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xu08614f32018-05-02 18:47:33 +08002993 pages += bitmap_count_one(block->bmap,
2994 block->used_length >> TARGET_PAGE_BITS);
2995 }
2996
2997 /* This may not be aligned with current bitmaps. Recalculate. */
2998 rs->migration_dirty_pages = pages;
2999
3000 rs->last_seen_block = NULL;
3001 rs->last_sent_block = NULL;
3002 rs->last_page = 0;
3003 rs->last_version = ram_list.version;
3004 /*
3005 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3006 * matter what we have sent.
3007 */
3008 rs->ram_bulk_stage = false;
3009
3010 /* Update RAMState cache of output QEMUFile */
3011 rs->f = out;
3012
3013 trace_ram_state_resume_prepare(pages);
3014}
3015
Juan Quintela3d0684b2017-03-23 15:06:39 +01003016/*
3017 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003018 * long-running RCU critical section. When rcu-reclaims in the code
3019 * start to become numerous it will be necessary to reduce the
3020 * granularity of these critical sections.
3021 */
3022
Juan Quintela3d0684b2017-03-23 15:06:39 +01003023/**
3024 * ram_save_setup: Setup RAM for migration
3025 *
3026 * Returns zero to indicate success and negative for error
3027 *
3028 * @f: QEMUFile where to send the data
3029 * @opaque: RAMState pointer
3030 */
zhanghailianga91246c2016-10-27 14:42:59 +08003031static int ram_save_setup(QEMUFile *f, void *opaque)
3032{
Juan Quintela53518d92017-05-04 11:46:24 +02003033 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003034 RAMBlock *block;
3035
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003036 if (compress_threads_save_setup()) {
3037 return -1;
3038 }
3039
zhanghailianga91246c2016-10-27 14:42:59 +08003040 /* migration has already setup the bitmap, reuse it. */
3041 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003042 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003043 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003044 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003045 }
zhanghailianga91246c2016-10-27 14:42:59 +08003046 }
Juan Quintela53518d92017-05-04 11:46:24 +02003047 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003048
3049 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003050
3051 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
3052
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003053 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003054 qemu_put_byte(f, strlen(block->idstr));
3055 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3056 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003057 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3058 qemu_put_be64(f, block->page_size);
3059 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003060 }
3061
3062 rcu_read_unlock();
3063
3064 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3065 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3066
Juan Quintela6df264a2018-02-28 09:10:07 +01003067 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003068 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3069
3070 return 0;
3071}
3072
Juan Quintela3d0684b2017-03-23 15:06:39 +01003073/**
3074 * ram_save_iterate: iterative stage for migration
3075 *
3076 * Returns zero to indicate success and negative for error
3077 *
3078 * @f: QEMUFile where to send the data
3079 * @opaque: RAMState pointer
3080 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003081static int ram_save_iterate(QEMUFile *f, void *opaque)
3082{
Juan Quintela53518d92017-05-04 11:46:24 +02003083 RAMState **temp = opaque;
3084 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003085 int ret;
3086 int i;
3087 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003088 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003089
Peter Lievenb2557342018-03-08 12:18:24 +01003090 if (blk_mig_bulk_active()) {
3091 /* Avoid transferring ram during bulk phase of block migration as
3092 * the bulk phase will usually take a long time and transferring
3093 * ram updates during that time is pointless. */
3094 goto out;
3095 }
3096
Juan Quintela56e93d22015-05-07 19:33:31 +02003097 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003098 if (ram_list.version != rs->last_version) {
3099 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003100 }
3101
3102 /* Read version before ram_list.blocks */
3103 smp_rmb();
3104
3105 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3106
3107 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3108 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003109 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3110 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003111 int pages;
3112
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003113 if (qemu_file_get_error(f)) {
3114 break;
3115 }
3116
Juan Quintelace25d332017-03-15 11:00:51 +01003117 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003118 /* no more pages to sent */
3119 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003120 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003121 break;
3122 }
Juan Quintela23b28c32017-03-13 20:51:34 +01003123 rs->iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04003124
Juan Quintela56e93d22015-05-07 19:33:31 +02003125 /* we want to check in the 1st loop, just in case it was the 1st time
3126 and we had to sync the dirty bitmap.
3127 qemu_get_clock_ns() is a bit expensive, so we only check each some
3128 iterations
3129 */
3130 if ((i & 63) == 0) {
3131 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3132 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003133 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003134 break;
3135 }
3136 }
3137 i++;
3138 }
Juan Quintelace25d332017-03-15 11:00:51 +01003139 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003140 rcu_read_unlock();
3141
3142 /*
3143 * Must occur before EOS (or any QEMUFile operation)
3144 * because of RDMA protocol.
3145 */
3146 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3147
Juan Quintela6df264a2018-02-28 09:10:07 +01003148 multifd_send_sync_main();
Peter Lievenb2557342018-03-08 12:18:24 +01003149out:
Juan Quintela56e93d22015-05-07 19:33:31 +02003150 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela93604472017-06-06 19:49:03 +02003151 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003152
3153 ret = qemu_file_get_error(f);
3154 if (ret < 0) {
3155 return ret;
3156 }
3157
Thomas Huth5c903082016-11-04 14:10:17 +01003158 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003159}
3160
Juan Quintela3d0684b2017-03-23 15:06:39 +01003161/**
3162 * ram_save_complete: function called to send the remaining amount of ram
3163 *
3164 * Returns zero to indicate success
3165 *
3166 * Called with iothread lock
3167 *
3168 * @f: QEMUFile where to send the data
3169 * @opaque: RAMState pointer
3170 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003171static int ram_save_complete(QEMUFile *f, void *opaque)
3172{
Juan Quintela53518d92017-05-04 11:46:24 +02003173 RAMState **temp = opaque;
3174 RAMState *rs = *temp;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003175
Juan Quintela56e93d22015-05-07 19:33:31 +02003176 rcu_read_lock();
3177
Juan Quintela57273092017-03-20 22:25:28 +01003178 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01003179 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003180 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003181
3182 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3183
3184 /* try transferring iterative blocks of memory */
3185
3186 /* flush all remaining blocks regardless of rate limiting */
3187 while (true) {
3188 int pages;
3189
Juan Quintelace25d332017-03-15 11:00:51 +01003190 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003191 /* no more blocks to sent */
3192 if (pages == 0) {
3193 break;
3194 }
3195 }
3196
Juan Quintelace25d332017-03-15 11:00:51 +01003197 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003198 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003199
3200 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003201
Juan Quintela6df264a2018-02-28 09:10:07 +01003202 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003203 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3204
3205 return 0;
3206}
3207
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003208static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003209 uint64_t *res_precopy_only,
3210 uint64_t *res_compatible,
3211 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003212{
Juan Quintela53518d92017-05-04 11:46:24 +02003213 RAMState **temp = opaque;
3214 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003215 uint64_t remaining_size;
3216
Juan Quintela9edabd42017-03-14 12:02:16 +01003217 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003218
Juan Quintela57273092017-03-20 22:25:28 +01003219 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003220 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003221 qemu_mutex_lock_iothread();
3222 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01003223 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003224 rcu_read_unlock();
3225 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003226 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003227 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003228
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003229 if (migrate_postcopy_ram()) {
3230 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003231 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003232 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003233 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003234 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003235}
3236
3237static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3238{
3239 unsigned int xh_len;
3240 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003241 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003242
Juan Quintela56e93d22015-05-07 19:33:31 +02003243 /* extract RLE header */
3244 xh_flags = qemu_get_byte(f);
3245 xh_len = qemu_get_be16(f);
3246
3247 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3248 error_report("Failed to load XBZRLE page - wrong compression!");
3249 return -1;
3250 }
3251
3252 if (xh_len > TARGET_PAGE_SIZE) {
3253 error_report("Failed to load XBZRLE page - len overflow!");
3254 return -1;
3255 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003256 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003257 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003258 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003259 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003260
3261 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003262 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003263 TARGET_PAGE_SIZE) == -1) {
3264 error_report("Failed to load XBZRLE page - decode error!");
3265 return -1;
3266 }
3267
3268 return 0;
3269}
3270
Juan Quintela3d0684b2017-03-23 15:06:39 +01003271/**
3272 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003273 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003274 * Must be called from within a rcu critical section.
3275 *
3276 * Returns a pointer from within the RCU-protected ram_list.
3277 *
3278 * @f: QEMUFile where to read the data from
3279 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003280 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003281static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003282{
3283 static RAMBlock *block = NULL;
3284 char id[256];
3285 uint8_t len;
3286
3287 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003288 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003289 error_report("Ack, bad migration stream!");
3290 return NULL;
3291 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003292 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003293 }
3294
3295 len = qemu_get_byte(f);
3296 qemu_get_buffer(f, (uint8_t *)id, len);
3297 id[len] = 0;
3298
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003299 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003300 if (!block) {
3301 error_report("Can't find block %s", id);
3302 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003303 }
3304
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003305 if (!qemu_ram_is_migratable(block)) {
3306 error_report("block %s should not be migrated !", id);
3307 return NULL;
3308 }
3309
zhanghailiang4c4bad42016-01-15 11:37:41 +08003310 return block;
3311}
3312
3313static inline void *host_from_ram_block_offset(RAMBlock *block,
3314 ram_addr_t offset)
3315{
3316 if (!offset_in_ramblock(block, offset)) {
3317 return NULL;
3318 }
3319
3320 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003321}
3322
Juan Quintela3d0684b2017-03-23 15:06:39 +01003323/**
3324 * ram_handle_compressed: handle the zero page case
3325 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003326 * If a page (or a whole RDMA chunk) has been
3327 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003328 *
3329 * @host: host address for the zero page
3330 * @ch: what the page is filled from. We only support zero
3331 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003332 */
3333void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3334{
3335 if (ch != 0 || !is_zero_range(host, size)) {
3336 memset(host, ch, size);
3337 }
3338}
3339
Xiao Guangrong797ca152018-03-30 15:51:21 +08003340/* return the size after decompression, or negative value on error */
3341static int
3342qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3343 const uint8_t *source, size_t source_len)
3344{
3345 int err;
3346
3347 err = inflateReset(stream);
3348 if (err != Z_OK) {
3349 return -1;
3350 }
3351
3352 stream->avail_in = source_len;
3353 stream->next_in = (uint8_t *)source;
3354 stream->avail_out = dest_len;
3355 stream->next_out = dest;
3356
3357 err = inflate(stream, Z_NO_FLUSH);
3358 if (err != Z_STREAM_END) {
3359 return -1;
3360 }
3361
3362 return stream->total_out;
3363}
3364
Juan Quintela56e93d22015-05-07 19:33:31 +02003365static void *do_data_decompress(void *opaque)
3366{
3367 DecompressParam *param = opaque;
3368 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003369 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003370 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003371
Liang Li33d151f2016-05-05 15:32:58 +08003372 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003373 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003374 if (param->des) {
3375 des = param->des;
3376 len = param->len;
3377 param->des = 0;
3378 qemu_mutex_unlock(&param->mutex);
3379
Liang Li73a89122016-05-05 15:32:51 +08003380 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003381
3382 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3383 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003384 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003385 error_report("decompress data failed");
3386 qemu_file_set_error(decomp_file, ret);
3387 }
Liang Li73a89122016-05-05 15:32:51 +08003388
Liang Li33d151f2016-05-05 15:32:58 +08003389 qemu_mutex_lock(&decomp_done_lock);
3390 param->done = true;
3391 qemu_cond_signal(&decomp_done_cond);
3392 qemu_mutex_unlock(&decomp_done_lock);
3393
3394 qemu_mutex_lock(&param->mutex);
3395 } else {
3396 qemu_cond_wait(&param->cond, &param->mutex);
3397 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003398 }
Liang Li33d151f2016-05-05 15:32:58 +08003399 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003400
3401 return NULL;
3402}
3403
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003404static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003405{
3406 int idx, thread_count;
3407
3408 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003409 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003410 }
3411
3412 thread_count = migrate_decompress_threads();
3413 qemu_mutex_lock(&decomp_done_lock);
3414 for (idx = 0; idx < thread_count; idx++) {
3415 while (!decomp_param[idx].done) {
3416 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3417 }
3418 }
3419 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003420 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003421}
3422
Juan Quintelaf0afa332017-06-28 11:52:28 +02003423static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003424{
3425 int i, thread_count;
3426
Juan Quintela3416ab52016-04-20 11:56:01 +02003427 if (!migrate_use_compression()) {
3428 return;
3429 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003430 thread_count = migrate_decompress_threads();
3431 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003432 /*
3433 * we use it as a indicator which shows if the thread is
3434 * properly init'd or not
3435 */
3436 if (!decomp_param[i].compbuf) {
3437 break;
3438 }
3439
Juan Quintela56e93d22015-05-07 19:33:31 +02003440 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003441 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003442 qemu_cond_signal(&decomp_param[i].cond);
3443 qemu_mutex_unlock(&decomp_param[i].mutex);
3444 }
3445 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003446 if (!decomp_param[i].compbuf) {
3447 break;
3448 }
3449
Juan Quintela56e93d22015-05-07 19:33:31 +02003450 qemu_thread_join(decompress_threads + i);
3451 qemu_mutex_destroy(&decomp_param[i].mutex);
3452 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003453 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003454 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003455 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003456 }
3457 g_free(decompress_threads);
3458 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003459 decompress_threads = NULL;
3460 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003461 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003462}
3463
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003464static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003465{
3466 int i, thread_count;
3467
3468 if (!migrate_use_compression()) {
3469 return 0;
3470 }
3471
3472 thread_count = migrate_decompress_threads();
3473 decompress_threads = g_new0(QemuThread, thread_count);
3474 decomp_param = g_new0(DecompressParam, thread_count);
3475 qemu_mutex_init(&decomp_done_lock);
3476 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003477 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003478 for (i = 0; i < thread_count; i++) {
3479 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3480 goto exit;
3481 }
3482
3483 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3484 qemu_mutex_init(&decomp_param[i].mutex);
3485 qemu_cond_init(&decomp_param[i].cond);
3486 decomp_param[i].done = true;
3487 decomp_param[i].quit = false;
3488 qemu_thread_create(decompress_threads + i, "decompress",
3489 do_data_decompress, decomp_param + i,
3490 QEMU_THREAD_JOINABLE);
3491 }
3492 return 0;
3493exit:
3494 compress_threads_load_cleanup();
3495 return -1;
3496}
3497
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003498static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003499 void *host, int len)
3500{
3501 int idx, thread_count;
3502
3503 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003504 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003505 while (true) {
3506 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003507 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003508 decomp_param[idx].done = false;
3509 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003510 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003511 decomp_param[idx].des = host;
3512 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003513 qemu_cond_signal(&decomp_param[idx].cond);
3514 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003515 break;
3516 }
3517 }
3518 if (idx < thread_count) {
3519 break;
Liang Li73a89122016-05-05 15:32:51 +08003520 } else {
3521 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003522 }
3523 }
Liang Li73a89122016-05-05 15:32:51 +08003524 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003525}
3526
Juan Quintela3d0684b2017-03-23 15:06:39 +01003527/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003528 * ram_load_setup: Setup RAM for migration incoming side
3529 *
3530 * Returns zero to indicate success and negative for error
3531 *
3532 * @f: QEMUFile where to receive the data
3533 * @opaque: RAMState pointer
3534 */
3535static int ram_load_setup(QEMUFile *f, void *opaque)
3536{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003537 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003538 return -1;
3539 }
3540
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003541 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003542 ramblock_recv_map_init();
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003543 return 0;
3544}
3545
3546static int ram_load_cleanup(void *opaque)
3547{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003548 RAMBlock *rb;
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003549 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003550 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003551
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003552 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003553 g_free(rb->receivedmap);
3554 rb->receivedmap = NULL;
3555 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003556 return 0;
3557}
3558
3559/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003560 * ram_postcopy_incoming_init: allocate postcopy data structures
3561 *
3562 * Returns 0 for success and negative if there was one error
3563 *
3564 * @mis: current migration incoming state
3565 *
3566 * Allocate data structures etc needed by incoming migration with
3567 * postcopy-ram. postcopy-ram's similarly names
3568 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003569 */
3570int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3571{
Juan Quintelab8c48992017-03-21 17:44:30 +01003572 unsigned long ram_pages = last_ram_page();
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003573
3574 return postcopy_ram_incoming_init(mis, ram_pages);
3575}
3576
Juan Quintela3d0684b2017-03-23 15:06:39 +01003577/**
3578 * ram_load_postcopy: load a page in postcopy case
3579 *
3580 * Returns 0 for success or -errno in case of error
3581 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003582 * Called in postcopy mode by ram_load().
3583 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003584 *
3585 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003586 */
3587static int ram_load_postcopy(QEMUFile *f)
3588{
3589 int flags = 0, ret = 0;
3590 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003591 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003592 MigrationIncomingState *mis = migration_incoming_get_current();
3593 /* Temporary page that is later 'placed' */
3594 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003595 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00003596 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003597
3598 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3599 ram_addr_t addr;
3600 void *host = NULL;
3601 void *page_buffer = NULL;
3602 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003603 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003604 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003605
3606 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003607
3608 /*
3609 * If qemu file error, we should stop here, and then "addr"
3610 * may be invalid
3611 */
3612 ret = qemu_file_get_error(f);
3613 if (ret) {
3614 break;
3615 }
3616
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003617 flags = addr & ~TARGET_PAGE_MASK;
3618 addr &= TARGET_PAGE_MASK;
3619
3620 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3621 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02003622 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003623 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003624
3625 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003626 if (!host) {
3627 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3628 ret = -EINVAL;
3629 break;
3630 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003631 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003632 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003633 * Postcopy requires that we place whole host pages atomically;
3634 * these may be huge pages for RAMBlocks that are backed by
3635 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003636 * To make it atomic, the data is read into a temporary page
3637 * that's moved into place later.
3638 * The migration protocol uses, possibly smaller, target-pages
3639 * however the source ensures it always sends all the components
3640 * of a host page in order.
3641 */
3642 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003643 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003644 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003645 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003646 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003647 } else {
3648 /* not the 1st TP within the HP */
3649 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01003650 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003651 host, last_host);
3652 ret = -EINVAL;
3653 break;
3654 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003655 }
3656
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003657
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003658 /*
3659 * If it's the last part of a host page then we place the host
3660 * page
3661 */
3662 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003663 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003664 place_source = postcopy_host_page;
3665 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003666 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003667
3668 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003669 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003670 ch = qemu_get_byte(f);
3671 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3672 if (ch) {
3673 all_zero = false;
3674 }
3675 break;
3676
3677 case RAM_SAVE_FLAG_PAGE:
3678 all_zero = false;
3679 if (!place_needed || !matching_page_sizes) {
3680 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3681 } else {
3682 /* Avoids the qemu_file copy during postcopy, which is
3683 * going to do a copy later; can only do it when we
3684 * do this read in one go (matching page sizes)
3685 */
3686 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3687 TARGET_PAGE_SIZE);
3688 }
3689 break;
3690 case RAM_SAVE_FLAG_EOS:
3691 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003692 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003693 break;
3694 default:
3695 error_report("Unknown combination of migration flags: %#x"
3696 " (postcopy mode)", flags);
3697 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003698 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003699 }
3700
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003701 /* Detect for any possible file errors */
3702 if (!ret && qemu_file_get_error(f)) {
3703 ret = qemu_file_get_error(f);
3704 }
3705
3706 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003707 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003708 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3709
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003710 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003711 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003712 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003713 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003714 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003715 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003716 }
3717 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003718 }
3719
3720 return ret;
3721}
3722
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003723static bool postcopy_is_advised(void)
3724{
3725 PostcopyState ps = postcopy_state_get();
3726 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3727}
3728
3729static bool postcopy_is_running(void)
3730{
3731 PostcopyState ps = postcopy_state_get();
3732 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3733}
3734
Juan Quintela56e93d22015-05-07 19:33:31 +02003735static int ram_load(QEMUFile *f, void *opaque, int version_id)
3736{
Juan Quintelaedc60122016-11-02 12:40:46 +01003737 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003738 static uint64_t seq_iter;
3739 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003740 /*
3741 * If system is running in postcopy mode, page inserts to host memory must
3742 * be atomic
3743 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003744 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003745 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003746 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02003747
3748 seq_iter++;
3749
3750 if (version_id != 4) {
3751 ret = -EINVAL;
3752 }
3753
Juan Quintelaedc60122016-11-02 12:40:46 +01003754 if (!migrate_use_compression()) {
3755 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3756 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003757 /* This RCU critical section can be very long running.
3758 * When RCU reclaims in the code start to become numerous,
3759 * it will be necessary to reduce the granularity of this
3760 * critical section.
3761 */
3762 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003763
3764 if (postcopy_running) {
3765 ret = ram_load_postcopy(f);
3766 }
3767
3768 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003769 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003770 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003771 uint8_t ch;
3772
3773 addr = qemu_get_be64(f);
3774 flags = addr & ~TARGET_PAGE_MASK;
3775 addr &= TARGET_PAGE_MASK;
3776
Juan Quintelaedc60122016-11-02 12:40:46 +01003777 if (flags & invalid_flags) {
3778 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3779 error_report("Received an unexpected compressed page");
3780 }
3781
3782 ret = -EINVAL;
3783 break;
3784 }
3785
Juan Quintelabb890ed2017-04-28 09:39:55 +02003786 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003787 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003788 RAMBlock *block = ram_block_from_stream(f, flags);
3789
3790 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003791 if (!host) {
3792 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3793 ret = -EINVAL;
3794 break;
3795 }
Alexey Perevalovf9494612017-10-05 14:13:20 +03003796 ramblock_recv_bitmap_set(block, host);
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003797 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003798 }
3799
Juan Quintela56e93d22015-05-07 19:33:31 +02003800 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3801 case RAM_SAVE_FLAG_MEM_SIZE:
3802 /* Synchronize RAM block list */
3803 total_ram_bytes = addr;
3804 while (!ret && total_ram_bytes) {
3805 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003806 char id[256];
3807 ram_addr_t length;
3808
3809 len = qemu_get_byte(f);
3810 qemu_get_buffer(f, (uint8_t *)id, len);
3811 id[len] = 0;
3812 length = qemu_get_be64(f);
3813
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003814 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003815 if (block && !qemu_ram_is_migratable(block)) {
3816 error_report("block %s should not be migrated !", id);
3817 ret = -EINVAL;
3818 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003819 if (length != block->used_length) {
3820 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003821
Gongleifa53a0e2016-05-10 10:04:59 +08003822 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003823 &local_err);
3824 if (local_err) {
3825 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003826 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003827 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003828 /* For postcopy we need to check hugepage sizes match */
3829 if (postcopy_advised &&
3830 block->page_size != qemu_host_page_size) {
3831 uint64_t remote_page_size = qemu_get_be64(f);
3832 if (remote_page_size != block->page_size) {
3833 error_report("Mismatched RAM page size %s "
3834 "(local) %zd != %" PRId64,
3835 id, block->page_size,
3836 remote_page_size);
3837 ret = -EINVAL;
3838 }
3839 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003840 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3841 block->idstr);
3842 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02003843 error_report("Unknown ramblock \"%s\", cannot "
3844 "accept migration", id);
3845 ret = -EINVAL;
3846 }
3847
3848 total_ram_bytes -= length;
3849 }
3850 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003851
Juan Quintelabb890ed2017-04-28 09:39:55 +02003852 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02003853 ch = qemu_get_byte(f);
3854 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3855 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003856
Juan Quintela56e93d22015-05-07 19:33:31 +02003857 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003858 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3859 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02003860
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003861 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003862 len = qemu_get_be32(f);
3863 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3864 error_report("Invalid compressed data length: %d", len);
3865 ret = -EINVAL;
3866 break;
3867 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003868 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003869 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003870
Juan Quintela56e93d22015-05-07 19:33:31 +02003871 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003872 if (load_xbzrle(f, addr, host) < 0) {
3873 error_report("Failed to decompress XBZRLE page at "
3874 RAM_ADDR_FMT, addr);
3875 ret = -EINVAL;
3876 break;
3877 }
3878 break;
3879 case RAM_SAVE_FLAG_EOS:
3880 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003881 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003882 break;
3883 default:
3884 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01003885 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02003886 } else {
3887 error_report("Unknown combination of migration flags: %#x",
3888 flags);
3889 ret = -EINVAL;
3890 }
3891 }
3892 if (!ret) {
3893 ret = qemu_file_get_error(f);
3894 }
3895 }
3896
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003897 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02003898 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01003899 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02003900 return ret;
3901}
3902
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003903static bool ram_has_postcopy(void *opaque)
3904{
3905 return migrate_postcopy_ram();
3906}
3907
Peter Xuedd090c2018-05-02 18:47:32 +08003908/* Sync all the dirty bitmap with destination VM. */
3909static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3910{
3911 RAMBlock *block;
3912 QEMUFile *file = s->to_dst_file;
3913 int ramblock_count = 0;
3914
3915 trace_ram_dirty_bitmap_sync_start();
3916
Dr. David Alan Gilbertff0769a2018-06-05 17:25:44 +01003917 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08003918 qemu_savevm_send_recv_bitmap(file, block->idstr);
3919 trace_ram_dirty_bitmap_request(block->idstr);
3920 ramblock_count++;
3921 }
3922
3923 trace_ram_dirty_bitmap_sync_wait();
3924
3925 /* Wait until all the ramblocks' dirty bitmap synced */
3926 while (ramblock_count--) {
3927 qemu_sem_wait(&s->rp_state.rp_sem);
3928 }
3929
3930 trace_ram_dirty_bitmap_sync_complete();
3931
3932 return 0;
3933}
3934
3935static void ram_dirty_bitmap_reload_notify(MigrationState *s)
3936{
3937 qemu_sem_post(&s->rp_state.rp_sem);
3938}
3939
Peter Xua335deb2018-05-02 18:47:28 +08003940/*
3941 * Read the received bitmap, revert it as the initial dirty bitmap.
3942 * This is only used when the postcopy migration is paused but wants
3943 * to resume from a middle point.
3944 */
3945int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
3946{
3947 int ret = -EINVAL;
3948 QEMUFile *file = s->rp_state.from_dst_file;
3949 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
3950 uint64_t local_size = nbits / 8;
3951 uint64_t size, end_mark;
3952
3953 trace_ram_dirty_bitmap_reload_begin(block->idstr);
3954
3955 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
3956 error_report("%s: incorrect state %s", __func__,
3957 MigrationStatus_str(s->state));
3958 return -EINVAL;
3959 }
3960
3961 /*
3962 * Note: see comments in ramblock_recv_bitmap_send() on why we
3963 * need the endianess convertion, and the paddings.
3964 */
3965 local_size = ROUND_UP(local_size, 8);
3966
3967 /* Add paddings */
3968 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
3969
3970 size = qemu_get_be64(file);
3971
3972 /* The size of the bitmap should match with our ramblock */
3973 if (size != local_size) {
3974 error_report("%s: ramblock '%s' bitmap size mismatch "
3975 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
3976 block->idstr, size, local_size);
3977 ret = -EINVAL;
3978 goto out;
3979 }
3980
3981 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
3982 end_mark = qemu_get_be64(file);
3983
3984 ret = qemu_file_get_error(file);
3985 if (ret || size != local_size) {
3986 error_report("%s: read bitmap failed for ramblock '%s': %d"
3987 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
3988 __func__, block->idstr, ret, local_size, size);
3989 ret = -EIO;
3990 goto out;
3991 }
3992
3993 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
3994 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
3995 __func__, block->idstr, end_mark);
3996 ret = -EINVAL;
3997 goto out;
3998 }
3999
4000 /*
4001 * Endianess convertion. We are during postcopy (though paused).
4002 * The dirty bitmap won't change. We can directly modify it.
4003 */
4004 bitmap_from_le(block->bmap, le_bitmap, nbits);
4005
4006 /*
4007 * What we received is "received bitmap". Revert it as the initial
4008 * dirty bitmap for this ramblock.
4009 */
4010 bitmap_complement(block->bmap, block->bmap, nbits);
4011
4012 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4013
Peter Xuedd090c2018-05-02 18:47:32 +08004014 /*
4015 * We succeeded to sync bitmap for current ramblock. If this is
4016 * the last one to sync, we need to notify the main send thread.
4017 */
4018 ram_dirty_bitmap_reload_notify(s);
4019
Peter Xua335deb2018-05-02 18:47:28 +08004020 ret = 0;
4021out:
Peter Xubf269902018-05-25 09:50:42 +08004022 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004023 return ret;
4024}
4025
Peter Xuedd090c2018-05-02 18:47:32 +08004026static int ram_resume_prepare(MigrationState *s, void *opaque)
4027{
4028 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004029 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004030
Peter Xu08614f32018-05-02 18:47:33 +08004031 ret = ram_dirty_bitmap_sync_all(s, rs);
4032 if (ret) {
4033 return ret;
4034 }
4035
4036 ram_state_resume_prepare(rs, s->to_dst_file);
4037
4038 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004039}
4040
Juan Quintela56e93d22015-05-07 19:33:31 +02004041static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004042 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004043 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004044 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004045 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004046 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004047 .save_live_pending = ram_save_pending,
4048 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004049 .save_cleanup = ram_save_cleanup,
4050 .load_setup = ram_load_setup,
4051 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004052 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004053};
4054
4055void ram_mig_init(void)
4056{
4057 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004058 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004059}