blob: d5335c10b6e74dac40534fb413944f1e5d22449e [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020036#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020037#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020038#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010039#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020040#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020041#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020042#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020043#include "postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010046#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010047#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020048#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030051#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020052#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080053#include "migration/colo.h"
Peter Lieven9ac78b62017-09-26 12:33:16 +020054#include "migration/block.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020055
Juan Quintela56e93d22015-05-07 19:33:31 +020056/***********************************************************/
57/* ram save/restore */
58
Juan Quintelabb890ed2017-04-28 09:39:55 +020059/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
60 * worked for pages that where filled with the same char. We switched
61 * it to only search for the zero value. And to avoid confusion with
62 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
63 */
64
Juan Quintela56e93d22015-05-07 19:33:31 +020065#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020066#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020067#define RAM_SAVE_FLAG_MEM_SIZE 0x04
68#define RAM_SAVE_FLAG_PAGE 0x08
69#define RAM_SAVE_FLAG_EOS 0x10
70#define RAM_SAVE_FLAG_CONTINUE 0x20
71#define RAM_SAVE_FLAG_XBZRLE 0x40
72/* 0x80 is reserved in migration.h start with 0x100 next */
73#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
74
Juan Quintela56e93d22015-05-07 19:33:31 +020075static inline bool is_zero_range(uint8_t *p, uint64_t size)
76{
Richard Hendersona1febc42016-08-29 11:46:14 -070077 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020078}
79
Juan Quintela93604472017-06-06 19:49:03 +020080XBZRLECacheStats xbzrle_counters;
81
Juan Quintela56e93d22015-05-07 19:33:31 +020082/* struct contains XBZRLE cache and a static page
83 used by the compression */
84static struct {
85 /* buffer used for XBZRLE encoding */
86 uint8_t *encoded_buf;
87 /* buffer for storing page content */
88 uint8_t *current_buf;
89 /* Cache for XBZRLE, Protected by lock. */
90 PageCache *cache;
91 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020092 /* it will store a page full of zeros */
93 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020094 /* buffer used for XBZRLE decoding */
95 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +020096} XBZRLE;
97
Juan Quintela56e93d22015-05-07 19:33:31 +020098static void XBZRLE_cache_lock(void)
99{
100 if (migrate_use_xbzrle())
101 qemu_mutex_lock(&XBZRLE.lock);
102}
103
104static void XBZRLE_cache_unlock(void)
105{
106 if (migrate_use_xbzrle())
107 qemu_mutex_unlock(&XBZRLE.lock);
108}
109
Juan Quintela3d0684b2017-03-23 15:06:39 +0100110/**
111 * xbzrle_cache_resize: resize the xbzrle cache
112 *
113 * This function is called from qmp_migrate_set_cache_size in main
114 * thread, possibly while a migration is in progress. A running
115 * migration may be using the cache and might finish during this call,
116 * hence changes to the cache are protected by XBZRLE.lock().
117 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200118 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100119 *
120 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200121 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200122 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200123int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200124{
125 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200126 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200127
Juan Quintela8acabf62017-10-05 22:00:31 +0200128 /* Check for truncation */
129 if (new_size != (size_t)new_size) {
130 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
131 "exceeding address space");
132 return -1;
133 }
134
Juan Quintela2a313e52017-10-06 23:00:12 +0200135 if (new_size == migrate_xbzrle_cache_size()) {
136 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200137 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200138 }
139
Juan Quintela56e93d22015-05-07 19:33:31 +0200140 XBZRLE_cache_lock();
141
142 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200143 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200144 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 ret = -1;
146 goto out;
147 }
148
149 cache_fini(XBZRLE.cache);
150 XBZRLE.cache = new_cache;
151 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200152out:
153 XBZRLE_cache_unlock();
154 return ret;
155}
156
Alexey Perevalovf9494612017-10-05 14:13:20 +0300157static void ramblock_recv_map_init(void)
158{
159 RAMBlock *rb;
160
161 RAMBLOCK_FOREACH(rb) {
162 assert(!rb->receivedmap);
163 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
164 }
165}
166
167int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
168{
169 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
170 rb->receivedmap);
171}
172
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000173bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
174{
175 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
176}
177
Alexey Perevalovf9494612017-10-05 14:13:20 +0300178void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
179{
180 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
181}
182
183void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
184 size_t nr)
185{
186 bitmap_set_atomic(rb->receivedmap,
187 ramblock_recv_bitmap_offset(host_addr, rb),
188 nr);
189}
190
Juan Quintelaec481c62017-03-20 22:12:40 +0100191/*
192 * An outstanding page request, on the source, having been received
193 * and queued
194 */
195struct RAMSrcPageRequest {
196 RAMBlock *rb;
197 hwaddr offset;
198 hwaddr len;
199
200 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
201};
202
Juan Quintela6f37bb82017-03-13 19:26:29 +0100203/* State of RAM for migration */
204struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100205 /* QEMUFile used for this migration */
206 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100207 /* Last block that we have visited searching for dirty pages */
208 RAMBlock *last_seen_block;
209 /* Last block from where we have sent data */
210 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100211 /* Last dirty target page we have sent */
212 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100213 /* last ram version we have seen */
214 uint32_t last_version;
215 /* We are in the first round */
216 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100217 /* How many times we have dirty too many pages */
218 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100219 /* these variables are used for bitmap sync */
220 /* last time we did a full bitmap_sync */
221 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200222 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200223 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200224 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200225 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100226 /* xbzrle misses since the beginning of the period */
227 uint64_t xbzrle_cache_miss_prev;
Juan Quintela36040d92017-03-13 19:51:13 +0100228 /* number of iterations at the beginning of period */
229 uint64_t iterations_prev;
Juan Quintela23b28c32017-03-13 20:51:34 +0100230 /* Iterations since start */
231 uint64_t iterations;
Juan Quintela93604472017-06-06 19:49:03 +0200232 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800233 uint64_t migration_dirty_pages;
234 /* protects modification of the bitmap */
Juan Quintela108cfae2017-03-13 21:38:09 +0100235 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100236 /* The RAMBlock used in the last src_page_requests */
237 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100238 /* Queue of outstanding page requests from the destination */
239 QemuMutex src_page_req_mutex;
240 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100241};
242typedef struct RAMState RAMState;
243
Juan Quintela53518d92017-05-04 11:46:24 +0200244static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100245
Juan Quintela9edabd42017-03-14 12:02:16 +0100246uint64_t ram_bytes_remaining(void)
247{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000248 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
249 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100250}
251
Juan Quintela93604472017-06-06 19:49:03 +0200252MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100253
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100254/* used by the search for pages to send */
255struct PageSearchStatus {
256 /* Current block being searched */
257 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100258 /* Current page to search from */
259 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100260 /* Set once we wrap around */
261 bool complete_round;
262};
263typedef struct PageSearchStatus PageSearchStatus;
264
Juan Quintela56e93d22015-05-07 19:33:31 +0200265struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200266 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800267 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200268 QEMUFile *file;
269 QemuMutex mutex;
270 QemuCond cond;
271 RAMBlock *block;
272 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800273
274 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800275 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800276 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200277};
278typedef struct CompressParam CompressParam;
279
280struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800281 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800282 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200283 QemuMutex mutex;
284 QemuCond cond;
285 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000286 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200287 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800288 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200289};
290typedef struct DecompressParam DecompressParam;
291
292static CompressParam *comp_param;
293static QemuThread *compress_threads;
294/* comp_done_cond is used to wake up the migration thread when
295 * one of the compression threads has finished the compression.
296 * comp_done_lock is used to co-work with comp_done_cond.
297 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800298static QemuMutex comp_done_lock;
299static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200300/* The empty QEMUFileOps will be used by file in CompressParam */
301static const QEMUFileOps empty_ops = { };
302
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800303static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200304static DecompressParam *decomp_param;
305static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800306static QemuMutex decomp_done_lock;
307static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200308
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800309static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800310 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200311
312static void *do_data_compress(void *opaque)
313{
314 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800315 RAMBlock *block;
316 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200317
Liang Lia7a9a882016-05-05 15:32:57 +0800318 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800319 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800320 if (param->block) {
321 block = param->block;
322 offset = param->offset;
323 param->block = NULL;
324 qemu_mutex_unlock(&param->mutex);
325
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800326 do_compress_ram_page(param->file, &param->stream, block, offset,
327 param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800328
Liang Li0d9f9a52016-05-05 15:32:59 +0800329 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800330 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800331 qemu_cond_signal(&comp_done_cond);
332 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800333
334 qemu_mutex_lock(&param->mutex);
335 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200336 qemu_cond_wait(&param->cond, &param->mutex);
337 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200338 }
Liang Lia7a9a882016-05-05 15:32:57 +0800339 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200340
341 return NULL;
342}
343
344static inline void terminate_compression_threads(void)
345{
346 int idx, thread_count;
347
348 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100349
Juan Quintela56e93d22015-05-07 19:33:31 +0200350 for (idx = 0; idx < thread_count; idx++) {
351 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800352 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200353 qemu_cond_signal(&comp_param[idx].cond);
354 qemu_mutex_unlock(&comp_param[idx].mutex);
355 }
356}
357
Juan Quintelaf0afa332017-06-28 11:52:28 +0200358static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200359{
360 int i, thread_count;
361
362 if (!migrate_use_compression()) {
363 return;
364 }
365 terminate_compression_threads();
366 thread_count = migrate_compress_threads();
367 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800368 /*
369 * we use it as a indicator which shows if the thread is
370 * properly init'd or not
371 */
372 if (!comp_param[i].file) {
373 break;
374 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200375 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200376 qemu_mutex_destroy(&comp_param[i].mutex);
377 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800378 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800379 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800380 qemu_fclose(comp_param[i].file);
381 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200382 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800383 qemu_mutex_destroy(&comp_done_lock);
384 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200385 g_free(compress_threads);
386 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200387 compress_threads = NULL;
388 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200389}
390
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800391static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200392{
393 int i, thread_count;
394
395 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800396 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200397 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200398 thread_count = migrate_compress_threads();
399 compress_threads = g_new0(QemuThread, thread_count);
400 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800401 qemu_cond_init(&comp_done_cond);
402 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200403 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800404 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
405 if (!comp_param[i].originbuf) {
406 goto exit;
407 }
408
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800409 if (deflateInit(&comp_param[i].stream,
410 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800411 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800412 goto exit;
413 }
414
Cao jine110aa92016-07-29 15:10:31 +0800415 /* comp_param[i].file is just used as a dummy buffer to save data,
416 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200417 */
418 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
419 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800420 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200421 qemu_mutex_init(&comp_param[i].mutex);
422 qemu_cond_init(&comp_param[i].cond);
423 qemu_thread_create(compress_threads + i, "compress",
424 do_data_compress, comp_param + i,
425 QEMU_THREAD_JOINABLE);
426 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800427 return 0;
428
429exit:
430 compress_threads_save_cleanup();
431 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200432}
433
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100434/* Multiple fd's */
435
436struct MultiFDSendParams {
437 uint8_t id;
438 char *name;
439 QemuThread thread;
440 QemuSemaphore sem;
441 QemuMutex mutex;
Juan Quintela66770702018-02-19 19:01:45 +0100442 bool running;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100443 bool quit;
444};
445typedef struct MultiFDSendParams MultiFDSendParams;
446
447struct {
448 MultiFDSendParams *params;
449 /* number of created threads */
450 int count;
451} *multifd_send_state;
452
Juan Quintela66770702018-02-19 19:01:45 +0100453static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100454{
455 int i;
456
Juan Quintela7a169d72018-02-19 19:01:15 +0100457 if (err) {
458 MigrationState *s = migrate_get_current();
459 migrate_set_error(s, err);
460 if (s->state == MIGRATION_STATUS_SETUP ||
461 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
462 s->state == MIGRATION_STATUS_DEVICE ||
463 s->state == MIGRATION_STATUS_ACTIVE) {
464 migrate_set_state(&s->state, s->state,
465 MIGRATION_STATUS_FAILED);
466 }
467 }
468
Juan Quintela66770702018-02-19 19:01:45 +0100469 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100470 MultiFDSendParams *p = &multifd_send_state->params[i];
471
472 qemu_mutex_lock(&p->mutex);
473 p->quit = true;
474 qemu_sem_post(&p->sem);
475 qemu_mutex_unlock(&p->mutex);
476 }
477}
478
479int multifd_save_cleanup(Error **errp)
480{
481 int i;
482 int ret = 0;
483
484 if (!migrate_use_multifd()) {
485 return 0;
486 }
Juan Quintela66770702018-02-19 19:01:45 +0100487 multifd_send_terminate_threads(NULL);
488 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100489 MultiFDSendParams *p = &multifd_send_state->params[i];
490
Juan Quintela66770702018-02-19 19:01:45 +0100491 if (p->running) {
492 qemu_thread_join(&p->thread);
493 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100494 qemu_mutex_destroy(&p->mutex);
495 qemu_sem_destroy(&p->sem);
496 g_free(p->name);
497 p->name = NULL;
498 }
499 g_free(multifd_send_state->params);
500 multifd_send_state->params = NULL;
501 g_free(multifd_send_state);
502 multifd_send_state = NULL;
503 return ret;
504}
505
506static void *multifd_send_thread(void *opaque)
507{
508 MultiFDSendParams *p = opaque;
509
510 while (true) {
511 qemu_mutex_lock(&p->mutex);
512 if (p->quit) {
513 qemu_mutex_unlock(&p->mutex);
514 break;
515 }
516 qemu_mutex_unlock(&p->mutex);
517 qemu_sem_wait(&p->sem);
518 }
519
Juan Quintela66770702018-02-19 19:01:45 +0100520 qemu_mutex_lock(&p->mutex);
521 p->running = false;
522 qemu_mutex_unlock(&p->mutex);
523
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100524 return NULL;
525}
526
527int multifd_save_setup(void)
528{
529 int thread_count;
530 uint8_t i;
531
532 if (!migrate_use_multifd()) {
533 return 0;
534 }
535 thread_count = migrate_multifd_channels();
536 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
537 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +0100538 atomic_set(&multifd_send_state->count, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100539 for (i = 0; i < thread_count; i++) {
540 MultiFDSendParams *p = &multifd_send_state->params[i];
541
542 qemu_mutex_init(&p->mutex);
543 qemu_sem_init(&p->sem, 0);
544 p->quit = false;
545 p->id = i;
546 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela66770702018-02-19 19:01:45 +0100547 p->running = true;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100548 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
549 QEMU_THREAD_JOINABLE);
550
Juan Quintela66770702018-02-19 19:01:45 +0100551 atomic_inc(&multifd_send_state->count);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100552 }
553 return 0;
554}
555
556struct MultiFDRecvParams {
557 uint8_t id;
558 char *name;
559 QemuThread thread;
560 QemuSemaphore sem;
561 QemuMutex mutex;
Juan Quintela66770702018-02-19 19:01:45 +0100562 bool running;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100563 bool quit;
564};
565typedef struct MultiFDRecvParams MultiFDRecvParams;
566
567struct {
568 MultiFDRecvParams *params;
569 /* number of created threads */
570 int count;
571} *multifd_recv_state;
572
Juan Quintela66770702018-02-19 19:01:45 +0100573static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100574{
575 int i;
576
Juan Quintela7a169d72018-02-19 19:01:15 +0100577 if (err) {
578 MigrationState *s = migrate_get_current();
579 migrate_set_error(s, err);
580 if (s->state == MIGRATION_STATUS_SETUP ||
581 s->state == MIGRATION_STATUS_ACTIVE) {
582 migrate_set_state(&s->state, s->state,
583 MIGRATION_STATUS_FAILED);
584 }
585 }
586
Juan Quintela66770702018-02-19 19:01:45 +0100587 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100588 MultiFDRecvParams *p = &multifd_recv_state->params[i];
589
590 qemu_mutex_lock(&p->mutex);
591 p->quit = true;
592 qemu_sem_post(&p->sem);
593 qemu_mutex_unlock(&p->mutex);
594 }
595}
596
597int multifd_load_cleanup(Error **errp)
598{
599 int i;
600 int ret = 0;
601
602 if (!migrate_use_multifd()) {
603 return 0;
604 }
Juan Quintela66770702018-02-19 19:01:45 +0100605 multifd_recv_terminate_threads(NULL);
606 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100607 MultiFDRecvParams *p = &multifd_recv_state->params[i];
608
Juan Quintela66770702018-02-19 19:01:45 +0100609 if (p->running) {
610 qemu_thread_join(&p->thread);
611 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100612 qemu_mutex_destroy(&p->mutex);
613 qemu_sem_destroy(&p->sem);
614 g_free(p->name);
615 p->name = NULL;
616 }
617 g_free(multifd_recv_state->params);
618 multifd_recv_state->params = NULL;
619 g_free(multifd_recv_state);
620 multifd_recv_state = NULL;
621
622 return ret;
623}
624
625static void *multifd_recv_thread(void *opaque)
626{
627 MultiFDRecvParams *p = opaque;
628
629 while (true) {
630 qemu_mutex_lock(&p->mutex);
631 if (p->quit) {
632 qemu_mutex_unlock(&p->mutex);
633 break;
634 }
635 qemu_mutex_unlock(&p->mutex);
636 qemu_sem_wait(&p->sem);
637 }
638
Juan Quintela66770702018-02-19 19:01:45 +0100639 qemu_mutex_lock(&p->mutex);
640 p->running = false;
641 qemu_mutex_unlock(&p->mutex);
642
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100643 return NULL;
644}
645
646int multifd_load_setup(void)
647{
648 int thread_count;
649 uint8_t i;
650
651 if (!migrate_use_multifd()) {
652 return 0;
653 }
654 thread_count = migrate_multifd_channels();
655 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
656 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +0100657 atomic_set(&multifd_recv_state->count, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100658 for (i = 0; i < thread_count; i++) {
659 MultiFDRecvParams *p = &multifd_recv_state->params[i];
660
661 qemu_mutex_init(&p->mutex);
662 qemu_sem_init(&p->sem, 0);
663 p->quit = false;
664 p->id = i;
665 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintela66770702018-02-19 19:01:45 +0100666 p->running = true;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100667 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
668 QEMU_THREAD_JOINABLE);
Juan Quintela66770702018-02-19 19:01:45 +0100669 atomic_inc(&multifd_recv_state->count);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100670 }
671 return 0;
672}
673
Juan Quintela62c1e0c2018-02-19 18:59:02 +0100674bool multifd_recv_all_channels_created(void)
675{
676 int thread_count = migrate_multifd_channels();
677
678 if (!migrate_use_multifd()) {
679 return true;
680 }
681
682 return thread_count == atomic_read(&multifd_recv_state->count);
683}
684
Juan Quintela71bb07d2018-02-19 19:01:03 +0100685void multifd_recv_new_channel(QIOChannel *ioc)
686{
687 /* nothing to do yet */
688}
689
Juan Quintela56e93d22015-05-07 19:33:31 +0200690/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100691 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200692 *
693 * If this is the 1st block, it also writes the block identification
694 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100695 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200696 *
697 * @f: QEMUFile where to send the data
698 * @block: block that contains the page we want to send
699 * @offset: offset inside the block for the page
700 * in the lower bits, it contains flags
701 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200702static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
703 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200704{
Liang Li9f5f3802015-07-13 17:34:10 +0800705 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200706
Juan Quintela24795692017-03-21 11:45:01 +0100707 if (block == rs->last_sent_block) {
708 offset |= RAM_SAVE_FLAG_CONTINUE;
709 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200710 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200711 size = 8;
712
713 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800714 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200715 qemu_put_byte(f, len);
716 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800717 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100718 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200719 }
720 return size;
721}
722
Juan Quintela3d0684b2017-03-23 15:06:39 +0100723/**
724 * mig_throttle_guest_down: throotle down the guest
725 *
726 * Reduce amount of guest cpu execution to hopefully slow down memory
727 * writes. If guest dirty memory rate is reduced below the rate at
728 * which we can transfer pages to the destination then we should be
729 * able to complete migration. Some workloads dirty memory way too
730 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400731 */
732static void mig_throttle_guest_down(void)
733{
734 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100735 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
736 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400737
738 /* We have not started throttling yet. Let's start it. */
739 if (!cpu_throttle_active()) {
740 cpu_throttle_set(pct_initial);
741 } else {
742 /* Throttling already on, just increase the rate */
743 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
744 }
745}
746
Juan Quintela3d0684b2017-03-23 15:06:39 +0100747/**
748 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
749 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100750 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100751 * @current_addr: address for the zero page
752 *
753 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200754 * The important thing is that a stale (not-yet-0'd) page be replaced
755 * by the new data.
756 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100757 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200758 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100759static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200760{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100761 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200762 return;
763 }
764
765 /* We don't care if this fails to allocate a new cache page
766 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200767 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200768 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200769}
770
771#define ENCODING_FLAG_XBZRLE 0x1
772
773/**
774 * save_xbzrle_page: compress and send current page
775 *
776 * Returns: 1 means that we wrote the page
777 * 0 means that page is identical to the one already sent
778 * -1 means that xbzrle would be longer than normal
779 *
Juan Quintela5a987732017-03-13 19:39:02 +0100780 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100781 * @current_data: pointer to the address of the page contents
782 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200783 * @block: block that contains the page we want to send
784 * @offset: offset inside the block for the page
785 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200786 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100787static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200788 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100789 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200790{
791 int encoded_len = 0, bytes_xbzrle;
792 uint8_t *prev_cached_page;
793
Juan Quintela93604472017-06-06 19:49:03 +0200794 if (!cache_is_cached(XBZRLE.cache, current_addr,
795 ram_counters.dirty_sync_count)) {
796 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200797 if (!last_stage) {
798 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200799 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200800 return -1;
801 } else {
802 /* update *current_data when the page has been
803 inserted into cache */
804 *current_data = get_cached_data(XBZRLE.cache, current_addr);
805 }
806 }
807 return -1;
808 }
809
810 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
811
812 /* save current buffer into memory */
813 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
814
815 /* XBZRLE encoding (if there is no overflow) */
816 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
817 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
818 TARGET_PAGE_SIZE);
819 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100820 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200821 return 0;
822 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100823 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200824 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200825 /* update data in the cache */
826 if (!last_stage) {
827 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
828 *current_data = prev_cached_page;
829 }
830 return -1;
831 }
832
833 /* we need to update the data in the cache, in order to get the same data */
834 if (!last_stage) {
835 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
836 }
837
838 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200839 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100840 offset | RAM_SAVE_FLAG_XBZRLE);
841 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
842 qemu_put_be16(rs->f, encoded_len);
843 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200844 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +0200845 xbzrle_counters.pages++;
846 xbzrle_counters.bytes += bytes_xbzrle;
847 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200848
849 return 1;
850}
851
Juan Quintela3d0684b2017-03-23 15:06:39 +0100852/**
853 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000854 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100855 * Called with rcu_read_lock() to protect migration_bitmap
856 *
857 * Returns the byte offset within memory region of the start of a dirty page
858 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100859 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100860 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100861 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000862 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200863static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100864unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100865 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +0200866{
Juan Quintela6b6712e2017-03-22 15:18:04 +0100867 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
868 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200869 unsigned long next;
870
Juan Quintela6b6712e2017-03-22 15:18:04 +0100871 if (rs->ram_bulk_stage && start > 0) {
872 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200873 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +0100874 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +0200875 }
876
Juan Quintela6b6712e2017-03-22 15:18:04 +0100877 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +0200878}
879
Juan Quintela06b10682017-03-21 15:18:05 +0100880static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100881 RAMBlock *rb,
882 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000883{
884 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000885
Juan Quintela6b6712e2017-03-22 15:18:04 +0100886 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000887
888 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100889 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000890 }
891 return ret;
892}
893
Juan Quintela15440dd2017-03-21 09:35:04 +0100894static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
895 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +0200896{
Juan Quintela0d8ec882017-03-13 21:21:41 +0100897 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +0100898 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +0100899 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +0200900}
901
Juan Quintela3d0684b2017-03-23 15:06:39 +0100902/**
903 * ram_pagesize_summary: calculate all the pagesizes of a VM
904 *
905 * Returns a summary bitmap of the page sizes of all RAMBlocks
906 *
907 * For VMs with just normal pages this is equivalent to the host page
908 * size. If it's got some huge pages then it's the OR of all the
909 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000910 */
911uint64_t ram_pagesize_summary(void)
912{
913 RAMBlock *block;
914 uint64_t summary = 0;
915
Peter Xu99e15582017-05-12 12:17:39 +0800916 RAMBLOCK_FOREACH(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000917 summary |= block->page_size;
918 }
919
920 return summary;
921}
922
Juan Quintela8d820d62017-03-13 19:35:50 +0100923static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200924{
925 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200926 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200927 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200928
Juan Quintela93604472017-06-06 19:49:03 +0200929 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200930
Juan Quintelaf664da82017-03-13 19:44:57 +0100931 if (!rs->time_last_bitmap_sync) {
932 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +0200933 }
934
935 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200936 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200937
Juan Quintela108cfae2017-03-13 21:38:09 +0100938 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200939 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +0800940 RAMBLOCK_FOREACH(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +0100941 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200942 }
943 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +0100944 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200945
Juan Quintelaa66cd902017-03-28 15:02:43 +0200946 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +0800947
Juan Quintela56e93d22015-05-07 19:33:31 +0200948 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
949
950 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +0100951 if (end_time > rs->time_last_bitmap_sync + 1000) {
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100952 /* calculate period counters */
Juan Quintela93604472017-06-06 19:49:03 +0200953 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100954 / (end_time - rs->time_last_bitmap_sync);
Juan Quintela93604472017-06-06 19:49:03 +0200955 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100956
Peter Lieven9ac78b62017-09-26 12:33:16 +0200957 /* During block migration the auto-converge logic incorrectly detects
958 * that ram migration makes no progress. Avoid this by disabling the
959 * throttling logic during the bulk phase of block migration. */
960 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200961 /* The following detection logic can be refined later. For now:
962 Check to see if the dirtied bytes is 50% more than the approx.
963 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400964 were in this routine. If that happens twice, start or increase
965 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -0400966
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100967 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +0200968 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +0100969 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200970 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +0100971 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400972 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100973 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200974 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400975
Juan Quintela56e93d22015-05-07 19:33:31 +0200976 if (migrate_use_xbzrle()) {
Juan Quintela23b28c32017-03-13 20:51:34 +0100977 if (rs->iterations_prev != rs->iterations) {
Juan Quintela93604472017-06-06 19:49:03 +0200978 xbzrle_counters.cache_miss_rate =
979 (double)(xbzrle_counters.cache_miss -
Juan Quintelab5833fd2017-03-13 19:49:19 +0100980 rs->xbzrle_cache_miss_prev) /
Juan Quintela23b28c32017-03-13 20:51:34 +0100981 (rs->iterations - rs->iterations_prev);
Juan Quintela56e93d22015-05-07 19:33:31 +0200982 }
Juan Quintela23b28c32017-03-13 20:51:34 +0100983 rs->iterations_prev = rs->iterations;
Juan Quintela93604472017-06-06 19:49:03 +0200984 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Juan Quintela56e93d22015-05-07 19:33:31 +0200985 }
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100986
987 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +0100988 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200989 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +0100990 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200991 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000992 if (migrate_use_events()) {
Juan Quintela93604472017-06-06 19:49:03 +0200993 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000994 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200995}
996
997/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100998 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200999 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001000 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001001 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001002 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001003 * @block: block that contains the page we want to send
1004 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001005 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001006static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001007{
Juan Quintela7faccdc2018-01-08 18:58:17 +01001008 uint8_t *p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001009 int pages = -1;
1010
1011 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
Juan Quintela93604472017-06-06 19:49:03 +02001012 ram_counters.duplicate++;
1013 ram_counters.transferred +=
Juan Quintelabb890ed2017-04-28 09:39:55 +02001014 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
Juan Quintelace25d332017-03-15 11:00:51 +01001015 qemu_put_byte(rs->f, 0);
Juan Quintela93604472017-06-06 19:49:03 +02001016 ram_counters.transferred += 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001017 pages = 1;
1018 }
1019
1020 return pages;
1021}
1022
Juan Quintela57273092017-03-20 22:25:28 +01001023static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001024{
Juan Quintela57273092017-03-20 22:25:28 +01001025 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001026 return;
1027 }
1028
Juan Quintelaaaa20642017-03-21 11:35:24 +01001029 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001030}
1031
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001032/*
1033 * @pages: the number of pages written by the control path,
1034 * < 0 - error
1035 * > 0 - number of pages written
1036 *
1037 * Return true if the pages has been saved, otherwise false is returned.
1038 */
1039static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1040 int *pages)
1041{
1042 uint64_t bytes_xmit = 0;
1043 int ret;
1044
1045 *pages = -1;
1046 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1047 &bytes_xmit);
1048 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1049 return false;
1050 }
1051
1052 if (bytes_xmit) {
1053 ram_counters.transferred += bytes_xmit;
1054 *pages = 1;
1055 }
1056
1057 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1058 return true;
1059 }
1060
1061 if (bytes_xmit > 0) {
1062 ram_counters.normal++;
1063 } else if (bytes_xmit == 0) {
1064 ram_counters.duplicate++;
1065 }
1066
1067 return true;
1068}
1069
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001070/*
1071 * directly send the page to the stream
1072 *
1073 * Returns the number of pages written.
1074 *
1075 * @rs: current RAM state
1076 * @block: block that contains the page we want to send
1077 * @offset: offset inside the block for the page
1078 * @buf: the page to be sent
1079 * @async: send to page asyncly
1080 */
1081static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1082 uint8_t *buf, bool async)
1083{
1084 ram_counters.transferred += save_page_header(rs, rs->f, block,
1085 offset | RAM_SAVE_FLAG_PAGE);
1086 if (async) {
1087 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1088 migrate_release_ram() &
1089 migration_in_postcopy());
1090 } else {
1091 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1092 }
1093 ram_counters.transferred += TARGET_PAGE_SIZE;
1094 ram_counters.normal++;
1095 return 1;
1096}
1097
Juan Quintela56e93d22015-05-07 19:33:31 +02001098/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001099 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001100 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001101 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001102 * < 0 - error
1103 * >=0 - Number of pages written - this might legally be 0
1104 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001105 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001106 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001107 * @block: block that contains the page we want to send
1108 * @offset: offset inside the block for the page
1109 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001110 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001111static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001112{
1113 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001114 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001115 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001116 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001117 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001118 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001119
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001120 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001121 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001122
Juan Quintela56e93d22015-05-07 19:33:31 +02001123 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001124 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1125 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001126 pages = save_xbzrle_page(rs, &p, current_addr, block,
1127 offset, last_stage);
1128 if (!last_stage) {
1129 /* Can't send this cached data async, since the cache page
1130 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001131 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001132 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001133 }
1134 }
1135
1136 /* XBZRLE overflow or normal page */
1137 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001138 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001139 }
1140
1141 XBZRLE_cache_unlock();
1142
1143 return pages;
1144}
1145
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08001146static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001147 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001148{
Juan Quintela53518d92017-05-04 11:46:24 +02001149 RAMState *rs = ram_state;
Juan Quintela56e93d22015-05-07 19:33:31 +02001150 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +08001151 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +02001152
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001153 bytes_sent = save_page_header(rs, f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +02001154 RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001155
1156 /*
1157 * copy it to a internal buffer to avoid it being modified by VM
1158 * so that we can catch up the error during compression and
1159 * decompression
1160 */
1161 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1162 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
Liang Lib3be2892016-05-05 15:32:54 +08001163 if (blen < 0) {
1164 bytes_sent = 0;
1165 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1166 error_report("compressed data failed!");
1167 } else {
1168 bytes_sent += blen;
Juan Quintela57273092017-03-20 22:25:28 +01001169 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +08001170 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001171
1172 return bytes_sent;
1173}
1174
Juan Quintelace25d332017-03-15 11:00:51 +01001175static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001176{
1177 int idx, len, thread_count;
1178
1179 if (!migrate_use_compression()) {
1180 return;
1181 }
1182 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001183
Liang Li0d9f9a52016-05-05 15:32:59 +08001184 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001185 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001186 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001187 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001188 }
Liang Lia7a9a882016-05-05 15:32:57 +08001189 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001190 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001191
1192 for (idx = 0; idx < thread_count; idx++) {
1193 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001194 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001195 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Juan Quintela93604472017-06-06 19:49:03 +02001196 ram_counters.transferred += len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001197 }
Liang Lia7a9a882016-05-05 15:32:57 +08001198 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001199 }
1200}
1201
1202static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1203 ram_addr_t offset)
1204{
1205 param->block = block;
1206 param->offset = offset;
1207}
1208
Juan Quintelace25d332017-03-15 11:00:51 +01001209static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1210 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001211{
1212 int idx, thread_count, bytes_xmit = -1, pages = -1;
1213
1214 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001215 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001216 while (true) {
1217 for (idx = 0; idx < thread_count; idx++) {
1218 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +08001219 comp_param[idx].done = false;
Juan Quintelace25d332017-03-15 11:00:51 +01001220 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +08001221 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001222 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +08001223 qemu_cond_signal(&comp_param[idx].cond);
1224 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001225 pages = 1;
Juan Quintela93604472017-06-06 19:49:03 +02001226 ram_counters.normal++;
1227 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +02001228 break;
1229 }
1230 }
1231 if (pages > 0) {
1232 break;
1233 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +08001234 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001235 }
1236 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001237 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001238
1239 return pages;
1240}
1241
1242/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001243 * find_dirty_block: find the next dirty page and update any state
1244 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001245 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001246 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001247 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001248 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001249 * @pss: data about the state of the current dirty page scan
1250 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001251 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001252static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001253{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001254 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001255 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001256 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001257 /*
1258 * We've been once around the RAM and haven't found anything.
1259 * Give up.
1260 */
1261 *again = false;
1262 return false;
1263 }
Juan Quintelaa935e302017-03-21 15:36:51 +01001264 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001265 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001266 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001267 pss->block = QLIST_NEXT_RCU(pss->block, next);
1268 if (!pss->block) {
1269 /* Hit the end of the list */
1270 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1271 /* Flag that we've looped */
1272 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001273 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001274 if (migrate_use_xbzrle()) {
1275 /* If xbzrle is on, stop using the data compression at this
1276 * point. In theory, xbzrle can do better than compression.
1277 */
Juan Quintelace25d332017-03-15 11:00:51 +01001278 flush_compressed_data(rs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001279 }
1280 }
1281 /* Didn't find anything this time, but try again on the new block */
1282 *again = true;
1283 return false;
1284 } else {
1285 /* Can go around again, but... */
1286 *again = true;
1287 /* We've found something so probably don't need to */
1288 return true;
1289 }
1290}
1291
Juan Quintela3d0684b2017-03-23 15:06:39 +01001292/**
1293 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001294 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001295 * Helper for 'get_queued_page' - gets a page off the queue
1296 *
1297 * Returns the block of the page (or NULL if none available)
1298 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001299 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001300 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001301 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001302static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001303{
1304 RAMBlock *block = NULL;
1305
Juan Quintelaec481c62017-03-20 22:12:40 +01001306 qemu_mutex_lock(&rs->src_page_req_mutex);
1307 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1308 struct RAMSrcPageRequest *entry =
1309 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001310 block = entry->rb;
1311 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001312
1313 if (entry->len > TARGET_PAGE_SIZE) {
1314 entry->len -= TARGET_PAGE_SIZE;
1315 entry->offset += TARGET_PAGE_SIZE;
1316 } else {
1317 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001318 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001319 g_free(entry);
1320 }
1321 }
Juan Quintelaec481c62017-03-20 22:12:40 +01001322 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001323
1324 return block;
1325}
1326
Juan Quintela3d0684b2017-03-23 15:06:39 +01001327/**
1328 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001329 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001330 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001331 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001332 * Returns if a queued page is found
1333 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001334 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001335 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001336 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001337static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001338{
1339 RAMBlock *block;
1340 ram_addr_t offset;
1341 bool dirty;
1342
1343 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001344 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001345 /*
1346 * We're sending this page, and since it's postcopy nothing else
1347 * will dirty it, and we must make sure it doesn't get sent again
1348 * even if this queue request was received after the background
1349 * search already sent it.
1350 */
1351 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001352 unsigned long page;
1353
Juan Quintela6b6712e2017-03-22 15:18:04 +01001354 page = offset >> TARGET_PAGE_BITS;
1355 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001356 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001357 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001358 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001359 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001360 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001361 }
1362 }
1363
1364 } while (block && !dirty);
1365
1366 if (block) {
1367 /*
1368 * As soon as we start servicing pages out of order, then we have
1369 * to kill the bulk stage, since the bulk stage assumes
1370 * in (migration_bitmap_find_and_reset_dirty) that every page is
1371 * dirty, that's no longer true.
1372 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001373 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001374
1375 /*
1376 * We want the background search to continue from the queued page
1377 * since the guest is likely to want other pages near to the page
1378 * it just requested.
1379 */
1380 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001381 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001382 }
1383
1384 return !!block;
1385}
1386
Juan Quintela56e93d22015-05-07 19:33:31 +02001387/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001388 * migration_page_queue_free: drop any remaining pages in the ram
1389 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001390 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001391 * It should be empty at the end anyway, but in error cases there may
1392 * be some left. in case that there is any page left, we drop it.
1393 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001394 */
Juan Quintela83c13382017-05-04 11:45:01 +02001395static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001396{
Juan Quintelaec481c62017-03-20 22:12:40 +01001397 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001398 /* This queue generally should be empty - but in the case of a failed
1399 * migration might have some droppings in.
1400 */
1401 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01001402 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001403 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001404 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001405 g_free(mspr);
1406 }
1407 rcu_read_unlock();
1408}
1409
1410/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001411 * ram_save_queue_pages: queue the page for transmission
1412 *
1413 * A request from postcopy destination for example.
1414 *
1415 * Returns zero on success or negative on error
1416 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001417 * @rbname: Name of the RAMBLock of the request. NULL means the
1418 * same that last one.
1419 * @start: starting address from the start of the RAMBlock
1420 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001421 */
Juan Quintela96506892017-03-14 18:41:03 +01001422int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001423{
1424 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02001425 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001426
Juan Quintela93604472017-06-06 19:49:03 +02001427 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001428 rcu_read_lock();
1429 if (!rbname) {
1430 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001431 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001432
1433 if (!ramblock) {
1434 /*
1435 * Shouldn't happen, we can't reuse the last RAMBlock if
1436 * it's the 1st request.
1437 */
1438 error_report("ram_save_queue_pages no previous block");
1439 goto err;
1440 }
1441 } else {
1442 ramblock = qemu_ram_block_by_name(rbname);
1443
1444 if (!ramblock) {
1445 /* We shouldn't be asked for a non-existent RAMBlock */
1446 error_report("ram_save_queue_pages no block '%s'", rbname);
1447 goto err;
1448 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001449 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001450 }
1451 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1452 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001453 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1454 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001455 __func__, start, len, ramblock->used_length);
1456 goto err;
1457 }
1458
Juan Quintelaec481c62017-03-20 22:12:40 +01001459 struct RAMSrcPageRequest *new_entry =
1460 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001461 new_entry->rb = ramblock;
1462 new_entry->offset = start;
1463 new_entry->len = len;
1464
1465 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001466 qemu_mutex_lock(&rs->src_page_req_mutex);
1467 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1468 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001469 rcu_read_unlock();
1470
1471 return 0;
1472
1473err:
1474 rcu_read_unlock();
1475 return -1;
1476}
1477
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001478static bool save_page_use_compression(RAMState *rs)
1479{
1480 if (!migrate_use_compression()) {
1481 return false;
1482 }
1483
1484 /*
1485 * If xbzrle is on, stop using the data compression after first
1486 * round of migration even if compression is enabled. In theory,
1487 * xbzrle can do better than compression.
1488 */
1489 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1490 return true;
1491 }
1492
1493 return false;
1494}
1495
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001496/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001497 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001498 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001499 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001500 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001501 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001502 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001503 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001504 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001505static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001506 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001507{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08001508 RAMBlock *block = pss->block;
1509 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1510 int res;
1511
1512 if (control_save_page(rs, block, offset, &res)) {
1513 return res;
1514 }
1515
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001516 /*
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001517 * When starting the process of a new block, the first page of
1518 * the block should be sent out before other pages in the same
1519 * block, and all the pages in last block should have been sent
1520 * out, keeping this order is important, because the 'cont' flag
1521 * is used to avoid resending the block name.
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001522 */
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001523 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
1524 flush_compressed_data(rs);
1525 }
1526
1527 res = save_zero_page(rs, block, offset);
1528 if (res > 0) {
1529 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1530 * page would be stale
1531 */
1532 if (!save_page_use_compression(rs)) {
1533 XBZRLE_cache_lock();
1534 xbzrle_cache_zero_page(rs, block->offset + offset);
1535 XBZRLE_cache_unlock();
1536 }
1537 ram_release_pages(block->idstr, offset, res);
1538 return res;
1539 }
1540
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08001541 /*
1542 * Make sure the first page is sent out before other pages.
1543 *
1544 * we post it as normal page as compression will take much
1545 * CPU resource.
1546 */
1547 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
Xiao Guangrong701b1872018-04-28 16:10:45 +08001548 return compress_page_with_multi_thread(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001549 }
1550
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001551 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001552}
1553
1554/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001555 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001556 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001557 * Starting at *offset send pages up to the end of the current host
1558 * page. It's valid for the initial offset to point into the middle of
1559 * a host page in which case the remainder of the hostpage is sent.
1560 * Only dirty target pages are sent. Note that the host page size may
1561 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001562 * The saving stops at the boundary of the used_length of the block
1563 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001564 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001565 * Returns the number of pages written or negative on error
1566 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001567 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001568 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001569 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001570 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001571 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001572static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001573 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001574{
1575 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01001576 size_t pagesize_bits =
1577 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001578
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001579 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001580 /* Check the pages is dirty and if it is send it */
1581 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1582 pss->page++;
1583 continue;
1584 }
1585
Juan Quintelaf20e2862017-03-21 16:19:05 +01001586 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001587 if (tmppages < 0) {
1588 return tmppages;
1589 }
1590
1591 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001592 if (pss->block->unsentmap) {
1593 clear_bit(pss->page, pss->block->unsentmap);
1594 }
1595
Juan Quintelaa935e302017-03-21 15:36:51 +01001596 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001597 } while ((pss->page & (pagesize_bits - 1)) &&
1598 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001599
1600 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01001601 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001602 return pages;
1603}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001604
1605/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001606 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001607 *
1608 * Called within an RCU critical section.
1609 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001610 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02001611 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001612 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001613 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001614 *
1615 * On systems where host-page-size > target-page-size it will send all the
1616 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001617 */
1618
Juan Quintelace25d332017-03-15 11:00:51 +01001619static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001620{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001621 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001622 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001623 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02001624
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301625 /* No dirty page as there is zero RAM */
1626 if (!ram_bytes_total()) {
1627 return pages;
1628 }
1629
Juan Quintela6f37bb82017-03-13 19:26:29 +01001630 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001631 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001632 pss.complete_round = false;
1633
1634 if (!pss.block) {
1635 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1636 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001637
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001638 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001639 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01001640 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001641
1642 if (!found) {
1643 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001644 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001645 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001646
1647 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001648 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02001649 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001650 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001651
Juan Quintela6f37bb82017-03-13 19:26:29 +01001652 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001653 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02001654
1655 return pages;
1656}
1657
1658void acct_update_position(QEMUFile *f, size_t size, bool zero)
1659{
1660 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001661
Juan Quintela56e93d22015-05-07 19:33:31 +02001662 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02001663 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02001664 } else {
Juan Quintela93604472017-06-06 19:49:03 +02001665 ram_counters.normal += pages;
1666 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02001667 qemu_update_position(f, size);
1668 }
1669}
1670
Juan Quintela56e93d22015-05-07 19:33:31 +02001671uint64_t ram_bytes_total(void)
1672{
1673 RAMBlock *block;
1674 uint64_t total = 0;
1675
1676 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +08001677 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001678 total += block->used_length;
Peter Xu99e15582017-05-12 12:17:39 +08001679 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001680 rcu_read_unlock();
1681 return total;
1682}
1683
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001684static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02001685{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001686 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001687}
1688
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001689static void xbzrle_load_cleanup(void)
1690{
1691 g_free(XBZRLE.decoded_buf);
1692 XBZRLE.decoded_buf = NULL;
1693}
1694
Peter Xu7d7c96b2017-10-19 14:31:58 +08001695static void ram_state_cleanup(RAMState **rsp)
1696{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00001697 if (*rsp) {
1698 migration_page_queue_free(*rsp);
1699 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1700 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1701 g_free(*rsp);
1702 *rsp = NULL;
1703 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08001704}
1705
Peter Xu84593a02017-10-19 14:31:59 +08001706static void xbzrle_cleanup(void)
1707{
1708 XBZRLE_cache_lock();
1709 if (XBZRLE.cache) {
1710 cache_fini(XBZRLE.cache);
1711 g_free(XBZRLE.encoded_buf);
1712 g_free(XBZRLE.current_buf);
1713 g_free(XBZRLE.zero_target_page);
1714 XBZRLE.cache = NULL;
1715 XBZRLE.encoded_buf = NULL;
1716 XBZRLE.current_buf = NULL;
1717 XBZRLE.zero_target_page = NULL;
1718 }
1719 XBZRLE_cache_unlock();
1720}
1721
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001722static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001723{
Juan Quintela53518d92017-05-04 11:46:24 +02001724 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001725 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001726
Li Zhijian2ff64032015-07-02 20:18:05 +08001727 /* caller have hold iothread lock or is in a bh, so there is
1728 * no writing race against this migration_bitmap
1729 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001730 memory_global_dirty_log_stop();
1731
1732 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1733 g_free(block->bmap);
1734 block->bmap = NULL;
1735 g_free(block->unsentmap);
1736 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001737 }
1738
Peter Xu84593a02017-10-19 14:31:59 +08001739 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02001740 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08001741 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02001742}
1743
Juan Quintela6f37bb82017-03-13 19:26:29 +01001744static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001745{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001746 rs->last_seen_block = NULL;
1747 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001748 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001749 rs->last_version = ram_list.version;
1750 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02001751}
1752
1753#define MAX_WAIT 50 /* ms, half buffered_file limit */
1754
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001755/*
1756 * 'expected' is the value you expect the bitmap mostly to be full
1757 * of; it won't bother printing lines that are all this value.
1758 * If 'todump' is null the migration bitmap is dumped.
1759 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001760void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1761 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001762{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001763 int64_t cur;
1764 int64_t linelen = 128;
1765 char linebuf[129];
1766
Juan Quintela6b6712e2017-03-22 15:18:04 +01001767 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001768 int64_t curb;
1769 bool found = false;
1770 /*
1771 * Last line; catch the case where the line length
1772 * is longer than remaining ram
1773 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001774 if (cur + linelen > pages) {
1775 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001776 }
1777 for (curb = 0; curb < linelen; curb++) {
1778 bool thisbit = test_bit(cur + curb, todump);
1779 linebuf[curb] = thisbit ? '1' : '.';
1780 found = found || (thisbit != expected);
1781 }
1782 if (found) {
1783 linebuf[curb] = '\0';
1784 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1785 }
1786 }
1787}
1788
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001789/* **** functions for postcopy ***** */
1790
Pavel Butsykinced1c612017-02-03 18:23:21 +03001791void ram_postcopy_migrated_memory_release(MigrationState *ms)
1792{
1793 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001794
Peter Xu99e15582017-05-12 12:17:39 +08001795 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001796 unsigned long *bitmap = block->bmap;
1797 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1798 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03001799
1800 while (run_start < range) {
1801 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01001802 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03001803 (run_end - run_start) << TARGET_PAGE_BITS);
1804 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1805 }
1806 }
1807}
1808
Juan Quintela3d0684b2017-03-23 15:06:39 +01001809/**
1810 * postcopy_send_discard_bm_ram: discard a RAMBlock
1811 *
1812 * Returns zero on success
1813 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001814 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1815 * Note: At this point the 'unsentmap' is the processed bitmap combined
1816 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001817 *
1818 * @ms: current migration state
1819 * @pds: state for postcopy
1820 * @start: RAMBlock starting page
1821 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001822 */
1823static int postcopy_send_discard_bm_ram(MigrationState *ms,
1824 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001825 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001826{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001827 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001828 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001829 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001830
Juan Quintela6b6712e2017-03-22 15:18:04 +01001831 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001832 unsigned long one = find_next_bit(unsentmap, end, current);
1833
1834 if (one <= end) {
1835 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1836 unsigned long discard_length;
1837
1838 if (zero >= end) {
1839 discard_length = end - one;
1840 } else {
1841 discard_length = zero - one;
1842 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001843 if (discard_length) {
1844 postcopy_discard_send_range(ms, pds, one, discard_length);
1845 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001846 current = one + discard_length;
1847 } else {
1848 current = one;
1849 }
1850 }
1851
1852 return 0;
1853}
1854
Juan Quintela3d0684b2017-03-23 15:06:39 +01001855/**
1856 * postcopy_each_ram_send_discard: discard all RAMBlocks
1857 *
1858 * Returns 0 for success or negative for error
1859 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001860 * Utility for the outgoing postcopy code.
1861 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1862 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001863 * (qemu_ram_foreach_block ends up passing unscaled lengths
1864 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01001865 *
1866 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001867 */
1868static int postcopy_each_ram_send_discard(MigrationState *ms)
1869{
1870 struct RAMBlock *block;
1871 int ret;
1872
Peter Xu99e15582017-05-12 12:17:39 +08001873 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001874 PostcopyDiscardState *pds =
1875 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001876
1877 /*
1878 * Postcopy sends chunks of bitmap over the wire, but it
1879 * just needs indexes at this point, avoids it having
1880 * target page specific code.
1881 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001882 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001883 postcopy_discard_send_finish(ms, pds);
1884 if (ret) {
1885 return ret;
1886 }
1887 }
1888
1889 return 0;
1890}
1891
Juan Quintela3d0684b2017-03-23 15:06:39 +01001892/**
1893 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001894 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001895 * Helper for postcopy_chunk_hostpages; it's called twice to
1896 * canonicalize the two bitmaps, that are similar, but one is
1897 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001898 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001899 * Postcopy requires that all target pages in a hostpage are dirty or
1900 * clean, not a mix. This function canonicalizes the bitmaps.
1901 *
1902 * @ms: current migration state
1903 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1904 * otherwise we need to canonicalize partially dirty host pages
1905 * @block: block that contains the page we want to canonicalize
1906 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001907 */
1908static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1909 RAMBlock *block,
1910 PostcopyDiscardState *pds)
1911{
Juan Quintela53518d92017-05-04 11:46:24 +02001912 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001913 unsigned long *bitmap = block->bmap;
1914 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001915 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001916 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001917 unsigned long run_start;
1918
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001919 if (block->page_size == TARGET_PAGE_SIZE) {
1920 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1921 return;
1922 }
1923
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001924 if (unsent_pass) {
1925 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001926 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001927 } else {
1928 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001929 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001930 }
1931
Juan Quintela6b6712e2017-03-22 15:18:04 +01001932 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001933 bool do_fixup = false;
1934 unsigned long fixup_start_addr;
1935 unsigned long host_offset;
1936
1937 /*
1938 * If the start of this run of pages is in the middle of a host
1939 * page, then we need to fixup this host page.
1940 */
1941 host_offset = run_start % host_ratio;
1942 if (host_offset) {
1943 do_fixup = true;
1944 run_start -= host_offset;
1945 fixup_start_addr = run_start;
1946 /* For the next pass */
1947 run_start = run_start + host_ratio;
1948 } else {
1949 /* Find the end of this run */
1950 unsigned long run_end;
1951 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001952 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001953 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001954 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001955 }
1956 /*
1957 * If the end isn't at the start of a host page, then the
1958 * run doesn't finish at the end of a host page
1959 * and we need to discard.
1960 */
1961 host_offset = run_end % host_ratio;
1962 if (host_offset) {
1963 do_fixup = true;
1964 fixup_start_addr = run_end - host_offset;
1965 /*
1966 * This host page has gone, the next loop iteration starts
1967 * from after the fixup
1968 */
1969 run_start = fixup_start_addr + host_ratio;
1970 } else {
1971 /*
1972 * No discards on this iteration, next loop starts from
1973 * next sent/dirty page
1974 */
1975 run_start = run_end + 1;
1976 }
1977 }
1978
1979 if (do_fixup) {
1980 unsigned long page;
1981
1982 /* Tell the destination to discard this page */
1983 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1984 /* For the unsent_pass we:
1985 * discard partially sent pages
1986 * For the !unsent_pass (dirty) we:
1987 * discard partially dirty pages that were sent
1988 * (any partially sent pages were already discarded
1989 * by the previous unsent_pass)
1990 */
1991 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1992 host_ratio);
1993 }
1994
1995 /* Clean up the bitmap */
1996 for (page = fixup_start_addr;
1997 page < fixup_start_addr + host_ratio; page++) {
1998 /* All pages in this host page are now not sent */
1999 set_bit(page, unsentmap);
2000
2001 /*
2002 * Remark them as dirty, updating the count for any pages
2003 * that weren't previously dirty.
2004 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002005 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002006 }
2007 }
2008
2009 if (unsent_pass) {
2010 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002011 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002012 } else {
2013 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002014 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002015 }
2016 }
2017}
2018
Juan Quintela3d0684b2017-03-23 15:06:39 +01002019/**
2020 * postcopy_chuck_hostpages: discrad any partially sent host page
2021 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002022 * Utility for the outgoing postcopy code.
2023 *
2024 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002025 * dirty host-page size chunks as all dirty. In this case the host-page
2026 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002027 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002028 * Returns zero on success
2029 *
2030 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002031 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002032 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002033static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002034{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002035 PostcopyDiscardState *pds =
2036 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002037
Juan Quintela6b6712e2017-03-22 15:18:04 +01002038 /* First pass: Discard all partially sent host pages */
2039 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2040 /*
2041 * Second pass: Ensure that all partially dirty host pages are made
2042 * fully dirty.
2043 */
2044 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002045
Juan Quintela6b6712e2017-03-22 15:18:04 +01002046 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002047 return 0;
2048}
2049
Juan Quintela3d0684b2017-03-23 15:06:39 +01002050/**
2051 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2052 *
2053 * Returns zero on success
2054 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002055 * Transmit the set of pages to be discarded after precopy to the target
2056 * these are pages that:
2057 * a) Have been previously transmitted but are now dirty again
2058 * b) Pages that have never been transmitted, this ensures that
2059 * any pages on the destination that have been mapped by background
2060 * tasks get discarded (transparent huge pages is the specific concern)
2061 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002062 *
2063 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002064 */
2065int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2066{
Juan Quintela53518d92017-05-04 11:46:24 +02002067 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002068 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002069 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002070
2071 rcu_read_lock();
2072
2073 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002074 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002075
Juan Quintela6b6712e2017-03-22 15:18:04 +01002076 /* Easiest way to make sure we don't resume in the middle of a host-page */
2077 rs->last_seen_block = NULL;
2078 rs->last_sent_block = NULL;
2079 rs->last_page = 0;
2080
2081 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2082 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2083 unsigned long *bitmap = block->bmap;
2084 unsigned long *unsentmap = block->unsentmap;
2085
2086 if (!unsentmap) {
2087 /* We don't have a safe way to resize the sentmap, so
2088 * if the bitmap was resized it will be NULL at this
2089 * point.
2090 */
2091 error_report("migration ram resized during precopy phase");
2092 rcu_read_unlock();
2093 return -EINVAL;
2094 }
2095 /* Deal with TPS != HPS and huge pages */
2096 ret = postcopy_chunk_hostpages(ms, block);
2097 if (ret) {
2098 rcu_read_unlock();
2099 return ret;
2100 }
2101
2102 /*
2103 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002104 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002105 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002106#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01002107 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002108#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002109 }
2110 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002111
2112 ret = postcopy_each_ram_send_discard(ms);
2113 rcu_read_unlock();
2114
2115 return ret;
2116}
2117
Juan Quintela3d0684b2017-03-23 15:06:39 +01002118/**
2119 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002120 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002121 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002122 *
Juan Quintela36449152017-03-23 15:11:59 +01002123 * @rbname: name of the RAMBlock of the request. NULL means the
2124 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002125 * @start: RAMBlock starting page
2126 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002127 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002128int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002129{
2130 int ret = -1;
2131
Juan Quintela36449152017-03-23 15:11:59 +01002132 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002133
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002134 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01002135 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002136
2137 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002138 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002139 goto err;
2140 }
2141
Alexey Perevalovf9494612017-10-05 14:13:20 +03002142 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2143 length >> qemu_target_page_bits());
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002144 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002145
2146err:
2147 rcu_read_unlock();
2148
2149 return ret;
2150}
2151
Peter Xu84593a02017-10-19 14:31:59 +08002152/*
2153 * For every allocation, we will try not to crash the VM if the
2154 * allocation failed.
2155 */
2156static int xbzrle_init(void)
2157{
2158 Error *local_err = NULL;
2159
2160 if (!migrate_use_xbzrle()) {
2161 return 0;
2162 }
2163
2164 XBZRLE_cache_lock();
2165
2166 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2167 if (!XBZRLE.zero_target_page) {
2168 error_report("%s: Error allocating zero page", __func__);
2169 goto err_out;
2170 }
2171
2172 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2173 TARGET_PAGE_SIZE, &local_err);
2174 if (!XBZRLE.cache) {
2175 error_report_err(local_err);
2176 goto free_zero_page;
2177 }
2178
2179 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2180 if (!XBZRLE.encoded_buf) {
2181 error_report("%s: Error allocating encoded_buf", __func__);
2182 goto free_cache;
2183 }
2184
2185 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2186 if (!XBZRLE.current_buf) {
2187 error_report("%s: Error allocating current_buf", __func__);
2188 goto free_encoded_buf;
2189 }
2190
2191 /* We are all good */
2192 XBZRLE_cache_unlock();
2193 return 0;
2194
2195free_encoded_buf:
2196 g_free(XBZRLE.encoded_buf);
2197 XBZRLE.encoded_buf = NULL;
2198free_cache:
2199 cache_fini(XBZRLE.cache);
2200 XBZRLE.cache = NULL;
2201free_zero_page:
2202 g_free(XBZRLE.zero_target_page);
2203 XBZRLE.zero_target_page = NULL;
2204err_out:
2205 XBZRLE_cache_unlock();
2206 return -ENOMEM;
2207}
2208
Juan Quintela53518d92017-05-04 11:46:24 +02002209static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002210{
Peter Xu7d00ee62017-10-19 14:31:57 +08002211 *rsp = g_try_new0(RAMState, 1);
2212
2213 if (!*rsp) {
2214 error_report("%s: Init ramstate fail", __func__);
2215 return -1;
2216 }
Juan Quintela53518d92017-05-04 11:46:24 +02002217
2218 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2219 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2220 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002221
Peter Xu7d00ee62017-10-19 14:31:57 +08002222 /*
2223 * Count the total number of pages used by ram blocks not including any
2224 * gaps due to alignment or unplugs.
2225 */
2226 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2227
2228 ram_state_reset(*rsp);
2229
2230 return 0;
2231}
2232
Peter Xud6eff5d2017-10-19 14:32:00 +08002233static void ram_list_init_bitmaps(void)
2234{
2235 RAMBlock *block;
2236 unsigned long pages;
2237
2238 /* Skip setting bitmap if there is no RAM */
2239 if (ram_bytes_total()) {
2240 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2241 pages = block->max_length >> TARGET_PAGE_BITS;
2242 block->bmap = bitmap_new(pages);
2243 bitmap_set(block->bmap, 0, pages);
2244 if (migrate_postcopy_ram()) {
2245 block->unsentmap = bitmap_new(pages);
2246 bitmap_set(block->unsentmap, 0, pages);
2247 }
2248 }
2249 }
2250}
2251
2252static void ram_init_bitmaps(RAMState *rs)
2253{
2254 /* For memory_global_dirty_log_start below. */
2255 qemu_mutex_lock_iothread();
2256 qemu_mutex_lock_ramlist();
2257 rcu_read_lock();
2258
2259 ram_list_init_bitmaps();
2260 memory_global_dirty_log_start();
2261 migration_bitmap_sync(rs);
2262
2263 rcu_read_unlock();
2264 qemu_mutex_unlock_ramlist();
2265 qemu_mutex_unlock_iothread();
2266}
2267
Peter Xu7d00ee62017-10-19 14:31:57 +08002268static int ram_init_all(RAMState **rsp)
2269{
Peter Xu7d00ee62017-10-19 14:31:57 +08002270 if (ram_state_init(rsp)) {
2271 return -1;
2272 }
2273
Peter Xu84593a02017-10-19 14:31:59 +08002274 if (xbzrle_init()) {
2275 ram_state_cleanup(rsp);
2276 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002277 }
2278
Peter Xud6eff5d2017-10-19 14:32:00 +08002279 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002280
2281 return 0;
2282}
2283
Juan Quintela3d0684b2017-03-23 15:06:39 +01002284/*
2285 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002286 * long-running RCU critical section. When rcu-reclaims in the code
2287 * start to become numerous it will be necessary to reduce the
2288 * granularity of these critical sections.
2289 */
2290
Juan Quintela3d0684b2017-03-23 15:06:39 +01002291/**
2292 * ram_save_setup: Setup RAM for migration
2293 *
2294 * Returns zero to indicate success and negative for error
2295 *
2296 * @f: QEMUFile where to send the data
2297 * @opaque: RAMState pointer
2298 */
zhanghailianga91246c2016-10-27 14:42:59 +08002299static int ram_save_setup(QEMUFile *f, void *opaque)
2300{
Juan Quintela53518d92017-05-04 11:46:24 +02002301 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002302 RAMBlock *block;
2303
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002304 if (compress_threads_save_setup()) {
2305 return -1;
2306 }
2307
zhanghailianga91246c2016-10-27 14:42:59 +08002308 /* migration has already setup the bitmap, reuse it. */
2309 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08002310 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002311 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08002312 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02002313 }
zhanghailianga91246c2016-10-27 14:42:59 +08002314 }
Juan Quintela53518d92017-05-04 11:46:24 +02002315 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002316
2317 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02002318
2319 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2320
Peter Xu99e15582017-05-12 12:17:39 +08002321 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002322 qemu_put_byte(f, strlen(block->idstr));
2323 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2324 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002325 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2326 qemu_put_be64(f, block->page_size);
2327 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002328 }
2329
2330 rcu_read_unlock();
2331
2332 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2333 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2334
2335 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2336
2337 return 0;
2338}
2339
Juan Quintela3d0684b2017-03-23 15:06:39 +01002340/**
2341 * ram_save_iterate: iterative stage for migration
2342 *
2343 * Returns zero to indicate success and negative for error
2344 *
2345 * @f: QEMUFile where to send the data
2346 * @opaque: RAMState pointer
2347 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002348static int ram_save_iterate(QEMUFile *f, void *opaque)
2349{
Juan Quintela53518d92017-05-04 11:46:24 +02002350 RAMState **temp = opaque;
2351 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002352 int ret;
2353 int i;
2354 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002355 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002356
Peter Lievenb2557342018-03-08 12:18:24 +01002357 if (blk_mig_bulk_active()) {
2358 /* Avoid transferring ram during bulk phase of block migration as
2359 * the bulk phase will usually take a long time and transferring
2360 * ram updates during that time is pointless. */
2361 goto out;
2362 }
2363
Juan Quintela56e93d22015-05-07 19:33:31 +02002364 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01002365 if (ram_list.version != rs->last_version) {
2366 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002367 }
2368
2369 /* Read version before ram_list.blocks */
2370 smp_rmb();
2371
2372 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2373
2374 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2375 i = 0;
2376 while ((ret = qemu_file_rate_limit(f)) == 0) {
2377 int pages;
2378
Juan Quintelace25d332017-03-15 11:00:51 +01002379 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02002380 /* no more pages to sent */
2381 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01002382 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002383 break;
2384 }
Juan Quintela23b28c32017-03-13 20:51:34 +01002385 rs->iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04002386
Juan Quintela56e93d22015-05-07 19:33:31 +02002387 /* we want to check in the 1st loop, just in case it was the 1st time
2388 and we had to sync the dirty bitmap.
2389 qemu_get_clock_ns() is a bit expensive, so we only check each some
2390 iterations
2391 */
2392 if ((i & 63) == 0) {
2393 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2394 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01002395 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02002396 break;
2397 }
2398 }
2399 i++;
2400 }
Juan Quintelace25d332017-03-15 11:00:51 +01002401 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002402 rcu_read_unlock();
2403
2404 /*
2405 * Must occur before EOS (or any QEMUFile operation)
2406 * because of RDMA protocol.
2407 */
2408 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2409
Peter Lievenb2557342018-03-08 12:18:24 +01002410out:
Juan Quintela56e93d22015-05-07 19:33:31 +02002411 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela93604472017-06-06 19:49:03 +02002412 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02002413
2414 ret = qemu_file_get_error(f);
2415 if (ret < 0) {
2416 return ret;
2417 }
2418
Thomas Huth5c903082016-11-04 14:10:17 +01002419 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002420}
2421
Juan Quintela3d0684b2017-03-23 15:06:39 +01002422/**
2423 * ram_save_complete: function called to send the remaining amount of ram
2424 *
2425 * Returns zero to indicate success
2426 *
2427 * Called with iothread lock
2428 *
2429 * @f: QEMUFile where to send the data
2430 * @opaque: RAMState pointer
2431 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002432static int ram_save_complete(QEMUFile *f, void *opaque)
2433{
Juan Quintela53518d92017-05-04 11:46:24 +02002434 RAMState **temp = opaque;
2435 RAMState *rs = *temp;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002436
Juan Quintela56e93d22015-05-07 19:33:31 +02002437 rcu_read_lock();
2438
Juan Quintela57273092017-03-20 22:25:28 +01002439 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01002440 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002441 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002442
2443 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2444
2445 /* try transferring iterative blocks of memory */
2446
2447 /* flush all remaining blocks regardless of rate limiting */
2448 while (true) {
2449 int pages;
2450
Juan Quintelace25d332017-03-15 11:00:51 +01002451 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02002452 /* no more blocks to sent */
2453 if (pages == 0) {
2454 break;
2455 }
2456 }
2457
Juan Quintelace25d332017-03-15 11:00:51 +01002458 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002459 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002460
2461 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002462
Juan Quintela56e93d22015-05-07 19:33:31 +02002463 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2464
2465 return 0;
2466}
2467
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002468static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002469 uint64_t *res_precopy_only,
2470 uint64_t *res_compatible,
2471 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02002472{
Juan Quintela53518d92017-05-04 11:46:24 +02002473 RAMState **temp = opaque;
2474 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002475 uint64_t remaining_size;
2476
Juan Quintela9edabd42017-03-14 12:02:16 +01002477 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002478
Juan Quintela57273092017-03-20 22:25:28 +01002479 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002480 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002481 qemu_mutex_lock_iothread();
2482 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01002483 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002484 rcu_read_unlock();
2485 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01002486 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002487 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002488
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002489 if (migrate_postcopy_ram()) {
2490 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002491 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002492 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002493 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002494 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002495}
2496
2497static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2498{
2499 unsigned int xh_len;
2500 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002501 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002502
Juan Quintela56e93d22015-05-07 19:33:31 +02002503 /* extract RLE header */
2504 xh_flags = qemu_get_byte(f);
2505 xh_len = qemu_get_be16(f);
2506
2507 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2508 error_report("Failed to load XBZRLE page - wrong compression!");
2509 return -1;
2510 }
2511
2512 if (xh_len > TARGET_PAGE_SIZE) {
2513 error_report("Failed to load XBZRLE page - len overflow!");
2514 return -1;
2515 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002516 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002517 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002518 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002519 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002520
2521 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002522 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002523 TARGET_PAGE_SIZE) == -1) {
2524 error_report("Failed to load XBZRLE page - decode error!");
2525 return -1;
2526 }
2527
2528 return 0;
2529}
2530
Juan Quintela3d0684b2017-03-23 15:06:39 +01002531/**
2532 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002533 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002534 * Must be called from within a rcu critical section.
2535 *
2536 * Returns a pointer from within the RCU-protected ram_list.
2537 *
2538 * @f: QEMUFile where to read the data from
2539 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002540 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002541static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002542{
2543 static RAMBlock *block = NULL;
2544 char id[256];
2545 uint8_t len;
2546
2547 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002548 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002549 error_report("Ack, bad migration stream!");
2550 return NULL;
2551 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002552 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002553 }
2554
2555 len = qemu_get_byte(f);
2556 qemu_get_buffer(f, (uint8_t *)id, len);
2557 id[len] = 0;
2558
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002559 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002560 if (!block) {
2561 error_report("Can't find block %s", id);
2562 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002563 }
2564
zhanghailiang4c4bad42016-01-15 11:37:41 +08002565 return block;
2566}
2567
2568static inline void *host_from_ram_block_offset(RAMBlock *block,
2569 ram_addr_t offset)
2570{
2571 if (!offset_in_ramblock(block, offset)) {
2572 return NULL;
2573 }
2574
2575 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002576}
2577
Juan Quintela3d0684b2017-03-23 15:06:39 +01002578/**
2579 * ram_handle_compressed: handle the zero page case
2580 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002581 * If a page (or a whole RDMA chunk) has been
2582 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002583 *
2584 * @host: host address for the zero page
2585 * @ch: what the page is filled from. We only support zero
2586 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002587 */
2588void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2589{
2590 if (ch != 0 || !is_zero_range(host, size)) {
2591 memset(host, ch, size);
2592 }
2593}
2594
Xiao Guangrong797ca152018-03-30 15:51:21 +08002595/* return the size after decompression, or negative value on error */
2596static int
2597qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2598 const uint8_t *source, size_t source_len)
2599{
2600 int err;
2601
2602 err = inflateReset(stream);
2603 if (err != Z_OK) {
2604 return -1;
2605 }
2606
2607 stream->avail_in = source_len;
2608 stream->next_in = (uint8_t *)source;
2609 stream->avail_out = dest_len;
2610 stream->next_out = dest;
2611
2612 err = inflate(stream, Z_NO_FLUSH);
2613 if (err != Z_STREAM_END) {
2614 return -1;
2615 }
2616
2617 return stream->total_out;
2618}
2619
Juan Quintela56e93d22015-05-07 19:33:31 +02002620static void *do_data_decompress(void *opaque)
2621{
2622 DecompressParam *param = opaque;
2623 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002624 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002625 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002626
Liang Li33d151f2016-05-05 15:32:58 +08002627 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002628 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002629 if (param->des) {
2630 des = param->des;
2631 len = param->len;
2632 param->des = 0;
2633 qemu_mutex_unlock(&param->mutex);
2634
Liang Li73a89122016-05-05 15:32:51 +08002635 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002636
2637 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2638 param->compbuf, len);
2639 if (ret < 0) {
2640 error_report("decompress data failed");
2641 qemu_file_set_error(decomp_file, ret);
2642 }
Liang Li73a89122016-05-05 15:32:51 +08002643
Liang Li33d151f2016-05-05 15:32:58 +08002644 qemu_mutex_lock(&decomp_done_lock);
2645 param->done = true;
2646 qemu_cond_signal(&decomp_done_cond);
2647 qemu_mutex_unlock(&decomp_done_lock);
2648
2649 qemu_mutex_lock(&param->mutex);
2650 } else {
2651 qemu_cond_wait(&param->cond, &param->mutex);
2652 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002653 }
Liang Li33d151f2016-05-05 15:32:58 +08002654 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002655
2656 return NULL;
2657}
2658
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002659static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08002660{
2661 int idx, thread_count;
2662
2663 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002664 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08002665 }
2666
2667 thread_count = migrate_decompress_threads();
2668 qemu_mutex_lock(&decomp_done_lock);
2669 for (idx = 0; idx < thread_count; idx++) {
2670 while (!decomp_param[idx].done) {
2671 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2672 }
2673 }
2674 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002675 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08002676}
2677
Juan Quintelaf0afa332017-06-28 11:52:28 +02002678static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002679{
2680 int i, thread_count;
2681
Juan Quintela3416ab52016-04-20 11:56:01 +02002682 if (!migrate_use_compression()) {
2683 return;
2684 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002685 thread_count = migrate_decompress_threads();
2686 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002687 /*
2688 * we use it as a indicator which shows if the thread is
2689 * properly init'd or not
2690 */
2691 if (!decomp_param[i].compbuf) {
2692 break;
2693 }
2694
Juan Quintela56e93d22015-05-07 19:33:31 +02002695 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002696 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002697 qemu_cond_signal(&decomp_param[i].cond);
2698 qemu_mutex_unlock(&decomp_param[i].mutex);
2699 }
2700 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002701 if (!decomp_param[i].compbuf) {
2702 break;
2703 }
2704
Juan Quintela56e93d22015-05-07 19:33:31 +02002705 qemu_thread_join(decompress_threads + i);
2706 qemu_mutex_destroy(&decomp_param[i].mutex);
2707 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002708 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02002709 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002710 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002711 }
2712 g_free(decompress_threads);
2713 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002714 decompress_threads = NULL;
2715 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002716 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002717}
2718
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002719static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08002720{
2721 int i, thread_count;
2722
2723 if (!migrate_use_compression()) {
2724 return 0;
2725 }
2726
2727 thread_count = migrate_decompress_threads();
2728 decompress_threads = g_new0(QemuThread, thread_count);
2729 decomp_param = g_new0(DecompressParam, thread_count);
2730 qemu_mutex_init(&decomp_done_lock);
2731 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002732 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08002733 for (i = 0; i < thread_count; i++) {
2734 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2735 goto exit;
2736 }
2737
2738 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2739 qemu_mutex_init(&decomp_param[i].mutex);
2740 qemu_cond_init(&decomp_param[i].cond);
2741 decomp_param[i].done = true;
2742 decomp_param[i].quit = false;
2743 qemu_thread_create(decompress_threads + i, "decompress",
2744 do_data_decompress, decomp_param + i,
2745 QEMU_THREAD_JOINABLE);
2746 }
2747 return 0;
2748exit:
2749 compress_threads_load_cleanup();
2750 return -1;
2751}
2752
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002753static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002754 void *host, int len)
2755{
2756 int idx, thread_count;
2757
2758 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002759 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002760 while (true) {
2761 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002762 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002763 decomp_param[idx].done = false;
2764 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002765 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002766 decomp_param[idx].des = host;
2767 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002768 qemu_cond_signal(&decomp_param[idx].cond);
2769 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002770 break;
2771 }
2772 }
2773 if (idx < thread_count) {
2774 break;
Liang Li73a89122016-05-05 15:32:51 +08002775 } else {
2776 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002777 }
2778 }
Liang Li73a89122016-05-05 15:32:51 +08002779 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002780}
2781
Juan Quintela3d0684b2017-03-23 15:06:39 +01002782/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002783 * ram_load_setup: Setup RAM for migration incoming side
2784 *
2785 * Returns zero to indicate success and negative for error
2786 *
2787 * @f: QEMUFile where to receive the data
2788 * @opaque: RAMState pointer
2789 */
2790static int ram_load_setup(QEMUFile *f, void *opaque)
2791{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002792 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002793 return -1;
2794 }
2795
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002796 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03002797 ramblock_recv_map_init();
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002798 return 0;
2799}
2800
2801static int ram_load_cleanup(void *opaque)
2802{
Alexey Perevalovf9494612017-10-05 14:13:20 +03002803 RAMBlock *rb;
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002804 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002805 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03002806
2807 RAMBLOCK_FOREACH(rb) {
2808 g_free(rb->receivedmap);
2809 rb->receivedmap = NULL;
2810 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002811 return 0;
2812}
2813
2814/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002815 * ram_postcopy_incoming_init: allocate postcopy data structures
2816 *
2817 * Returns 0 for success and negative if there was one error
2818 *
2819 * @mis: current migration incoming state
2820 *
2821 * Allocate data structures etc needed by incoming migration with
2822 * postcopy-ram. postcopy-ram's similarly names
2823 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002824 */
2825int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2826{
Juan Quintelab8c48992017-03-21 17:44:30 +01002827 unsigned long ram_pages = last_ram_page();
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002828
2829 return postcopy_ram_incoming_init(mis, ram_pages);
2830}
2831
Juan Quintela3d0684b2017-03-23 15:06:39 +01002832/**
2833 * ram_load_postcopy: load a page in postcopy case
2834 *
2835 * Returns 0 for success or -errno in case of error
2836 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002837 * Called in postcopy mode by ram_load().
2838 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002839 *
2840 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002841 */
2842static int ram_load_postcopy(QEMUFile *f)
2843{
2844 int flags = 0, ret = 0;
2845 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002846 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002847 MigrationIncomingState *mis = migration_incoming_get_current();
2848 /* Temporary page that is later 'placed' */
2849 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002850 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002851 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002852
2853 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2854 ram_addr_t addr;
2855 void *host = NULL;
2856 void *page_buffer = NULL;
2857 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002858 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002859 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002860
2861 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08002862
2863 /*
2864 * If qemu file error, we should stop here, and then "addr"
2865 * may be invalid
2866 */
2867 ret = qemu_file_get_error(f);
2868 if (ret) {
2869 break;
2870 }
2871
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002872 flags = addr & ~TARGET_PAGE_MASK;
2873 addr &= TARGET_PAGE_MASK;
2874
2875 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2876 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02002877 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002878 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002879
2880 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002881 if (!host) {
2882 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2883 ret = -EINVAL;
2884 break;
2885 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002886 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002887 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002888 * Postcopy requires that we place whole host pages atomically;
2889 * these may be huge pages for RAMBlocks that are backed by
2890 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002891 * To make it atomic, the data is read into a temporary page
2892 * that's moved into place later.
2893 * The migration protocol uses, possibly smaller, target-pages
2894 * however the source ensures it always sends all the components
2895 * of a host page in order.
2896 */
2897 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002898 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002899 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002900 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002901 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002902 } else {
2903 /* not the 1st TP within the HP */
2904 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002905 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002906 host, last_host);
2907 ret = -EINVAL;
2908 break;
2909 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002910 }
2911
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002912
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002913 /*
2914 * If it's the last part of a host page then we place the host
2915 * page
2916 */
2917 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002918 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002919 place_source = postcopy_host_page;
2920 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002921 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002922
2923 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02002924 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002925 ch = qemu_get_byte(f);
2926 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2927 if (ch) {
2928 all_zero = false;
2929 }
2930 break;
2931
2932 case RAM_SAVE_FLAG_PAGE:
2933 all_zero = false;
2934 if (!place_needed || !matching_page_sizes) {
2935 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2936 } else {
2937 /* Avoids the qemu_file copy during postcopy, which is
2938 * going to do a copy later; can only do it when we
2939 * do this read in one go (matching page sizes)
2940 */
2941 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2942 TARGET_PAGE_SIZE);
2943 }
2944 break;
2945 case RAM_SAVE_FLAG_EOS:
2946 /* normal exit */
2947 break;
2948 default:
2949 error_report("Unknown combination of migration flags: %#x"
2950 " (postcopy mode)", flags);
2951 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08002952 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002953 }
2954
Peter Xu7a9ddfb2018-02-08 18:31:05 +08002955 /* Detect for any possible file errors */
2956 if (!ret && qemu_file_get_error(f)) {
2957 ret = qemu_file_get_error(f);
2958 }
2959
2960 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002961 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002962 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2963
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002964 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002965 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03002966 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002967 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002968 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03002969 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002970 }
2971 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002972 }
2973
2974 return ret;
2975}
2976
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02002977static bool postcopy_is_advised(void)
2978{
2979 PostcopyState ps = postcopy_state_get();
2980 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2981}
2982
2983static bool postcopy_is_running(void)
2984{
2985 PostcopyState ps = postcopy_state_get();
2986 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2987}
2988
Juan Quintela56e93d22015-05-07 19:33:31 +02002989static int ram_load(QEMUFile *f, void *opaque, int version_id)
2990{
Juan Quintelaedc60122016-11-02 12:40:46 +01002991 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002992 static uint64_t seq_iter;
2993 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002994 /*
2995 * If system is running in postcopy mode, page inserts to host memory must
2996 * be atomic
2997 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02002998 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002999 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003000 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02003001
3002 seq_iter++;
3003
3004 if (version_id != 4) {
3005 ret = -EINVAL;
3006 }
3007
Juan Quintelaedc60122016-11-02 12:40:46 +01003008 if (!migrate_use_compression()) {
3009 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3010 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003011 /* This RCU critical section can be very long running.
3012 * When RCU reclaims in the code start to become numerous,
3013 * it will be necessary to reduce the granularity of this
3014 * critical section.
3015 */
3016 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003017
3018 if (postcopy_running) {
3019 ret = ram_load_postcopy(f);
3020 }
3021
3022 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003023 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003024 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003025 uint8_t ch;
3026
3027 addr = qemu_get_be64(f);
3028 flags = addr & ~TARGET_PAGE_MASK;
3029 addr &= TARGET_PAGE_MASK;
3030
Juan Quintelaedc60122016-11-02 12:40:46 +01003031 if (flags & invalid_flags) {
3032 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3033 error_report("Received an unexpected compressed page");
3034 }
3035
3036 ret = -EINVAL;
3037 break;
3038 }
3039
Juan Quintelabb890ed2017-04-28 09:39:55 +02003040 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003041 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003042 RAMBlock *block = ram_block_from_stream(f, flags);
3043
3044 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003045 if (!host) {
3046 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3047 ret = -EINVAL;
3048 break;
3049 }
Alexey Perevalovf9494612017-10-05 14:13:20 +03003050 ramblock_recv_bitmap_set(block, host);
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003051 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003052 }
3053
Juan Quintela56e93d22015-05-07 19:33:31 +02003054 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3055 case RAM_SAVE_FLAG_MEM_SIZE:
3056 /* Synchronize RAM block list */
3057 total_ram_bytes = addr;
3058 while (!ret && total_ram_bytes) {
3059 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003060 char id[256];
3061 ram_addr_t length;
3062
3063 len = qemu_get_byte(f);
3064 qemu_get_buffer(f, (uint8_t *)id, len);
3065 id[len] = 0;
3066 length = qemu_get_be64(f);
3067
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003068 block = qemu_ram_block_by_name(id);
3069 if (block) {
3070 if (length != block->used_length) {
3071 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003072
Gongleifa53a0e2016-05-10 10:04:59 +08003073 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003074 &local_err);
3075 if (local_err) {
3076 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003077 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003078 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003079 /* For postcopy we need to check hugepage sizes match */
3080 if (postcopy_advised &&
3081 block->page_size != qemu_host_page_size) {
3082 uint64_t remote_page_size = qemu_get_be64(f);
3083 if (remote_page_size != block->page_size) {
3084 error_report("Mismatched RAM page size %s "
3085 "(local) %zd != %" PRId64,
3086 id, block->page_size,
3087 remote_page_size);
3088 ret = -EINVAL;
3089 }
3090 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003091 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3092 block->idstr);
3093 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02003094 error_report("Unknown ramblock \"%s\", cannot "
3095 "accept migration", id);
3096 ret = -EINVAL;
3097 }
3098
3099 total_ram_bytes -= length;
3100 }
3101 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003102
Juan Quintelabb890ed2017-04-28 09:39:55 +02003103 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02003104 ch = qemu_get_byte(f);
3105 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3106 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003107
Juan Quintela56e93d22015-05-07 19:33:31 +02003108 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003109 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3110 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02003111
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003112 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003113 len = qemu_get_be32(f);
3114 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3115 error_report("Invalid compressed data length: %d", len);
3116 ret = -EINVAL;
3117 break;
3118 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003119 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003120 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003121
Juan Quintela56e93d22015-05-07 19:33:31 +02003122 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003123 if (load_xbzrle(f, addr, host) < 0) {
3124 error_report("Failed to decompress XBZRLE page at "
3125 RAM_ADDR_FMT, addr);
3126 ret = -EINVAL;
3127 break;
3128 }
3129 break;
3130 case RAM_SAVE_FLAG_EOS:
3131 /* normal exit */
3132 break;
3133 default:
3134 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01003135 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02003136 } else {
3137 error_report("Unknown combination of migration flags: %#x",
3138 flags);
3139 ret = -EINVAL;
3140 }
3141 }
3142 if (!ret) {
3143 ret = qemu_file_get_error(f);
3144 }
3145 }
3146
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003147 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02003148 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01003149 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02003150 return ret;
3151}
3152
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003153static bool ram_has_postcopy(void *opaque)
3154{
3155 return migrate_postcopy_ram();
3156}
3157
Juan Quintela56e93d22015-05-07 19:33:31 +02003158static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02003159 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02003160 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00003161 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00003162 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003163 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02003164 .save_live_pending = ram_save_pending,
3165 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003166 .save_cleanup = ram_save_cleanup,
3167 .load_setup = ram_load_setup,
3168 .load_cleanup = ram_load_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02003169};
3170
3171void ram_mig_init(void)
3172{
3173 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01003174 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02003175}