blob: cb14399ef9f1edd7a63cd28b26e809defbdedcd0 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020036#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020037#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020038#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010039#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020040#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020041#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020042#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020043#include "postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010046#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010047#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020048#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030051#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020052#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080053#include "migration/colo.h"
Peter Lieven9ac78b62017-09-26 12:33:16 +020054#include "migration/block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020055#include "sysemu/sysemu.h"
56#include "qemu/uuid.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020057
Juan Quintela56e93d22015-05-07 19:33:31 +020058/***********************************************************/
59/* ram save/restore */
60
Juan Quintelabb890ed2017-04-28 09:39:55 +020061/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
62 * worked for pages that where filled with the same char. We switched
63 * it to only search for the zero value. And to avoid confusion with
64 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
65 */
66
Juan Quintela56e93d22015-05-07 19:33:31 +020067#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020068#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020069#define RAM_SAVE_FLAG_MEM_SIZE 0x04
70#define RAM_SAVE_FLAG_PAGE 0x08
71#define RAM_SAVE_FLAG_EOS 0x10
72#define RAM_SAVE_FLAG_CONTINUE 0x20
73#define RAM_SAVE_FLAG_XBZRLE 0x40
74/* 0x80 is reserved in migration.h start with 0x100 next */
75#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
76
Juan Quintela56e93d22015-05-07 19:33:31 +020077static inline bool is_zero_range(uint8_t *p, uint64_t size)
78{
Richard Hendersona1febc42016-08-29 11:46:14 -070079 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020080}
81
Juan Quintela93604472017-06-06 19:49:03 +020082XBZRLECacheStats xbzrle_counters;
83
Juan Quintela56e93d22015-05-07 19:33:31 +020084/* struct contains XBZRLE cache and a static page
85 used by the compression */
86static struct {
87 /* buffer used for XBZRLE encoding */
88 uint8_t *encoded_buf;
89 /* buffer for storing page content */
90 uint8_t *current_buf;
91 /* Cache for XBZRLE, Protected by lock. */
92 PageCache *cache;
93 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020094 /* it will store a page full of zeros */
95 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020096 /* buffer used for XBZRLE decoding */
97 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +020098} XBZRLE;
99
Juan Quintela56e93d22015-05-07 19:33:31 +0200100static void XBZRLE_cache_lock(void)
101{
102 if (migrate_use_xbzrle())
103 qemu_mutex_lock(&XBZRLE.lock);
104}
105
106static void XBZRLE_cache_unlock(void)
107{
108 if (migrate_use_xbzrle())
109 qemu_mutex_unlock(&XBZRLE.lock);
110}
111
Juan Quintela3d0684b2017-03-23 15:06:39 +0100112/**
113 * xbzrle_cache_resize: resize the xbzrle cache
114 *
115 * This function is called from qmp_migrate_set_cache_size in main
116 * thread, possibly while a migration is in progress. A running
117 * migration may be using the cache and might finish during this call,
118 * hence changes to the cache are protected by XBZRLE.lock().
119 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200120 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100121 *
122 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200123 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200124 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200125int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200126{
127 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200128 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200129
Juan Quintela8acabf62017-10-05 22:00:31 +0200130 /* Check for truncation */
131 if (new_size != (size_t)new_size) {
132 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
133 "exceeding address space");
134 return -1;
135 }
136
Juan Quintela2a313e52017-10-06 23:00:12 +0200137 if (new_size == migrate_xbzrle_cache_size()) {
138 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200139 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 }
141
Juan Quintela56e93d22015-05-07 19:33:31 +0200142 XBZRLE_cache_lock();
143
144 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200145 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200146 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200147 ret = -1;
148 goto out;
149 }
150
151 cache_fini(XBZRLE.cache);
152 XBZRLE.cache = new_cache;
153 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200154out:
155 XBZRLE_cache_unlock();
156 return ret;
157}
158
Alexey Perevalovf9494612017-10-05 14:13:20 +0300159static void ramblock_recv_map_init(void)
160{
161 RAMBlock *rb;
162
163 RAMBLOCK_FOREACH(rb) {
164 assert(!rb->receivedmap);
165 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
166 }
167}
168
169int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
170{
171 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
172 rb->receivedmap);
173}
174
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000175bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
176{
177 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
178}
179
Alexey Perevalovf9494612017-10-05 14:13:20 +0300180void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
181{
182 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
183}
184
185void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
186 size_t nr)
187{
188 bitmap_set_atomic(rb->receivedmap,
189 ramblock_recv_bitmap_offset(host_addr, rb),
190 nr);
191}
192
Juan Quintelaec481c62017-03-20 22:12:40 +0100193/*
194 * An outstanding page request, on the source, having been received
195 * and queued
196 */
197struct RAMSrcPageRequest {
198 RAMBlock *rb;
199 hwaddr offset;
200 hwaddr len;
201
202 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
203};
204
Juan Quintela6f37bb82017-03-13 19:26:29 +0100205/* State of RAM for migration */
206struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100207 /* QEMUFile used for this migration */
208 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100209 /* Last block that we have visited searching for dirty pages */
210 RAMBlock *last_seen_block;
211 /* Last block from where we have sent data */
212 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100213 /* Last dirty target page we have sent */
214 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100215 /* last ram version we have seen */
216 uint32_t last_version;
217 /* We are in the first round */
218 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100219 /* How many times we have dirty too many pages */
220 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100221 /* these variables are used for bitmap sync */
222 /* last time we did a full bitmap_sync */
223 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200224 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200225 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200226 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200227 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100228 /* xbzrle misses since the beginning of the period */
229 uint64_t xbzrle_cache_miss_prev;
Juan Quintela36040d92017-03-13 19:51:13 +0100230 /* number of iterations at the beginning of period */
231 uint64_t iterations_prev;
Juan Quintela23b28c32017-03-13 20:51:34 +0100232 /* Iterations since start */
233 uint64_t iterations;
Juan Quintela93604472017-06-06 19:49:03 +0200234 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800235 uint64_t migration_dirty_pages;
236 /* protects modification of the bitmap */
Juan Quintela108cfae2017-03-13 21:38:09 +0100237 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100238 /* The RAMBlock used in the last src_page_requests */
239 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100240 /* Queue of outstanding page requests from the destination */
241 QemuMutex src_page_req_mutex;
242 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100243};
244typedef struct RAMState RAMState;
245
Juan Quintela53518d92017-05-04 11:46:24 +0200246static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100247
Juan Quintela9edabd42017-03-14 12:02:16 +0100248uint64_t ram_bytes_remaining(void)
249{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000250 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
251 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100252}
253
Juan Quintela93604472017-06-06 19:49:03 +0200254MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100255
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100256/* used by the search for pages to send */
257struct PageSearchStatus {
258 /* Current block being searched */
259 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100260 /* Current page to search from */
261 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100262 /* Set once we wrap around */
263 bool complete_round;
264};
265typedef struct PageSearchStatus PageSearchStatus;
266
Juan Quintela56e93d22015-05-07 19:33:31 +0200267struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200268 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800269 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200270 QEMUFile *file;
271 QemuMutex mutex;
272 QemuCond cond;
273 RAMBlock *block;
274 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800275
276 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800277 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800278 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200279};
280typedef struct CompressParam CompressParam;
281
282struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800283 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800284 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200285 QemuMutex mutex;
286 QemuCond cond;
287 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000288 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200289 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800290 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200291};
292typedef struct DecompressParam DecompressParam;
293
294static CompressParam *comp_param;
295static QemuThread *compress_threads;
296/* comp_done_cond is used to wake up the migration thread when
297 * one of the compression threads has finished the compression.
298 * comp_done_lock is used to co-work with comp_done_cond.
299 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800300static QemuMutex comp_done_lock;
301static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200302/* The empty QEMUFileOps will be used by file in CompressParam */
303static const QEMUFileOps empty_ops = { };
304
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800305static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200306static DecompressParam *decomp_param;
307static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800308static QemuMutex decomp_done_lock;
309static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200310
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800311static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800312 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200313
314static void *do_data_compress(void *opaque)
315{
316 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800317 RAMBlock *block;
318 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200319
Liang Lia7a9a882016-05-05 15:32:57 +0800320 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800321 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800322 if (param->block) {
323 block = param->block;
324 offset = param->offset;
325 param->block = NULL;
326 qemu_mutex_unlock(&param->mutex);
327
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800328 do_compress_ram_page(param->file, &param->stream, block, offset,
329 param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800330
Liang Li0d9f9a52016-05-05 15:32:59 +0800331 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800332 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800333 qemu_cond_signal(&comp_done_cond);
334 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800335
336 qemu_mutex_lock(&param->mutex);
337 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200338 qemu_cond_wait(&param->cond, &param->mutex);
339 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200340 }
Liang Lia7a9a882016-05-05 15:32:57 +0800341 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200342
343 return NULL;
344}
345
346static inline void terminate_compression_threads(void)
347{
348 int idx, thread_count;
349
350 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100351
Juan Quintela56e93d22015-05-07 19:33:31 +0200352 for (idx = 0; idx < thread_count; idx++) {
353 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800354 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200355 qemu_cond_signal(&comp_param[idx].cond);
356 qemu_mutex_unlock(&comp_param[idx].mutex);
357 }
358}
359
Juan Quintelaf0afa332017-06-28 11:52:28 +0200360static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200361{
362 int i, thread_count;
363
364 if (!migrate_use_compression()) {
365 return;
366 }
367 terminate_compression_threads();
368 thread_count = migrate_compress_threads();
369 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800370 /*
371 * we use it as a indicator which shows if the thread is
372 * properly init'd or not
373 */
374 if (!comp_param[i].file) {
375 break;
376 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200377 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200378 qemu_mutex_destroy(&comp_param[i].mutex);
379 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800380 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800381 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800382 qemu_fclose(comp_param[i].file);
383 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200384 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800385 qemu_mutex_destroy(&comp_done_lock);
386 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200387 g_free(compress_threads);
388 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200389 compress_threads = NULL;
390 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200391}
392
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800393static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200394{
395 int i, thread_count;
396
397 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800398 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200399 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200400 thread_count = migrate_compress_threads();
401 compress_threads = g_new0(QemuThread, thread_count);
402 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800403 qemu_cond_init(&comp_done_cond);
404 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200405 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800406 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
407 if (!comp_param[i].originbuf) {
408 goto exit;
409 }
410
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800411 if (deflateInit(&comp_param[i].stream,
412 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800413 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800414 goto exit;
415 }
416
Cao jine110aa92016-07-29 15:10:31 +0800417 /* comp_param[i].file is just used as a dummy buffer to save data,
418 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200419 */
420 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
421 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800422 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200423 qemu_mutex_init(&comp_param[i].mutex);
424 qemu_cond_init(&comp_param[i].cond);
425 qemu_thread_create(compress_threads + i, "compress",
426 do_data_compress, comp_param + i,
427 QEMU_THREAD_JOINABLE);
428 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800429 return 0;
430
431exit:
432 compress_threads_save_cleanup();
433 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434}
435
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100436/* Multiple fd's */
437
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200438#define MULTIFD_MAGIC 0x11223344U
439#define MULTIFD_VERSION 1
440
441typedef struct {
442 uint32_t magic;
443 uint32_t version;
444 unsigned char uuid[16]; /* QemuUUID */
445 uint8_t id;
446} __attribute__((packed)) MultiFDInit_t;
447
Juan Quintela8c4598f2018-04-07 13:59:07 +0200448typedef struct {
449 /* this fields are not changed once the thread is created */
450 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100451 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200452 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100453 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200454 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100455 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200456 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100457 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200458 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100459 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200460 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100461 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200462 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100463 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200464 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100465 bool quit;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200466} MultiFDSendParams;
467
468typedef struct {
469 /* this fields are not changed once the thread is created */
470 /* channel number */
471 uint8_t id;
472 /* channel thread name */
473 char *name;
474 /* channel thread id */
475 QemuThread thread;
476 /* communication channel */
477 QIOChannel *c;
478 /* sem where to wait for more work */
479 QemuSemaphore sem;
480 /* this mutex protects the following parameters */
481 QemuMutex mutex;
482 /* is this channel thread running */
483 bool running;
484 /* should this thread finish */
485 bool quit;
486} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100487
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200488static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
489{
490 MultiFDInit_t msg;
491 int ret;
492
493 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
494 msg.version = cpu_to_be32(MULTIFD_VERSION);
495 msg.id = p->id;
496 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
497
498 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
499 if (ret != 0) {
500 return -1;
501 }
502 return 0;
503}
504
505static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
506{
507 MultiFDInit_t msg;
508 int ret;
509
510 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
511 if (ret != 0) {
512 return -1;
513 }
514
515 be32_to_cpus(&msg.magic);
516 be32_to_cpus(&msg.version);
517
518 if (msg.magic != MULTIFD_MAGIC) {
519 error_setg(errp, "multifd: received packet magic %x "
520 "expected %x", msg.magic, MULTIFD_MAGIC);
521 return -1;
522 }
523
524 if (msg.version != MULTIFD_VERSION) {
525 error_setg(errp, "multifd: received packet version %d "
526 "expected %d", msg.version, MULTIFD_VERSION);
527 return -1;
528 }
529
530 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
531 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
532 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
533
534 error_setg(errp, "multifd: received uuid '%s' and expected "
535 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
536 g_free(uuid);
537 g_free(msg_uuid);
538 return -1;
539 }
540
541 if (msg.id > migrate_multifd_channels()) {
542 error_setg(errp, "multifd: received channel version %d "
543 "expected %d", msg.version, MULTIFD_VERSION);
544 return -1;
545 }
546
547 return msg.id;
548}
549
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100550struct {
551 MultiFDSendParams *params;
552 /* number of created threads */
553 int count;
554} *multifd_send_state;
555
Juan Quintela66770702018-02-19 19:01:45 +0100556static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100557{
558 int i;
559
Juan Quintela7a169d72018-02-19 19:01:15 +0100560 if (err) {
561 MigrationState *s = migrate_get_current();
562 migrate_set_error(s, err);
563 if (s->state == MIGRATION_STATUS_SETUP ||
564 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
565 s->state == MIGRATION_STATUS_DEVICE ||
566 s->state == MIGRATION_STATUS_ACTIVE) {
567 migrate_set_state(&s->state, s->state,
568 MIGRATION_STATUS_FAILED);
569 }
570 }
571
Juan Quintela66770702018-02-19 19:01:45 +0100572 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100573 MultiFDSendParams *p = &multifd_send_state->params[i];
574
575 qemu_mutex_lock(&p->mutex);
576 p->quit = true;
577 qemu_sem_post(&p->sem);
578 qemu_mutex_unlock(&p->mutex);
579 }
580}
581
582int multifd_save_cleanup(Error **errp)
583{
584 int i;
585 int ret = 0;
586
587 if (!migrate_use_multifd()) {
588 return 0;
589 }
Juan Quintela66770702018-02-19 19:01:45 +0100590 multifd_send_terminate_threads(NULL);
591 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100592 MultiFDSendParams *p = &multifd_send_state->params[i];
593
Juan Quintela66770702018-02-19 19:01:45 +0100594 if (p->running) {
595 qemu_thread_join(&p->thread);
596 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100597 socket_send_channel_destroy(p->c);
598 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100599 qemu_mutex_destroy(&p->mutex);
600 qemu_sem_destroy(&p->sem);
601 g_free(p->name);
602 p->name = NULL;
603 }
604 g_free(multifd_send_state->params);
605 multifd_send_state->params = NULL;
606 g_free(multifd_send_state);
607 multifd_send_state = NULL;
608 return ret;
609}
610
611static void *multifd_send_thread(void *opaque)
612{
613 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200614 Error *local_err = NULL;
615
616 if (multifd_send_initial_packet(p, &local_err) < 0) {
617 goto out;
618 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100619
620 while (true) {
621 qemu_mutex_lock(&p->mutex);
622 if (p->quit) {
623 qemu_mutex_unlock(&p->mutex);
624 break;
625 }
626 qemu_mutex_unlock(&p->mutex);
627 qemu_sem_wait(&p->sem);
628 }
629
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200630out:
631 if (local_err) {
632 multifd_send_terminate_threads(local_err);
633 }
634
Juan Quintela66770702018-02-19 19:01:45 +0100635 qemu_mutex_lock(&p->mutex);
636 p->running = false;
637 qemu_mutex_unlock(&p->mutex);
638
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100639 return NULL;
640}
641
Juan Quintela60df2d42018-03-07 07:56:15 +0100642static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
643{
644 MultiFDSendParams *p = opaque;
645 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
646 Error *local_err = NULL;
647
648 if (qio_task_propagate_error(task, &local_err)) {
649 if (multifd_save_cleanup(&local_err) != 0) {
650 migrate_set_error(migrate_get_current(), local_err);
651 }
652 } else {
653 p->c = QIO_CHANNEL(sioc);
654 qio_channel_set_delay(p->c, false);
655 p->running = true;
656 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
657 QEMU_THREAD_JOINABLE);
658
659 atomic_inc(&multifd_send_state->count);
660 }
661}
662
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100663int multifd_save_setup(void)
664{
665 int thread_count;
666 uint8_t i;
667
668 if (!migrate_use_multifd()) {
669 return 0;
670 }
671 thread_count = migrate_multifd_channels();
672 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
673 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +0100674 atomic_set(&multifd_send_state->count, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100675 for (i = 0; i < thread_count; i++) {
676 MultiFDSendParams *p = &multifd_send_state->params[i];
677
678 qemu_mutex_init(&p->mutex);
679 qemu_sem_init(&p->sem, 0);
680 p->quit = false;
681 p->id = i;
682 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +0100683 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100684 }
685 return 0;
686}
687
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100688struct {
689 MultiFDRecvParams *params;
690 /* number of created threads */
691 int count;
692} *multifd_recv_state;
693
Juan Quintela66770702018-02-19 19:01:45 +0100694static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100695{
696 int i;
697
Juan Quintela7a169d72018-02-19 19:01:15 +0100698 if (err) {
699 MigrationState *s = migrate_get_current();
700 migrate_set_error(s, err);
701 if (s->state == MIGRATION_STATUS_SETUP ||
702 s->state == MIGRATION_STATUS_ACTIVE) {
703 migrate_set_state(&s->state, s->state,
704 MIGRATION_STATUS_FAILED);
705 }
706 }
707
Juan Quintela66770702018-02-19 19:01:45 +0100708 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100709 MultiFDRecvParams *p = &multifd_recv_state->params[i];
710
711 qemu_mutex_lock(&p->mutex);
712 p->quit = true;
713 qemu_sem_post(&p->sem);
714 qemu_mutex_unlock(&p->mutex);
715 }
716}
717
718int multifd_load_cleanup(Error **errp)
719{
720 int i;
721 int ret = 0;
722
723 if (!migrate_use_multifd()) {
724 return 0;
725 }
Juan Quintela66770702018-02-19 19:01:45 +0100726 multifd_recv_terminate_threads(NULL);
727 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100728 MultiFDRecvParams *p = &multifd_recv_state->params[i];
729
Juan Quintela66770702018-02-19 19:01:45 +0100730 if (p->running) {
731 qemu_thread_join(&p->thread);
732 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100733 object_unref(OBJECT(p->c));
734 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100735 qemu_mutex_destroy(&p->mutex);
736 qemu_sem_destroy(&p->sem);
737 g_free(p->name);
738 p->name = NULL;
739 }
740 g_free(multifd_recv_state->params);
741 multifd_recv_state->params = NULL;
742 g_free(multifd_recv_state);
743 multifd_recv_state = NULL;
744
745 return ret;
746}
747
748static void *multifd_recv_thread(void *opaque)
749{
750 MultiFDRecvParams *p = opaque;
751
752 while (true) {
753 qemu_mutex_lock(&p->mutex);
754 if (p->quit) {
755 qemu_mutex_unlock(&p->mutex);
756 break;
757 }
758 qemu_mutex_unlock(&p->mutex);
759 qemu_sem_wait(&p->sem);
760 }
761
Juan Quintela66770702018-02-19 19:01:45 +0100762 qemu_mutex_lock(&p->mutex);
763 p->running = false;
764 qemu_mutex_unlock(&p->mutex);
765
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100766 return NULL;
767}
768
769int multifd_load_setup(void)
770{
771 int thread_count;
772 uint8_t i;
773
774 if (!migrate_use_multifd()) {
775 return 0;
776 }
777 thread_count = migrate_multifd_channels();
778 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
779 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +0100780 atomic_set(&multifd_recv_state->count, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100781 for (i = 0; i < thread_count; i++) {
782 MultiFDRecvParams *p = &multifd_recv_state->params[i];
783
784 qemu_mutex_init(&p->mutex);
785 qemu_sem_init(&p->sem, 0);
786 p->quit = false;
787 p->id = i;
788 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100789 }
790 return 0;
791}
792
Juan Quintela62c1e0c2018-02-19 18:59:02 +0100793bool multifd_recv_all_channels_created(void)
794{
795 int thread_count = migrate_multifd_channels();
796
797 if (!migrate_use_multifd()) {
798 return true;
799 }
800
801 return thread_count == atomic_read(&multifd_recv_state->count);
802}
803
Juan Quintela71bb07d2018-02-19 19:01:03 +0100804void multifd_recv_new_channel(QIOChannel *ioc)
805{
Juan Quintela60df2d42018-03-07 07:56:15 +0100806 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200807 Error *local_err = NULL;
808 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +0100809
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200810 id = multifd_recv_initial_packet(ioc, &local_err);
811 if (id < 0) {
812 multifd_recv_terminate_threads(local_err);
813 return;
814 }
815
816 p = &multifd_recv_state->params[id];
817 if (p->c != NULL) {
818 error_setg(&local_err, "multifd: received id '%d' already setup'",
819 id);
820 multifd_recv_terminate_threads(local_err);
821 return;
822 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100823 p->c = ioc;
824 object_ref(OBJECT(ioc));
825
826 p->running = true;
827 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
828 QEMU_THREAD_JOINABLE);
829 atomic_inc(&multifd_recv_state->count);
Juan Quintela36c2f8b2018-03-07 08:40:52 +0100830 if (multifd_recv_state->count == migrate_multifd_channels()) {
831 migration_incoming_process();
832 }
Juan Quintela71bb07d2018-02-19 19:01:03 +0100833}
834
Juan Quintela56e93d22015-05-07 19:33:31 +0200835/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100836 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200837 *
838 * If this is the 1st block, it also writes the block identification
839 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100840 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200841 *
842 * @f: QEMUFile where to send the data
843 * @block: block that contains the page we want to send
844 * @offset: offset inside the block for the page
845 * in the lower bits, it contains flags
846 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200847static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
848 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200849{
Liang Li9f5f3802015-07-13 17:34:10 +0800850 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200851
Juan Quintela24795692017-03-21 11:45:01 +0100852 if (block == rs->last_sent_block) {
853 offset |= RAM_SAVE_FLAG_CONTINUE;
854 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200855 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200856 size = 8;
857
858 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800859 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200860 qemu_put_byte(f, len);
861 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800862 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100863 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200864 }
865 return size;
866}
867
Juan Quintela3d0684b2017-03-23 15:06:39 +0100868/**
869 * mig_throttle_guest_down: throotle down the guest
870 *
871 * Reduce amount of guest cpu execution to hopefully slow down memory
872 * writes. If guest dirty memory rate is reduced below the rate at
873 * which we can transfer pages to the destination then we should be
874 * able to complete migration. Some workloads dirty memory way too
875 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400876 */
877static void mig_throttle_guest_down(void)
878{
879 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100880 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
881 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400882
883 /* We have not started throttling yet. Let's start it. */
884 if (!cpu_throttle_active()) {
885 cpu_throttle_set(pct_initial);
886 } else {
887 /* Throttling already on, just increase the rate */
888 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
889 }
890}
891
Juan Quintela3d0684b2017-03-23 15:06:39 +0100892/**
893 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
894 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100895 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100896 * @current_addr: address for the zero page
897 *
898 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200899 * The important thing is that a stale (not-yet-0'd) page be replaced
900 * by the new data.
901 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100902 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200903 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100904static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200905{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100906 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200907 return;
908 }
909
910 /* We don't care if this fails to allocate a new cache page
911 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200912 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200913 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200914}
915
916#define ENCODING_FLAG_XBZRLE 0x1
917
918/**
919 * save_xbzrle_page: compress and send current page
920 *
921 * Returns: 1 means that we wrote the page
922 * 0 means that page is identical to the one already sent
923 * -1 means that xbzrle would be longer than normal
924 *
Juan Quintela5a987732017-03-13 19:39:02 +0100925 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100926 * @current_data: pointer to the address of the page contents
927 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200928 * @block: block that contains the page we want to send
929 * @offset: offset inside the block for the page
930 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200931 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100932static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200933 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100934 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200935{
936 int encoded_len = 0, bytes_xbzrle;
937 uint8_t *prev_cached_page;
938
Juan Quintela93604472017-06-06 19:49:03 +0200939 if (!cache_is_cached(XBZRLE.cache, current_addr,
940 ram_counters.dirty_sync_count)) {
941 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200942 if (!last_stage) {
943 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200944 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200945 return -1;
946 } else {
947 /* update *current_data when the page has been
948 inserted into cache */
949 *current_data = get_cached_data(XBZRLE.cache, current_addr);
950 }
951 }
952 return -1;
953 }
954
955 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
956
957 /* save current buffer into memory */
958 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
959
960 /* XBZRLE encoding (if there is no overflow) */
961 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
962 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
963 TARGET_PAGE_SIZE);
964 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100965 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200966 return 0;
967 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100968 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200969 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200970 /* update data in the cache */
971 if (!last_stage) {
972 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
973 *current_data = prev_cached_page;
974 }
975 return -1;
976 }
977
978 /* we need to update the data in the cache, in order to get the same data */
979 if (!last_stage) {
980 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
981 }
982
983 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200984 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100985 offset | RAM_SAVE_FLAG_XBZRLE);
986 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
987 qemu_put_be16(rs->f, encoded_len);
988 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200989 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +0200990 xbzrle_counters.pages++;
991 xbzrle_counters.bytes += bytes_xbzrle;
992 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200993
994 return 1;
995}
996
Juan Quintela3d0684b2017-03-23 15:06:39 +0100997/**
998 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000999 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001000 * Called with rcu_read_lock() to protect migration_bitmap
1001 *
1002 * Returns the byte offset within memory region of the start of a dirty page
1003 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001004 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001005 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001006 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001007 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001008static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001009unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001010 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001011{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001012 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1013 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001014 unsigned long next;
1015
Juan Quintela6b6712e2017-03-22 15:18:04 +01001016 if (rs->ram_bulk_stage && start > 0) {
1017 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001018 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001019 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001020 }
1021
Juan Quintela6b6712e2017-03-22 15:18:04 +01001022 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001023}
1024
Juan Quintela06b10682017-03-21 15:18:05 +01001025static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001026 RAMBlock *rb,
1027 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001028{
1029 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001030
Juan Quintela6b6712e2017-03-22 15:18:04 +01001031 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001032
1033 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001034 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001035 }
1036 return ret;
1037}
1038
Juan Quintela15440dd2017-03-21 09:35:04 +01001039static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1040 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +02001041{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001042 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +01001043 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001044 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001045}
1046
Juan Quintela3d0684b2017-03-23 15:06:39 +01001047/**
1048 * ram_pagesize_summary: calculate all the pagesizes of a VM
1049 *
1050 * Returns a summary bitmap of the page sizes of all RAMBlocks
1051 *
1052 * For VMs with just normal pages this is equivalent to the host page
1053 * size. If it's got some huge pages then it's the OR of all the
1054 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001055 */
1056uint64_t ram_pagesize_summary(void)
1057{
1058 RAMBlock *block;
1059 uint64_t summary = 0;
1060
Peter Xu99e15582017-05-12 12:17:39 +08001061 RAMBLOCK_FOREACH(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001062 summary |= block->page_size;
1063 }
1064
1065 return summary;
1066}
1067
Juan Quintela8d820d62017-03-13 19:35:50 +01001068static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001069{
1070 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001071 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001072 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001073
Juan Quintela93604472017-06-06 19:49:03 +02001074 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001075
Juan Quintelaf664da82017-03-13 19:44:57 +01001076 if (!rs->time_last_bitmap_sync) {
1077 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001078 }
1079
1080 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001081 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001082
Juan Quintela108cfae2017-03-13 21:38:09 +01001083 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001084 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +08001085 RAMBLOCK_FOREACH(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +01001086 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +02001087 }
1088 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001089 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001090
Juan Quintelaa66cd902017-03-28 15:02:43 +02001091 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001092
Juan Quintela56e93d22015-05-07 19:33:31 +02001093 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1094
1095 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001096 if (end_time > rs->time_last_bitmap_sync + 1000) {
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001097 /* calculate period counters */
Juan Quintela93604472017-06-06 19:49:03 +02001098 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001099 / (end_time - rs->time_last_bitmap_sync);
Juan Quintela93604472017-06-06 19:49:03 +02001100 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001101
Peter Lieven9ac78b62017-09-26 12:33:16 +02001102 /* During block migration the auto-converge logic incorrectly detects
1103 * that ram migration makes no progress. Avoid this by disabling the
1104 * throttling logic during the bulk phase of block migration. */
1105 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001106 /* The following detection logic can be refined later. For now:
1107 Check to see if the dirtied bytes is 50% more than the approx.
1108 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001109 were in this routine. If that happens twice, start or increase
1110 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001111
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001112 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001113 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001114 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001115 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001116 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001117 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001118 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001119 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001120
Juan Quintela56e93d22015-05-07 19:33:31 +02001121 if (migrate_use_xbzrle()) {
Juan Quintela23b28c32017-03-13 20:51:34 +01001122 if (rs->iterations_prev != rs->iterations) {
Juan Quintela93604472017-06-06 19:49:03 +02001123 xbzrle_counters.cache_miss_rate =
1124 (double)(xbzrle_counters.cache_miss -
Juan Quintelab5833fd2017-03-13 19:49:19 +01001125 rs->xbzrle_cache_miss_prev) /
Juan Quintela23b28c32017-03-13 20:51:34 +01001126 (rs->iterations - rs->iterations_prev);
Juan Quintela56e93d22015-05-07 19:33:31 +02001127 }
Juan Quintela23b28c32017-03-13 20:51:34 +01001128 rs->iterations_prev = rs->iterations;
Juan Quintela93604472017-06-06 19:49:03 +02001129 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001130 }
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001131
1132 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001133 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001134 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001135 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001136 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001137 if (migrate_use_events()) {
Juan Quintela93604472017-06-06 19:49:03 +02001138 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001139 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001140}
1141
1142/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001143 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001144 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001145 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001146 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001147 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001148 * @block: block that contains the page we want to send
1149 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001150 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001151static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001152{
Juan Quintela7faccdc2018-01-08 18:58:17 +01001153 uint8_t *p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001154 int pages = -1;
1155
1156 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
Juan Quintela93604472017-06-06 19:49:03 +02001157 ram_counters.duplicate++;
1158 ram_counters.transferred +=
Juan Quintelabb890ed2017-04-28 09:39:55 +02001159 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
Juan Quintelace25d332017-03-15 11:00:51 +01001160 qemu_put_byte(rs->f, 0);
Juan Quintela93604472017-06-06 19:49:03 +02001161 ram_counters.transferred += 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001162 pages = 1;
1163 }
1164
1165 return pages;
1166}
1167
Juan Quintela57273092017-03-20 22:25:28 +01001168static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001169{
Juan Quintela57273092017-03-20 22:25:28 +01001170 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001171 return;
1172 }
1173
Juan Quintelaaaa20642017-03-21 11:35:24 +01001174 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001175}
1176
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001177/*
1178 * @pages: the number of pages written by the control path,
1179 * < 0 - error
1180 * > 0 - number of pages written
1181 *
1182 * Return true if the pages has been saved, otherwise false is returned.
1183 */
1184static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1185 int *pages)
1186{
1187 uint64_t bytes_xmit = 0;
1188 int ret;
1189
1190 *pages = -1;
1191 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1192 &bytes_xmit);
1193 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1194 return false;
1195 }
1196
1197 if (bytes_xmit) {
1198 ram_counters.transferred += bytes_xmit;
1199 *pages = 1;
1200 }
1201
1202 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1203 return true;
1204 }
1205
1206 if (bytes_xmit > 0) {
1207 ram_counters.normal++;
1208 } else if (bytes_xmit == 0) {
1209 ram_counters.duplicate++;
1210 }
1211
1212 return true;
1213}
1214
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001215/*
1216 * directly send the page to the stream
1217 *
1218 * Returns the number of pages written.
1219 *
1220 * @rs: current RAM state
1221 * @block: block that contains the page we want to send
1222 * @offset: offset inside the block for the page
1223 * @buf: the page to be sent
1224 * @async: send to page asyncly
1225 */
1226static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1227 uint8_t *buf, bool async)
1228{
1229 ram_counters.transferred += save_page_header(rs, rs->f, block,
1230 offset | RAM_SAVE_FLAG_PAGE);
1231 if (async) {
1232 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1233 migrate_release_ram() &
1234 migration_in_postcopy());
1235 } else {
1236 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1237 }
1238 ram_counters.transferred += TARGET_PAGE_SIZE;
1239 ram_counters.normal++;
1240 return 1;
1241}
1242
Juan Quintela56e93d22015-05-07 19:33:31 +02001243/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001244 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001245 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001246 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001247 * < 0 - error
1248 * >=0 - Number of pages written - this might legally be 0
1249 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001250 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001251 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001252 * @block: block that contains the page we want to send
1253 * @offset: offset inside the block for the page
1254 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001255 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001256static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001257{
1258 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001259 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001260 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001261 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001262 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001263 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001264
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001265 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001266 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001267
Juan Quintela56e93d22015-05-07 19:33:31 +02001268 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001269 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1270 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001271 pages = save_xbzrle_page(rs, &p, current_addr, block,
1272 offset, last_stage);
1273 if (!last_stage) {
1274 /* Can't send this cached data async, since the cache page
1275 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001276 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001277 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001278 }
1279 }
1280
1281 /* XBZRLE overflow or normal page */
1282 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001283 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001284 }
1285
1286 XBZRLE_cache_unlock();
1287
1288 return pages;
1289}
1290
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08001291static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001292 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001293{
Juan Quintela53518d92017-05-04 11:46:24 +02001294 RAMState *rs = ram_state;
Juan Quintela56e93d22015-05-07 19:33:31 +02001295 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +08001296 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +02001297
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001298 bytes_sent = save_page_header(rs, f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +02001299 RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001300
1301 /*
1302 * copy it to a internal buffer to avoid it being modified by VM
1303 * so that we can catch up the error during compression and
1304 * decompression
1305 */
1306 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1307 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
Liang Lib3be2892016-05-05 15:32:54 +08001308 if (blen < 0) {
1309 bytes_sent = 0;
1310 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1311 error_report("compressed data failed!");
1312 } else {
1313 bytes_sent += blen;
Juan Quintela57273092017-03-20 22:25:28 +01001314 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +08001315 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001316
1317 return bytes_sent;
1318}
1319
Juan Quintelace25d332017-03-15 11:00:51 +01001320static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001321{
1322 int idx, len, thread_count;
1323
1324 if (!migrate_use_compression()) {
1325 return;
1326 }
1327 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001328
Liang Li0d9f9a52016-05-05 15:32:59 +08001329 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001330 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001331 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001332 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001333 }
Liang Lia7a9a882016-05-05 15:32:57 +08001334 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001335 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001336
1337 for (idx = 0; idx < thread_count; idx++) {
1338 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001339 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001340 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Juan Quintela93604472017-06-06 19:49:03 +02001341 ram_counters.transferred += len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001342 }
Liang Lia7a9a882016-05-05 15:32:57 +08001343 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001344 }
1345}
1346
1347static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1348 ram_addr_t offset)
1349{
1350 param->block = block;
1351 param->offset = offset;
1352}
1353
Juan Quintelace25d332017-03-15 11:00:51 +01001354static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1355 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001356{
1357 int idx, thread_count, bytes_xmit = -1, pages = -1;
1358
1359 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001360 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001361 while (true) {
1362 for (idx = 0; idx < thread_count; idx++) {
1363 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +08001364 comp_param[idx].done = false;
Juan Quintelace25d332017-03-15 11:00:51 +01001365 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +08001366 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001367 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +08001368 qemu_cond_signal(&comp_param[idx].cond);
1369 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001370 pages = 1;
Juan Quintela93604472017-06-06 19:49:03 +02001371 ram_counters.normal++;
1372 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +02001373 break;
1374 }
1375 }
1376 if (pages > 0) {
1377 break;
1378 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +08001379 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001380 }
1381 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001382 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001383
1384 return pages;
1385}
1386
1387/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001388 * find_dirty_block: find the next dirty page and update any state
1389 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001390 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001391 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001392 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001393 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001394 * @pss: data about the state of the current dirty page scan
1395 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001396 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001397static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001398{
Juan Quintelaf20e2862017-03-21 16:19:05 +01001399 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001400 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001401 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001402 /*
1403 * We've been once around the RAM and haven't found anything.
1404 * Give up.
1405 */
1406 *again = false;
1407 return false;
1408 }
Juan Quintelaa935e302017-03-21 15:36:51 +01001409 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001410 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001411 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001412 pss->block = QLIST_NEXT_RCU(pss->block, next);
1413 if (!pss->block) {
1414 /* Hit the end of the list */
1415 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1416 /* Flag that we've looped */
1417 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001418 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001419 if (migrate_use_xbzrle()) {
1420 /* If xbzrle is on, stop using the data compression at this
1421 * point. In theory, xbzrle can do better than compression.
1422 */
Juan Quintelace25d332017-03-15 11:00:51 +01001423 flush_compressed_data(rs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001424 }
1425 }
1426 /* Didn't find anything this time, but try again on the new block */
1427 *again = true;
1428 return false;
1429 } else {
1430 /* Can go around again, but... */
1431 *again = true;
1432 /* We've found something so probably don't need to */
1433 return true;
1434 }
1435}
1436
Juan Quintela3d0684b2017-03-23 15:06:39 +01001437/**
1438 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001439 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001440 * Helper for 'get_queued_page' - gets a page off the queue
1441 *
1442 * Returns the block of the page (or NULL if none available)
1443 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001444 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001445 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001446 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001447static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001448{
1449 RAMBlock *block = NULL;
1450
Juan Quintelaec481c62017-03-20 22:12:40 +01001451 qemu_mutex_lock(&rs->src_page_req_mutex);
1452 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1453 struct RAMSrcPageRequest *entry =
1454 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001455 block = entry->rb;
1456 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001457
1458 if (entry->len > TARGET_PAGE_SIZE) {
1459 entry->len -= TARGET_PAGE_SIZE;
1460 entry->offset += TARGET_PAGE_SIZE;
1461 } else {
1462 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001463 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001464 g_free(entry);
1465 }
1466 }
Juan Quintelaec481c62017-03-20 22:12:40 +01001467 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001468
1469 return block;
1470}
1471
Juan Quintela3d0684b2017-03-23 15:06:39 +01001472/**
1473 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001474 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001475 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001476 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001477 * Returns if a queued page is found
1478 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001479 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001480 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001481 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001482static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001483{
1484 RAMBlock *block;
1485 ram_addr_t offset;
1486 bool dirty;
1487
1488 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001489 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001490 /*
1491 * We're sending this page, and since it's postcopy nothing else
1492 * will dirty it, and we must make sure it doesn't get sent again
1493 * even if this queue request was received after the background
1494 * search already sent it.
1495 */
1496 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001497 unsigned long page;
1498
Juan Quintela6b6712e2017-03-22 15:18:04 +01001499 page = offset >> TARGET_PAGE_BITS;
1500 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001501 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001502 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001503 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001504 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001505 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001506 }
1507 }
1508
1509 } while (block && !dirty);
1510
1511 if (block) {
1512 /*
1513 * As soon as we start servicing pages out of order, then we have
1514 * to kill the bulk stage, since the bulk stage assumes
1515 * in (migration_bitmap_find_and_reset_dirty) that every page is
1516 * dirty, that's no longer true.
1517 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001518 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001519
1520 /*
1521 * We want the background search to continue from the queued page
1522 * since the guest is likely to want other pages near to the page
1523 * it just requested.
1524 */
1525 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001526 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001527 }
1528
1529 return !!block;
1530}
1531
Juan Quintela56e93d22015-05-07 19:33:31 +02001532/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001533 * migration_page_queue_free: drop any remaining pages in the ram
1534 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001535 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001536 * It should be empty at the end anyway, but in error cases there may
1537 * be some left. in case that there is any page left, we drop it.
1538 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001539 */
Juan Quintela83c13382017-05-04 11:45:01 +02001540static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001541{
Juan Quintelaec481c62017-03-20 22:12:40 +01001542 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001543 /* This queue generally should be empty - but in the case of a failed
1544 * migration might have some droppings in.
1545 */
1546 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01001547 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001548 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001549 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001550 g_free(mspr);
1551 }
1552 rcu_read_unlock();
1553}
1554
1555/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001556 * ram_save_queue_pages: queue the page for transmission
1557 *
1558 * A request from postcopy destination for example.
1559 *
1560 * Returns zero on success or negative on error
1561 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001562 * @rbname: Name of the RAMBLock of the request. NULL means the
1563 * same that last one.
1564 * @start: starting address from the start of the RAMBlock
1565 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001566 */
Juan Quintela96506892017-03-14 18:41:03 +01001567int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001568{
1569 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02001570 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001571
Juan Quintela93604472017-06-06 19:49:03 +02001572 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001573 rcu_read_lock();
1574 if (!rbname) {
1575 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001576 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001577
1578 if (!ramblock) {
1579 /*
1580 * Shouldn't happen, we can't reuse the last RAMBlock if
1581 * it's the 1st request.
1582 */
1583 error_report("ram_save_queue_pages no previous block");
1584 goto err;
1585 }
1586 } else {
1587 ramblock = qemu_ram_block_by_name(rbname);
1588
1589 if (!ramblock) {
1590 /* We shouldn't be asked for a non-existent RAMBlock */
1591 error_report("ram_save_queue_pages no block '%s'", rbname);
1592 goto err;
1593 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001594 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001595 }
1596 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1597 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001598 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1599 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001600 __func__, start, len, ramblock->used_length);
1601 goto err;
1602 }
1603
Juan Quintelaec481c62017-03-20 22:12:40 +01001604 struct RAMSrcPageRequest *new_entry =
1605 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001606 new_entry->rb = ramblock;
1607 new_entry->offset = start;
1608 new_entry->len = len;
1609
1610 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001611 qemu_mutex_lock(&rs->src_page_req_mutex);
1612 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1613 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001614 rcu_read_unlock();
1615
1616 return 0;
1617
1618err:
1619 rcu_read_unlock();
1620 return -1;
1621}
1622
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001623static bool save_page_use_compression(RAMState *rs)
1624{
1625 if (!migrate_use_compression()) {
1626 return false;
1627 }
1628
1629 /*
1630 * If xbzrle is on, stop using the data compression after first
1631 * round of migration even if compression is enabled. In theory,
1632 * xbzrle can do better than compression.
1633 */
1634 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1635 return true;
1636 }
1637
1638 return false;
1639}
1640
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001641/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001642 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001643 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001644 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001645 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001646 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001647 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001648 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001649 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001650static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001651 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001652{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08001653 RAMBlock *block = pss->block;
1654 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1655 int res;
1656
1657 if (control_save_page(rs, block, offset, &res)) {
1658 return res;
1659 }
1660
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001661 /*
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001662 * When starting the process of a new block, the first page of
1663 * the block should be sent out before other pages in the same
1664 * block, and all the pages in last block should have been sent
1665 * out, keeping this order is important, because the 'cont' flag
1666 * is used to avoid resending the block name.
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001667 */
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001668 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
1669 flush_compressed_data(rs);
1670 }
1671
1672 res = save_zero_page(rs, block, offset);
1673 if (res > 0) {
1674 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1675 * page would be stale
1676 */
1677 if (!save_page_use_compression(rs)) {
1678 XBZRLE_cache_lock();
1679 xbzrle_cache_zero_page(rs, block->offset + offset);
1680 XBZRLE_cache_unlock();
1681 }
1682 ram_release_pages(block->idstr, offset, res);
1683 return res;
1684 }
1685
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08001686 /*
1687 * Make sure the first page is sent out before other pages.
1688 *
1689 * we post it as normal page as compression will take much
1690 * CPU resource.
1691 */
1692 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
Xiao Guangrong701b1872018-04-28 16:10:45 +08001693 return compress_page_with_multi_thread(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001694 }
1695
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001696 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001697}
1698
1699/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001700 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001701 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001702 * Starting at *offset send pages up to the end of the current host
1703 * page. It's valid for the initial offset to point into the middle of
1704 * a host page in which case the remainder of the hostpage is sent.
1705 * Only dirty target pages are sent. Note that the host page size may
1706 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001707 * The saving stops at the boundary of the used_length of the block
1708 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001709 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001710 * Returns the number of pages written or negative on error
1711 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001712 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001713 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001714 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001715 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001716 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001717static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001718 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001719{
1720 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01001721 size_t pagesize_bits =
1722 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001723
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001724 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001725 /* Check the pages is dirty and if it is send it */
1726 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1727 pss->page++;
1728 continue;
1729 }
1730
Juan Quintelaf20e2862017-03-21 16:19:05 +01001731 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001732 if (tmppages < 0) {
1733 return tmppages;
1734 }
1735
1736 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08001737 if (pss->block->unsentmap) {
1738 clear_bit(pss->page, pss->block->unsentmap);
1739 }
1740
Juan Quintelaa935e302017-03-21 15:36:51 +01001741 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001742 } while ((pss->page & (pagesize_bits - 1)) &&
1743 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001744
1745 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01001746 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001747 return pages;
1748}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001749
1750/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001751 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001752 *
1753 * Called within an RCU critical section.
1754 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001755 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02001756 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001757 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001758 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001759 *
1760 * On systems where host-page-size > target-page-size it will send all the
1761 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001762 */
1763
Juan Quintelace25d332017-03-15 11:00:51 +01001764static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001765{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001766 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001767 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001768 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02001769
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301770 /* No dirty page as there is zero RAM */
1771 if (!ram_bytes_total()) {
1772 return pages;
1773 }
1774
Juan Quintela6f37bb82017-03-13 19:26:29 +01001775 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001776 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001777 pss.complete_round = false;
1778
1779 if (!pss.block) {
1780 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1781 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001782
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001783 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001784 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01001785 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001786
1787 if (!found) {
1788 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001789 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001790 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001791
1792 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001793 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02001794 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001795 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001796
Juan Quintela6f37bb82017-03-13 19:26:29 +01001797 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001798 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02001799
1800 return pages;
1801}
1802
1803void acct_update_position(QEMUFile *f, size_t size, bool zero)
1804{
1805 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001806
Juan Quintela56e93d22015-05-07 19:33:31 +02001807 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02001808 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02001809 } else {
Juan Quintela93604472017-06-06 19:49:03 +02001810 ram_counters.normal += pages;
1811 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02001812 qemu_update_position(f, size);
1813 }
1814}
1815
Juan Quintela56e93d22015-05-07 19:33:31 +02001816uint64_t ram_bytes_total(void)
1817{
1818 RAMBlock *block;
1819 uint64_t total = 0;
1820
1821 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +08001822 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001823 total += block->used_length;
Peter Xu99e15582017-05-12 12:17:39 +08001824 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001825 rcu_read_unlock();
1826 return total;
1827}
1828
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001829static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02001830{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001831 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001832}
1833
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001834static void xbzrle_load_cleanup(void)
1835{
1836 g_free(XBZRLE.decoded_buf);
1837 XBZRLE.decoded_buf = NULL;
1838}
1839
Peter Xu7d7c96b2017-10-19 14:31:58 +08001840static void ram_state_cleanup(RAMState **rsp)
1841{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00001842 if (*rsp) {
1843 migration_page_queue_free(*rsp);
1844 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1845 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1846 g_free(*rsp);
1847 *rsp = NULL;
1848 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08001849}
1850
Peter Xu84593a02017-10-19 14:31:59 +08001851static void xbzrle_cleanup(void)
1852{
1853 XBZRLE_cache_lock();
1854 if (XBZRLE.cache) {
1855 cache_fini(XBZRLE.cache);
1856 g_free(XBZRLE.encoded_buf);
1857 g_free(XBZRLE.current_buf);
1858 g_free(XBZRLE.zero_target_page);
1859 XBZRLE.cache = NULL;
1860 XBZRLE.encoded_buf = NULL;
1861 XBZRLE.current_buf = NULL;
1862 XBZRLE.zero_target_page = NULL;
1863 }
1864 XBZRLE_cache_unlock();
1865}
1866
Juan Quintelaf265e0e2017-06-28 11:52:27 +02001867static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001868{
Juan Quintela53518d92017-05-04 11:46:24 +02001869 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001870 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001871
Li Zhijian2ff64032015-07-02 20:18:05 +08001872 /* caller have hold iothread lock or is in a bh, so there is
1873 * no writing race against this migration_bitmap
1874 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001875 memory_global_dirty_log_stop();
1876
1877 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1878 g_free(block->bmap);
1879 block->bmap = NULL;
1880 g_free(block->unsentmap);
1881 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001882 }
1883
Peter Xu84593a02017-10-19 14:31:59 +08001884 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02001885 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08001886 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02001887}
1888
Juan Quintela6f37bb82017-03-13 19:26:29 +01001889static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001890{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001891 rs->last_seen_block = NULL;
1892 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001893 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001894 rs->last_version = ram_list.version;
1895 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02001896}
1897
1898#define MAX_WAIT 50 /* ms, half buffered_file limit */
1899
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001900/*
1901 * 'expected' is the value you expect the bitmap mostly to be full
1902 * of; it won't bother printing lines that are all this value.
1903 * If 'todump' is null the migration bitmap is dumped.
1904 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001905void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1906 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001907{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001908 int64_t cur;
1909 int64_t linelen = 128;
1910 char linebuf[129];
1911
Juan Quintela6b6712e2017-03-22 15:18:04 +01001912 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001913 int64_t curb;
1914 bool found = false;
1915 /*
1916 * Last line; catch the case where the line length
1917 * is longer than remaining ram
1918 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001919 if (cur + linelen > pages) {
1920 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001921 }
1922 for (curb = 0; curb < linelen; curb++) {
1923 bool thisbit = test_bit(cur + curb, todump);
1924 linebuf[curb] = thisbit ? '1' : '.';
1925 found = found || (thisbit != expected);
1926 }
1927 if (found) {
1928 linebuf[curb] = '\0';
1929 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1930 }
1931 }
1932}
1933
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001934/* **** functions for postcopy ***** */
1935
Pavel Butsykinced1c612017-02-03 18:23:21 +03001936void ram_postcopy_migrated_memory_release(MigrationState *ms)
1937{
1938 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001939
Peter Xu99e15582017-05-12 12:17:39 +08001940 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001941 unsigned long *bitmap = block->bmap;
1942 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1943 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03001944
1945 while (run_start < range) {
1946 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01001947 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03001948 (run_end - run_start) << TARGET_PAGE_BITS);
1949 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1950 }
1951 }
1952}
1953
Juan Quintela3d0684b2017-03-23 15:06:39 +01001954/**
1955 * postcopy_send_discard_bm_ram: discard a RAMBlock
1956 *
1957 * Returns zero on success
1958 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001959 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1960 * Note: At this point the 'unsentmap' is the processed bitmap combined
1961 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001962 *
1963 * @ms: current migration state
1964 * @pds: state for postcopy
1965 * @start: RAMBlock starting page
1966 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001967 */
1968static int postcopy_send_discard_bm_ram(MigrationState *ms,
1969 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001970 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001971{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001972 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001973 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001974 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001975
Juan Quintela6b6712e2017-03-22 15:18:04 +01001976 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001977 unsigned long one = find_next_bit(unsentmap, end, current);
1978
1979 if (one <= end) {
1980 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1981 unsigned long discard_length;
1982
1983 if (zero >= end) {
1984 discard_length = end - one;
1985 } else {
1986 discard_length = zero - one;
1987 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001988 if (discard_length) {
1989 postcopy_discard_send_range(ms, pds, one, discard_length);
1990 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001991 current = one + discard_length;
1992 } else {
1993 current = one;
1994 }
1995 }
1996
1997 return 0;
1998}
1999
Juan Quintela3d0684b2017-03-23 15:06:39 +01002000/**
2001 * postcopy_each_ram_send_discard: discard all RAMBlocks
2002 *
2003 * Returns 0 for success or negative for error
2004 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002005 * Utility for the outgoing postcopy code.
2006 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2007 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002008 * (qemu_ram_foreach_block ends up passing unscaled lengths
2009 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002010 *
2011 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002012 */
2013static int postcopy_each_ram_send_discard(MigrationState *ms)
2014{
2015 struct RAMBlock *block;
2016 int ret;
2017
Peter Xu99e15582017-05-12 12:17:39 +08002018 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002019 PostcopyDiscardState *pds =
2020 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002021
2022 /*
2023 * Postcopy sends chunks of bitmap over the wire, but it
2024 * just needs indexes at this point, avoids it having
2025 * target page specific code.
2026 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002027 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002028 postcopy_discard_send_finish(ms, pds);
2029 if (ret) {
2030 return ret;
2031 }
2032 }
2033
2034 return 0;
2035}
2036
Juan Quintela3d0684b2017-03-23 15:06:39 +01002037/**
2038 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002039 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002040 * Helper for postcopy_chunk_hostpages; it's called twice to
2041 * canonicalize the two bitmaps, that are similar, but one is
2042 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002043 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002044 * Postcopy requires that all target pages in a hostpage are dirty or
2045 * clean, not a mix. This function canonicalizes the bitmaps.
2046 *
2047 * @ms: current migration state
2048 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2049 * otherwise we need to canonicalize partially dirty host pages
2050 * @block: block that contains the page we want to canonicalize
2051 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002052 */
2053static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2054 RAMBlock *block,
2055 PostcopyDiscardState *pds)
2056{
Juan Quintela53518d92017-05-04 11:46:24 +02002057 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002058 unsigned long *bitmap = block->bmap;
2059 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002060 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002061 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002062 unsigned long run_start;
2063
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002064 if (block->page_size == TARGET_PAGE_SIZE) {
2065 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2066 return;
2067 }
2068
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002069 if (unsent_pass) {
2070 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002071 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002072 } else {
2073 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002074 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002075 }
2076
Juan Quintela6b6712e2017-03-22 15:18:04 +01002077 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002078 bool do_fixup = false;
2079 unsigned long fixup_start_addr;
2080 unsigned long host_offset;
2081
2082 /*
2083 * If the start of this run of pages is in the middle of a host
2084 * page, then we need to fixup this host page.
2085 */
2086 host_offset = run_start % host_ratio;
2087 if (host_offset) {
2088 do_fixup = true;
2089 run_start -= host_offset;
2090 fixup_start_addr = run_start;
2091 /* For the next pass */
2092 run_start = run_start + host_ratio;
2093 } else {
2094 /* Find the end of this run */
2095 unsigned long run_end;
2096 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002097 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002098 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002099 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002100 }
2101 /*
2102 * If the end isn't at the start of a host page, then the
2103 * run doesn't finish at the end of a host page
2104 * and we need to discard.
2105 */
2106 host_offset = run_end % host_ratio;
2107 if (host_offset) {
2108 do_fixup = true;
2109 fixup_start_addr = run_end - host_offset;
2110 /*
2111 * This host page has gone, the next loop iteration starts
2112 * from after the fixup
2113 */
2114 run_start = fixup_start_addr + host_ratio;
2115 } else {
2116 /*
2117 * No discards on this iteration, next loop starts from
2118 * next sent/dirty page
2119 */
2120 run_start = run_end + 1;
2121 }
2122 }
2123
2124 if (do_fixup) {
2125 unsigned long page;
2126
2127 /* Tell the destination to discard this page */
2128 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2129 /* For the unsent_pass we:
2130 * discard partially sent pages
2131 * For the !unsent_pass (dirty) we:
2132 * discard partially dirty pages that were sent
2133 * (any partially sent pages were already discarded
2134 * by the previous unsent_pass)
2135 */
2136 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2137 host_ratio);
2138 }
2139
2140 /* Clean up the bitmap */
2141 for (page = fixup_start_addr;
2142 page < fixup_start_addr + host_ratio; page++) {
2143 /* All pages in this host page are now not sent */
2144 set_bit(page, unsentmap);
2145
2146 /*
2147 * Remark them as dirty, updating the count for any pages
2148 * that weren't previously dirty.
2149 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002150 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002151 }
2152 }
2153
2154 if (unsent_pass) {
2155 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002156 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002157 } else {
2158 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002159 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002160 }
2161 }
2162}
2163
Juan Quintela3d0684b2017-03-23 15:06:39 +01002164/**
2165 * postcopy_chuck_hostpages: discrad any partially sent host page
2166 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002167 * Utility for the outgoing postcopy code.
2168 *
2169 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002170 * dirty host-page size chunks as all dirty. In this case the host-page
2171 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002172 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002173 * Returns zero on success
2174 *
2175 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002176 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002177 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002178static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002179{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002180 PostcopyDiscardState *pds =
2181 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002182
Juan Quintela6b6712e2017-03-22 15:18:04 +01002183 /* First pass: Discard all partially sent host pages */
2184 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2185 /*
2186 * Second pass: Ensure that all partially dirty host pages are made
2187 * fully dirty.
2188 */
2189 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002190
Juan Quintela6b6712e2017-03-22 15:18:04 +01002191 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002192 return 0;
2193}
2194
Juan Quintela3d0684b2017-03-23 15:06:39 +01002195/**
2196 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2197 *
2198 * Returns zero on success
2199 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002200 * Transmit the set of pages to be discarded after precopy to the target
2201 * these are pages that:
2202 * a) Have been previously transmitted but are now dirty again
2203 * b) Pages that have never been transmitted, this ensures that
2204 * any pages on the destination that have been mapped by background
2205 * tasks get discarded (transparent huge pages is the specific concern)
2206 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002207 *
2208 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002209 */
2210int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2211{
Juan Quintela53518d92017-05-04 11:46:24 +02002212 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002213 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002214 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002215
2216 rcu_read_lock();
2217
2218 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002219 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002220
Juan Quintela6b6712e2017-03-22 15:18:04 +01002221 /* Easiest way to make sure we don't resume in the middle of a host-page */
2222 rs->last_seen_block = NULL;
2223 rs->last_sent_block = NULL;
2224 rs->last_page = 0;
2225
2226 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2227 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2228 unsigned long *bitmap = block->bmap;
2229 unsigned long *unsentmap = block->unsentmap;
2230
2231 if (!unsentmap) {
2232 /* We don't have a safe way to resize the sentmap, so
2233 * if the bitmap was resized it will be NULL at this
2234 * point.
2235 */
2236 error_report("migration ram resized during precopy phase");
2237 rcu_read_unlock();
2238 return -EINVAL;
2239 }
2240 /* Deal with TPS != HPS and huge pages */
2241 ret = postcopy_chunk_hostpages(ms, block);
2242 if (ret) {
2243 rcu_read_unlock();
2244 return ret;
2245 }
2246
2247 /*
2248 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002249 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002250 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002251#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01002252 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002253#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002254 }
2255 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002256
2257 ret = postcopy_each_ram_send_discard(ms);
2258 rcu_read_unlock();
2259
2260 return ret;
2261}
2262
Juan Quintela3d0684b2017-03-23 15:06:39 +01002263/**
2264 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002265 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002266 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002267 *
Juan Quintela36449152017-03-23 15:11:59 +01002268 * @rbname: name of the RAMBlock of the request. NULL means the
2269 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002270 * @start: RAMBlock starting page
2271 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002272 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002273int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002274{
2275 int ret = -1;
2276
Juan Quintela36449152017-03-23 15:11:59 +01002277 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002278
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002279 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01002280 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002281
2282 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002283 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002284 goto err;
2285 }
2286
Alexey Perevalovf9494612017-10-05 14:13:20 +03002287 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2288 length >> qemu_target_page_bits());
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002289 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002290
2291err:
2292 rcu_read_unlock();
2293
2294 return ret;
2295}
2296
Peter Xu84593a02017-10-19 14:31:59 +08002297/*
2298 * For every allocation, we will try not to crash the VM if the
2299 * allocation failed.
2300 */
2301static int xbzrle_init(void)
2302{
2303 Error *local_err = NULL;
2304
2305 if (!migrate_use_xbzrle()) {
2306 return 0;
2307 }
2308
2309 XBZRLE_cache_lock();
2310
2311 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2312 if (!XBZRLE.zero_target_page) {
2313 error_report("%s: Error allocating zero page", __func__);
2314 goto err_out;
2315 }
2316
2317 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2318 TARGET_PAGE_SIZE, &local_err);
2319 if (!XBZRLE.cache) {
2320 error_report_err(local_err);
2321 goto free_zero_page;
2322 }
2323
2324 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2325 if (!XBZRLE.encoded_buf) {
2326 error_report("%s: Error allocating encoded_buf", __func__);
2327 goto free_cache;
2328 }
2329
2330 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2331 if (!XBZRLE.current_buf) {
2332 error_report("%s: Error allocating current_buf", __func__);
2333 goto free_encoded_buf;
2334 }
2335
2336 /* We are all good */
2337 XBZRLE_cache_unlock();
2338 return 0;
2339
2340free_encoded_buf:
2341 g_free(XBZRLE.encoded_buf);
2342 XBZRLE.encoded_buf = NULL;
2343free_cache:
2344 cache_fini(XBZRLE.cache);
2345 XBZRLE.cache = NULL;
2346free_zero_page:
2347 g_free(XBZRLE.zero_target_page);
2348 XBZRLE.zero_target_page = NULL;
2349err_out:
2350 XBZRLE_cache_unlock();
2351 return -ENOMEM;
2352}
2353
Juan Quintela53518d92017-05-04 11:46:24 +02002354static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02002355{
Peter Xu7d00ee62017-10-19 14:31:57 +08002356 *rsp = g_try_new0(RAMState, 1);
2357
2358 if (!*rsp) {
2359 error_report("%s: Init ramstate fail", __func__);
2360 return -1;
2361 }
Juan Quintela53518d92017-05-04 11:46:24 +02002362
2363 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2364 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2365 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02002366
Peter Xu7d00ee62017-10-19 14:31:57 +08002367 /*
2368 * Count the total number of pages used by ram blocks not including any
2369 * gaps due to alignment or unplugs.
2370 */
2371 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2372
2373 ram_state_reset(*rsp);
2374
2375 return 0;
2376}
2377
Peter Xud6eff5d2017-10-19 14:32:00 +08002378static void ram_list_init_bitmaps(void)
2379{
2380 RAMBlock *block;
2381 unsigned long pages;
2382
2383 /* Skip setting bitmap if there is no RAM */
2384 if (ram_bytes_total()) {
2385 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2386 pages = block->max_length >> TARGET_PAGE_BITS;
2387 block->bmap = bitmap_new(pages);
2388 bitmap_set(block->bmap, 0, pages);
2389 if (migrate_postcopy_ram()) {
2390 block->unsentmap = bitmap_new(pages);
2391 bitmap_set(block->unsentmap, 0, pages);
2392 }
2393 }
2394 }
2395}
2396
2397static void ram_init_bitmaps(RAMState *rs)
2398{
2399 /* For memory_global_dirty_log_start below. */
2400 qemu_mutex_lock_iothread();
2401 qemu_mutex_lock_ramlist();
2402 rcu_read_lock();
2403
2404 ram_list_init_bitmaps();
2405 memory_global_dirty_log_start();
2406 migration_bitmap_sync(rs);
2407
2408 rcu_read_unlock();
2409 qemu_mutex_unlock_ramlist();
2410 qemu_mutex_unlock_iothread();
2411}
2412
Peter Xu7d00ee62017-10-19 14:31:57 +08002413static int ram_init_all(RAMState **rsp)
2414{
Peter Xu7d00ee62017-10-19 14:31:57 +08002415 if (ram_state_init(rsp)) {
2416 return -1;
2417 }
2418
Peter Xu84593a02017-10-19 14:31:59 +08002419 if (xbzrle_init()) {
2420 ram_state_cleanup(rsp);
2421 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002422 }
2423
Peter Xud6eff5d2017-10-19 14:32:00 +08002424 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08002425
2426 return 0;
2427}
2428
Juan Quintela3d0684b2017-03-23 15:06:39 +01002429/*
2430 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002431 * long-running RCU critical section. When rcu-reclaims in the code
2432 * start to become numerous it will be necessary to reduce the
2433 * granularity of these critical sections.
2434 */
2435
Juan Quintela3d0684b2017-03-23 15:06:39 +01002436/**
2437 * ram_save_setup: Setup RAM for migration
2438 *
2439 * Returns zero to indicate success and negative for error
2440 *
2441 * @f: QEMUFile where to send the data
2442 * @opaque: RAMState pointer
2443 */
zhanghailianga91246c2016-10-27 14:42:59 +08002444static int ram_save_setup(QEMUFile *f, void *opaque)
2445{
Juan Quintela53518d92017-05-04 11:46:24 +02002446 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002447 RAMBlock *block;
2448
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002449 if (compress_threads_save_setup()) {
2450 return -1;
2451 }
2452
zhanghailianga91246c2016-10-27 14:42:59 +08002453 /* migration has already setup the bitmap, reuse it. */
2454 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08002455 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08002456 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08002457 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02002458 }
zhanghailianga91246c2016-10-27 14:42:59 +08002459 }
Juan Quintela53518d92017-05-04 11:46:24 +02002460 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002461
2462 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02002463
2464 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2465
Peter Xu99e15582017-05-12 12:17:39 +08002466 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002467 qemu_put_byte(f, strlen(block->idstr));
2468 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2469 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002470 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2471 qemu_put_be64(f, block->page_size);
2472 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002473 }
2474
2475 rcu_read_unlock();
2476
2477 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2478 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2479
2480 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2481
2482 return 0;
2483}
2484
Juan Quintela3d0684b2017-03-23 15:06:39 +01002485/**
2486 * ram_save_iterate: iterative stage for migration
2487 *
2488 * Returns zero to indicate success and negative for error
2489 *
2490 * @f: QEMUFile where to send the data
2491 * @opaque: RAMState pointer
2492 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002493static int ram_save_iterate(QEMUFile *f, void *opaque)
2494{
Juan Quintela53518d92017-05-04 11:46:24 +02002495 RAMState **temp = opaque;
2496 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002497 int ret;
2498 int i;
2499 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002500 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002501
Peter Lievenb2557342018-03-08 12:18:24 +01002502 if (blk_mig_bulk_active()) {
2503 /* Avoid transferring ram during bulk phase of block migration as
2504 * the bulk phase will usually take a long time and transferring
2505 * ram updates during that time is pointless. */
2506 goto out;
2507 }
2508
Juan Quintela56e93d22015-05-07 19:33:31 +02002509 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01002510 if (ram_list.version != rs->last_version) {
2511 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002512 }
2513
2514 /* Read version before ram_list.blocks */
2515 smp_rmb();
2516
2517 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2518
2519 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2520 i = 0;
2521 while ((ret = qemu_file_rate_limit(f)) == 0) {
2522 int pages;
2523
Juan Quintelace25d332017-03-15 11:00:51 +01002524 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02002525 /* no more pages to sent */
2526 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01002527 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002528 break;
2529 }
Juan Quintela23b28c32017-03-13 20:51:34 +01002530 rs->iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04002531
Juan Quintela56e93d22015-05-07 19:33:31 +02002532 /* we want to check in the 1st loop, just in case it was the 1st time
2533 and we had to sync the dirty bitmap.
2534 qemu_get_clock_ns() is a bit expensive, so we only check each some
2535 iterations
2536 */
2537 if ((i & 63) == 0) {
2538 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2539 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01002540 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02002541 break;
2542 }
2543 }
2544 i++;
2545 }
Juan Quintelace25d332017-03-15 11:00:51 +01002546 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002547 rcu_read_unlock();
2548
2549 /*
2550 * Must occur before EOS (or any QEMUFile operation)
2551 * because of RDMA protocol.
2552 */
2553 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2554
Peter Lievenb2557342018-03-08 12:18:24 +01002555out:
Juan Quintela56e93d22015-05-07 19:33:31 +02002556 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela93604472017-06-06 19:49:03 +02002557 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02002558
2559 ret = qemu_file_get_error(f);
2560 if (ret < 0) {
2561 return ret;
2562 }
2563
Thomas Huth5c903082016-11-04 14:10:17 +01002564 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002565}
2566
Juan Quintela3d0684b2017-03-23 15:06:39 +01002567/**
2568 * ram_save_complete: function called to send the remaining amount of ram
2569 *
2570 * Returns zero to indicate success
2571 *
2572 * Called with iothread lock
2573 *
2574 * @f: QEMUFile where to send the data
2575 * @opaque: RAMState pointer
2576 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002577static int ram_save_complete(QEMUFile *f, void *opaque)
2578{
Juan Quintela53518d92017-05-04 11:46:24 +02002579 RAMState **temp = opaque;
2580 RAMState *rs = *temp;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002581
Juan Quintela56e93d22015-05-07 19:33:31 +02002582 rcu_read_lock();
2583
Juan Quintela57273092017-03-20 22:25:28 +01002584 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01002585 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002586 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002587
2588 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2589
2590 /* try transferring iterative blocks of memory */
2591
2592 /* flush all remaining blocks regardless of rate limiting */
2593 while (true) {
2594 int pages;
2595
Juan Quintelace25d332017-03-15 11:00:51 +01002596 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02002597 /* no more blocks to sent */
2598 if (pages == 0) {
2599 break;
2600 }
2601 }
2602
Juan Quintelace25d332017-03-15 11:00:51 +01002603 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002604 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002605
2606 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002607
Juan Quintela56e93d22015-05-07 19:33:31 +02002608 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2609
2610 return 0;
2611}
2612
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002613static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002614 uint64_t *res_precopy_only,
2615 uint64_t *res_compatible,
2616 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02002617{
Juan Quintela53518d92017-05-04 11:46:24 +02002618 RAMState **temp = opaque;
2619 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002620 uint64_t remaining_size;
2621
Juan Quintela9edabd42017-03-14 12:02:16 +01002622 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002623
Juan Quintela57273092017-03-20 22:25:28 +01002624 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002625 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002626 qemu_mutex_lock_iothread();
2627 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01002628 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002629 rcu_read_unlock();
2630 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01002631 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002632 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002633
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002634 if (migrate_postcopy_ram()) {
2635 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002636 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002637 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04002638 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03002639 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002640}
2641
2642static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2643{
2644 unsigned int xh_len;
2645 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002646 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002647
Juan Quintela56e93d22015-05-07 19:33:31 +02002648 /* extract RLE header */
2649 xh_flags = qemu_get_byte(f);
2650 xh_len = qemu_get_be16(f);
2651
2652 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2653 error_report("Failed to load XBZRLE page - wrong compression!");
2654 return -1;
2655 }
2656
2657 if (xh_len > TARGET_PAGE_SIZE) {
2658 error_report("Failed to load XBZRLE page - len overflow!");
2659 return -1;
2660 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002661 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002662 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002663 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002664 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002665
2666 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002667 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002668 TARGET_PAGE_SIZE) == -1) {
2669 error_report("Failed to load XBZRLE page - decode error!");
2670 return -1;
2671 }
2672
2673 return 0;
2674}
2675
Juan Quintela3d0684b2017-03-23 15:06:39 +01002676/**
2677 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002678 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002679 * Must be called from within a rcu critical section.
2680 *
2681 * Returns a pointer from within the RCU-protected ram_list.
2682 *
2683 * @f: QEMUFile where to read the data from
2684 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002685 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002686static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002687{
2688 static RAMBlock *block = NULL;
2689 char id[256];
2690 uint8_t len;
2691
2692 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002693 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002694 error_report("Ack, bad migration stream!");
2695 return NULL;
2696 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002697 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002698 }
2699
2700 len = qemu_get_byte(f);
2701 qemu_get_buffer(f, (uint8_t *)id, len);
2702 id[len] = 0;
2703
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002704 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002705 if (!block) {
2706 error_report("Can't find block %s", id);
2707 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002708 }
2709
zhanghailiang4c4bad42016-01-15 11:37:41 +08002710 return block;
2711}
2712
2713static inline void *host_from_ram_block_offset(RAMBlock *block,
2714 ram_addr_t offset)
2715{
2716 if (!offset_in_ramblock(block, offset)) {
2717 return NULL;
2718 }
2719
2720 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002721}
2722
Juan Quintela3d0684b2017-03-23 15:06:39 +01002723/**
2724 * ram_handle_compressed: handle the zero page case
2725 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002726 * If a page (or a whole RDMA chunk) has been
2727 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002728 *
2729 * @host: host address for the zero page
2730 * @ch: what the page is filled from. We only support zero
2731 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002732 */
2733void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2734{
2735 if (ch != 0 || !is_zero_range(host, size)) {
2736 memset(host, ch, size);
2737 }
2738}
2739
Xiao Guangrong797ca152018-03-30 15:51:21 +08002740/* return the size after decompression, or negative value on error */
2741static int
2742qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2743 const uint8_t *source, size_t source_len)
2744{
2745 int err;
2746
2747 err = inflateReset(stream);
2748 if (err != Z_OK) {
2749 return -1;
2750 }
2751
2752 stream->avail_in = source_len;
2753 stream->next_in = (uint8_t *)source;
2754 stream->avail_out = dest_len;
2755 stream->next_out = dest;
2756
2757 err = inflate(stream, Z_NO_FLUSH);
2758 if (err != Z_STREAM_END) {
2759 return -1;
2760 }
2761
2762 return stream->total_out;
2763}
2764
Juan Quintela56e93d22015-05-07 19:33:31 +02002765static void *do_data_decompress(void *opaque)
2766{
2767 DecompressParam *param = opaque;
2768 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002769 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002770 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02002771
Liang Li33d151f2016-05-05 15:32:58 +08002772 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002773 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002774 if (param->des) {
2775 des = param->des;
2776 len = param->len;
2777 param->des = 0;
2778 qemu_mutex_unlock(&param->mutex);
2779
Liang Li73a89122016-05-05 15:32:51 +08002780 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002781
2782 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2783 param->compbuf, len);
2784 if (ret < 0) {
2785 error_report("decompress data failed");
2786 qemu_file_set_error(decomp_file, ret);
2787 }
Liang Li73a89122016-05-05 15:32:51 +08002788
Liang Li33d151f2016-05-05 15:32:58 +08002789 qemu_mutex_lock(&decomp_done_lock);
2790 param->done = true;
2791 qemu_cond_signal(&decomp_done_cond);
2792 qemu_mutex_unlock(&decomp_done_lock);
2793
2794 qemu_mutex_lock(&param->mutex);
2795 } else {
2796 qemu_cond_wait(&param->cond, &param->mutex);
2797 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002798 }
Liang Li33d151f2016-05-05 15:32:58 +08002799 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002800
2801 return NULL;
2802}
2803
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002804static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08002805{
2806 int idx, thread_count;
2807
2808 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002809 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08002810 }
2811
2812 thread_count = migrate_decompress_threads();
2813 qemu_mutex_lock(&decomp_done_lock);
2814 for (idx = 0; idx < thread_count; idx++) {
2815 while (!decomp_param[idx].done) {
2816 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2817 }
2818 }
2819 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002820 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08002821}
2822
Juan Quintelaf0afa332017-06-28 11:52:28 +02002823static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002824{
2825 int i, thread_count;
2826
Juan Quintela3416ab52016-04-20 11:56:01 +02002827 if (!migrate_use_compression()) {
2828 return;
2829 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002830 thread_count = migrate_decompress_threads();
2831 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002832 /*
2833 * we use it as a indicator which shows if the thread is
2834 * properly init'd or not
2835 */
2836 if (!decomp_param[i].compbuf) {
2837 break;
2838 }
2839
Juan Quintela56e93d22015-05-07 19:33:31 +02002840 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002841 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002842 qemu_cond_signal(&decomp_param[i].cond);
2843 qemu_mutex_unlock(&decomp_param[i].mutex);
2844 }
2845 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002846 if (!decomp_param[i].compbuf) {
2847 break;
2848 }
2849
Juan Quintela56e93d22015-05-07 19:33:31 +02002850 qemu_thread_join(decompress_threads + i);
2851 qemu_mutex_destroy(&decomp_param[i].mutex);
2852 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002853 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02002854 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08002855 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002856 }
2857 g_free(decompress_threads);
2858 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002859 decompress_threads = NULL;
2860 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002861 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002862}
2863
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002864static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08002865{
2866 int i, thread_count;
2867
2868 if (!migrate_use_compression()) {
2869 return 0;
2870 }
2871
2872 thread_count = migrate_decompress_threads();
2873 decompress_threads = g_new0(QemuThread, thread_count);
2874 decomp_param = g_new0(DecompressParam, thread_count);
2875 qemu_mutex_init(&decomp_done_lock);
2876 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002877 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08002878 for (i = 0; i < thread_count; i++) {
2879 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2880 goto exit;
2881 }
2882
2883 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2884 qemu_mutex_init(&decomp_param[i].mutex);
2885 qemu_cond_init(&decomp_param[i].cond);
2886 decomp_param[i].done = true;
2887 decomp_param[i].quit = false;
2888 qemu_thread_create(decompress_threads + i, "decompress",
2889 do_data_decompress, decomp_param + i,
2890 QEMU_THREAD_JOINABLE);
2891 }
2892 return 0;
2893exit:
2894 compress_threads_load_cleanup();
2895 return -1;
2896}
2897
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002898static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002899 void *host, int len)
2900{
2901 int idx, thread_count;
2902
2903 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002904 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002905 while (true) {
2906 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002907 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002908 decomp_param[idx].done = false;
2909 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002910 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002911 decomp_param[idx].des = host;
2912 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002913 qemu_cond_signal(&decomp_param[idx].cond);
2914 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002915 break;
2916 }
2917 }
2918 if (idx < thread_count) {
2919 break;
Liang Li73a89122016-05-05 15:32:51 +08002920 } else {
2921 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002922 }
2923 }
Liang Li73a89122016-05-05 15:32:51 +08002924 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002925}
2926
Juan Quintela3d0684b2017-03-23 15:06:39 +01002927/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002928 * ram_load_setup: Setup RAM for migration incoming side
2929 *
2930 * Returns zero to indicate success and negative for error
2931 *
2932 * @f: QEMUFile where to receive the data
2933 * @opaque: RAMState pointer
2934 */
2935static int ram_load_setup(QEMUFile *f, void *opaque)
2936{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08002937 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08002938 return -1;
2939 }
2940
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002941 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03002942 ramblock_recv_map_init();
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002943 return 0;
2944}
2945
2946static int ram_load_cleanup(void *opaque)
2947{
Alexey Perevalovf9494612017-10-05 14:13:20 +03002948 RAMBlock *rb;
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002949 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002950 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03002951
2952 RAMBLOCK_FOREACH(rb) {
2953 g_free(rb->receivedmap);
2954 rb->receivedmap = NULL;
2955 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002956 return 0;
2957}
2958
2959/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002960 * ram_postcopy_incoming_init: allocate postcopy data structures
2961 *
2962 * Returns 0 for success and negative if there was one error
2963 *
2964 * @mis: current migration incoming state
2965 *
2966 * Allocate data structures etc needed by incoming migration with
2967 * postcopy-ram. postcopy-ram's similarly names
2968 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002969 */
2970int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2971{
Juan Quintelab8c48992017-03-21 17:44:30 +01002972 unsigned long ram_pages = last_ram_page();
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002973
2974 return postcopy_ram_incoming_init(mis, ram_pages);
2975}
2976
Juan Quintela3d0684b2017-03-23 15:06:39 +01002977/**
2978 * ram_load_postcopy: load a page in postcopy case
2979 *
2980 * Returns 0 for success or -errno in case of error
2981 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002982 * Called in postcopy mode by ram_load().
2983 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002984 *
2985 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002986 */
2987static int ram_load_postcopy(QEMUFile *f)
2988{
2989 int flags = 0, ret = 0;
2990 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002991 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002992 MigrationIncomingState *mis = migration_incoming_get_current();
2993 /* Temporary page that is later 'placed' */
2994 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002995 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002996 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002997
2998 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2999 ram_addr_t addr;
3000 void *host = NULL;
3001 void *page_buffer = NULL;
3002 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003003 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003004 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003005
3006 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003007
3008 /*
3009 * If qemu file error, we should stop here, and then "addr"
3010 * may be invalid
3011 */
3012 ret = qemu_file_get_error(f);
3013 if (ret) {
3014 break;
3015 }
3016
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003017 flags = addr & ~TARGET_PAGE_MASK;
3018 addr &= TARGET_PAGE_MASK;
3019
3020 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3021 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02003022 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003023 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003024
3025 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003026 if (!host) {
3027 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3028 ret = -EINVAL;
3029 break;
3030 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003031 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003032 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003033 * Postcopy requires that we place whole host pages atomically;
3034 * these may be huge pages for RAMBlocks that are backed by
3035 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003036 * To make it atomic, the data is read into a temporary page
3037 * that's moved into place later.
3038 * The migration protocol uses, possibly smaller, target-pages
3039 * however the source ensures it always sends all the components
3040 * of a host page in order.
3041 */
3042 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003043 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003044 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003045 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003046 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003047 } else {
3048 /* not the 1st TP within the HP */
3049 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01003050 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003051 host, last_host);
3052 ret = -EINVAL;
3053 break;
3054 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003055 }
3056
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003057
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003058 /*
3059 * If it's the last part of a host page then we place the host
3060 * page
3061 */
3062 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003063 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003064 place_source = postcopy_host_page;
3065 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003066 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003067
3068 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003069 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003070 ch = qemu_get_byte(f);
3071 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3072 if (ch) {
3073 all_zero = false;
3074 }
3075 break;
3076
3077 case RAM_SAVE_FLAG_PAGE:
3078 all_zero = false;
3079 if (!place_needed || !matching_page_sizes) {
3080 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3081 } else {
3082 /* Avoids the qemu_file copy during postcopy, which is
3083 * going to do a copy later; can only do it when we
3084 * do this read in one go (matching page sizes)
3085 */
3086 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3087 TARGET_PAGE_SIZE);
3088 }
3089 break;
3090 case RAM_SAVE_FLAG_EOS:
3091 /* normal exit */
3092 break;
3093 default:
3094 error_report("Unknown combination of migration flags: %#x"
3095 " (postcopy mode)", flags);
3096 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003097 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003098 }
3099
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003100 /* Detect for any possible file errors */
3101 if (!ret && qemu_file_get_error(f)) {
3102 ret = qemu_file_get_error(f);
3103 }
3104
3105 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003106 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003107 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3108
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003109 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003110 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003111 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003112 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003113 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003114 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003115 }
3116 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003117 }
3118
3119 return ret;
3120}
3121
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003122static bool postcopy_is_advised(void)
3123{
3124 PostcopyState ps = postcopy_state_get();
3125 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3126}
3127
3128static bool postcopy_is_running(void)
3129{
3130 PostcopyState ps = postcopy_state_get();
3131 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3132}
3133
Juan Quintela56e93d22015-05-07 19:33:31 +02003134static int ram_load(QEMUFile *f, void *opaque, int version_id)
3135{
Juan Quintelaedc60122016-11-02 12:40:46 +01003136 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003137 static uint64_t seq_iter;
3138 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003139 /*
3140 * If system is running in postcopy mode, page inserts to host memory must
3141 * be atomic
3142 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003143 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003144 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003145 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02003146
3147 seq_iter++;
3148
3149 if (version_id != 4) {
3150 ret = -EINVAL;
3151 }
3152
Juan Quintelaedc60122016-11-02 12:40:46 +01003153 if (!migrate_use_compression()) {
3154 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3155 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003156 /* This RCU critical section can be very long running.
3157 * When RCU reclaims in the code start to become numerous,
3158 * it will be necessary to reduce the granularity of this
3159 * critical section.
3160 */
3161 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003162
3163 if (postcopy_running) {
3164 ret = ram_load_postcopy(f);
3165 }
3166
3167 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003168 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003169 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003170 uint8_t ch;
3171
3172 addr = qemu_get_be64(f);
3173 flags = addr & ~TARGET_PAGE_MASK;
3174 addr &= TARGET_PAGE_MASK;
3175
Juan Quintelaedc60122016-11-02 12:40:46 +01003176 if (flags & invalid_flags) {
3177 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3178 error_report("Received an unexpected compressed page");
3179 }
3180
3181 ret = -EINVAL;
3182 break;
3183 }
3184
Juan Quintelabb890ed2017-04-28 09:39:55 +02003185 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003186 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003187 RAMBlock *block = ram_block_from_stream(f, flags);
3188
3189 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003190 if (!host) {
3191 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3192 ret = -EINVAL;
3193 break;
3194 }
Alexey Perevalovf9494612017-10-05 14:13:20 +03003195 ramblock_recv_bitmap_set(block, host);
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01003196 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003197 }
3198
Juan Quintela56e93d22015-05-07 19:33:31 +02003199 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3200 case RAM_SAVE_FLAG_MEM_SIZE:
3201 /* Synchronize RAM block list */
3202 total_ram_bytes = addr;
3203 while (!ret && total_ram_bytes) {
3204 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003205 char id[256];
3206 ram_addr_t length;
3207
3208 len = qemu_get_byte(f);
3209 qemu_get_buffer(f, (uint8_t *)id, len);
3210 id[len] = 0;
3211 length = qemu_get_be64(f);
3212
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003213 block = qemu_ram_block_by_name(id);
3214 if (block) {
3215 if (length != block->used_length) {
3216 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003217
Gongleifa53a0e2016-05-10 10:04:59 +08003218 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003219 &local_err);
3220 if (local_err) {
3221 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02003222 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003223 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003224 /* For postcopy we need to check hugepage sizes match */
3225 if (postcopy_advised &&
3226 block->page_size != qemu_host_page_size) {
3227 uint64_t remote_page_size = qemu_get_be64(f);
3228 if (remote_page_size != block->page_size) {
3229 error_report("Mismatched RAM page size %s "
3230 "(local) %zd != %" PRId64,
3231 id, block->page_size,
3232 remote_page_size);
3233 ret = -EINVAL;
3234 }
3235 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003236 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3237 block->idstr);
3238 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02003239 error_report("Unknown ramblock \"%s\", cannot "
3240 "accept migration", id);
3241 ret = -EINVAL;
3242 }
3243
3244 total_ram_bytes -= length;
3245 }
3246 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003247
Juan Quintelabb890ed2017-04-28 09:39:55 +02003248 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02003249 ch = qemu_get_byte(f);
3250 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3251 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003252
Juan Quintela56e93d22015-05-07 19:33:31 +02003253 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003254 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3255 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02003256
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003257 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003258 len = qemu_get_be32(f);
3259 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3260 error_report("Invalid compressed data length: %d", len);
3261 ret = -EINVAL;
3262 break;
3263 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003264 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003265 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003266
Juan Quintela56e93d22015-05-07 19:33:31 +02003267 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02003268 if (load_xbzrle(f, addr, host) < 0) {
3269 error_report("Failed to decompress XBZRLE page at "
3270 RAM_ADDR_FMT, addr);
3271 ret = -EINVAL;
3272 break;
3273 }
3274 break;
3275 case RAM_SAVE_FLAG_EOS:
3276 /* normal exit */
3277 break;
3278 default:
3279 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01003280 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02003281 } else {
3282 error_report("Unknown combination of migration flags: %#x",
3283 flags);
3284 ret = -EINVAL;
3285 }
3286 }
3287 if (!ret) {
3288 ret = qemu_file_get_error(f);
3289 }
3290 }
3291
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003292 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02003293 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01003294 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02003295 return ret;
3296}
3297
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003298static bool ram_has_postcopy(void *opaque)
3299{
3300 return migrate_postcopy_ram();
3301}
3302
Juan Quintela56e93d22015-05-07 19:33:31 +02003303static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02003304 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02003305 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00003306 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00003307 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03003308 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02003309 .save_live_pending = ram_save_pending,
3310 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003311 .save_cleanup = ram_save_cleanup,
3312 .load_setup = ram_load_setup,
3313 .load_cleanup = ram_load_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02003314};
3315
3316void ram_mig_init(void)
3317{
3318 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01003319 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02003320}