blob: cd7a446c95afac0f01a1cab8d924ba0f84503a33 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Markus Armbrustere688df62018-02-01 12:18:31 +010028
Peter Maydell1393a482016-01-26 18:16:54 +000029#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010030#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020032#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020033#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020035#include "qemu/main-loop.h"
Junyan He56eb90a2018-07-18 15:48:03 +080036#include "qemu/pmem.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020037#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020038#include "ram.h"
Juan Quintela6666c962017-04-24 20:07:27 +020039#include "migration.h"
Juan Quintela71bb07d2018-02-19 19:01:03 +010040#include "socket.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020044#include "postcopy-ram.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030045#include "page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "qemu/error-report.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010047#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010048#include "qapi/qapi-events-migration.h"
Juan Quintela8acabf62017-10-05 22:00:31 +020049#include "qapi/qmp/qerror.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "exec/ram_addr.h"
Alexey Perevalovf9494612017-10-05 14:13:20 +030052#include "exec/target_page.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080054#include "migration/colo.h"
Michael S. Tsirkin53d37d32018-05-03 22:50:51 +030055#include "block.h"
Juan Quintelaaf8b7d22018-04-06 19:32:12 +020056#include "sysemu/sysemu.h"
57#include "qemu/uuid.h"
Peter Xuedd090c2018-05-02 18:47:32 +080058#include "savevm.h"
Juan Quintelab9ee2f72016-01-15 11:40:13 +010059#include "qemu/iov.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020060
Juan Quintela56e93d22015-05-07 19:33:31 +020061/***********************************************************/
62/* ram save/restore */
63
Juan Quintelabb890ed2017-04-28 09:39:55 +020064/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
65 * worked for pages that where filled with the same char. We switched
66 * it to only search for the zero value. And to avoid confusion with
67 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 */
69
Juan Quintela56e93d22015-05-07 19:33:31 +020070#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020071#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020072#define RAM_SAVE_FLAG_MEM_SIZE 0x04
73#define RAM_SAVE_FLAG_PAGE 0x08
74#define RAM_SAVE_FLAG_EOS 0x10
75#define RAM_SAVE_FLAG_CONTINUE 0x20
76#define RAM_SAVE_FLAG_XBZRLE 0x40
77/* 0x80 is reserved in migration.h start with 0x100 next */
78#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080static inline bool is_zero_range(uint8_t *p, uint64_t size)
81{
Richard Hendersona1febc42016-08-29 11:46:14 -070082 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020083}
84
Juan Quintela93604472017-06-06 19:49:03 +020085XBZRLECacheStats xbzrle_counters;
86
Juan Quintela56e93d22015-05-07 19:33:31 +020087/* struct contains XBZRLE cache and a static page
88 used by the compression */
89static struct {
90 /* buffer used for XBZRLE encoding */
91 uint8_t *encoded_buf;
92 /* buffer for storing page content */
93 uint8_t *current_buf;
94 /* Cache for XBZRLE, Protected by lock. */
95 PageCache *cache;
96 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020097 /* it will store a page full of zeros */
98 uint8_t *zero_target_page;
Juan Quintelaf265e0e2017-06-28 11:52:27 +020099 /* buffer used for XBZRLE decoding */
100 uint8_t *decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200101} XBZRLE;
102
Juan Quintela56e93d22015-05-07 19:33:31 +0200103static void XBZRLE_cache_lock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_lock(&XBZRLE.lock);
107}
108
109static void XBZRLE_cache_unlock(void)
110{
111 if (migrate_use_xbzrle())
112 qemu_mutex_unlock(&XBZRLE.lock);
113}
114
Juan Quintela3d0684b2017-03-23 15:06:39 +0100115/**
116 * xbzrle_cache_resize: resize the xbzrle cache
117 *
118 * This function is called from qmp_migrate_set_cache_size in main
119 * thread, possibly while a migration is in progress. A running
120 * migration may be using the cache and might finish during this call,
121 * hence changes to the cache are protected by XBZRLE.lock().
122 *
Juan Quintelac9dede22017-10-06 23:03:55 +0200123 * Returns 0 for success or -1 for error
Juan Quintela3d0684b2017-03-23 15:06:39 +0100124 *
125 * @new_size: new cache size
Juan Quintela8acabf62017-10-05 22:00:31 +0200126 * @errp: set *errp if the check failed, with reason
Juan Quintela56e93d22015-05-07 19:33:31 +0200127 */
Juan Quintelac9dede22017-10-06 23:03:55 +0200128int xbzrle_cache_resize(int64_t new_size, Error **errp)
Juan Quintela56e93d22015-05-07 19:33:31 +0200129{
130 PageCache *new_cache;
Juan Quintelac9dede22017-10-06 23:03:55 +0200131 int64_t ret = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200132
Juan Quintela8acabf62017-10-05 22:00:31 +0200133 /* Check for truncation */
134 if (new_size != (size_t)new_size) {
135 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
136 "exceeding address space");
137 return -1;
138 }
139
Juan Quintela2a313e52017-10-06 23:00:12 +0200140 if (new_size == migrate_xbzrle_cache_size()) {
141 /* nothing to do */
Juan Quintelac9dede22017-10-06 23:03:55 +0200142 return 0;
Juan Quintela2a313e52017-10-06 23:00:12 +0200143 }
144
Juan Quintela56e93d22015-05-07 19:33:31 +0200145 XBZRLE_cache_lock();
146
147 if (XBZRLE.cache != NULL) {
Juan Quintela80f8dfd2017-10-06 22:30:45 +0200148 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
Juan Quintela56e93d22015-05-07 19:33:31 +0200149 if (!new_cache) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200150 ret = -1;
151 goto out;
152 }
153
154 cache_fini(XBZRLE.cache);
155 XBZRLE.cache = new_cache;
156 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200157out:
158 XBZRLE_cache_unlock();
159 return ret;
160}
161
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200162/* Should be holding either ram_list.mutex, or the RCU lock. */
163#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100164 INTERNAL_RAMBLOCK_FOREACH(block) \
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200165 if (!qemu_ram_is_migratable(block)) {} else
166
Dr. David Alan Gilbert343f6322018-06-05 17:25:45 +0100167#undef RAMBLOCK_FOREACH
168
Alexey Perevalovf9494612017-10-05 14:13:20 +0300169static void ramblock_recv_map_init(void)
170{
171 RAMBlock *rb;
172
Cédric Le Goaterb895de52018-05-14 08:57:00 +0200173 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +0300174 assert(!rb->receivedmap);
175 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
176 }
177}
178
179int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
180{
181 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
182 rb->receivedmap);
183}
184
Dr. David Alan Gilbert1cba9f62018-03-12 17:21:08 +0000185bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
186{
187 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
188}
189
Alexey Perevalovf9494612017-10-05 14:13:20 +0300190void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
191{
192 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
193}
194
195void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
196 size_t nr)
197{
198 bitmap_set_atomic(rb->receivedmap,
199 ramblock_recv_bitmap_offset(host_addr, rb),
200 nr);
201}
202
Peter Xua335deb2018-05-02 18:47:28 +0800203#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
204
205/*
206 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
207 *
208 * Returns >0 if success with sent bytes, or <0 if error.
209 */
210int64_t ramblock_recv_bitmap_send(QEMUFile *file,
211 const char *block_name)
212{
213 RAMBlock *block = qemu_ram_block_by_name(block_name);
214 unsigned long *le_bitmap, nbits;
215 uint64_t size;
216
217 if (!block) {
218 error_report("%s: invalid block name: %s", __func__, block_name);
219 return -1;
220 }
221
222 nbits = block->used_length >> TARGET_PAGE_BITS;
223
224 /*
225 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
226 * machines we may need 4 more bytes for padding (see below
227 * comment). So extend it a bit before hand.
228 */
229 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
230
231 /*
232 * Always use little endian when sending the bitmap. This is
233 * required that when source and destination VMs are not using the
234 * same endianess. (Note: big endian won't work.)
235 */
236 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
237
238 /* Size of the bitmap, in bytes */
Peter Xua725ef92018-07-10 17:18:55 +0800239 size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +0800240
241 /*
242 * size is always aligned to 8 bytes for 64bit machines, but it
243 * may not be true for 32bit machines. We need this padding to
244 * make sure the migration can survive even between 32bit and
245 * 64bit machines.
246 */
247 size = ROUND_UP(size, 8);
248
249 qemu_put_be64(file, size);
250 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
251 /*
252 * Mark as an end, in case the middle part is screwed up due to
253 * some "misterious" reason.
254 */
255 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
256 qemu_fflush(file);
257
Peter Xubf269902018-05-25 09:50:42 +0800258 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +0800259
260 if (qemu_file_get_error(file)) {
261 return qemu_file_get_error(file);
262 }
263
264 return size + sizeof(size);
265}
266
Juan Quintelaec481c62017-03-20 22:12:40 +0100267/*
268 * An outstanding page request, on the source, having been received
269 * and queued
270 */
271struct RAMSrcPageRequest {
272 RAMBlock *rb;
273 hwaddr offset;
274 hwaddr len;
275
276 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
277};
278
Juan Quintela6f37bb82017-03-13 19:26:29 +0100279/* State of RAM for migration */
280struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100281 /* QEMUFile used for this migration */
282 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100283 /* Last block that we have visited searching for dirty pages */
284 RAMBlock *last_seen_block;
285 /* Last block from where we have sent data */
286 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100287 /* Last dirty target page we have sent */
288 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100289 /* last ram version we have seen */
290 uint32_t last_version;
291 /* We are in the first round */
292 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100293 /* How many times we have dirty too many pages */
294 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100295 /* these variables are used for bitmap sync */
296 /* last time we did a full bitmap_sync */
297 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200298 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200299 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200300 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200301 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100302 /* xbzrle misses since the beginning of the period */
303 uint64_t xbzrle_cache_miss_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +0800304
305 /* compression statistics since the beginning of the period */
306 /* amount of count that no free thread to compress data */
307 uint64_t compress_thread_busy_prev;
308 /* amount bytes after compression */
309 uint64_t compressed_size_prev;
310 /* amount of compressed pages */
311 uint64_t compress_pages_prev;
312
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +0800313 /* total handled target pages at the beginning of period */
314 uint64_t target_page_count_prev;
315 /* total handled target pages since start */
316 uint64_t target_page_count;
Juan Quintela93604472017-06-06 19:49:03 +0200317 /* number of dirty bits in the bitmap */
Peter Xu2dfaf122017-08-02 17:41:19 +0800318 uint64_t migration_dirty_pages;
319 /* protects modification of the bitmap */
Juan Quintela108cfae2017-03-13 21:38:09 +0100320 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100321 /* The RAMBlock used in the last src_page_requests */
322 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100323 /* Queue of outstanding page requests from the destination */
324 QemuMutex src_page_req_mutex;
325 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100326};
327typedef struct RAMState RAMState;
328
Juan Quintela53518d92017-05-04 11:46:24 +0200329static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100330
Juan Quintela9edabd42017-03-14 12:02:16 +0100331uint64_t ram_bytes_remaining(void)
332{
Dr. David Alan Gilbertbae416e2017-12-15 11:51:23 +0000333 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
334 0;
Juan Quintela9edabd42017-03-14 12:02:16 +0100335}
336
Juan Quintela93604472017-06-06 19:49:03 +0200337MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100338
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100339/* used by the search for pages to send */
340struct PageSearchStatus {
341 /* Current block being searched */
342 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100343 /* Current page to search from */
344 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100345 /* Set once we wrap around */
346 bool complete_round;
347};
348typedef struct PageSearchStatus PageSearchStatus;
349
Xiao Guangrong76e03002018-09-06 15:01:00 +0800350CompressionStats compression_counters;
351
Juan Quintela56e93d22015-05-07 19:33:31 +0200352struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200353 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800354 bool quit;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800355 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200356 QEMUFile *file;
357 QemuMutex mutex;
358 QemuCond cond;
359 RAMBlock *block;
360 ram_addr_t offset;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800361
362 /* internally used fields */
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800363 z_stream stream;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800364 uint8_t *originbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200365};
366typedef struct CompressParam CompressParam;
367
368struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800369 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800370 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200371 QemuMutex mutex;
372 QemuCond cond;
373 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000374 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200375 int len;
Xiao Guangrong797ca152018-03-30 15:51:21 +0800376 z_stream stream;
Juan Quintela56e93d22015-05-07 19:33:31 +0200377};
378typedef struct DecompressParam DecompressParam;
379
380static CompressParam *comp_param;
381static QemuThread *compress_threads;
382/* comp_done_cond is used to wake up the migration thread when
383 * one of the compression threads has finished the compression.
384 * comp_done_lock is used to co-work with comp_done_cond.
385 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800386static QemuMutex comp_done_lock;
387static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200388/* The empty QEMUFileOps will be used by file in CompressParam */
389static const QEMUFileOps empty_ops = { };
390
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800391static QEMUFile *decomp_file;
Juan Quintela56e93d22015-05-07 19:33:31 +0200392static DecompressParam *decomp_param;
393static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800394static QemuMutex decomp_done_lock;
395static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200396
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800397static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +0800398 ram_addr_t offset, uint8_t *source_buf);
Juan Quintela56e93d22015-05-07 19:33:31 +0200399
400static void *do_data_compress(void *opaque)
401{
402 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800403 RAMBlock *block;
404 ram_addr_t offset;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800405 bool zero_page;
Juan Quintela56e93d22015-05-07 19:33:31 +0200406
Liang Lia7a9a882016-05-05 15:32:57 +0800407 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800408 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800409 if (param->block) {
410 block = param->block;
411 offset = param->offset;
412 param->block = NULL;
413 qemu_mutex_unlock(&param->mutex);
414
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800415 zero_page = do_compress_ram_page(param->file, &param->stream,
416 block, offset, param->originbuf);
Liang Lia7a9a882016-05-05 15:32:57 +0800417
Liang Li0d9f9a52016-05-05 15:32:59 +0800418 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800419 param->done = true;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +0800420 param->zero_page = zero_page;
Liang Li0d9f9a52016-05-05 15:32:59 +0800421 qemu_cond_signal(&comp_done_cond);
422 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800423
424 qemu_mutex_lock(&param->mutex);
425 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200426 qemu_cond_wait(&param->cond, &param->mutex);
427 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200428 }
Liang Lia7a9a882016-05-05 15:32:57 +0800429 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200430
431 return NULL;
432}
433
Juan Quintelaf0afa332017-06-28 11:52:28 +0200434static void compress_threads_save_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200435{
436 int i, thread_count;
437
Fei Li05306932018-09-25 17:14:40 +0800438 if (!migrate_use_compression() || !comp_param) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200439 return;
440 }
Fei Li05306932018-09-25 17:14:40 +0800441
Juan Quintela56e93d22015-05-07 19:33:31 +0200442 thread_count = migrate_compress_threads();
443 for (i = 0; i < thread_count; i++) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800444 /*
445 * we use it as a indicator which shows if the thread is
446 * properly init'd or not
447 */
448 if (!comp_param[i].file) {
449 break;
450 }
Fei Li05306932018-09-25 17:14:40 +0800451
452 qemu_mutex_lock(&comp_param[i].mutex);
453 comp_param[i].quit = true;
454 qemu_cond_signal(&comp_param[i].cond);
455 qemu_mutex_unlock(&comp_param[i].mutex);
456
Juan Quintela56e93d22015-05-07 19:33:31 +0200457 qemu_thread_join(compress_threads + i);
Juan Quintela56e93d22015-05-07 19:33:31 +0200458 qemu_mutex_destroy(&comp_param[i].mutex);
459 qemu_cond_destroy(&comp_param[i].cond);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800460 deflateEnd(&comp_param[i].stream);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800461 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800462 qemu_fclose(comp_param[i].file);
463 comp_param[i].file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200464 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800465 qemu_mutex_destroy(&comp_done_lock);
466 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200467 g_free(compress_threads);
468 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200469 compress_threads = NULL;
470 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200471}
472
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800473static int compress_threads_save_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +0200474{
475 int i, thread_count;
476
477 if (!migrate_use_compression()) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800478 return 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200479 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200480 thread_count = migrate_compress_threads();
481 compress_threads = g_new0(QemuThread, thread_count);
482 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800483 qemu_cond_init(&comp_done_cond);
484 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200485 for (i = 0; i < thread_count; i++) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800486 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
487 if (!comp_param[i].originbuf) {
488 goto exit;
489 }
490
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800491 if (deflateInit(&comp_param[i].stream,
492 migrate_compress_level()) != Z_OK) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +0800493 g_free(comp_param[i].originbuf);
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800494 goto exit;
495 }
496
Cao jine110aa92016-07-29 15:10:31 +0800497 /* comp_param[i].file is just used as a dummy buffer to save data,
498 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200499 */
500 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
501 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800502 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200503 qemu_mutex_init(&comp_param[i].mutex);
504 qemu_cond_init(&comp_param[i].cond);
505 qemu_thread_create(compress_threads + i, "compress",
506 do_data_compress, comp_param + i,
507 QEMU_THREAD_JOINABLE);
508 }
Xiao Guangrongdcaf4462018-03-30 15:51:20 +0800509 return 0;
510
511exit:
512 compress_threads_save_cleanup();
513 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200514}
515
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100516/* Multiple fd's */
517
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200518#define MULTIFD_MAGIC 0x11223344U
519#define MULTIFD_VERSION 1
520
Juan Quintela6df264a2018-02-28 09:10:07 +0100521#define MULTIFD_FLAG_SYNC (1 << 0)
522
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200523typedef struct {
524 uint32_t magic;
525 uint32_t version;
526 unsigned char uuid[16]; /* QemuUUID */
527 uint8_t id;
528} __attribute__((packed)) MultiFDInit_t;
529
Juan Quintela8c4598f2018-04-07 13:59:07 +0200530typedef struct {
Juan Quintela2a26c972018-04-04 11:26:58 +0200531 uint32_t magic;
532 uint32_t version;
533 uint32_t flags;
534 uint32_t size;
535 uint32_t used;
536 uint64_t packet_num;
537 char ramblock[256];
538 uint64_t offset[];
539} __attribute__((packed)) MultiFDPacket_t;
540
541typedef struct {
Juan Quintela34c55a92018-04-10 23:35:15 +0200542 /* number of used pages */
543 uint32_t used;
544 /* number of allocated pages */
545 uint32_t allocated;
546 /* global number of generated multifd packets */
547 uint64_t packet_num;
548 /* offset of each page */
549 ram_addr_t *offset;
550 /* pointer to each page */
551 struct iovec *iov;
552 RAMBlock *block;
553} MultiFDPages_t;
554
555typedef struct {
Juan Quintela8c4598f2018-04-07 13:59:07 +0200556 /* this fields are not changed once the thread is created */
557 /* channel number */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100558 uint8_t id;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200559 /* channel thread name */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100560 char *name;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200561 /* channel thread id */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100562 QemuThread thread;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200563 /* communication channel */
Juan Quintela60df2d42018-03-07 07:56:15 +0100564 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200565 /* sem where to wait for more work */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100566 QemuSemaphore sem;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200567 /* this mutex protects the following parameters */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100568 QemuMutex mutex;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200569 /* is this channel thread running */
Juan Quintela66770702018-02-19 19:01:45 +0100570 bool running;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200571 /* should this thread finish */
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100572 bool quit;
Juan Quintela0beb5ed2018-04-11 03:02:10 +0200573 /* thread has work to do */
574 int pending_job;
Juan Quintela34c55a92018-04-10 23:35:15 +0200575 /* array of pages to sent */
576 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200577 /* packet allocated len */
578 uint32_t packet_len;
579 /* pointer to the packet */
580 MultiFDPacket_t *packet;
581 /* multifd flags for each packet */
582 uint32_t flags;
583 /* global number of generated multifd packets */
584 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200585 /* thread local variables */
586 /* packets sent through this channel */
587 uint64_t num_packets;
588 /* pages sent through this channel */
589 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100590 /* syncs main thread and channels */
591 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200592} MultiFDSendParams;
593
594typedef struct {
595 /* this fields are not changed once the thread is created */
596 /* channel number */
597 uint8_t id;
598 /* channel thread name */
599 char *name;
600 /* channel thread id */
601 QemuThread thread;
602 /* communication channel */
603 QIOChannel *c;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200604 /* this mutex protects the following parameters */
605 QemuMutex mutex;
606 /* is this channel thread running */
607 bool running;
Juan Quintela34c55a92018-04-10 23:35:15 +0200608 /* array of pages to receive */
609 MultiFDPages_t *pages;
Juan Quintela2a26c972018-04-04 11:26:58 +0200610 /* packet allocated len */
611 uint32_t packet_len;
612 /* pointer to the packet */
613 MultiFDPacket_t *packet;
614 /* multifd flags for each packet */
615 uint32_t flags;
616 /* global number of generated multifd packets */
617 uint64_t packet_num;
Juan Quintela408ea6a2018-04-06 18:28:59 +0200618 /* thread local variables */
619 /* packets sent through this channel */
620 uint64_t num_packets;
621 /* pages sent through this channel */
622 uint64_t num_pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100623 /* syncs main thread and channels */
624 QemuSemaphore sem_sync;
Juan Quintela8c4598f2018-04-07 13:59:07 +0200625} MultiFDRecvParams;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100626
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200627static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
628{
629 MultiFDInit_t msg;
630 int ret;
631
632 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
633 msg.version = cpu_to_be32(MULTIFD_VERSION);
634 msg.id = p->id;
635 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
636
637 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
638 if (ret != 0) {
639 return -1;
640 }
641 return 0;
642}
643
644static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
645{
646 MultiFDInit_t msg;
647 int ret;
648
649 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
650 if (ret != 0) {
651 return -1;
652 }
653
Peter Maydell341ba0d2018-09-25 17:19:24 +0100654 msg.magic = be32_to_cpu(msg.magic);
655 msg.version = be32_to_cpu(msg.version);
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200656
657 if (msg.magic != MULTIFD_MAGIC) {
658 error_setg(errp, "multifd: received packet magic %x "
659 "expected %x", msg.magic, MULTIFD_MAGIC);
660 return -1;
661 }
662
663 if (msg.version != MULTIFD_VERSION) {
664 error_setg(errp, "multifd: received packet version %d "
665 "expected %d", msg.version, MULTIFD_VERSION);
666 return -1;
667 }
668
669 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
670 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
671 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
672
673 error_setg(errp, "multifd: received uuid '%s' and expected "
674 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
675 g_free(uuid);
676 g_free(msg_uuid);
677 return -1;
678 }
679
680 if (msg.id > migrate_multifd_channels()) {
681 error_setg(errp, "multifd: received channel version %d "
682 "expected %d", msg.version, MULTIFD_VERSION);
683 return -1;
684 }
685
686 return msg.id;
687}
688
Juan Quintela34c55a92018-04-10 23:35:15 +0200689static MultiFDPages_t *multifd_pages_init(size_t size)
690{
691 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
692
693 pages->allocated = size;
694 pages->iov = g_new0(struct iovec, size);
695 pages->offset = g_new0(ram_addr_t, size);
696
697 return pages;
698}
699
700static void multifd_pages_clear(MultiFDPages_t *pages)
701{
702 pages->used = 0;
703 pages->allocated = 0;
704 pages->packet_num = 0;
705 pages->block = NULL;
706 g_free(pages->iov);
707 pages->iov = NULL;
708 g_free(pages->offset);
709 pages->offset = NULL;
710 g_free(pages);
711}
712
Juan Quintela2a26c972018-04-04 11:26:58 +0200713static void multifd_send_fill_packet(MultiFDSendParams *p)
714{
715 MultiFDPacket_t *packet = p->packet;
716 int i;
717
718 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
719 packet->version = cpu_to_be32(MULTIFD_VERSION);
720 packet->flags = cpu_to_be32(p->flags);
721 packet->size = cpu_to_be32(migrate_multifd_page_count());
722 packet->used = cpu_to_be32(p->pages->used);
723 packet->packet_num = cpu_to_be64(p->packet_num);
724
725 if (p->pages->block) {
726 strncpy(packet->ramblock, p->pages->block->idstr, 256);
727 }
728
729 for (i = 0; i < p->pages->used; i++) {
730 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
731 }
732}
733
734static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
735{
736 MultiFDPacket_t *packet = p->packet;
737 RAMBlock *block;
738 int i;
739
Peter Maydell341ba0d2018-09-25 17:19:24 +0100740 packet->magic = be32_to_cpu(packet->magic);
Juan Quintela2a26c972018-04-04 11:26:58 +0200741 if (packet->magic != MULTIFD_MAGIC) {
742 error_setg(errp, "multifd: received packet "
743 "magic %x and expected magic %x",
744 packet->magic, MULTIFD_MAGIC);
745 return -1;
746 }
747
Peter Maydell341ba0d2018-09-25 17:19:24 +0100748 packet->version = be32_to_cpu(packet->version);
Juan Quintela2a26c972018-04-04 11:26:58 +0200749 if (packet->version != MULTIFD_VERSION) {
750 error_setg(errp, "multifd: received packet "
751 "version %d and expected version %d",
752 packet->version, MULTIFD_VERSION);
753 return -1;
754 }
755
756 p->flags = be32_to_cpu(packet->flags);
757
Peter Maydell341ba0d2018-09-25 17:19:24 +0100758 packet->size = be32_to_cpu(packet->size);
Juan Quintela2a26c972018-04-04 11:26:58 +0200759 if (packet->size > migrate_multifd_page_count()) {
760 error_setg(errp, "multifd: received packet "
761 "with size %d and expected maximum size %d",
762 packet->size, migrate_multifd_page_count()) ;
763 return -1;
764 }
765
766 p->pages->used = be32_to_cpu(packet->used);
767 if (p->pages->used > packet->size) {
768 error_setg(errp, "multifd: received packet "
769 "with size %d and expected maximum size %d",
770 p->pages->used, packet->size) ;
771 return -1;
772 }
773
774 p->packet_num = be64_to_cpu(packet->packet_num);
775
776 if (p->pages->used) {
777 /* make sure that ramblock is 0 terminated */
778 packet->ramblock[255] = 0;
779 block = qemu_ram_block_by_name(packet->ramblock);
780 if (!block) {
781 error_setg(errp, "multifd: unknown ram block %s",
782 packet->ramblock);
783 return -1;
784 }
785 }
786
787 for (i = 0; i < p->pages->used; i++) {
788 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
789
790 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
791 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
792 " (max " RAM_ADDR_FMT ")",
793 offset, block->max_length);
794 return -1;
795 }
796 p->pages->iov[i].iov_base = block->host + offset;
797 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
798 }
799
800 return 0;
801}
802
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100803struct {
804 MultiFDSendParams *params;
805 /* number of created threads */
806 int count;
Juan Quintela34c55a92018-04-10 23:35:15 +0200807 /* array of pages to sent */
808 MultiFDPages_t *pages;
Juan Quintela6df264a2018-02-28 09:10:07 +0100809 /* syncs main thread and channels */
810 QemuSemaphore sem_sync;
811 /* global number of generated multifd packets */
812 uint64_t packet_num;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100813 /* send channels ready */
814 QemuSemaphore channels_ready;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100815} *multifd_send_state;
816
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100817/*
818 * How we use multifd_send_state->pages and channel->pages?
819 *
820 * We create a pages for each channel, and a main one. Each time that
821 * we need to send a batch of pages we interchange the ones between
822 * multifd_send_state and the channel that is sending it. There are
823 * two reasons for that:
824 * - to not have to do so many mallocs during migration
825 * - to make easier to know what to free at the end of migration
826 *
827 * This way we always know who is the owner of each "pages" struct,
828 * and we don't need any loocking. It belongs to the migration thread
829 * or to the channel thread. Switching is safe because the migration
830 * thread is using the channel mutex when changing it, and the channel
831 * have to had finish with its own, otherwise pending_job can't be
832 * false.
833 */
834
835static void multifd_send_pages(void)
836{
837 int i;
838 static int next_channel;
839 MultiFDSendParams *p = NULL; /* make happy gcc */
840 MultiFDPages_t *pages = multifd_send_state->pages;
841 uint64_t transferred;
842
843 qemu_sem_wait(&multifd_send_state->channels_ready);
844 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
845 p = &multifd_send_state->params[i];
846
847 qemu_mutex_lock(&p->mutex);
848 if (!p->pending_job) {
849 p->pending_job++;
850 next_channel = (i + 1) % migrate_multifd_channels();
851 break;
852 }
853 qemu_mutex_unlock(&p->mutex);
854 }
855 p->pages->used = 0;
856
857 p->packet_num = multifd_send_state->packet_num++;
858 p->pages->block = NULL;
859 multifd_send_state->pages = p->pages;
860 p->pages = pages;
Peter Xu4fcefd42018-07-20 11:47:13 +0800861 transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len;
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100862 ram_counters.multifd_bytes += transferred;
863 ram_counters.transferred += transferred;;
864 qemu_mutex_unlock(&p->mutex);
865 qemu_sem_post(&p->sem);
866}
867
868static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
869{
870 MultiFDPages_t *pages = multifd_send_state->pages;
871
872 if (!pages->block) {
873 pages->block = block;
874 }
875
876 if (pages->block == block) {
877 pages->offset[pages->used] = offset;
878 pages->iov[pages->used].iov_base = block->host + offset;
879 pages->iov[pages->used].iov_len = TARGET_PAGE_SIZE;
880 pages->used++;
881
882 if (pages->used < pages->allocated) {
883 return;
884 }
885 }
886
887 multifd_send_pages();
888
889 if (pages->block != block) {
890 multifd_queue_page(block, offset);
891 }
892}
893
Juan Quintela66770702018-02-19 19:01:45 +0100894static void multifd_send_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100895{
896 int i;
897
Juan Quintela7a169d72018-02-19 19:01:15 +0100898 if (err) {
899 MigrationState *s = migrate_get_current();
900 migrate_set_error(s, err);
901 if (s->state == MIGRATION_STATUS_SETUP ||
902 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
903 s->state == MIGRATION_STATUS_DEVICE ||
904 s->state == MIGRATION_STATUS_ACTIVE) {
905 migrate_set_state(&s->state, s->state,
906 MIGRATION_STATUS_FAILED);
907 }
908 }
909
Juan Quintela66770702018-02-19 19:01:45 +0100910 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100911 MultiFDSendParams *p = &multifd_send_state->params[i];
912
913 qemu_mutex_lock(&p->mutex);
914 p->quit = true;
915 qemu_sem_post(&p->sem);
916 qemu_mutex_unlock(&p->mutex);
917 }
918}
919
920int multifd_save_cleanup(Error **errp)
921{
922 int i;
923 int ret = 0;
924
925 if (!migrate_use_multifd()) {
926 return 0;
927 }
Juan Quintela66770702018-02-19 19:01:45 +0100928 multifd_send_terminate_threads(NULL);
929 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100930 MultiFDSendParams *p = &multifd_send_state->params[i];
931
Juan Quintela66770702018-02-19 19:01:45 +0100932 if (p->running) {
933 qemu_thread_join(&p->thread);
934 }
Juan Quintela60df2d42018-03-07 07:56:15 +0100935 socket_send_channel_destroy(p->c);
936 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100937 qemu_mutex_destroy(&p->mutex);
938 qemu_sem_destroy(&p->sem);
Juan Quintela6df264a2018-02-28 09:10:07 +0100939 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100940 g_free(p->name);
941 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +0200942 multifd_pages_clear(p->pages);
943 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +0200944 p->packet_len = 0;
945 g_free(p->packet);
946 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100947 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100948 qemu_sem_destroy(&multifd_send_state->channels_ready);
Juan Quintela6df264a2018-02-28 09:10:07 +0100949 qemu_sem_destroy(&multifd_send_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100950 g_free(multifd_send_state->params);
951 multifd_send_state->params = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +0200952 multifd_pages_clear(multifd_send_state->pages);
953 multifd_send_state->pages = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100954 g_free(multifd_send_state);
955 multifd_send_state = NULL;
956 return ret;
957}
958
Juan Quintela6df264a2018-02-28 09:10:07 +0100959static void multifd_send_sync_main(void)
960{
961 int i;
962
963 if (!migrate_use_multifd()) {
964 return;
965 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100966 if (multifd_send_state->pages->used) {
967 multifd_send_pages();
968 }
Juan Quintela6df264a2018-02-28 09:10:07 +0100969 for (i = 0; i < migrate_multifd_channels(); i++) {
970 MultiFDSendParams *p = &multifd_send_state->params[i];
971
972 trace_multifd_send_sync_main_signal(p->id);
973
974 qemu_mutex_lock(&p->mutex);
Juan Quintelab9ee2f72016-01-15 11:40:13 +0100975
976 p->packet_num = multifd_send_state->packet_num++;
Juan Quintela6df264a2018-02-28 09:10:07 +0100977 p->flags |= MULTIFD_FLAG_SYNC;
978 p->pending_job++;
979 qemu_mutex_unlock(&p->mutex);
980 qemu_sem_post(&p->sem);
981 }
982 for (i = 0; i < migrate_multifd_channels(); i++) {
983 MultiFDSendParams *p = &multifd_send_state->params[i];
984
985 trace_multifd_send_sync_main_wait(p->id);
986 qemu_sem_wait(&multifd_send_state->sem_sync);
987 }
988 trace_multifd_send_sync_main(multifd_send_state->packet_num);
989}
990
Juan Quintelaf986c3d2016-01-14 16:52:55 +0100991static void *multifd_send_thread(void *opaque)
992{
993 MultiFDSendParams *p = opaque;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200994 Error *local_err = NULL;
Juan Quintela8b2db7f2018-04-11 12:36:13 +0200995 int ret;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +0200996
Juan Quintela408ea6a2018-04-06 18:28:59 +0200997 trace_multifd_send_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +0800998 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +0200999
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001000 if (multifd_send_initial_packet(p, &local_err) < 0) {
1001 goto out;
1002 }
Juan Quintela408ea6a2018-04-06 18:28:59 +02001003 /* initial packet */
1004 p->num_packets = 1;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001005
1006 while (true) {
Juan Quintelad82628e2018-04-11 02:44:24 +02001007 qemu_sem_wait(&p->sem);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001008 qemu_mutex_lock(&p->mutex);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001009
1010 if (p->pending_job) {
1011 uint32_t used = p->pages->used;
1012 uint64_t packet_num = p->packet_num;
1013 uint32_t flags = p->flags;
1014
1015 multifd_send_fill_packet(p);
1016 p->flags = 0;
1017 p->num_packets++;
1018 p->num_pages += used;
1019 p->pages->used = 0;
1020 qemu_mutex_unlock(&p->mutex);
1021
1022 trace_multifd_send(p->id, packet_num, used, flags);
1023
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001024 ret = qio_channel_write_all(p->c, (void *)p->packet,
1025 p->packet_len, &local_err);
1026 if (ret != 0) {
1027 break;
1028 }
1029
1030 ret = qio_channel_writev_all(p->c, p->pages->iov, used, &local_err);
1031 if (ret != 0) {
1032 break;
1033 }
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001034
1035 qemu_mutex_lock(&p->mutex);
1036 p->pending_job--;
1037 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001038
1039 if (flags & MULTIFD_FLAG_SYNC) {
1040 qemu_sem_post(&multifd_send_state->sem_sync);
1041 }
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001042 qemu_sem_post(&multifd_send_state->channels_ready);
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001043 } else if (p->quit) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001044 qemu_mutex_unlock(&p->mutex);
1045 break;
Juan Quintela6df264a2018-02-28 09:10:07 +01001046 } else {
1047 qemu_mutex_unlock(&p->mutex);
1048 /* sometimes there are spurious wakeups */
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001049 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001050 }
1051
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001052out:
1053 if (local_err) {
1054 multifd_send_terminate_threads(local_err);
1055 }
1056
Juan Quintela66770702018-02-19 19:01:45 +01001057 qemu_mutex_lock(&p->mutex);
1058 p->running = false;
1059 qemu_mutex_unlock(&p->mutex);
1060
Lidong Chen74637e62018-08-06 21:29:29 +08001061 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001062 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
1063
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001064 return NULL;
1065}
1066
Juan Quintela60df2d42018-03-07 07:56:15 +01001067static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1068{
1069 MultiFDSendParams *p = opaque;
1070 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
1071 Error *local_err = NULL;
1072
1073 if (qio_task_propagate_error(task, &local_err)) {
1074 if (multifd_save_cleanup(&local_err) != 0) {
1075 migrate_set_error(migrate_get_current(), local_err);
1076 }
1077 } else {
1078 p->c = QIO_CHANNEL(sioc);
1079 qio_channel_set_delay(p->c, false);
1080 p->running = true;
1081 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1082 QEMU_THREAD_JOINABLE);
1083
1084 atomic_inc(&multifd_send_state->count);
1085 }
1086}
1087
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001088int multifd_save_setup(void)
1089{
1090 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001091 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001092 uint8_t i;
1093
1094 if (!migrate_use_multifd()) {
1095 return 0;
1096 }
1097 thread_count = migrate_multifd_channels();
1098 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1099 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001100 atomic_set(&multifd_send_state->count, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001101 multifd_send_state->pages = multifd_pages_init(page_count);
Juan Quintela6df264a2018-02-28 09:10:07 +01001102 qemu_sem_init(&multifd_send_state->sem_sync, 0);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001103 qemu_sem_init(&multifd_send_state->channels_ready, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001104
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001105 for (i = 0; i < thread_count; i++) {
1106 MultiFDSendParams *p = &multifd_send_state->params[i];
1107
1108 qemu_mutex_init(&p->mutex);
1109 qemu_sem_init(&p->sem, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001110 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001111 p->quit = false;
Juan Quintela0beb5ed2018-04-11 03:02:10 +02001112 p->pending_job = 0;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001113 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001114 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001115 p->packet_len = sizeof(MultiFDPacket_t)
1116 + sizeof(ram_addr_t) * page_count;
1117 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001118 p->name = g_strdup_printf("multifdsend_%d", i);
Juan Quintela60df2d42018-03-07 07:56:15 +01001119 socket_send_channel_create(multifd_new_send_channel_async, p);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001120 }
1121 return 0;
1122}
1123
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001124struct {
1125 MultiFDRecvParams *params;
1126 /* number of created threads */
1127 int count;
Juan Quintela6df264a2018-02-28 09:10:07 +01001128 /* syncs main thread and channels */
1129 QemuSemaphore sem_sync;
1130 /* global number of generated multifd packets */
1131 uint64_t packet_num;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001132} *multifd_recv_state;
1133
Juan Quintela66770702018-02-19 19:01:45 +01001134static void multifd_recv_terminate_threads(Error *err)
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001135{
1136 int i;
1137
Juan Quintela7a169d72018-02-19 19:01:15 +01001138 if (err) {
1139 MigrationState *s = migrate_get_current();
1140 migrate_set_error(s, err);
1141 if (s->state == MIGRATION_STATUS_SETUP ||
1142 s->state == MIGRATION_STATUS_ACTIVE) {
1143 migrate_set_state(&s->state, s->state,
1144 MIGRATION_STATUS_FAILED);
1145 }
1146 }
1147
Juan Quintela66770702018-02-19 19:01:45 +01001148 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001149 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1150
1151 qemu_mutex_lock(&p->mutex);
Juan Quintela7a5cc332018-04-18 00:49:19 +02001152 /* We could arrive here for two reasons:
1153 - normal quit, i.e. everything went fine, just finished
1154 - error quit: We close the channels so the channel threads
1155 finish the qio_channel_read_all_eof() */
1156 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001157 qemu_mutex_unlock(&p->mutex);
1158 }
1159}
1160
1161int multifd_load_cleanup(Error **errp)
1162{
1163 int i;
1164 int ret = 0;
1165
1166 if (!migrate_use_multifd()) {
1167 return 0;
1168 }
Juan Quintela66770702018-02-19 19:01:45 +01001169 multifd_recv_terminate_threads(NULL);
1170 for (i = 0; i < migrate_multifd_channels(); i++) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001171 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1172
Juan Quintela66770702018-02-19 19:01:45 +01001173 if (p->running) {
1174 qemu_thread_join(&p->thread);
1175 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001176 object_unref(OBJECT(p->c));
1177 p->c = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001178 qemu_mutex_destroy(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001179 qemu_sem_destroy(&p->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001180 g_free(p->name);
1181 p->name = NULL;
Juan Quintela34c55a92018-04-10 23:35:15 +02001182 multifd_pages_clear(p->pages);
1183 p->pages = NULL;
Juan Quintela2a26c972018-04-04 11:26:58 +02001184 p->packet_len = 0;
1185 g_free(p->packet);
1186 p->packet = NULL;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001187 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001188 qemu_sem_destroy(&multifd_recv_state->sem_sync);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001189 g_free(multifd_recv_state->params);
1190 multifd_recv_state->params = NULL;
1191 g_free(multifd_recv_state);
1192 multifd_recv_state = NULL;
1193
1194 return ret;
1195}
1196
Juan Quintela6df264a2018-02-28 09:10:07 +01001197static void multifd_recv_sync_main(void)
1198{
1199 int i;
1200
1201 if (!migrate_use_multifd()) {
1202 return;
1203 }
1204 for (i = 0; i < migrate_multifd_channels(); i++) {
1205 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1206
Juan Quintela6df264a2018-02-28 09:10:07 +01001207 trace_multifd_recv_sync_main_wait(p->id);
1208 qemu_sem_wait(&multifd_recv_state->sem_sync);
1209 qemu_mutex_lock(&p->mutex);
1210 if (multifd_recv_state->packet_num < p->packet_num) {
1211 multifd_recv_state->packet_num = p->packet_num;
1212 }
1213 qemu_mutex_unlock(&p->mutex);
1214 }
1215 for (i = 0; i < migrate_multifd_channels(); i++) {
1216 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1217
1218 trace_multifd_recv_sync_main_signal(p->id);
Juan Quintela6df264a2018-02-28 09:10:07 +01001219 qemu_sem_post(&p->sem_sync);
1220 }
1221 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1222}
1223
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001224static void *multifd_recv_thread(void *opaque)
1225{
1226 MultiFDRecvParams *p = opaque;
Juan Quintela2a26c972018-04-04 11:26:58 +02001227 Error *local_err = NULL;
1228 int ret;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001229
Juan Quintela408ea6a2018-04-06 18:28:59 +02001230 trace_multifd_recv_thread_start(p->id);
Lidong Chen74637e62018-08-06 21:29:29 +08001231 rcu_register_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001232
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001233 while (true) {
Juan Quintela6df264a2018-02-28 09:10:07 +01001234 uint32_t used;
1235 uint32_t flags;
1236
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001237 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1238 p->packet_len, &local_err);
1239 if (ret == 0) { /* EOF */
1240 break;
1241 }
1242 if (ret == -1) { /* Error */
1243 break;
1244 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001245
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001246 qemu_mutex_lock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001247 ret = multifd_recv_unfill_packet(p, &local_err);
1248 if (ret) {
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001249 qemu_mutex_unlock(&p->mutex);
1250 break;
1251 }
Juan Quintela6df264a2018-02-28 09:10:07 +01001252
1253 used = p->pages->used;
1254 flags = p->flags;
1255 trace_multifd_recv(p->id, p->packet_num, used, flags);
Juan Quintela6df264a2018-02-28 09:10:07 +01001256 p->num_packets++;
1257 p->num_pages += used;
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001258 qemu_mutex_unlock(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001259
Juan Quintela8b2db7f2018-04-11 12:36:13 +02001260 ret = qio_channel_readv_all(p->c, p->pages->iov, used, &local_err);
1261 if (ret != 0) {
1262 break;
1263 }
1264
Juan Quintela6df264a2018-02-28 09:10:07 +01001265 if (flags & MULTIFD_FLAG_SYNC) {
1266 qemu_sem_post(&multifd_recv_state->sem_sync);
1267 qemu_sem_wait(&p->sem_sync);
1268 }
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001269 }
1270
Juan Quintelad82628e2018-04-11 02:44:24 +02001271 if (local_err) {
1272 multifd_recv_terminate_threads(local_err);
1273 }
Juan Quintela66770702018-02-19 19:01:45 +01001274 qemu_mutex_lock(&p->mutex);
1275 p->running = false;
1276 qemu_mutex_unlock(&p->mutex);
1277
Lidong Chen74637e62018-08-06 21:29:29 +08001278 rcu_unregister_thread();
Juan Quintela408ea6a2018-04-06 18:28:59 +02001279 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1280
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001281 return NULL;
1282}
1283
1284int multifd_load_setup(void)
1285{
1286 int thread_count;
Juan Quintela34c55a92018-04-10 23:35:15 +02001287 uint32_t page_count = migrate_multifd_page_count();
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001288 uint8_t i;
1289
1290 if (!migrate_use_multifd()) {
1291 return 0;
1292 }
1293 thread_count = migrate_multifd_channels();
1294 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1295 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
Juan Quintela66770702018-02-19 19:01:45 +01001296 atomic_set(&multifd_recv_state->count, 0);
Juan Quintela6df264a2018-02-28 09:10:07 +01001297 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
Juan Quintela34c55a92018-04-10 23:35:15 +02001298
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001299 for (i = 0; i < thread_count; i++) {
1300 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1301
1302 qemu_mutex_init(&p->mutex);
Juan Quintela6df264a2018-02-28 09:10:07 +01001303 qemu_sem_init(&p->sem_sync, 0);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001304 p->id = i;
Juan Quintela34c55a92018-04-10 23:35:15 +02001305 p->pages = multifd_pages_init(page_count);
Juan Quintela2a26c972018-04-04 11:26:58 +02001306 p->packet_len = sizeof(MultiFDPacket_t)
1307 + sizeof(ram_addr_t) * page_count;
1308 p->packet = g_malloc0(p->packet_len);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001309 p->name = g_strdup_printf("multifdrecv_%d", i);
Juan Quintelaf986c3d2016-01-14 16:52:55 +01001310 }
1311 return 0;
1312}
1313
Juan Quintela62c1e0c2018-02-19 18:59:02 +01001314bool multifd_recv_all_channels_created(void)
1315{
1316 int thread_count = migrate_multifd_channels();
1317
1318 if (!migrate_use_multifd()) {
1319 return true;
1320 }
1321
1322 return thread_count == atomic_read(&multifd_recv_state->count);
1323}
1324
Peter Xu81e62052018-06-27 21:22:44 +08001325/* Return true if multifd is ready for the migration, otherwise false */
1326bool multifd_recv_new_channel(QIOChannel *ioc)
Juan Quintela71bb07d2018-02-19 19:01:03 +01001327{
Juan Quintela60df2d42018-03-07 07:56:15 +01001328 MultiFDRecvParams *p;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001329 Error *local_err = NULL;
1330 int id;
Juan Quintela60df2d42018-03-07 07:56:15 +01001331
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001332 id = multifd_recv_initial_packet(ioc, &local_err);
1333 if (id < 0) {
1334 multifd_recv_terminate_threads(local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001335 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001336 }
1337
1338 p = &multifd_recv_state->params[id];
1339 if (p->c != NULL) {
1340 error_setg(&local_err, "multifd: received id '%d' already setup'",
1341 id);
1342 multifd_recv_terminate_threads(local_err);
Peter Xu81e62052018-06-27 21:22:44 +08001343 return false;
Juan Quintelaaf8b7d22018-04-06 19:32:12 +02001344 }
Juan Quintela60df2d42018-03-07 07:56:15 +01001345 p->c = ioc;
1346 object_ref(OBJECT(ioc));
Juan Quintela408ea6a2018-04-06 18:28:59 +02001347 /* initial packet */
1348 p->num_packets = 1;
Juan Quintela60df2d42018-03-07 07:56:15 +01001349
1350 p->running = true;
1351 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1352 QEMU_THREAD_JOINABLE);
1353 atomic_inc(&multifd_recv_state->count);
Peter Xu81e62052018-06-27 21:22:44 +08001354 return multifd_recv_state->count == migrate_multifd_channels();
Juan Quintela71bb07d2018-02-19 19:01:03 +01001355}
1356
Juan Quintela56e93d22015-05-07 19:33:31 +02001357/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001358 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001359 *
1360 * If this is the 1st block, it also writes the block identification
1361 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001362 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +02001363 *
1364 * @f: QEMUFile where to send the data
1365 * @block: block that contains the page we want to send
1366 * @offset: offset inside the block for the page
1367 * in the lower bits, it contains flags
1368 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001369static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1370 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001371{
Liang Li9f5f3802015-07-13 17:34:10 +08001372 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +02001373
Juan Quintela24795692017-03-21 11:45:01 +01001374 if (block == rs->last_sent_block) {
1375 offset |= RAM_SAVE_FLAG_CONTINUE;
1376 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001377 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001378 size = 8;
1379
1380 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +08001381 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001382 qemu_put_byte(f, len);
1383 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +08001384 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +01001385 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001386 }
1387 return size;
1388}
1389
Juan Quintela3d0684b2017-03-23 15:06:39 +01001390/**
1391 * mig_throttle_guest_down: throotle down the guest
1392 *
1393 * Reduce amount of guest cpu execution to hopefully slow down memory
1394 * writes. If guest dirty memory rate is reduced below the rate at
1395 * which we can transfer pages to the destination then we should be
1396 * able to complete migration. Some workloads dirty memory way too
1397 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -04001398 */
1399static void mig_throttle_guest_down(void)
1400{
1401 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +01001402 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1403 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Li Qiang4cbc9c72018-08-01 06:00:20 -07001404 int pct_max = s->parameters.max_cpu_throttle;
Jason J. Herne070afca2015-09-08 13:12:35 -04001405
1406 /* We have not started throttling yet. Let's start it. */
1407 if (!cpu_throttle_active()) {
1408 cpu_throttle_set(pct_initial);
1409 } else {
1410 /* Throttling already on, just increase the rate */
Li Qiang4cbc9c72018-08-01 06:00:20 -07001411 cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
1412 pct_max));
Jason J. Herne070afca2015-09-08 13:12:35 -04001413 }
1414}
1415
Juan Quintela3d0684b2017-03-23 15:06:39 +01001416/**
1417 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1418 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001419 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001420 * @current_addr: address for the zero page
1421 *
1422 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +02001423 * The important thing is that a stale (not-yet-0'd) page be replaced
1424 * by the new data.
1425 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +01001426 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +02001427 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001428static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +02001429{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001430 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001431 return;
1432 }
1433
1434 /* We don't care if this fails to allocate a new cache page
1435 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +02001436 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +02001437 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +02001438}
1439
1440#define ENCODING_FLAG_XBZRLE 0x1
1441
1442/**
1443 * save_xbzrle_page: compress and send current page
1444 *
1445 * Returns: 1 means that we wrote the page
1446 * 0 means that page is identical to the one already sent
1447 * -1 means that xbzrle would be longer than normal
1448 *
Juan Quintela5a987732017-03-13 19:39:02 +01001449 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001450 * @current_data: pointer to the address of the page contents
1451 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001452 * @block: block that contains the page we want to send
1453 * @offset: offset inside the block for the page
1454 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001455 */
Juan Quintela204b88b2017-03-15 09:16:57 +01001456static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +02001457 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +01001458 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001459{
1460 int encoded_len = 0, bytes_xbzrle;
1461 uint8_t *prev_cached_page;
1462
Juan Quintela93604472017-06-06 19:49:03 +02001463 if (!cache_is_cached(XBZRLE.cache, current_addr,
1464 ram_counters.dirty_sync_count)) {
1465 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001466 if (!last_stage) {
1467 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +02001468 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001469 return -1;
1470 } else {
1471 /* update *current_data when the page has been
1472 inserted into cache */
1473 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1474 }
1475 }
1476 return -1;
1477 }
1478
1479 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1480
1481 /* save current buffer into memory */
1482 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1483
1484 /* XBZRLE encoding (if there is no overflow) */
1485 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1486 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1487 TARGET_PAGE_SIZE);
1488 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +01001489 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +02001490 return 0;
1491 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +01001492 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +02001493 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001494 /* update data in the cache */
1495 if (!last_stage) {
1496 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1497 *current_data = prev_cached_page;
1498 }
1499 return -1;
1500 }
1501
1502 /* we need to update the data in the cache, in order to get the same data */
1503 if (!last_stage) {
1504 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1505 }
1506
1507 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +02001508 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +01001509 offset | RAM_SAVE_FLAG_XBZRLE);
1510 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1511 qemu_put_be16(rs->f, encoded_len);
1512 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001513 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +02001514 xbzrle_counters.pages++;
1515 xbzrle_counters.bytes += bytes_xbzrle;
1516 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +02001517
1518 return 1;
1519}
1520
Juan Quintela3d0684b2017-03-23 15:06:39 +01001521/**
1522 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001523 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001524 * Called with rcu_read_lock() to protect migration_bitmap
1525 *
1526 * Returns the byte offset within memory region of the start of a dirty page
1527 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001528 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001529 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +01001530 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001531 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001532static inline
Juan Quintelaa935e302017-03-21 15:36:51 +01001533unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001534 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +02001535{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001536 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1537 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +02001538 unsigned long next;
1539
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001540 if (!qemu_ram_is_migratable(rb)) {
1541 return size;
1542 }
1543
Juan Quintela6b6712e2017-03-22 15:18:04 +01001544 if (rs->ram_bulk_stage && start > 0) {
1545 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001546 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001547 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +02001548 }
1549
Juan Quintela6b6712e2017-03-22 15:18:04 +01001550 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +02001551}
1552
Juan Quintela06b10682017-03-21 15:18:05 +01001553static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001554 RAMBlock *rb,
1555 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001556{
1557 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001558
Juan Quintela6b6712e2017-03-22 15:18:04 +01001559 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001560
1561 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +01001562 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001563 }
1564 return ret;
1565}
1566
Juan Quintela15440dd2017-03-21 09:35:04 +01001567static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1568 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +02001569{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001570 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +01001571 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +01001572 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +02001573}
1574
Juan Quintela3d0684b2017-03-23 15:06:39 +01001575/**
1576 * ram_pagesize_summary: calculate all the pagesizes of a VM
1577 *
1578 * Returns a summary bitmap of the page sizes of all RAMBlocks
1579 *
1580 * For VMs with just normal pages this is equivalent to the host page
1581 * size. If it's got some huge pages then it's the OR of all the
1582 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001583 */
1584uint64_t ram_pagesize_summary(void)
1585{
1586 RAMBlock *block;
1587 uint64_t summary = 0;
1588
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001589 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +00001590 summary |= block->page_size;
1591 }
1592
1593 return summary;
1594}
1595
Xiao Guangrongb7340352018-06-04 17:55:12 +08001596static void migration_update_rates(RAMState *rs, int64_t end_time)
1597{
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001598 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001599 double compressed_size;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001600
1601 /* calculate period counters */
1602 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1603 / (end_time - rs->time_last_bitmap_sync);
1604
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001605 if (!page_count) {
Xiao Guangrongb7340352018-06-04 17:55:12 +08001606 return;
1607 }
1608
1609 if (migrate_use_xbzrle()) {
1610 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001611 rs->xbzrle_cache_miss_prev) / page_count;
Xiao Guangrongb7340352018-06-04 17:55:12 +08001612 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1613 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001614
1615 if (migrate_use_compression()) {
1616 compression_counters.busy_rate = (double)(compression_counters.busy -
1617 rs->compress_thread_busy_prev) / page_count;
1618 rs->compress_thread_busy_prev = compression_counters.busy;
1619
1620 compressed_size = compression_counters.compressed_size -
1621 rs->compressed_size_prev;
1622 if (compressed_size) {
1623 double uncompressed_size = (compression_counters.pages -
1624 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
1625
1626 /* Compression-Ratio = Uncompressed-size / Compressed-size */
1627 compression_counters.compression_rate =
1628 uncompressed_size / compressed_size;
1629
1630 rs->compress_pages_prev = compression_counters.pages;
1631 rs->compressed_size_prev = compression_counters.compressed_size;
1632 }
1633 }
Xiao Guangrongb7340352018-06-04 17:55:12 +08001634}
1635
Juan Quintela8d820d62017-03-13 19:35:50 +01001636static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001637{
1638 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001639 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +02001640 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001641
Juan Quintela93604472017-06-06 19:49:03 +02001642 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001643
Juan Quintelaf664da82017-03-13 19:44:57 +01001644 if (!rs->time_last_bitmap_sync) {
1645 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +02001646 }
1647
1648 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +02001649 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +02001650
Juan Quintela108cfae2017-03-13 21:38:09 +01001651 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001652 rcu_read_lock();
Cédric Le Goaterb895de52018-05-14 08:57:00 +02001653 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +01001654 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +02001655 }
Balamuruhan S650af892018-06-12 14:20:09 +05301656 ram_counters.remaining = ram_bytes_remaining();
Juan Quintela56e93d22015-05-07 19:33:31 +02001657 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +01001658 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001659
Juan Quintelaa66cd902017-03-28 15:02:43 +02001660 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +08001661
Juan Quintela56e93d22015-05-07 19:33:31 +02001662 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1663
1664 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +01001665 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela93604472017-06-06 19:49:03 +02001666 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001667
Peter Lieven9ac78b62017-09-26 12:33:16 +02001668 /* During block migration the auto-converge logic incorrectly detects
1669 * that ram migration makes no progress. Avoid this by disabling the
1670 * throttling logic during the bulk phase of block migration. */
1671 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001672 /* The following detection logic can be refined later. For now:
1673 Check to see if the dirtied bytes is 50% more than the approx.
1674 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -04001675 were in this routine. If that happens twice, start or increase
1676 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -04001677
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001678 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +02001679 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +01001680 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001681 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +01001682 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -04001683 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001684 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001685 }
Jason J. Herne070afca2015-09-08 13:12:35 -04001686
Xiao Guangrongb7340352018-06-04 17:55:12 +08001687 migration_update_rates(rs, end_time);
1688
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08001689 rs->target_page_count_prev = rs->target_page_count;
Felipe Franciosid693c6f2017-05-24 17:10:01 +01001690
1691 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +01001692 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +02001693 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +01001694 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +02001695 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001696 if (migrate_use_events()) {
Peter Xu3ab72382018-08-15 21:37:37 +08001697 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +00001698 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001699}
1700
1701/**
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001702 * save_zero_page_to_file: send the zero page to the file
1703 *
1704 * Returns the size of data written to the file, 0 means the page is not
1705 * a zero page
1706 *
1707 * @rs: current RAM state
1708 * @file: the file where the data is saved
1709 * @block: block that contains the page we want to send
1710 * @offset: offset inside the block for the page
1711 */
1712static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1713 RAMBlock *block, ram_addr_t offset)
1714{
1715 uint8_t *p = block->host + offset;
1716 int len = 0;
1717
1718 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1719 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1720 qemu_put_byte(file, 0);
1721 len += 1;
1722 }
1723 return len;
1724}
1725
1726/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001727 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001728 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001729 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +02001730 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001731 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001732 * @block: block that contains the page we want to send
1733 * @offset: offset inside the block for the page
Juan Quintela56e93d22015-05-07 19:33:31 +02001734 */
Juan Quintela7faccdc2018-01-08 18:58:17 +01001735static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001736{
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001737 int len = save_zero_page_to_file(rs, rs->f, block, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +02001738
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001739 if (len) {
Juan Quintela93604472017-06-06 19:49:03 +02001740 ram_counters.duplicate++;
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001741 ram_counters.transferred += len;
1742 return 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001743 }
Xiao Guangrong6c97ec52018-08-21 16:10:22 +08001744 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001745}
1746
Juan Quintela57273092017-03-20 22:25:28 +01001747static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001748{
Juan Quintela57273092017-03-20 22:25:28 +01001749 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001750 return;
1751 }
1752
Juan Quintelaaaa20642017-03-21 11:35:24 +01001753 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001754}
1755
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001756/*
1757 * @pages: the number of pages written by the control path,
1758 * < 0 - error
1759 * > 0 - number of pages written
1760 *
1761 * Return true if the pages has been saved, otherwise false is returned.
1762 */
1763static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1764 int *pages)
1765{
1766 uint64_t bytes_xmit = 0;
1767 int ret;
1768
1769 *pages = -1;
1770 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1771 &bytes_xmit);
1772 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1773 return false;
1774 }
1775
1776 if (bytes_xmit) {
1777 ram_counters.transferred += bytes_xmit;
1778 *pages = 1;
1779 }
1780
1781 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1782 return true;
1783 }
1784
1785 if (bytes_xmit > 0) {
1786 ram_counters.normal++;
1787 } else if (bytes_xmit == 0) {
1788 ram_counters.duplicate++;
1789 }
1790
1791 return true;
1792}
1793
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001794/*
1795 * directly send the page to the stream
1796 *
1797 * Returns the number of pages written.
1798 *
1799 * @rs: current RAM state
1800 * @block: block that contains the page we want to send
1801 * @offset: offset inside the block for the page
1802 * @buf: the page to be sent
1803 * @async: send to page asyncly
1804 */
1805static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1806 uint8_t *buf, bool async)
1807{
1808 ram_counters.transferred += save_page_header(rs, rs->f, block,
1809 offset | RAM_SAVE_FLAG_PAGE);
1810 if (async) {
1811 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1812 migrate_release_ram() &
1813 migration_in_postcopy());
1814 } else {
1815 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1816 }
1817 ram_counters.transferred += TARGET_PAGE_SIZE;
1818 ram_counters.normal++;
1819 return 1;
1820}
1821
Juan Quintela56e93d22015-05-07 19:33:31 +02001822/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001823 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +02001824 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001825 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001826 * < 0 - error
1827 * >=0 - Number of pages written - this might legally be 0
1828 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +02001829 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001830 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001831 * @block: block that contains the page we want to send
1832 * @offset: offset inside the block for the page
1833 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +02001834 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001835static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001836{
1837 int pages = -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001838 uint8_t *p;
Juan Quintela56e93d22015-05-07 19:33:31 +02001839 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +08001840 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001841 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001842 ram_addr_t current_addr = block->offset + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001843
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +01001844 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01001845 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001846
Juan Quintela56e93d22015-05-07 19:33:31 +02001847 XBZRLE_cache_lock();
Xiao Guangrongd7400a32018-03-30 15:51:26 +08001848 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1849 migrate_use_xbzrle()) {
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001850 pages = save_xbzrle_page(rs, &p, current_addr, block,
1851 offset, last_stage);
1852 if (!last_stage) {
1853 /* Can't send this cached data async, since the cache page
1854 * might get updated before it gets to the wire
Juan Quintela56e93d22015-05-07 19:33:31 +02001855 */
Xiao Guangrong059ff0f2018-03-30 15:51:23 +08001856 send_async = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02001857 }
1858 }
1859
1860 /* XBZRLE overflow or normal page */
1861 if (pages == -1) {
Xiao Guangrong65dacaa2018-03-30 15:51:27 +08001862 pages = save_normal_page(rs, block, offset, p, send_async);
Juan Quintela56e93d22015-05-07 19:33:31 +02001863 }
1864
1865 XBZRLE_cache_unlock();
1866
1867 return pages;
1868}
1869
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001870static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1871 ram_addr_t offset)
1872{
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001873 multifd_queue_page(block, offset);
Juan Quintelab9ee2f72016-01-15 11:40:13 +01001874 ram_counters.normal++;
1875
1876 return 1;
1877}
1878
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001879static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001880 ram_addr_t offset, uint8_t *source_buf)
Juan Quintela56e93d22015-05-07 19:33:31 +02001881{
Juan Quintela53518d92017-05-04 11:46:24 +02001882 RAMState *rs = ram_state;
Liang Lia7a9a882016-05-05 15:32:57 +08001883 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001884 bool zero_page = false;
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001885 int ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02001886
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001887 if (save_zero_page_to_file(rs, f, block, offset)) {
1888 zero_page = true;
1889 goto exit;
1890 }
1891
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001892 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08001893
1894 /*
1895 * copy it to a internal buffer to avoid it being modified by VM
1896 * so that we can catch up the error during compression and
1897 * decompression
1898 */
1899 memcpy(source_buf, p, TARGET_PAGE_SIZE);
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001900 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1901 if (ret < 0) {
1902 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
Liang Lib3be2892016-05-05 15:32:54 +08001903 error_report("compressed data failed!");
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001904 return false;
Liang Lib3be2892016-05-05 15:32:54 +08001905 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001906
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001907exit:
Xiao Guangrong6ef37712018-08-21 16:10:23 +08001908 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001909 return zero_page;
1910}
1911
1912static void
1913update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1914{
Xiao Guangrong76e03002018-09-06 15:01:00 +08001915 ram_counters.transferred += bytes_xmit;
1916
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001917 if (param->zero_page) {
1918 ram_counters.duplicate++;
Xiao Guangrong76e03002018-09-06 15:01:00 +08001919 return;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001920 }
Xiao Guangrong76e03002018-09-06 15:01:00 +08001921
1922 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
1923 compression_counters.compressed_size += bytes_xmit - 8;
1924 compression_counters.pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001925}
1926
Xiao Guangrong32b05492018-09-06 15:01:01 +08001927static bool save_page_use_compression(RAMState *rs);
1928
Juan Quintelace25d332017-03-15 11:00:51 +01001929static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001930{
1931 int idx, len, thread_count;
1932
Xiao Guangrong32b05492018-09-06 15:01:01 +08001933 if (!save_page_use_compression(rs)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001934 return;
1935 }
1936 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +08001937
Liang Li0d9f9a52016-05-05 15:32:59 +08001938 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001939 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +08001940 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +08001941 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02001942 }
Liang Lia7a9a882016-05-05 15:32:57 +08001943 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001944 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +08001945
1946 for (idx = 0; idx < thread_count; idx++) {
1947 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08001948 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +01001949 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001950 /*
1951 * it's safe to fetch zero_page without holding comp_done_lock
1952 * as there is no further request submitted to the thread,
1953 * i.e, the thread should be waiting for a request at this point.
1954 */
1955 update_compress_thread_counts(&comp_param[idx], len);
Juan Quintela56e93d22015-05-07 19:33:31 +02001956 }
Liang Lia7a9a882016-05-05 15:32:57 +08001957 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02001958 }
1959}
1960
1961static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1962 ram_addr_t offset)
1963{
1964 param->block = block;
1965 param->offset = offset;
1966}
1967
Juan Quintelace25d332017-03-15 11:00:51 +01001968static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1969 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +02001970{
1971 int idx, thread_count, bytes_xmit = -1, pages = -1;
Xiao Guangrong1d588722018-08-21 16:10:20 +08001972 bool wait = migrate_compress_wait_thread();
Juan Quintela56e93d22015-05-07 19:33:31 +02001973
1974 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +08001975 qemu_mutex_lock(&comp_done_lock);
Xiao Guangrong1d588722018-08-21 16:10:20 +08001976retry:
1977 for (idx = 0; idx < thread_count; idx++) {
1978 if (comp_param[idx].done) {
1979 comp_param[idx].done = false;
1980 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1981 qemu_mutex_lock(&comp_param[idx].mutex);
1982 set_compress_params(&comp_param[idx], block, offset);
1983 qemu_cond_signal(&comp_param[idx].cond);
1984 qemu_mutex_unlock(&comp_param[idx].mutex);
1985 pages = 1;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08001986 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
Juan Quintela56e93d22015-05-07 19:33:31 +02001987 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02001988 }
1989 }
Xiao Guangrong1d588722018-08-21 16:10:20 +08001990
1991 /*
1992 * wait for the free thread if the user specifies 'compress-wait-thread',
1993 * otherwise we will post the page out in the main thread as normal page.
1994 */
1995 if (pages < 0 && wait) {
1996 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1997 goto retry;
1998 }
Liang Li0d9f9a52016-05-05 15:32:59 +08001999 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002000
2001 return pages;
2002}
2003
2004/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002005 * find_dirty_block: find the next dirty page and update any state
2006 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002007 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002008 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002009 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002010 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002011 * @pss: data about the state of the current dirty page scan
2012 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002013 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002014static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002015{
Juan Quintelaf20e2862017-03-21 16:19:05 +01002016 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002017 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01002018 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002019 /*
2020 * We've been once around the RAM and haven't found anything.
2021 * Give up.
2022 */
2023 *again = false;
2024 return false;
2025 }
Juan Quintelaa935e302017-03-21 15:36:51 +01002026 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002027 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01002028 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002029 pss->block = QLIST_NEXT_RCU(pss->block, next);
2030 if (!pss->block) {
Xiao Guangrong48df9d82018-09-06 15:00:59 +08002031 /*
2032 * If memory migration starts over, we will meet a dirtied page
2033 * which may still exists in compression threads's ring, so we
2034 * should flush the compressed data to make sure the new page
2035 * is not overwritten by the old one in the destination.
2036 *
2037 * Also If xbzrle is on, stop using the data compression at this
2038 * point. In theory, xbzrle can do better than compression.
2039 */
2040 flush_compressed_data(rs);
2041
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002042 /* Hit the end of the list */
2043 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
2044 /* Flag that we've looped */
2045 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002046 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002047 }
2048 /* Didn't find anything this time, but try again on the new block */
2049 *again = true;
2050 return false;
2051 } else {
2052 /* Can go around again, but... */
2053 *again = true;
2054 /* We've found something so probably don't need to */
2055 return true;
2056 }
2057}
2058
Juan Quintela3d0684b2017-03-23 15:06:39 +01002059/**
2060 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002061 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002062 * Helper for 'get_queued_page' - gets a page off the queue
2063 *
2064 * Returns the block of the page (or NULL if none available)
2065 *
Juan Quintelaec481c62017-03-20 22:12:40 +01002066 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002067 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002068 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002069static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002070{
2071 RAMBlock *block = NULL;
2072
Xiao Guangrongae526e32018-08-21 16:10:25 +08002073 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
2074 return NULL;
2075 }
2076
Juan Quintelaec481c62017-03-20 22:12:40 +01002077 qemu_mutex_lock(&rs->src_page_req_mutex);
2078 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2079 struct RAMSrcPageRequest *entry =
2080 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002081 block = entry->rb;
2082 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002083
2084 if (entry->len > TARGET_PAGE_SIZE) {
2085 entry->len -= TARGET_PAGE_SIZE;
2086 entry->offset += TARGET_PAGE_SIZE;
2087 } else {
2088 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002089 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002090 g_free(entry);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002091 migration_consume_urgent_request();
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002092 }
2093 }
Juan Quintelaec481c62017-03-20 22:12:40 +01002094 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002095
2096 return block;
2097}
2098
Juan Quintela3d0684b2017-03-23 15:06:39 +01002099/**
2100 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002101 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002102 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002103 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002104 * Returns if a queued page is found
2105 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002106 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002107 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002108 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002109static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002110{
2111 RAMBlock *block;
2112 ram_addr_t offset;
2113 bool dirty;
2114
2115 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002116 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002117 /*
2118 * We're sending this page, and since it's postcopy nothing else
2119 * will dirty it, and we must make sure it doesn't get sent again
2120 * even if this queue request was received after the background
2121 * search already sent it.
2122 */
2123 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002124 unsigned long page;
2125
Juan Quintela6b6712e2017-03-22 15:18:04 +01002126 page = offset >> TARGET_PAGE_BITS;
2127 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002128 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01002129 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002130 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002131 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002132 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002133 }
2134 }
2135
2136 } while (block && !dirty);
2137
2138 if (block) {
2139 /*
2140 * As soon as we start servicing pages out of order, then we have
2141 * to kill the bulk stage, since the bulk stage assumes
2142 * in (migration_bitmap_find_and_reset_dirty) that every page is
2143 * dirty, that's no longer true.
2144 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01002145 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002146
2147 /*
2148 * We want the background search to continue from the queued page
2149 * since the guest is likely to want other pages near to the page
2150 * it just requested.
2151 */
2152 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002153 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002154 }
2155
2156 return !!block;
2157}
2158
Juan Quintela56e93d22015-05-07 19:33:31 +02002159/**
Juan Quintela5e58f962017-04-03 22:06:54 +02002160 * migration_page_queue_free: drop any remaining pages in the ram
2161 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002162 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002163 * It should be empty at the end anyway, but in error cases there may
2164 * be some left. in case that there is any page left, we drop it.
2165 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002166 */
Juan Quintela83c13382017-05-04 11:45:01 +02002167static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002168{
Juan Quintelaec481c62017-03-20 22:12:40 +01002169 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002170 /* This queue generally should be empty - but in the case of a failed
2171 * migration might have some droppings in.
2172 */
2173 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01002174 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002175 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002176 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002177 g_free(mspr);
2178 }
2179 rcu_read_unlock();
2180}
2181
2182/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002183 * ram_save_queue_pages: queue the page for transmission
2184 *
2185 * A request from postcopy destination for example.
2186 *
2187 * Returns zero on success or negative on error
2188 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002189 * @rbname: Name of the RAMBLock of the request. NULL means the
2190 * same that last one.
2191 * @start: starting address from the start of the RAMBlock
2192 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002193 */
Juan Quintela96506892017-03-14 18:41:03 +01002194int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002195{
2196 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02002197 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002198
Juan Quintela93604472017-06-06 19:49:03 +02002199 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002200 rcu_read_lock();
2201 if (!rbname) {
2202 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01002203 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002204
2205 if (!ramblock) {
2206 /*
2207 * Shouldn't happen, we can't reuse the last RAMBlock if
2208 * it's the 1st request.
2209 */
2210 error_report("ram_save_queue_pages no previous block");
2211 goto err;
2212 }
2213 } else {
2214 ramblock = qemu_ram_block_by_name(rbname);
2215
2216 if (!ramblock) {
2217 /* We shouldn't be asked for a non-existent RAMBlock */
2218 error_report("ram_save_queue_pages no block '%s'", rbname);
2219 goto err;
2220 }
Juan Quintela68a098f2017-03-14 13:48:42 +01002221 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002222 }
2223 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2224 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01002225 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2226 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002227 __func__, start, len, ramblock->used_length);
2228 goto err;
2229 }
2230
Juan Quintelaec481c62017-03-20 22:12:40 +01002231 struct RAMSrcPageRequest *new_entry =
2232 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002233 new_entry->rb = ramblock;
2234 new_entry->offset = start;
2235 new_entry->len = len;
2236
2237 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01002238 qemu_mutex_lock(&rs->src_page_req_mutex);
2239 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01002240 migration_make_urgent_request();
Juan Quintelaec481c62017-03-20 22:12:40 +01002241 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002242 rcu_read_unlock();
2243
2244 return 0;
2245
2246err:
2247 rcu_read_unlock();
2248 return -1;
2249}
2250
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002251static bool save_page_use_compression(RAMState *rs)
2252{
2253 if (!migrate_use_compression()) {
2254 return false;
2255 }
2256
2257 /*
2258 * If xbzrle is on, stop using the data compression after first
2259 * round of migration even if compression is enabled. In theory,
2260 * xbzrle can do better than compression.
2261 */
2262 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2263 return true;
2264 }
2265
2266 return false;
2267}
2268
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002269/*
2270 * try to compress the page before posting it out, return true if the page
2271 * has been properly handled by compression, otherwise needs other
2272 * paths to handle it
2273 */
2274static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
2275{
2276 if (!save_page_use_compression(rs)) {
2277 return false;
2278 }
2279
2280 /*
2281 * When starting the process of a new block, the first page of
2282 * the block should be sent out before other pages in the same
2283 * block, and all the pages in last block should have been sent
2284 * out, keeping this order is important, because the 'cont' flag
2285 * is used to avoid resending the block name.
2286 *
2287 * We post the fist page as normal page as compression will take
2288 * much CPU resource.
2289 */
2290 if (block != rs->last_sent_block) {
2291 flush_compressed_data(rs);
2292 return false;
2293 }
2294
2295 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
2296 return true;
2297 }
2298
Xiao Guangrong76e03002018-09-06 15:01:00 +08002299 compression_counters.busy++;
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002300 return false;
2301}
2302
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002303/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002304 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002305 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002306 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002307 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002308 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002309 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002310 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002311 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002312static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002313 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002314{
Xiao Guangronga8ec91f2018-03-30 15:51:25 +08002315 RAMBlock *block = pss->block;
2316 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2317 int res;
2318
2319 if (control_save_page(rs, block, offset, &res)) {
2320 return res;
2321 }
2322
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002323 if (save_compress_page(rs, block, offset)) {
2324 return 1;
Xiao Guangrongd7400a32018-03-30 15:51:26 +08002325 }
2326
2327 res = save_zero_page(rs, block, offset);
2328 if (res > 0) {
2329 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2330 * page would be stale
2331 */
2332 if (!save_page_use_compression(rs)) {
2333 XBZRLE_cache_lock();
2334 xbzrle_cache_zero_page(rs, block->offset + offset);
2335 XBZRLE_cache_unlock();
2336 }
2337 ram_release_pages(block->idstr, offset, res);
2338 return res;
2339 }
2340
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002341 /*
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002342 * do not use multifd for compression as the first page in the new
2343 * block should be posted out before sending the compressed page
Xiao Guangrongda3f56c2018-03-30 15:51:28 +08002344 */
Xiao Guangrong5e5fdcf2018-08-21 16:10:24 +08002345 if (!save_page_use_compression(rs) && migrate_use_multifd()) {
Juan Quintelab9ee2f72016-01-15 11:40:13 +01002346 return ram_save_multifd_page(rs, block, offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002347 }
2348
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002349 return ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002350}
2351
2352/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002353 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002354 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002355 * Starting at *offset send pages up to the end of the current host
2356 * page. It's valid for the initial offset to point into the middle of
2357 * a host page in which case the remainder of the hostpage is sent.
2358 * Only dirty target pages are sent. Note that the host page size may
2359 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002360 * The saving stops at the boundary of the used_length of the block
2361 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002362 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002363 * Returns the number of pages written or negative on error
2364 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002365 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002366 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01002367 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002368 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002369 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01002370static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01002371 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002372{
2373 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01002374 size_t pagesize_bits =
2375 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00002376
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002377 if (!qemu_ram_is_migratable(pss->block)) {
2378 error_report("block %s should not be migrated !", pss->block->idstr);
2379 return 0;
2380 }
2381
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002382 do {
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002383 /* Check the pages is dirty and if it is send it */
2384 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2385 pss->page++;
2386 continue;
2387 }
2388
Juan Quintelaf20e2862017-03-21 16:19:05 +01002389 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002390 if (tmppages < 0) {
2391 return tmppages;
2392 }
2393
2394 pages += tmppages;
Xiao Guangrong1faa5662018-03-30 15:51:24 +08002395 if (pss->block->unsentmap) {
2396 clear_bit(pss->page, pss->block->unsentmap);
2397 }
2398
Juan Quintelaa935e302017-03-21 15:36:51 +01002399 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01002400 } while ((pss->page & (pagesize_bits - 1)) &&
2401 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002402
2403 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01002404 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002405 return pages;
2406}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00002407
2408/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01002409 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02002410 *
2411 * Called within an RCU critical section.
2412 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08002413 * Returns the number of pages written where zero means no dirty pages,
2414 * or negative on error
Juan Quintela56e93d22015-05-07 19:33:31 +02002415 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01002416 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02002417 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002418 *
2419 * On systems where host-page-size > target-page-size it will send all the
2420 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02002421 */
2422
Juan Quintelace25d332017-03-15 11:00:51 +01002423static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02002424{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002425 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02002426 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002427 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02002428
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302429 /* No dirty page as there is zero RAM */
2430 if (!ram_bytes_total()) {
2431 return pages;
2432 }
2433
Juan Quintela6f37bb82017-03-13 19:26:29 +01002434 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002435 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01002436 pss.complete_round = false;
2437
2438 if (!pss.block) {
2439 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2440 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002441
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002442 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002443 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01002444 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002445
2446 if (!found) {
2447 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01002448 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00002449 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002450
2451 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01002452 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02002453 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01002454 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02002455
Juan Quintela6f37bb82017-03-13 19:26:29 +01002456 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01002457 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02002458
2459 return pages;
2460}
2461
2462void acct_update_position(QEMUFile *f, size_t size, bool zero)
2463{
2464 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01002465
Juan Quintela56e93d22015-05-07 19:33:31 +02002466 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02002467 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02002468 } else {
Juan Quintela93604472017-06-06 19:49:03 +02002469 ram_counters.normal += pages;
2470 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002471 qemu_update_position(f, size);
2472 }
2473}
2474
Juan Quintela56e93d22015-05-07 19:33:31 +02002475uint64_t ram_bytes_total(void)
2476{
2477 RAMBlock *block;
2478 uint64_t total = 0;
2479
2480 rcu_read_lock();
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002481 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002482 total += block->used_length;
Peter Xu99e15582017-05-12 12:17:39 +08002483 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002484 rcu_read_unlock();
2485 return total;
2486}
2487
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002488static void xbzrle_load_setup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02002489{
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002490 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002491}
2492
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002493static void xbzrle_load_cleanup(void)
2494{
2495 g_free(XBZRLE.decoded_buf);
2496 XBZRLE.decoded_buf = NULL;
2497}
2498
Peter Xu7d7c96b2017-10-19 14:31:58 +08002499static void ram_state_cleanup(RAMState **rsp)
2500{
Dr. David Alan Gilbertb9ccaf62018-02-12 16:03:39 +00002501 if (*rsp) {
2502 migration_page_queue_free(*rsp);
2503 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2504 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2505 g_free(*rsp);
2506 *rsp = NULL;
2507 }
Peter Xu7d7c96b2017-10-19 14:31:58 +08002508}
2509
Peter Xu84593a02017-10-19 14:31:59 +08002510static void xbzrle_cleanup(void)
2511{
2512 XBZRLE_cache_lock();
2513 if (XBZRLE.cache) {
2514 cache_fini(XBZRLE.cache);
2515 g_free(XBZRLE.encoded_buf);
2516 g_free(XBZRLE.current_buf);
2517 g_free(XBZRLE.zero_target_page);
2518 XBZRLE.cache = NULL;
2519 XBZRLE.encoded_buf = NULL;
2520 XBZRLE.current_buf = NULL;
2521 XBZRLE.zero_target_page = NULL;
2522 }
2523 XBZRLE_cache_unlock();
2524}
2525
Juan Quintelaf265e0e2017-06-28 11:52:27 +02002526static void ram_save_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02002527{
Juan Quintela53518d92017-05-04 11:46:24 +02002528 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002529 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002530
Li Zhijian2ff64032015-07-02 20:18:05 +08002531 /* caller have hold iothread lock or is in a bh, so there is
2532 * no writing race against this migration_bitmap
2533 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002534 memory_global_dirty_log_stop();
2535
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002536 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002537 g_free(block->bmap);
2538 block->bmap = NULL;
2539 g_free(block->unsentmap);
2540 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002541 }
2542
Peter Xu84593a02017-10-19 14:31:59 +08002543 xbzrle_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02002544 compress_threads_save_cleanup();
Peter Xu7d7c96b2017-10-19 14:31:58 +08002545 ram_state_cleanup(rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02002546}
2547
Juan Quintela6f37bb82017-03-13 19:26:29 +01002548static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002549{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002550 rs->last_seen_block = NULL;
2551 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01002552 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002553 rs->last_version = ram_list.version;
2554 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002555}
2556
2557#define MAX_WAIT 50 /* ms, half buffered_file limit */
2558
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002559/*
2560 * 'expected' is the value you expect the bitmap mostly to be full
2561 * of; it won't bother printing lines that are all this value.
2562 * If 'todump' is null the migration bitmap is dumped.
2563 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002564void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2565 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002566{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002567 int64_t cur;
2568 int64_t linelen = 128;
2569 char linebuf[129];
2570
Juan Quintela6b6712e2017-03-22 15:18:04 +01002571 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002572 int64_t curb;
2573 bool found = false;
2574 /*
2575 * Last line; catch the case where the line length
2576 * is longer than remaining ram
2577 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002578 if (cur + linelen > pages) {
2579 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00002580 }
2581 for (curb = 0; curb < linelen; curb++) {
2582 bool thisbit = test_bit(cur + curb, todump);
2583 linebuf[curb] = thisbit ? '1' : '.';
2584 found = found || (thisbit != expected);
2585 }
2586 if (found) {
2587 linebuf[curb] = '\0';
2588 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2589 }
2590 }
2591}
2592
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002593/* **** functions for postcopy ***** */
2594
Pavel Butsykinced1c612017-02-03 18:23:21 +03002595void ram_postcopy_migrated_memory_release(MigrationState *ms)
2596{
2597 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03002598
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002599 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002600 unsigned long *bitmap = block->bmap;
2601 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2602 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03002603
2604 while (run_start < range) {
2605 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01002606 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03002607 (run_end - run_start) << TARGET_PAGE_BITS);
2608 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2609 }
2610 }
2611}
2612
Juan Quintela3d0684b2017-03-23 15:06:39 +01002613/**
2614 * postcopy_send_discard_bm_ram: discard a RAMBlock
2615 *
2616 * Returns zero on success
2617 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002618 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2619 * Note: At this point the 'unsentmap' is the processed bitmap combined
2620 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002621 *
2622 * @ms: current migration state
2623 * @pds: state for postcopy
2624 * @start: RAMBlock starting page
2625 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002626 */
2627static int postcopy_send_discard_bm_ram(MigrationState *ms,
2628 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01002629 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002630{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002631 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002632 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002633 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002634
Juan Quintela6b6712e2017-03-22 15:18:04 +01002635 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002636 unsigned long one = find_next_bit(unsentmap, end, current);
2637
2638 if (one <= end) {
2639 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2640 unsigned long discard_length;
2641
2642 if (zero >= end) {
2643 discard_length = end - one;
2644 } else {
2645 discard_length = zero - one;
2646 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01002647 if (discard_length) {
2648 postcopy_discard_send_range(ms, pds, one, discard_length);
2649 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002650 current = one + discard_length;
2651 } else {
2652 current = one;
2653 }
2654 }
2655
2656 return 0;
2657}
2658
Juan Quintela3d0684b2017-03-23 15:06:39 +01002659/**
2660 * postcopy_each_ram_send_discard: discard all RAMBlocks
2661 *
2662 * Returns 0 for success or negative for error
2663 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002664 * Utility for the outgoing postcopy code.
2665 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2666 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002667 * (qemu_ram_foreach_block ends up passing unscaled lengths
2668 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01002669 *
2670 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002671 */
2672static int postcopy_each_ram_send_discard(MigrationState *ms)
2673{
2674 struct RAMBlock *block;
2675 int ret;
2676
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002677 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002678 PostcopyDiscardState *pds =
2679 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002680
2681 /*
2682 * Postcopy sends chunks of bitmap over the wire, but it
2683 * just needs indexes at this point, avoids it having
2684 * target page specific code.
2685 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002686 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002687 postcopy_discard_send_finish(ms, pds);
2688 if (ret) {
2689 return ret;
2690 }
2691 }
2692
2693 return 0;
2694}
2695
Juan Quintela3d0684b2017-03-23 15:06:39 +01002696/**
2697 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002698 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002699 * Helper for postcopy_chunk_hostpages; it's called twice to
2700 * canonicalize the two bitmaps, that are similar, but one is
2701 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002702 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002703 * Postcopy requires that all target pages in a hostpage are dirty or
2704 * clean, not a mix. This function canonicalizes the bitmaps.
2705 *
2706 * @ms: current migration state
2707 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2708 * otherwise we need to canonicalize partially dirty host pages
2709 * @block: block that contains the page we want to canonicalize
2710 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002711 */
2712static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2713 RAMBlock *block,
2714 PostcopyDiscardState *pds)
2715{
Juan Quintela53518d92017-05-04 11:46:24 +02002716 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002717 unsigned long *bitmap = block->bmap;
2718 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002719 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002720 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002721 unsigned long run_start;
2722
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002723 if (block->page_size == TARGET_PAGE_SIZE) {
2724 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2725 return;
2726 }
2727
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002728 if (unsent_pass) {
2729 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002730 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002731 } else {
2732 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002733 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002734 }
2735
Juan Quintela6b6712e2017-03-22 15:18:04 +01002736 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002737 bool do_fixup = false;
2738 unsigned long fixup_start_addr;
2739 unsigned long host_offset;
2740
2741 /*
2742 * If the start of this run of pages is in the middle of a host
2743 * page, then we need to fixup this host page.
2744 */
2745 host_offset = run_start % host_ratio;
2746 if (host_offset) {
2747 do_fixup = true;
2748 run_start -= host_offset;
2749 fixup_start_addr = run_start;
2750 /* For the next pass */
2751 run_start = run_start + host_ratio;
2752 } else {
2753 /* Find the end of this run */
2754 unsigned long run_end;
2755 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002756 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002757 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002758 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002759 }
2760 /*
2761 * If the end isn't at the start of a host page, then the
2762 * run doesn't finish at the end of a host page
2763 * and we need to discard.
2764 */
2765 host_offset = run_end % host_ratio;
2766 if (host_offset) {
2767 do_fixup = true;
2768 fixup_start_addr = run_end - host_offset;
2769 /*
2770 * This host page has gone, the next loop iteration starts
2771 * from after the fixup
2772 */
2773 run_start = fixup_start_addr + host_ratio;
2774 } else {
2775 /*
2776 * No discards on this iteration, next loop starts from
2777 * next sent/dirty page
2778 */
2779 run_start = run_end + 1;
2780 }
2781 }
2782
2783 if (do_fixup) {
2784 unsigned long page;
2785
2786 /* Tell the destination to discard this page */
2787 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2788 /* For the unsent_pass we:
2789 * discard partially sent pages
2790 * For the !unsent_pass (dirty) we:
2791 * discard partially dirty pages that were sent
2792 * (any partially sent pages were already discarded
2793 * by the previous unsent_pass)
2794 */
2795 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2796 host_ratio);
2797 }
2798
2799 /* Clean up the bitmap */
2800 for (page = fixup_start_addr;
2801 page < fixup_start_addr + host_ratio; page++) {
2802 /* All pages in this host page are now not sent */
2803 set_bit(page, unsentmap);
2804
2805 /*
2806 * Remark them as dirty, updating the count for any pages
2807 * that weren't previously dirty.
2808 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002809 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002810 }
2811 }
2812
2813 if (unsent_pass) {
2814 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002815 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002816 } else {
2817 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002818 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002819 }
2820 }
2821}
2822
Juan Quintela3d0684b2017-03-23 15:06:39 +01002823/**
2824 * postcopy_chuck_hostpages: discrad any partially sent host page
2825 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002826 * Utility for the outgoing postcopy code.
2827 *
2828 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00002829 * dirty host-page size chunks as all dirty. In this case the host-page
2830 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002831 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002832 * Returns zero on success
2833 *
2834 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01002835 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002836 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002837static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002838{
Juan Quintela6b6712e2017-03-22 15:18:04 +01002839 PostcopyDiscardState *pds =
2840 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002841
Juan Quintela6b6712e2017-03-22 15:18:04 +01002842 /* First pass: Discard all partially sent host pages */
2843 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2844 /*
2845 * Second pass: Ensure that all partially dirty host pages are made
2846 * fully dirty.
2847 */
2848 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002849
Juan Quintela6b6712e2017-03-22 15:18:04 +01002850 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00002851 return 0;
2852}
2853
Juan Quintela3d0684b2017-03-23 15:06:39 +01002854/**
2855 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2856 *
2857 * Returns zero on success
2858 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002859 * Transmit the set of pages to be discarded after precopy to the target
2860 * these are pages that:
2861 * a) Have been previously transmitted but are now dirty again
2862 * b) Pages that have never been transmitted, this ensures that
2863 * any pages on the destination that have been mapped by background
2864 * tasks get discarded (transparent huge pages is the specific concern)
2865 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01002866 *
2867 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002868 */
2869int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2870{
Juan Quintela53518d92017-05-04 11:46:24 +02002871 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01002872 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002873 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002874
2875 rcu_read_lock();
2876
2877 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01002878 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002879
Juan Quintela6b6712e2017-03-22 15:18:04 +01002880 /* Easiest way to make sure we don't resume in the middle of a host-page */
2881 rs->last_seen_block = NULL;
2882 rs->last_sent_block = NULL;
2883 rs->last_page = 0;
2884
Cédric Le Goaterb895de52018-05-14 08:57:00 +02002885 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01002886 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2887 unsigned long *bitmap = block->bmap;
2888 unsigned long *unsentmap = block->unsentmap;
2889
2890 if (!unsentmap) {
2891 /* We don't have a safe way to resize the sentmap, so
2892 * if the bitmap was resized it will be NULL at this
2893 * point.
2894 */
2895 error_report("migration ram resized during precopy phase");
2896 rcu_read_unlock();
2897 return -EINVAL;
2898 }
2899 /* Deal with TPS != HPS and huge pages */
2900 ret = postcopy_chunk_hostpages(ms, block);
2901 if (ret) {
2902 rcu_read_unlock();
2903 return ret;
2904 }
2905
2906 /*
2907 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002908 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01002909 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002910#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01002911 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002912#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01002913 }
2914 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002915
2916 ret = postcopy_each_ram_send_discard(ms);
2917 rcu_read_unlock();
2918
2919 return ret;
2920}
2921
Juan Quintela3d0684b2017-03-23 15:06:39 +01002922/**
2923 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002924 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002925 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002926 *
Juan Quintela36449152017-03-23 15:11:59 +01002927 * @rbname: name of the RAMBlock of the request. NULL means the
2928 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002929 * @start: RAMBlock starting page
2930 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002931 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01002932int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002933{
2934 int ret = -1;
2935
Juan Quintela36449152017-03-23 15:11:59 +01002936 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002937
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002938 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01002939 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002940
2941 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01002942 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002943 goto err;
2944 }
2945
Peter Xu814bb082018-07-23 20:33:02 +08002946 /*
2947 * On source VM, we don't need to update the received bitmap since
2948 * we don't even have one.
2949 */
2950 if (rb->receivedmap) {
2951 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2952 length >> qemu_target_page_bits());
2953 }
2954
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00002955 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00002956
2957err:
2958 rcu_read_unlock();
2959
2960 return ret;
2961}
2962
Peter Xu84593a02017-10-19 14:31:59 +08002963/*
2964 * For every allocation, we will try not to crash the VM if the
2965 * allocation failed.
2966 */
2967static int xbzrle_init(void)
2968{
2969 Error *local_err = NULL;
2970
2971 if (!migrate_use_xbzrle()) {
2972 return 0;
2973 }
2974
2975 XBZRLE_cache_lock();
2976
2977 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2978 if (!XBZRLE.zero_target_page) {
2979 error_report("%s: Error allocating zero page", __func__);
2980 goto err_out;
2981 }
2982
2983 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2984 TARGET_PAGE_SIZE, &local_err);
2985 if (!XBZRLE.cache) {
2986 error_report_err(local_err);
2987 goto free_zero_page;
2988 }
2989
2990 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2991 if (!XBZRLE.encoded_buf) {
2992 error_report("%s: Error allocating encoded_buf", __func__);
2993 goto free_cache;
2994 }
2995
2996 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2997 if (!XBZRLE.current_buf) {
2998 error_report("%s: Error allocating current_buf", __func__);
2999 goto free_encoded_buf;
3000 }
3001
3002 /* We are all good */
3003 XBZRLE_cache_unlock();
3004 return 0;
3005
3006free_encoded_buf:
3007 g_free(XBZRLE.encoded_buf);
3008 XBZRLE.encoded_buf = NULL;
3009free_cache:
3010 cache_fini(XBZRLE.cache);
3011 XBZRLE.cache = NULL;
3012free_zero_page:
3013 g_free(XBZRLE.zero_target_page);
3014 XBZRLE.zero_target_page = NULL;
3015err_out:
3016 XBZRLE_cache_unlock();
3017 return -ENOMEM;
3018}
3019
Juan Quintela53518d92017-05-04 11:46:24 +02003020static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02003021{
Peter Xu7d00ee62017-10-19 14:31:57 +08003022 *rsp = g_try_new0(RAMState, 1);
3023
3024 if (!*rsp) {
3025 error_report("%s: Init ramstate fail", __func__);
3026 return -1;
3027 }
Juan Quintela53518d92017-05-04 11:46:24 +02003028
3029 qemu_mutex_init(&(*rsp)->bitmap_mutex);
3030 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
3031 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02003032
Peter Xu7d00ee62017-10-19 14:31:57 +08003033 /*
3034 * Count the total number of pages used by ram blocks not including any
3035 * gaps due to alignment or unplugs.
3036 */
3037 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
3038
3039 ram_state_reset(*rsp);
3040
3041 return 0;
3042}
3043
Peter Xud6eff5d2017-10-19 14:32:00 +08003044static void ram_list_init_bitmaps(void)
3045{
3046 RAMBlock *block;
3047 unsigned long pages;
3048
3049 /* Skip setting bitmap if there is no RAM */
3050 if (ram_bytes_total()) {
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003051 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xud6eff5d2017-10-19 14:32:00 +08003052 pages = block->max_length >> TARGET_PAGE_BITS;
3053 block->bmap = bitmap_new(pages);
3054 bitmap_set(block->bmap, 0, pages);
3055 if (migrate_postcopy_ram()) {
3056 block->unsentmap = bitmap_new(pages);
3057 bitmap_set(block->unsentmap, 0, pages);
3058 }
3059 }
3060 }
3061}
3062
3063static void ram_init_bitmaps(RAMState *rs)
3064{
3065 /* For memory_global_dirty_log_start below. */
3066 qemu_mutex_lock_iothread();
3067 qemu_mutex_lock_ramlist();
3068 rcu_read_lock();
3069
3070 ram_list_init_bitmaps();
3071 memory_global_dirty_log_start();
3072 migration_bitmap_sync(rs);
3073
3074 rcu_read_unlock();
3075 qemu_mutex_unlock_ramlist();
3076 qemu_mutex_unlock_iothread();
3077}
3078
Peter Xu7d00ee62017-10-19 14:31:57 +08003079static int ram_init_all(RAMState **rsp)
3080{
Peter Xu7d00ee62017-10-19 14:31:57 +08003081 if (ram_state_init(rsp)) {
3082 return -1;
3083 }
3084
Peter Xu84593a02017-10-19 14:31:59 +08003085 if (xbzrle_init()) {
3086 ram_state_cleanup(rsp);
3087 return -1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003088 }
3089
Peter Xud6eff5d2017-10-19 14:32:00 +08003090 ram_init_bitmaps(*rsp);
zhanghailianga91246c2016-10-27 14:42:59 +08003091
3092 return 0;
3093}
3094
Peter Xu08614f32018-05-02 18:47:33 +08003095static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
3096{
3097 RAMBlock *block;
3098 uint64_t pages = 0;
3099
3100 /*
3101 * Postcopy is not using xbzrle/compression, so no need for that.
3102 * Also, since source are already halted, we don't need to care
3103 * about dirty page logging as well.
3104 */
3105
Dr. David Alan Gilbertff0769a2018-06-05 17:25:44 +01003106 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xu08614f32018-05-02 18:47:33 +08003107 pages += bitmap_count_one(block->bmap,
3108 block->used_length >> TARGET_PAGE_BITS);
3109 }
3110
3111 /* This may not be aligned with current bitmaps. Recalculate. */
3112 rs->migration_dirty_pages = pages;
3113
3114 rs->last_seen_block = NULL;
3115 rs->last_sent_block = NULL;
3116 rs->last_page = 0;
3117 rs->last_version = ram_list.version;
3118 /*
3119 * Disable the bulk stage, otherwise we'll resend the whole RAM no
3120 * matter what we have sent.
3121 */
3122 rs->ram_bulk_stage = false;
3123
3124 /* Update RAMState cache of output QEMUFile */
3125 rs->f = out;
3126
3127 trace_ram_state_resume_prepare(pages);
3128}
3129
Juan Quintela3d0684b2017-03-23 15:06:39 +01003130/*
3131 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08003132 * long-running RCU critical section. When rcu-reclaims in the code
3133 * start to become numerous it will be necessary to reduce the
3134 * granularity of these critical sections.
3135 */
3136
Juan Quintela3d0684b2017-03-23 15:06:39 +01003137/**
3138 * ram_save_setup: Setup RAM for migration
3139 *
3140 * Returns zero to indicate success and negative for error
3141 *
3142 * @f: QEMUFile where to send the data
3143 * @opaque: RAMState pointer
3144 */
zhanghailianga91246c2016-10-27 14:42:59 +08003145static int ram_save_setup(QEMUFile *f, void *opaque)
3146{
Juan Quintela53518d92017-05-04 11:46:24 +02003147 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08003148 RAMBlock *block;
3149
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003150 if (compress_threads_save_setup()) {
3151 return -1;
3152 }
3153
zhanghailianga91246c2016-10-27 14:42:59 +08003154 /* migration has already setup the bitmap, reuse it. */
3155 if (!migration_in_colo_state()) {
Peter Xu7d00ee62017-10-19 14:31:57 +08003156 if (ram_init_all(rsp) != 0) {
Xiao Guangrongdcaf4462018-03-30 15:51:20 +08003157 compress_threads_save_cleanup();
zhanghailianga91246c2016-10-27 14:42:59 +08003158 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02003159 }
zhanghailianga91246c2016-10-27 14:42:59 +08003160 }
Juan Quintela53518d92017-05-04 11:46:24 +02003161 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08003162
3163 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02003164
3165 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
3166
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003167 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003168 qemu_put_byte(f, strlen(block->idstr));
3169 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
3170 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003171 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
3172 qemu_put_be64(f, block->page_size);
3173 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003174 }
3175
3176 rcu_read_unlock();
3177
3178 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
3179 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
3180
Juan Quintela6df264a2018-02-28 09:10:07 +01003181 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003182 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003183 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003184
3185 return 0;
3186}
3187
Juan Quintela3d0684b2017-03-23 15:06:39 +01003188/**
3189 * ram_save_iterate: iterative stage for migration
3190 *
3191 * Returns zero to indicate success and negative for error
3192 *
3193 * @f: QEMUFile where to send the data
3194 * @opaque: RAMState pointer
3195 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003196static int ram_save_iterate(QEMUFile *f, void *opaque)
3197{
Juan Quintela53518d92017-05-04 11:46:24 +02003198 RAMState **temp = opaque;
3199 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003200 int ret;
3201 int i;
3202 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01003203 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003204
Peter Lievenb2557342018-03-08 12:18:24 +01003205 if (blk_mig_bulk_active()) {
3206 /* Avoid transferring ram during bulk phase of block migration as
3207 * the bulk phase will usually take a long time and transferring
3208 * ram updates during that time is pointless. */
3209 goto out;
3210 }
3211
Juan Quintela56e93d22015-05-07 19:33:31 +02003212 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01003213 if (ram_list.version != rs->last_version) {
3214 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003215 }
3216
3217 /* Read version before ram_list.blocks */
3218 smp_rmb();
3219
3220 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
3221
3222 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3223 i = 0;
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003224 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3225 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003226 int pages;
3227
Dr. David Alan Gilberte03a34f2018-06-13 11:26:42 +01003228 if (qemu_file_get_error(f)) {
3229 break;
3230 }
3231
Juan Quintelace25d332017-03-15 11:00:51 +01003232 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02003233 /* no more pages to sent */
3234 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01003235 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02003236 break;
3237 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003238
3239 if (pages < 0) {
3240 qemu_file_set_error(f, pages);
3241 break;
3242 }
3243
Xiao Guangrongbe8b02e2018-09-03 17:26:42 +08003244 rs->target_page_count += pages;
Jason J. Herne070afca2015-09-08 13:12:35 -04003245
Juan Quintela56e93d22015-05-07 19:33:31 +02003246 /* we want to check in the 1st loop, just in case it was the 1st time
3247 and we had to sync the dirty bitmap.
3248 qemu_get_clock_ns() is a bit expensive, so we only check each some
3249 iterations
3250 */
3251 if ((i & 63) == 0) {
3252 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3253 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01003254 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02003255 break;
3256 }
3257 }
3258 i++;
3259 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003260 rcu_read_unlock();
3261
3262 /*
3263 * Must occur before EOS (or any QEMUFile operation)
3264 * because of RDMA protocol.
3265 */
3266 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3267
Juan Quintela6df264a2018-02-28 09:10:07 +01003268 multifd_send_sync_main();
Peter Lievenb2557342018-03-08 12:18:24 +01003269out:
Juan Quintela56e93d22015-05-07 19:33:31 +02003270 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003271 qemu_fflush(f);
Juan Quintela93604472017-06-06 19:49:03 +02003272 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02003273
3274 ret = qemu_file_get_error(f);
3275 if (ret < 0) {
3276 return ret;
3277 }
3278
Thomas Huth5c903082016-11-04 14:10:17 +01003279 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02003280}
3281
Juan Quintela3d0684b2017-03-23 15:06:39 +01003282/**
3283 * ram_save_complete: function called to send the remaining amount of ram
3284 *
Xiao Guangronge8f37352018-09-03 17:26:44 +08003285 * Returns zero to indicate success or negative on error
Juan Quintela3d0684b2017-03-23 15:06:39 +01003286 *
3287 * Called with iothread lock
3288 *
3289 * @f: QEMUFile where to send the data
3290 * @opaque: RAMState pointer
3291 */
Juan Quintela56e93d22015-05-07 19:33:31 +02003292static int ram_save_complete(QEMUFile *f, void *opaque)
3293{
Juan Quintela53518d92017-05-04 11:46:24 +02003294 RAMState **temp = opaque;
3295 RAMState *rs = *temp;
Xiao Guangronge8f37352018-09-03 17:26:44 +08003296 int ret = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01003297
Juan Quintela56e93d22015-05-07 19:33:31 +02003298 rcu_read_lock();
3299
Juan Quintela57273092017-03-20 22:25:28 +01003300 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01003301 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003302 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003303
3304 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3305
3306 /* try transferring iterative blocks of memory */
3307
3308 /* flush all remaining blocks regardless of rate limiting */
3309 while (true) {
3310 int pages;
3311
Juan Quintelace25d332017-03-15 11:00:51 +01003312 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02003313 /* no more blocks to sent */
3314 if (pages == 0) {
3315 break;
3316 }
Xiao Guangronge8f37352018-09-03 17:26:44 +08003317 if (pages < 0) {
3318 ret = pages;
3319 break;
3320 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003321 }
3322
Juan Quintelace25d332017-03-15 11:00:51 +01003323 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003324 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02003325
3326 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02003327
Juan Quintela6df264a2018-02-28 09:10:07 +01003328 multifd_send_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02003329 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela35374cb2018-04-18 10:13:21 +02003330 qemu_fflush(f);
Juan Quintela56e93d22015-05-07 19:33:31 +02003331
Xiao Guangronge8f37352018-09-03 17:26:44 +08003332 return ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003333}
3334
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003335static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003336 uint64_t *res_precopy_only,
3337 uint64_t *res_compatible,
3338 uint64_t *res_postcopy_only)
Juan Quintela56e93d22015-05-07 19:33:31 +02003339{
Juan Quintela53518d92017-05-04 11:46:24 +02003340 RAMState **temp = opaque;
3341 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02003342 uint64_t remaining_size;
3343
Juan Quintela9edabd42017-03-14 12:02:16 +01003344 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003345
Juan Quintela57273092017-03-20 22:25:28 +01003346 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00003347 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003348 qemu_mutex_lock_iothread();
3349 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01003350 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02003351 rcu_read_unlock();
3352 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01003353 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02003354 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00003355
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003356 if (migrate_postcopy_ram()) {
3357 /* We can do postcopy, and all the data is postcopiable */
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003358 *res_compatible += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003359 } else {
Vladimir Sementsov-Ogievskiy47995022018-03-13 15:34:00 -04003360 *res_precopy_only += remaining_size;
Vladimir Sementsov-Ogievskiy86e11672017-07-10 19:30:15 +03003361 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003362}
3363
3364static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3365{
3366 unsigned int xh_len;
3367 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003368 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02003369
Juan Quintela56e93d22015-05-07 19:33:31 +02003370 /* extract RLE header */
3371 xh_flags = qemu_get_byte(f);
3372 xh_len = qemu_get_be16(f);
3373
3374 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3375 error_report("Failed to load XBZRLE page - wrong compression!");
3376 return -1;
3377 }
3378
3379 if (xh_len > TARGET_PAGE_SIZE) {
3380 error_report("Failed to load XBZRLE page - len overflow!");
3381 return -1;
3382 }
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003383 loaded_data = XBZRLE.decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02003384 /* load data and decode */
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003385 /* it can change loaded_data to point to an internal buffer */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003386 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003387
3388 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00003389 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02003390 TARGET_PAGE_SIZE) == -1) {
3391 error_report("Failed to load XBZRLE page - decode error!");
3392 return -1;
3393 }
3394
3395 return 0;
3396}
3397
Juan Quintela3d0684b2017-03-23 15:06:39 +01003398/**
3399 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003400 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01003401 * Must be called from within a rcu critical section.
3402 *
3403 * Returns a pointer from within the RCU-protected ram_list.
3404 *
3405 * @f: QEMUFile where to read the data from
3406 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003407 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01003408static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02003409{
3410 static RAMBlock *block = NULL;
3411 char id[256];
3412 uint8_t len;
3413
3414 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003415 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003416 error_report("Ack, bad migration stream!");
3417 return NULL;
3418 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08003419 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02003420 }
3421
3422 len = qemu_get_byte(f);
3423 qemu_get_buffer(f, (uint8_t *)id, len);
3424 id[len] = 0;
3425
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00003426 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003427 if (!block) {
3428 error_report("Can't find block %s", id);
3429 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003430 }
3431
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003432 if (!qemu_ram_is_migratable(block)) {
3433 error_report("block %s should not be migrated !", id);
3434 return NULL;
3435 }
3436
zhanghailiang4c4bad42016-01-15 11:37:41 +08003437 return block;
3438}
3439
3440static inline void *host_from_ram_block_offset(RAMBlock *block,
3441 ram_addr_t offset)
3442{
3443 if (!offset_in_ramblock(block, offset)) {
3444 return NULL;
3445 }
3446
3447 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02003448}
3449
Zhang Chen13af18f2018-09-03 12:38:48 +08003450static inline void *colo_cache_from_block_offset(RAMBlock *block,
3451 ram_addr_t offset)
3452{
3453 if (!offset_in_ramblock(block, offset)) {
3454 return NULL;
3455 }
3456 if (!block->colo_cache) {
3457 error_report("%s: colo_cache is NULL in block :%s",
3458 __func__, block->idstr);
3459 return NULL;
3460 }
3461 return block->colo_cache + offset;
3462}
3463
Juan Quintela3d0684b2017-03-23 15:06:39 +01003464/**
3465 * ram_handle_compressed: handle the zero page case
3466 *
Juan Quintela56e93d22015-05-07 19:33:31 +02003467 * If a page (or a whole RDMA chunk) has been
3468 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003469 *
3470 * @host: host address for the zero page
3471 * @ch: what the page is filled from. We only support zero
3472 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02003473 */
3474void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3475{
3476 if (ch != 0 || !is_zero_range(host, size)) {
3477 memset(host, ch, size);
3478 }
3479}
3480
Xiao Guangrong797ca152018-03-30 15:51:21 +08003481/* return the size after decompression, or negative value on error */
3482static int
3483qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3484 const uint8_t *source, size_t source_len)
3485{
3486 int err;
3487
3488 err = inflateReset(stream);
3489 if (err != Z_OK) {
3490 return -1;
3491 }
3492
3493 stream->avail_in = source_len;
3494 stream->next_in = (uint8_t *)source;
3495 stream->avail_out = dest_len;
3496 stream->next_out = dest;
3497
3498 err = inflate(stream, Z_NO_FLUSH);
3499 if (err != Z_STREAM_END) {
3500 return -1;
3501 }
3502
3503 return stream->total_out;
3504}
3505
Juan Quintela56e93d22015-05-07 19:33:31 +02003506static void *do_data_decompress(void *opaque)
3507{
3508 DecompressParam *param = opaque;
3509 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08003510 uint8_t *des;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003511 int len, ret;
Juan Quintela56e93d22015-05-07 19:33:31 +02003512
Liang Li33d151f2016-05-05 15:32:58 +08003513 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003514 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08003515 if (param->des) {
3516 des = param->des;
3517 len = param->len;
3518 param->des = 0;
3519 qemu_mutex_unlock(&param->mutex);
3520
Liang Li73a89122016-05-05 15:32:51 +08003521 pagesize = TARGET_PAGE_SIZE;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003522
3523 ret = qemu_uncompress_data(&param->stream, des, pagesize,
3524 param->compbuf, len);
Xiao Guangrongf5482222018-05-03 16:06:11 +08003525 if (ret < 0 && migrate_get_current()->decompress_error_check) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003526 error_report("decompress data failed");
3527 qemu_file_set_error(decomp_file, ret);
3528 }
Liang Li73a89122016-05-05 15:32:51 +08003529
Liang Li33d151f2016-05-05 15:32:58 +08003530 qemu_mutex_lock(&decomp_done_lock);
3531 param->done = true;
3532 qemu_cond_signal(&decomp_done_cond);
3533 qemu_mutex_unlock(&decomp_done_lock);
3534
3535 qemu_mutex_lock(&param->mutex);
3536 } else {
3537 qemu_cond_wait(&param->cond, &param->mutex);
3538 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003539 }
Liang Li33d151f2016-05-05 15:32:58 +08003540 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003541
3542 return NULL;
3543}
3544
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003545static int wait_for_decompress_done(void)
Liang Li5533b2e2016-05-05 15:32:52 +08003546{
3547 int idx, thread_count;
3548
3549 if (!migrate_use_compression()) {
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003550 return 0;
Liang Li5533b2e2016-05-05 15:32:52 +08003551 }
3552
3553 thread_count = migrate_decompress_threads();
3554 qemu_mutex_lock(&decomp_done_lock);
3555 for (idx = 0; idx < thread_count; idx++) {
3556 while (!decomp_param[idx].done) {
3557 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3558 }
3559 }
3560 qemu_mutex_unlock(&decomp_done_lock);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003561 return qemu_file_get_error(decomp_file);
Liang Li5533b2e2016-05-05 15:32:52 +08003562}
3563
Juan Quintelaf0afa332017-06-28 11:52:28 +02003564static void compress_threads_load_cleanup(void)
Juan Quintela56e93d22015-05-07 19:33:31 +02003565{
3566 int i, thread_count;
3567
Juan Quintela3416ab52016-04-20 11:56:01 +02003568 if (!migrate_use_compression()) {
3569 return;
3570 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003571 thread_count = migrate_decompress_threads();
3572 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003573 /*
3574 * we use it as a indicator which shows if the thread is
3575 * properly init'd or not
3576 */
3577 if (!decomp_param[i].compbuf) {
3578 break;
3579 }
3580
Juan Quintela56e93d22015-05-07 19:33:31 +02003581 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08003582 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02003583 qemu_cond_signal(&decomp_param[i].cond);
3584 qemu_mutex_unlock(&decomp_param[i].mutex);
3585 }
3586 for (i = 0; i < thread_count; i++) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003587 if (!decomp_param[i].compbuf) {
3588 break;
3589 }
3590
Juan Quintela56e93d22015-05-07 19:33:31 +02003591 qemu_thread_join(decompress_threads + i);
3592 qemu_mutex_destroy(&decomp_param[i].mutex);
3593 qemu_cond_destroy(&decomp_param[i].cond);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003594 inflateEnd(&decomp_param[i].stream);
Juan Quintela56e93d22015-05-07 19:33:31 +02003595 g_free(decomp_param[i].compbuf);
Xiao Guangrong797ca152018-03-30 15:51:21 +08003596 decomp_param[i].compbuf = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003597 }
3598 g_free(decompress_threads);
3599 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02003600 decompress_threads = NULL;
3601 decomp_param = NULL;
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003602 decomp_file = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003603}
3604
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003605static int compress_threads_load_setup(QEMUFile *f)
Xiao Guangrong797ca152018-03-30 15:51:21 +08003606{
3607 int i, thread_count;
3608
3609 if (!migrate_use_compression()) {
3610 return 0;
3611 }
3612
3613 thread_count = migrate_decompress_threads();
3614 decompress_threads = g_new0(QemuThread, thread_count);
3615 decomp_param = g_new0(DecompressParam, thread_count);
3616 qemu_mutex_init(&decomp_done_lock);
3617 qemu_cond_init(&decomp_done_cond);
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003618 decomp_file = f;
Xiao Guangrong797ca152018-03-30 15:51:21 +08003619 for (i = 0; i < thread_count; i++) {
3620 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3621 goto exit;
3622 }
3623
3624 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3625 qemu_mutex_init(&decomp_param[i].mutex);
3626 qemu_cond_init(&decomp_param[i].cond);
3627 decomp_param[i].done = true;
3628 decomp_param[i].quit = false;
3629 qemu_thread_create(decompress_threads + i, "decompress",
3630 do_data_decompress, decomp_param + i,
3631 QEMU_THREAD_JOINABLE);
3632 }
3633 return 0;
3634exit:
3635 compress_threads_load_cleanup();
3636 return -1;
3637}
3638
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003639static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02003640 void *host, int len)
3641{
3642 int idx, thread_count;
3643
3644 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08003645 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003646 while (true) {
3647 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08003648 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08003649 decomp_param[idx].done = false;
3650 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00003651 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02003652 decomp_param[idx].des = host;
3653 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08003654 qemu_cond_signal(&decomp_param[idx].cond);
3655 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02003656 break;
3657 }
3658 }
3659 if (idx < thread_count) {
3660 break;
Liang Li73a89122016-05-05 15:32:51 +08003661 } else {
3662 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003663 }
3664 }
Liang Li73a89122016-05-05 15:32:51 +08003665 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02003666}
3667
Zhang Chen13af18f2018-09-03 12:38:48 +08003668/*
3669 * colo cache: this is for secondary VM, we cache the whole
3670 * memory of the secondary VM, it is need to hold the global lock
3671 * to call this helper.
3672 */
3673int colo_init_ram_cache(void)
3674{
3675 RAMBlock *block;
3676
3677 rcu_read_lock();
3678 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3679 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3680 NULL,
3681 false);
3682 if (!block->colo_cache) {
3683 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3684 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3685 block->used_length);
3686 goto out_locked;
3687 }
3688 memcpy(block->colo_cache, block->host, block->used_length);
3689 }
3690 rcu_read_unlock();
3691 return 0;
3692
3693out_locked:
3694 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3695 if (block->colo_cache) {
3696 qemu_anon_ram_free(block->colo_cache, block->used_length);
3697 block->colo_cache = NULL;
3698 }
3699 }
3700
3701 rcu_read_unlock();
3702 return -errno;
3703}
3704
3705/* It is need to hold the global lock to call this helper */
3706void colo_release_ram_cache(void)
3707{
3708 RAMBlock *block;
3709
3710 rcu_read_lock();
3711 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3712 if (block->colo_cache) {
3713 qemu_anon_ram_free(block->colo_cache, block->used_length);
3714 block->colo_cache = NULL;
3715 }
3716 }
3717 rcu_read_unlock();
3718}
3719
Juan Quintela3d0684b2017-03-23 15:06:39 +01003720/**
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003721 * ram_load_setup: Setup RAM for migration incoming side
3722 *
3723 * Returns zero to indicate success and negative for error
3724 *
3725 * @f: QEMUFile where to receive the data
3726 * @opaque: RAMState pointer
3727 */
3728static int ram_load_setup(QEMUFile *f, void *opaque)
3729{
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08003730 if (compress_threads_load_setup(f)) {
Xiao Guangrong797ca152018-03-30 15:51:21 +08003731 return -1;
3732 }
3733
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003734 xbzrle_load_setup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003735 ramblock_recv_map_init();
Zhang Chen13af18f2018-09-03 12:38:48 +08003736
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003737 return 0;
3738}
3739
3740static int ram_load_cleanup(void *opaque)
3741{
Alexey Perevalovf9494612017-10-05 14:13:20 +03003742 RAMBlock *rb;
Junyan He56eb90a2018-07-18 15:48:03 +08003743
3744 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
3745 if (ramblock_is_pmem(rb)) {
3746 pmem_persist(rb->host, rb->used_length);
3747 }
3748 }
3749
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003750 xbzrle_load_cleanup();
Juan Quintelaf0afa332017-06-28 11:52:28 +02003751 compress_threads_load_cleanup();
Alexey Perevalovf9494612017-10-05 14:13:20 +03003752
Cédric Le Goaterb895de52018-05-14 08:57:00 +02003753 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
Alexey Perevalovf9494612017-10-05 14:13:20 +03003754 g_free(rb->receivedmap);
3755 rb->receivedmap = NULL;
3756 }
Zhang Chen13af18f2018-09-03 12:38:48 +08003757
Juan Quintelaf265e0e2017-06-28 11:52:27 +02003758 return 0;
3759}
3760
3761/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01003762 * ram_postcopy_incoming_init: allocate postcopy data structures
3763 *
3764 * Returns 0 for success and negative if there was one error
3765 *
3766 * @mis: current migration incoming state
3767 *
3768 * Allocate data structures etc needed by incoming migration with
3769 * postcopy-ram. postcopy-ram's similarly names
3770 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003771 */
3772int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3773{
David Hildenbrandc1361802018-06-20 22:27:36 +02003774 return postcopy_ram_incoming_init(mis);
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00003775}
3776
Juan Quintela3d0684b2017-03-23 15:06:39 +01003777/**
3778 * ram_load_postcopy: load a page in postcopy case
3779 *
3780 * Returns 0 for success or -errno in case of error
3781 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003782 * Called in postcopy mode by ram_load().
3783 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01003784 *
3785 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003786 */
3787static int ram_load_postcopy(QEMUFile *f)
3788{
3789 int flags = 0, ret = 0;
3790 bool place_needed = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003791 bool matches_target_page_size = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003792 MigrationIncomingState *mis = migration_incoming_get_current();
3793 /* Temporary page that is later 'placed' */
3794 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003795 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00003796 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003797
3798 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3799 ram_addr_t addr;
3800 void *host = NULL;
3801 void *page_buffer = NULL;
3802 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003803 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003804 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003805
3806 addr = qemu_get_be64(f);
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003807
3808 /*
3809 * If qemu file error, we should stop here, and then "addr"
3810 * may be invalid
3811 */
3812 ret = qemu_file_get_error(f);
3813 if (ret) {
3814 break;
3815 }
3816
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003817 flags = addr & ~TARGET_PAGE_MASK;
3818 addr &= TARGET_PAGE_MASK;
3819
3820 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3821 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02003822 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003823 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08003824
3825 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003826 if (!host) {
3827 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3828 ret = -EINVAL;
3829 break;
3830 }
Peter Xu1aa83672018-07-10 17:18:53 +08003831 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003832 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003833 * Postcopy requires that we place whole host pages atomically;
3834 * these may be huge pages for RAMBlocks that are backed by
3835 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003836 * To make it atomic, the data is read into a temporary page
3837 * that's moved into place later.
3838 * The migration protocol uses, possibly smaller, target-pages
3839 * however the source ensures it always sends all the components
3840 * of a host page in order.
3841 */
3842 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003843 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003844 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003845 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003846 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003847 } else {
3848 /* not the 1st TP within the HP */
3849 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01003850 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003851 host, last_host);
3852 ret = -EINVAL;
3853 break;
3854 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003855 }
3856
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003857
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003858 /*
3859 * If it's the last part of a host page then we place the host
3860 * page
3861 */
3862 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00003863 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003864 place_source = postcopy_host_page;
3865 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00003866 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003867
3868 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02003869 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003870 ch = qemu_get_byte(f);
3871 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3872 if (ch) {
3873 all_zero = false;
3874 }
3875 break;
3876
3877 case RAM_SAVE_FLAG_PAGE:
3878 all_zero = false;
Peter Xu1aa83672018-07-10 17:18:53 +08003879 if (!matches_target_page_size) {
3880 /* For huge pages, we always use temporary buffer */
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003881 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3882 } else {
Peter Xu1aa83672018-07-10 17:18:53 +08003883 /*
3884 * For small pages that matches target page size, we
3885 * avoid the qemu_file copy. Instead we directly use
3886 * the buffer of QEMUFile to place the page. Note: we
3887 * cannot do any QEMUFile operation before using that
3888 * buffer to make sure the buffer is valid when
3889 * placing the page.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003890 */
3891 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3892 TARGET_PAGE_SIZE);
3893 }
3894 break;
3895 case RAM_SAVE_FLAG_EOS:
3896 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01003897 multifd_recv_sync_main();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003898 break;
3899 default:
3900 error_report("Unknown combination of migration flags: %#x"
3901 " (postcopy mode)", flags);
3902 ret = -EINVAL;
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003903 break;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003904 }
3905
Peter Xu7a9ddfb2018-02-08 18:31:05 +08003906 /* Detect for any possible file errors */
3907 if (!ret && qemu_file_get_error(f)) {
3908 ret = qemu_file_get_error(f);
3909 }
3910
3911 if (!ret && place_needed) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003912 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003913 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3914
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003915 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003916 ret = postcopy_place_page_zero(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003917 block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003918 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00003919 ret = postcopy_place_page(mis, place_dest,
Alexey Perevalov8be46202017-10-05 14:13:18 +03003920 place_source, block);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003921 }
3922 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003923 }
3924
3925 return ret;
3926}
3927
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003928static bool postcopy_is_advised(void)
3929{
3930 PostcopyState ps = postcopy_state_get();
3931 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3932}
3933
3934static bool postcopy_is_running(void)
3935{
3936 PostcopyState ps = postcopy_state_get();
3937 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3938}
3939
Juan Quintela56e93d22015-05-07 19:33:31 +02003940static int ram_load(QEMUFile *f, void *opaque, int version_id)
3941{
Juan Quintelaedc60122016-11-02 12:40:46 +01003942 int flags = 0, ret = 0, invalid_flags = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02003943 static uint64_t seq_iter;
3944 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003945 /*
3946 * If system is running in postcopy mode, page inserts to host memory must
3947 * be atomic
3948 */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003949 bool postcopy_running = postcopy_is_running();
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00003950 /* ADVISE is earlier, it shows the source has the postcopy capability on */
Daniel Henrique Barbozaacab30b2017-11-16 20:35:26 -02003951 bool postcopy_advised = postcopy_is_advised();
Juan Quintela56e93d22015-05-07 19:33:31 +02003952
3953 seq_iter++;
3954
3955 if (version_id != 4) {
3956 ret = -EINVAL;
3957 }
3958
Juan Quintelaedc60122016-11-02 12:40:46 +01003959 if (!migrate_use_compression()) {
3960 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3961 }
Juan Quintela56e93d22015-05-07 19:33:31 +02003962 /* This RCU critical section can be very long running.
3963 * When RCU reclaims in the code start to become numerous,
3964 * it will be necessary to reduce the granularity of this
3965 * critical section.
3966 */
3967 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00003968
3969 if (postcopy_running) {
3970 ret = ram_load_postcopy(f);
3971 }
3972
3973 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02003974 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003975 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02003976 uint8_t ch;
3977
3978 addr = qemu_get_be64(f);
3979 flags = addr & ~TARGET_PAGE_MASK;
3980 addr &= TARGET_PAGE_MASK;
3981
Juan Quintelaedc60122016-11-02 12:40:46 +01003982 if (flags & invalid_flags) {
3983 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3984 error_report("Received an unexpected compressed page");
3985 }
3986
3987 ret = -EINVAL;
3988 break;
3989 }
3990
Juan Quintelabb890ed2017-04-28 09:39:55 +02003991 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00003992 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08003993 RAMBlock *block = ram_block_from_stream(f, flags);
3994
Zhang Chen13af18f2018-09-03 12:38:48 +08003995 /*
3996 * After going into COLO, we should load the Page into colo_cache.
3997 */
3998 if (migration_incoming_in_colo_state()) {
3999 host = colo_cache_from_block_offset(block, addr);
4000 } else {
4001 host = host_from_ram_block_offset(block, addr);
4002 }
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004003 if (!host) {
4004 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4005 ret = -EINVAL;
4006 break;
4007 }
Zhang Chen13af18f2018-09-03 12:38:48 +08004008
4009 if (!migration_incoming_in_colo_state()) {
4010 ramblock_recv_bitmap_set(block, host);
4011 }
4012
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01004013 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004014 }
4015
Juan Quintela56e93d22015-05-07 19:33:31 +02004016 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
4017 case RAM_SAVE_FLAG_MEM_SIZE:
4018 /* Synchronize RAM block list */
4019 total_ram_bytes = addr;
4020 while (!ret && total_ram_bytes) {
4021 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02004022 char id[256];
4023 ram_addr_t length;
4024
4025 len = qemu_get_byte(f);
4026 qemu_get_buffer(f, (uint8_t *)id, len);
4027 id[len] = 0;
4028 length = qemu_get_be64(f);
4029
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004030 block = qemu_ram_block_by_name(id);
Cédric Le Goaterb895de52018-05-14 08:57:00 +02004031 if (block && !qemu_ram_is_migratable(block)) {
4032 error_report("block %s should not be migrated !", id);
4033 ret = -EINVAL;
4034 } else if (block) {
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004035 if (length != block->used_length) {
4036 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02004037
Gongleifa53a0e2016-05-10 10:04:59 +08004038 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004039 &local_err);
4040 if (local_err) {
4041 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02004042 }
Juan Quintela56e93d22015-05-07 19:33:31 +02004043 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00004044 /* For postcopy we need to check hugepage sizes match */
4045 if (postcopy_advised &&
4046 block->page_size != qemu_host_page_size) {
4047 uint64_t remote_page_size = qemu_get_be64(f);
4048 if (remote_page_size != block->page_size) {
4049 error_report("Mismatched RAM page size %s "
4050 "(local) %zd != %" PRId64,
4051 id, block->page_size,
4052 remote_page_size);
4053 ret = -EINVAL;
4054 }
4055 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00004056 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
4057 block->idstr);
4058 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02004059 error_report("Unknown ramblock \"%s\", cannot "
4060 "accept migration", id);
4061 ret = -EINVAL;
4062 }
4063
4064 total_ram_bytes -= length;
4065 }
4066 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004067
Juan Quintelabb890ed2017-04-28 09:39:55 +02004068 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02004069 ch = qemu_get_byte(f);
4070 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
4071 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004072
Juan Quintela56e93d22015-05-07 19:33:31 +02004073 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004074 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
4075 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02004076
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004077 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004078 len = qemu_get_be32(f);
4079 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
4080 error_report("Invalid compressed data length: %d", len);
4081 ret = -EINVAL;
4082 break;
4083 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00004084 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02004085 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00004086
Juan Quintela56e93d22015-05-07 19:33:31 +02004087 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02004088 if (load_xbzrle(f, addr, host) < 0) {
4089 error_report("Failed to decompress XBZRLE page at "
4090 RAM_ADDR_FMT, addr);
4091 ret = -EINVAL;
4092 break;
4093 }
4094 break;
4095 case RAM_SAVE_FLAG_EOS:
4096 /* normal exit */
Juan Quintela6df264a2018-02-28 09:10:07 +01004097 multifd_recv_sync_main();
Juan Quintela56e93d22015-05-07 19:33:31 +02004098 break;
4099 default:
4100 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01004101 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02004102 } else {
4103 error_report("Unknown combination of migration flags: %#x",
4104 flags);
4105 ret = -EINVAL;
4106 }
4107 }
4108 if (!ret) {
4109 ret = qemu_file_get_error(f);
4110 }
4111 }
4112
Xiao Guangrong34ab9e92018-03-30 15:51:22 +08004113 ret |= wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02004114 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01004115 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02004116 return ret;
4117}
4118
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004119static bool ram_has_postcopy(void *opaque)
4120{
Junyan He469dd512018-07-18 15:48:02 +08004121 RAMBlock *rb;
4122 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
4123 if (ramblock_is_pmem(rb)) {
4124 info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4125 "is not supported now!", rb->idstr, rb->host);
4126 return false;
4127 }
4128 }
4129
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004130 return migrate_postcopy_ram();
4131}
4132
Peter Xuedd090c2018-05-02 18:47:32 +08004133/* Sync all the dirty bitmap with destination VM. */
4134static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4135{
4136 RAMBlock *block;
4137 QEMUFile *file = s->to_dst_file;
4138 int ramblock_count = 0;
4139
4140 trace_ram_dirty_bitmap_sync_start();
4141
Dr. David Alan Gilbertff0769a2018-06-05 17:25:44 +01004142 RAMBLOCK_FOREACH_MIGRATABLE(block) {
Peter Xuedd090c2018-05-02 18:47:32 +08004143 qemu_savevm_send_recv_bitmap(file, block->idstr);
4144 trace_ram_dirty_bitmap_request(block->idstr);
4145 ramblock_count++;
4146 }
4147
4148 trace_ram_dirty_bitmap_sync_wait();
4149
4150 /* Wait until all the ramblocks' dirty bitmap synced */
4151 while (ramblock_count--) {
4152 qemu_sem_wait(&s->rp_state.rp_sem);
4153 }
4154
4155 trace_ram_dirty_bitmap_sync_complete();
4156
4157 return 0;
4158}
4159
4160static void ram_dirty_bitmap_reload_notify(MigrationState *s)
4161{
4162 qemu_sem_post(&s->rp_state.rp_sem);
4163}
4164
Peter Xua335deb2018-05-02 18:47:28 +08004165/*
4166 * Read the received bitmap, revert it as the initial dirty bitmap.
4167 * This is only used when the postcopy migration is paused but wants
4168 * to resume from a middle point.
4169 */
4170int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
4171{
4172 int ret = -EINVAL;
4173 QEMUFile *file = s->rp_state.from_dst_file;
4174 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
Peter Xua725ef92018-07-10 17:18:55 +08004175 uint64_t local_size = DIV_ROUND_UP(nbits, 8);
Peter Xua335deb2018-05-02 18:47:28 +08004176 uint64_t size, end_mark;
4177
4178 trace_ram_dirty_bitmap_reload_begin(block->idstr);
4179
4180 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
4181 error_report("%s: incorrect state %s", __func__,
4182 MigrationStatus_str(s->state));
4183 return -EINVAL;
4184 }
4185
4186 /*
4187 * Note: see comments in ramblock_recv_bitmap_send() on why we
4188 * need the endianess convertion, and the paddings.
4189 */
4190 local_size = ROUND_UP(local_size, 8);
4191
4192 /* Add paddings */
4193 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4194
4195 size = qemu_get_be64(file);
4196
4197 /* The size of the bitmap should match with our ramblock */
4198 if (size != local_size) {
4199 error_report("%s: ramblock '%s' bitmap size mismatch "
4200 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
4201 block->idstr, size, local_size);
4202 ret = -EINVAL;
4203 goto out;
4204 }
4205
4206 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4207 end_mark = qemu_get_be64(file);
4208
4209 ret = qemu_file_get_error(file);
4210 if (ret || size != local_size) {
4211 error_report("%s: read bitmap failed for ramblock '%s': %d"
4212 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
4213 __func__, block->idstr, ret, local_size, size);
4214 ret = -EIO;
4215 goto out;
4216 }
4217
4218 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
4219 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
4220 __func__, block->idstr, end_mark);
4221 ret = -EINVAL;
4222 goto out;
4223 }
4224
4225 /*
4226 * Endianess convertion. We are during postcopy (though paused).
4227 * The dirty bitmap won't change. We can directly modify it.
4228 */
4229 bitmap_from_le(block->bmap, le_bitmap, nbits);
4230
4231 /*
4232 * What we received is "received bitmap". Revert it as the initial
4233 * dirty bitmap for this ramblock.
4234 */
4235 bitmap_complement(block->bmap, block->bmap, nbits);
4236
4237 trace_ram_dirty_bitmap_reload_complete(block->idstr);
4238
Peter Xuedd090c2018-05-02 18:47:32 +08004239 /*
4240 * We succeeded to sync bitmap for current ramblock. If this is
4241 * the last one to sync, we need to notify the main send thread.
4242 */
4243 ram_dirty_bitmap_reload_notify(s);
4244
Peter Xua335deb2018-05-02 18:47:28 +08004245 ret = 0;
4246out:
Peter Xubf269902018-05-25 09:50:42 +08004247 g_free(le_bitmap);
Peter Xua335deb2018-05-02 18:47:28 +08004248 return ret;
4249}
4250
Peter Xuedd090c2018-05-02 18:47:32 +08004251static int ram_resume_prepare(MigrationState *s, void *opaque)
4252{
4253 RAMState *rs = *(RAMState **)opaque;
Peter Xu08614f32018-05-02 18:47:33 +08004254 int ret;
Peter Xuedd090c2018-05-02 18:47:32 +08004255
Peter Xu08614f32018-05-02 18:47:33 +08004256 ret = ram_dirty_bitmap_sync_all(s, rs);
4257 if (ret) {
4258 return ret;
4259 }
4260
4261 ram_state_resume_prepare(rs, s->to_dst_file);
4262
4263 return 0;
Peter Xuedd090c2018-05-02 18:47:32 +08004264}
4265
Juan Quintela56e93d22015-05-07 19:33:31 +02004266static SaveVMHandlers savevm_ram_handlers = {
Juan Quintela9907e842017-06-28 11:52:24 +02004267 .save_setup = ram_save_setup,
Juan Quintela56e93d22015-05-07 19:33:31 +02004268 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00004269 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00004270 .save_live_complete_precopy = ram_save_complete,
Vladimir Sementsov-Ogievskiyc6467622017-07-10 19:30:14 +03004271 .has_postcopy = ram_has_postcopy,
Juan Quintela56e93d22015-05-07 19:33:31 +02004272 .save_live_pending = ram_save_pending,
4273 .load_state = ram_load,
Juan Quintelaf265e0e2017-06-28 11:52:27 +02004274 .save_cleanup = ram_save_cleanup,
4275 .load_setup = ram_load_setup,
4276 .load_cleanup = ram_load_cleanup,
Peter Xuedd090c2018-05-02 18:47:32 +08004277 .resume_prepare = ram_resume_prepare,
Juan Quintela56e93d22015-05-07 19:33:31 +02004278};
4279
4280void ram_mig_init(void)
4281{
4282 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01004283 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02004284}