blob: eec398f83381c3820397c16dc1cf7010663e9475 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Peter Maydell1393a482016-01-26 18:16:54 +000028#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010029#include "qemu-common.h"
30#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +000032#include "qapi-event.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020033#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020034#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020036#include "qemu/timer.h"
37#include "qemu/main-loop.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020038#include "migration/migration.h"
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +000039#include "migration/postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020040#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020043#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "exec/ram_addr.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080046#include "migration/colo.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020047
Juan Quintela56e93d22015-05-07 19:33:31 +020048/***********************************************************/
49/* ram save/restore */
50
51#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52#define RAM_SAVE_FLAG_COMPRESS 0x02
53#define RAM_SAVE_FLAG_MEM_SIZE 0x04
54#define RAM_SAVE_FLAG_PAGE 0x08
55#define RAM_SAVE_FLAG_EOS 0x10
56#define RAM_SAVE_FLAG_CONTINUE 0x20
57#define RAM_SAVE_FLAG_XBZRLE 0x40
58/* 0x80 is reserved in migration.h start with 0x100 next */
59#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
60
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +010061static uint8_t *ZERO_TARGET_PAGE;
Juan Quintela56e93d22015-05-07 19:33:31 +020062
63static inline bool is_zero_range(uint8_t *p, uint64_t size)
64{
Richard Hendersona1febc42016-08-29 11:46:14 -070065 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020066}
67
68/* struct contains XBZRLE cache and a static page
69 used by the compression */
70static struct {
71 /* buffer used for XBZRLE encoding */
72 uint8_t *encoded_buf;
73 /* buffer for storing page content */
74 uint8_t *current_buf;
75 /* Cache for XBZRLE, Protected by lock. */
76 PageCache *cache;
77 QemuMutex lock;
78} XBZRLE;
79
80/* buffer used for XBZRLE decoding */
81static uint8_t *xbzrle_decoded_buf;
82
83static void XBZRLE_cache_lock(void)
84{
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE.lock);
87}
88
89static void XBZRLE_cache_unlock(void)
90{
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE.lock);
93}
94
Juan Quintela3d0684b2017-03-23 15:06:39 +010095/**
96 * xbzrle_cache_resize: resize the xbzrle cache
97 *
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
102 *
103 * Returns the new_size or negative in case of error.
104 *
105 * @new_size: new cache size
Juan Quintela56e93d22015-05-07 19:33:31 +0200106 */
107int64_t xbzrle_cache_resize(int64_t new_size)
108{
109 PageCache *new_cache;
110 int64_t ret;
111
112 if (new_size < TARGET_PAGE_SIZE) {
113 return -1;
114 }
115
116 XBZRLE_cache_lock();
117
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120 goto out_new_size;
121 }
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123 TARGET_PAGE_SIZE);
124 if (!new_cache) {
125 error_report("Error creating cache");
126 ret = -1;
127 goto out;
128 }
129
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
132 }
133
134out_new_size:
135 ret = pow2floor(new_size);
136out:
137 XBZRLE_cache_unlock();
138 return ret;
139}
140
Juan Quintelaeb859c52017-03-13 21:51:55 +0100141struct RAMBitmap {
142 struct rcu_head rcu;
143 /* Main migration bitmap */
144 unsigned long *bmap;
145 /* bitmap of pages that haven't been sent even once
146 * only maintained and used in postcopy at the moment
147 * where it's used to send the dirtymap at the start
148 * of the postcopy phase
149 */
150 unsigned long *unsentmap;
151};
152typedef struct RAMBitmap RAMBitmap;
153
Juan Quintelaec481c62017-03-20 22:12:40 +0100154/*
155 * An outstanding page request, on the source, having been received
156 * and queued
157 */
158struct RAMSrcPageRequest {
159 RAMBlock *rb;
160 hwaddr offset;
161 hwaddr len;
162
163 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
164};
165
Juan Quintela6f37bb82017-03-13 19:26:29 +0100166/* State of RAM for migration */
167struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100168 /* QEMUFile used for this migration */
169 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100170 /* Last block that we have visited searching for dirty pages */
171 RAMBlock *last_seen_block;
172 /* Last block from where we have sent data */
173 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100174 /* Last dirty target page we have sent */
175 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100176 /* last ram version we have seen */
177 uint32_t last_version;
178 /* We are in the first round */
179 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100180 /* How many times we have dirty too many pages */
181 int dirty_rate_high_cnt;
Juan Quintela5a987732017-03-13 19:39:02 +0100182 /* How many times we have synchronized the bitmap */
183 uint64_t bitmap_sync_count;
Juan Quintelaf664da82017-03-13 19:44:57 +0100184 /* these variables are used for bitmap sync */
185 /* last time we did a full bitmap_sync */
186 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200187 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200188 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200189 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200190 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100191 /* xbzrle misses since the beginning of the period */
192 uint64_t xbzrle_cache_miss_prev;
Juan Quintela36040d92017-03-13 19:51:13 +0100193 /* number of iterations at the beginning of period */
194 uint64_t iterations_prev;
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100195 /* Accounting fields */
196 /* number of zero pages. It used to be pages filled by the same char. */
197 uint64_t zero_pages;
Juan Quintelab4d1c6e2017-03-13 20:40:53 +0100198 /* number of normal transferred pages */
199 uint64_t norm_pages;
Juan Quintela23b28c32017-03-13 20:51:34 +0100200 /* Iterations since start */
201 uint64_t iterations;
Juan Quintelaf36ada92017-03-13 20:59:32 +0100202 /* xbzrle transmitted bytes. Notice that this is with
203 * compression, they can't be calculated from the pages */
Juan Quintela07ed50a2017-03-13 20:57:27 +0100204 uint64_t xbzrle_bytes;
Juan Quintelaf36ada92017-03-13 20:59:32 +0100205 /* xbzrle transmmited pages */
206 uint64_t xbzrle_pages;
Juan Quintela544c36f2017-03-13 21:02:08 +0100207 /* xbzrle number of cache miss */
208 uint64_t xbzrle_cache_miss;
Juan Quintelab07016b2017-03-13 21:04:16 +0100209 /* xbzrle miss rate */
210 double xbzrle_cache_miss_rate;
Juan Quintela180f61f2017-03-13 21:07:03 +0100211 /* xbzrle number of overflows */
212 uint64_t xbzrle_overflows;
Juan Quintela0d8ec882017-03-13 21:21:41 +0100213 /* number of dirty bits in the bitmap */
214 uint64_t migration_dirty_pages;
Juan Quintela2f4fde92017-03-13 21:58:11 +0100215 /* total number of bytes transferred */
216 uint64_t bytes_transferred;
Juan Quintela47ad8612017-03-14 18:20:30 +0100217 /* number of dirtied pages in the last second */
218 uint64_t dirty_pages_rate;
Juan Quintela96506892017-03-14 18:41:03 +0100219 /* Count of requests incoming from destination */
220 uint64_t postcopy_requests;
Juan Quintela108cfae2017-03-13 21:38:09 +0100221 /* protects modification of the bitmap */
222 QemuMutex bitmap_mutex;
Juan Quintelaeb859c52017-03-13 21:51:55 +0100223 /* Ram Bitmap protected by RCU */
224 RAMBitmap *ram_bitmap;
Juan Quintela68a098f2017-03-14 13:48:42 +0100225 /* The RAMBlock used in the last src_page_requests */
226 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100227 /* Queue of outstanding page requests from the destination */
228 QemuMutex src_page_req_mutex;
229 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100230};
231typedef struct RAMState RAMState;
232
233static RAMState ram_state;
234
Juan Quintela56e93d22015-05-07 19:33:31 +0200235uint64_t dup_mig_pages_transferred(void)
236{
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100237 return ram_state.zero_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200238}
239
Juan Quintela56e93d22015-05-07 19:33:31 +0200240uint64_t norm_mig_pages_transferred(void)
241{
Juan Quintelab4d1c6e2017-03-13 20:40:53 +0100242 return ram_state.norm_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200243}
244
245uint64_t xbzrle_mig_bytes_transferred(void)
246{
Juan Quintela07ed50a2017-03-13 20:57:27 +0100247 return ram_state.xbzrle_bytes;
Juan Quintela56e93d22015-05-07 19:33:31 +0200248}
249
250uint64_t xbzrle_mig_pages_transferred(void)
251{
Juan Quintelaf36ada92017-03-13 20:59:32 +0100252 return ram_state.xbzrle_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200253}
254
255uint64_t xbzrle_mig_pages_cache_miss(void)
256{
Juan Quintela544c36f2017-03-13 21:02:08 +0100257 return ram_state.xbzrle_cache_miss;
Juan Quintela56e93d22015-05-07 19:33:31 +0200258}
259
260double xbzrle_mig_cache_miss_rate(void)
261{
Juan Quintelab07016b2017-03-13 21:04:16 +0100262 return ram_state.xbzrle_cache_miss_rate;
Juan Quintela56e93d22015-05-07 19:33:31 +0200263}
264
265uint64_t xbzrle_mig_pages_overflow(void)
266{
Juan Quintela180f61f2017-03-13 21:07:03 +0100267 return ram_state.xbzrle_overflows;
Juan Quintela56e93d22015-05-07 19:33:31 +0200268}
269
Juan Quintela2f4fde92017-03-13 21:58:11 +0100270uint64_t ram_bytes_transferred(void)
271{
272 return ram_state.bytes_transferred;
273}
274
Juan Quintela9edabd42017-03-14 12:02:16 +0100275uint64_t ram_bytes_remaining(void)
276{
277 return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
278}
279
Juan Quintela42d219d2017-03-14 18:01:38 +0100280uint64_t ram_dirty_sync_count(void)
281{
282 return ram_state.bitmap_sync_count;
283}
284
Juan Quintela47ad8612017-03-14 18:20:30 +0100285uint64_t ram_dirty_pages_rate(void)
286{
287 return ram_state.dirty_pages_rate;
288}
289
Juan Quintela96506892017-03-14 18:41:03 +0100290uint64_t ram_postcopy_requests(void)
291{
292 return ram_state.postcopy_requests;
293}
294
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100295/* used by the search for pages to send */
296struct PageSearchStatus {
297 /* Current block being searched */
298 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100299 /* Current page to search from */
300 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100301 /* Set once we wrap around */
302 bool complete_round;
303};
304typedef struct PageSearchStatus PageSearchStatus;
305
Juan Quintela56e93d22015-05-07 19:33:31 +0200306struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200307 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800308 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200309 QEMUFile *file;
310 QemuMutex mutex;
311 QemuCond cond;
312 RAMBlock *block;
313 ram_addr_t offset;
314};
315typedef struct CompressParam CompressParam;
316
317struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800318 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800319 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200320 QemuMutex mutex;
321 QemuCond cond;
322 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000323 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200324 int len;
325};
326typedef struct DecompressParam DecompressParam;
327
328static CompressParam *comp_param;
329static QemuThread *compress_threads;
330/* comp_done_cond is used to wake up the migration thread when
331 * one of the compression threads has finished the compression.
332 * comp_done_lock is used to co-work with comp_done_cond.
333 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800334static QemuMutex comp_done_lock;
335static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200336/* The empty QEMUFileOps will be used by file in CompressParam */
337static const QEMUFileOps empty_ops = { };
338
Juan Quintela56e93d22015-05-07 19:33:31 +0200339static DecompressParam *decomp_param;
340static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800341static QemuMutex decomp_done_lock;
342static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200343
Liang Lia7a9a882016-05-05 15:32:57 +0800344static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
345 ram_addr_t offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200346
347static void *do_data_compress(void *opaque)
348{
349 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800350 RAMBlock *block;
351 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200352
Liang Lia7a9a882016-05-05 15:32:57 +0800353 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800354 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800355 if (param->block) {
356 block = param->block;
357 offset = param->offset;
358 param->block = NULL;
359 qemu_mutex_unlock(&param->mutex);
360
361 do_compress_ram_page(param->file, block, offset);
362
Liang Li0d9f9a52016-05-05 15:32:59 +0800363 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800364 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800365 qemu_cond_signal(&comp_done_cond);
366 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800367
368 qemu_mutex_lock(&param->mutex);
369 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200370 qemu_cond_wait(&param->cond, &param->mutex);
371 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200372 }
Liang Lia7a9a882016-05-05 15:32:57 +0800373 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200374
375 return NULL;
376}
377
378static inline void terminate_compression_threads(void)
379{
380 int idx, thread_count;
381
382 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100383
Juan Quintela56e93d22015-05-07 19:33:31 +0200384 for (idx = 0; idx < thread_count; idx++) {
385 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800386 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200387 qemu_cond_signal(&comp_param[idx].cond);
388 qemu_mutex_unlock(&comp_param[idx].mutex);
389 }
390}
391
392void migrate_compress_threads_join(void)
393{
394 int i, thread_count;
395
396 if (!migrate_use_compression()) {
397 return;
398 }
399 terminate_compression_threads();
400 thread_count = migrate_compress_threads();
401 for (i = 0; i < thread_count; i++) {
402 qemu_thread_join(compress_threads + i);
403 qemu_fclose(comp_param[i].file);
404 qemu_mutex_destroy(&comp_param[i].mutex);
405 qemu_cond_destroy(&comp_param[i].cond);
406 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800407 qemu_mutex_destroy(&comp_done_lock);
408 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200409 g_free(compress_threads);
410 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200411 compress_threads = NULL;
412 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200413}
414
415void migrate_compress_threads_create(void)
416{
417 int i, thread_count;
418
419 if (!migrate_use_compression()) {
420 return;
421 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200422 thread_count = migrate_compress_threads();
423 compress_threads = g_new0(QemuThread, thread_count);
424 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800425 qemu_cond_init(&comp_done_cond);
426 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200427 for (i = 0; i < thread_count; i++) {
Cao jine110aa92016-07-29 15:10:31 +0800428 /* comp_param[i].file is just used as a dummy buffer to save data,
429 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200430 */
431 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
432 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800433 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200434 qemu_mutex_init(&comp_param[i].mutex);
435 qemu_cond_init(&comp_param[i].cond);
436 qemu_thread_create(compress_threads + i, "compress",
437 do_data_compress, comp_param + i,
438 QEMU_THREAD_JOINABLE);
439 }
440}
441
442/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100443 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200444 *
445 * If this is the 1st block, it also writes the block identification
446 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100447 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200448 *
449 * @f: QEMUFile where to send the data
450 * @block: block that contains the page we want to send
451 * @offset: offset inside the block for the page
452 * in the lower bits, it contains flags
453 */
Juan Quintela24795692017-03-21 11:45:01 +0100454static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200455{
Liang Li9f5f3802015-07-13 17:34:10 +0800456 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200457
Juan Quintela24795692017-03-21 11:45:01 +0100458 if (block == rs->last_sent_block) {
459 offset |= RAM_SAVE_FLAG_CONTINUE;
460 }
461 qemu_put_be64(rs->f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200462 size = 8;
463
464 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800465 len = strlen(block->idstr);
Juan Quintela24795692017-03-21 11:45:01 +0100466 qemu_put_byte(rs->f, len);
467 qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800468 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100469 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200470 }
471 return size;
472}
473
Juan Quintela3d0684b2017-03-23 15:06:39 +0100474/**
475 * mig_throttle_guest_down: throotle down the guest
476 *
477 * Reduce amount of guest cpu execution to hopefully slow down memory
478 * writes. If guest dirty memory rate is reduced below the rate at
479 * which we can transfer pages to the destination then we should be
480 * able to complete migration. Some workloads dirty memory way too
481 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400482 */
483static void mig_throttle_guest_down(void)
484{
485 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100486 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
487 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400488
489 /* We have not started throttling yet. Let's start it. */
490 if (!cpu_throttle_active()) {
491 cpu_throttle_set(pct_initial);
492 } else {
493 /* Throttling already on, just increase the rate */
494 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
495 }
496}
497
Juan Quintela3d0684b2017-03-23 15:06:39 +0100498/**
499 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
500 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100501 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100502 * @current_addr: address for the zero page
503 *
504 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200505 * The important thing is that a stale (not-yet-0'd) page be replaced
506 * by the new data.
507 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100508 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200509 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100510static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200511{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100512 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200513 return;
514 }
515
516 /* We don't care if this fails to allocate a new cache page
517 * as long as it updated an old one */
518 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
Juan Quintela5a987732017-03-13 19:39:02 +0100519 rs->bitmap_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200520}
521
522#define ENCODING_FLAG_XBZRLE 0x1
523
524/**
525 * save_xbzrle_page: compress and send current page
526 *
527 * Returns: 1 means that we wrote the page
528 * 0 means that page is identical to the one already sent
529 * -1 means that xbzrle would be longer than normal
530 *
Juan Quintela5a987732017-03-13 19:39:02 +0100531 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100532 * @current_data: pointer to the address of the page contents
533 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200534 * @block: block that contains the page we want to send
535 * @offset: offset inside the block for the page
536 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200537 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100538static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200539 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100540 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200541{
542 int encoded_len = 0, bytes_xbzrle;
543 uint8_t *prev_cached_page;
544
Juan Quintela5a987732017-03-13 19:39:02 +0100545 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
Juan Quintela544c36f2017-03-13 21:02:08 +0100546 rs->xbzrle_cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200547 if (!last_stage) {
548 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela5a987732017-03-13 19:39:02 +0100549 rs->bitmap_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200550 return -1;
551 } else {
552 /* update *current_data when the page has been
553 inserted into cache */
554 *current_data = get_cached_data(XBZRLE.cache, current_addr);
555 }
556 }
557 return -1;
558 }
559
560 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
561
562 /* save current buffer into memory */
563 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
564
565 /* XBZRLE encoding (if there is no overflow) */
566 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
567 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
568 TARGET_PAGE_SIZE);
569 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100570 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200571 return 0;
572 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100573 trace_save_xbzrle_page_overflow();
Juan Quintela180f61f2017-03-13 21:07:03 +0100574 rs->xbzrle_overflows++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200575 /* update data in the cache */
576 if (!last_stage) {
577 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
578 *current_data = prev_cached_page;
579 }
580 return -1;
581 }
582
583 /* we need to update the data in the cache, in order to get the same data */
584 if (!last_stage) {
585 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
586 }
587
588 /* Send XBZRLE based compressed page */
Juan Quintela24795692017-03-21 11:45:01 +0100589 bytes_xbzrle = save_page_header(rs, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100590 offset | RAM_SAVE_FLAG_XBZRLE);
591 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
592 qemu_put_be16(rs->f, encoded_len);
593 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200594 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintelaf36ada92017-03-13 20:59:32 +0100595 rs->xbzrle_pages++;
Juan Quintela07ed50a2017-03-13 20:57:27 +0100596 rs->xbzrle_bytes += bytes_xbzrle;
Juan Quintela072c2512017-03-14 10:27:31 +0100597 rs->bytes_transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200598
599 return 1;
600}
601
Juan Quintela3d0684b2017-03-23 15:06:39 +0100602/**
603 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000604 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100605 * Called with rcu_read_lock() to protect migration_bitmap
606 *
607 * Returns the byte offset within memory region of the start of a dirty page
608 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100609 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100610 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100611 * @start: page where we start the search
Juan Quintela06b10682017-03-21 15:18:05 +0100612 * @page_abs: pointer into where to store the dirty page
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000613 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200614static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100615unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
616 unsigned long start,
617 unsigned long *page_abs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200618{
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100619 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
Juan Quintelaa935e302017-03-21 15:36:51 +0100620 unsigned long nr = base + start;
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100621 uint64_t rb_size = rb->used_length;
622 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
Li Zhijian2ff64032015-07-02 20:18:05 +0800623 unsigned long *bitmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200624
625 unsigned long next;
626
Juan Quintelaeb859c52017-03-13 21:51:55 +0100627 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100628 if (rs->ram_bulk_stage && nr > base) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200629 next = nr + 1;
630 } else {
Li Zhijian2ff64032015-07-02 20:18:05 +0800631 next = find_next_bit(bitmap, size, nr);
Juan Quintela56e93d22015-05-07 19:33:31 +0200632 }
633
Juan Quintela06b10682017-03-21 15:18:05 +0100634 *page_abs = next;
Juan Quintelaa935e302017-03-21 15:36:51 +0100635 return next - base;
Juan Quintela56e93d22015-05-07 19:33:31 +0200636}
637
Juan Quintela06b10682017-03-21 15:18:05 +0100638static inline bool migration_bitmap_clear_dirty(RAMState *rs,
639 unsigned long page_abs)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000640{
641 bool ret;
Juan Quintelaeb859c52017-03-13 21:51:55 +0100642 unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000643
Juan Quintela06b10682017-03-21 15:18:05 +0100644 ret = test_and_clear_bit(page_abs, bitmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000645
646 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100647 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000648 }
649 return ret;
650}
651
Juan Quintela15440dd2017-03-21 09:35:04 +0100652static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
653 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +0200654{
Li Zhijian2ff64032015-07-02 20:18:05 +0800655 unsigned long *bitmap;
Juan Quintelaeb859c52017-03-13 21:51:55 +0100656 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Juan Quintela0d8ec882017-03-13 21:21:41 +0100657 rs->migration_dirty_pages +=
Juan Quintela15440dd2017-03-21 09:35:04 +0100658 cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +0100659 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +0200660}
661
Juan Quintela3d0684b2017-03-23 15:06:39 +0100662/**
663 * ram_pagesize_summary: calculate all the pagesizes of a VM
664 *
665 * Returns a summary bitmap of the page sizes of all RAMBlocks
666 *
667 * For VMs with just normal pages this is equivalent to the host page
668 * size. If it's got some huge pages then it's the OR of all the
669 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000670 */
671uint64_t ram_pagesize_summary(void)
672{
673 RAMBlock *block;
674 uint64_t summary = 0;
675
676 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
677 summary |= block->page_size;
678 }
679
680 return summary;
681}
682
Juan Quintela8d820d62017-03-13 19:35:50 +0100683static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200684{
685 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200686 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200687 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200688
Juan Quintela5a987732017-03-13 19:39:02 +0100689 rs->bitmap_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200690
Juan Quintelaeac74152017-03-28 14:59:01 +0200691 if (!rs->bytes_xfer_prev) {
692 rs->bytes_xfer_prev = ram_bytes_transferred();
Juan Quintela56e93d22015-05-07 19:33:31 +0200693 }
694
Juan Quintelaf664da82017-03-13 19:44:57 +0100695 if (!rs->time_last_bitmap_sync) {
696 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +0200697 }
698
699 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200700 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200701
Juan Quintela108cfae2017-03-13 21:38:09 +0100702 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200703 rcu_read_lock();
704 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
Juan Quintela15440dd2017-03-21 09:35:04 +0100705 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200706 }
707 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +0100708 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200709
Juan Quintelaa66cd902017-03-28 15:02:43 +0200710 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +0800711
Juan Quintela56e93d22015-05-07 19:33:31 +0200712 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
713
714 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +0100715 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200716 if (migrate_auto_converge()) {
717 /* The following detection logic can be refined later. For now:
718 Check to see if the dirtied bytes is 50% more than the approx.
719 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400720 were in this routine. If that happens twice, start or increase
721 throttling */
Juan Quintela56e93d22015-05-07 19:33:31 +0200722 bytes_xfer_now = ram_bytes_transferred();
Jason J. Herne070afca2015-09-08 13:12:35 -0400723
Juan Quintela47ad8612017-03-14 18:20:30 +0100724 if (rs->dirty_pages_rate &&
Juan Quintelaa66cd902017-03-28 15:02:43 +0200725 (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +0200726 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Juan Quintela8d820d62017-03-13 19:35:50 +0100727 (rs->dirty_rate_high_cnt++ >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200728 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +0100729 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400730 mig_throttle_guest_down();
Juan Quintela56e93d22015-05-07 19:33:31 +0200731 }
Juan Quintelaeac74152017-03-28 14:59:01 +0200732 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200733 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400734
Juan Quintela56e93d22015-05-07 19:33:31 +0200735 if (migrate_use_xbzrle()) {
Juan Quintela23b28c32017-03-13 20:51:34 +0100736 if (rs->iterations_prev != rs->iterations) {
Juan Quintelab07016b2017-03-13 21:04:16 +0100737 rs->xbzrle_cache_miss_rate =
Juan Quintela544c36f2017-03-13 21:02:08 +0100738 (double)(rs->xbzrle_cache_miss -
Juan Quintelab5833fd2017-03-13 19:49:19 +0100739 rs->xbzrle_cache_miss_prev) /
Juan Quintela23b28c32017-03-13 20:51:34 +0100740 (rs->iterations - rs->iterations_prev);
Juan Quintela56e93d22015-05-07 19:33:31 +0200741 }
Juan Quintela23b28c32017-03-13 20:51:34 +0100742 rs->iterations_prev = rs->iterations;
Juan Quintela544c36f2017-03-13 21:02:08 +0100743 rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
Juan Quintela56e93d22015-05-07 19:33:31 +0200744 }
Juan Quintela47ad8612017-03-14 18:20:30 +0100745 rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
Juan Quintelaf664da82017-03-13 19:44:57 +0100746 / (end_time - rs->time_last_bitmap_sync);
Juan Quintelaf664da82017-03-13 19:44:57 +0100747 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200748 rs->num_dirty_pages_period = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200749 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000750 if (migrate_use_events()) {
Juan Quintela5a987732017-03-13 19:39:02 +0100751 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000752 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200753}
754
755/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100756 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200757 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100758 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200759 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100760 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200761 * @block: block that contains the page we want to send
762 * @offset: offset inside the block for the page
763 * @p: pointer to the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200764 */
Juan Quintelace25d332017-03-15 11:00:51 +0100765static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
766 uint8_t *p)
Juan Quintela56e93d22015-05-07 19:33:31 +0200767{
768 int pages = -1;
769
770 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100771 rs->zero_pages++;
Juan Quintela072c2512017-03-14 10:27:31 +0100772 rs->bytes_transferred +=
Juan Quintela24795692017-03-21 11:45:01 +0100773 save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS);
Juan Quintelace25d332017-03-15 11:00:51 +0100774 qemu_put_byte(rs->f, 0);
Juan Quintela072c2512017-03-14 10:27:31 +0100775 rs->bytes_transferred += 1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200776 pages = 1;
777 }
778
779 return pages;
780}
781
Juan Quintela57273092017-03-20 22:25:28 +0100782static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300783{
Juan Quintela57273092017-03-20 22:25:28 +0100784 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300785 return;
786 }
787
Juan Quintelaaaa20642017-03-21 11:35:24 +0100788 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300789}
790
Juan Quintela56e93d22015-05-07 19:33:31 +0200791/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100792 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200793 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100794 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +0000795 * < 0 - error
796 * >=0 - Number of pages written - this might legally be 0
797 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +0200798 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100799 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200800 * @block: block that contains the page we want to send
801 * @offset: offset inside the block for the page
802 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200803 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +0100804static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200805{
806 int pages = -1;
807 uint64_t bytes_xmit;
808 ram_addr_t current_addr;
Juan Quintela56e93d22015-05-07 19:33:31 +0200809 uint8_t *p;
810 int ret;
811 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +0800812 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100813 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200814
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100815 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200816
817 /* In doubt sent page as normal */
818 bytes_xmit = 0;
Juan Quintelace25d332017-03-15 11:00:51 +0100819 ret = ram_control_save_page(rs->f, block->offset,
Juan Quintela56e93d22015-05-07 19:33:31 +0200820 offset, TARGET_PAGE_SIZE, &bytes_xmit);
821 if (bytes_xmit) {
Juan Quintela072c2512017-03-14 10:27:31 +0100822 rs->bytes_transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200823 pages = 1;
824 }
825
826 XBZRLE_cache_lock();
827
828 current_addr = block->offset + offset;
829
Juan Quintela56e93d22015-05-07 19:33:31 +0200830 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
831 if (ret != RAM_SAVE_CONTROL_DELAYED) {
832 if (bytes_xmit > 0) {
Juan Quintelab4d1c6e2017-03-13 20:40:53 +0100833 rs->norm_pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200834 } else if (bytes_xmit == 0) {
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100835 rs->zero_pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200836 }
837 }
838 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100839 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +0200840 if (pages > 0) {
841 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
842 * page would be stale
843 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100844 xbzrle_cache_zero_page(rs, current_addr);
Juan Quintelaa935e302017-03-21 15:36:51 +0100845 ram_release_pages(block->idstr, offset, pages);
Juan Quintela6f37bb82017-03-13 19:26:29 +0100846 } else if (!rs->ram_bulk_stage &&
Juan Quintela57273092017-03-20 22:25:28 +0100847 !migration_in_postcopy() && migrate_use_xbzrle()) {
Juan Quintela204b88b2017-03-15 09:16:57 +0100848 pages = save_xbzrle_page(rs, &p, current_addr, block,
Juan Quintela072c2512017-03-14 10:27:31 +0100849 offset, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +0200850 if (!last_stage) {
851 /* Can't send this cached data async, since the cache page
852 * might get updated before it gets to the wire
853 */
854 send_async = false;
855 }
856 }
857 }
858
859 /* XBZRLE overflow or normal page */
860 if (pages == -1) {
Juan Quintela24795692017-03-21 11:45:01 +0100861 rs->bytes_transferred += save_page_header(rs, block,
862 offset | RAM_SAVE_FLAG_PAGE);
Juan Quintela56e93d22015-05-07 19:33:31 +0200863 if (send_async) {
Juan Quintelace25d332017-03-15 11:00:51 +0100864 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300865 migrate_release_ram() &
Juan Quintela57273092017-03-20 22:25:28 +0100866 migration_in_postcopy());
Juan Quintela56e93d22015-05-07 19:33:31 +0200867 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100868 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +0200869 }
Juan Quintela072c2512017-03-14 10:27:31 +0100870 rs->bytes_transferred += TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200871 pages = 1;
Juan Quintelab4d1c6e2017-03-13 20:40:53 +0100872 rs->norm_pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200873 }
874
875 XBZRLE_cache_unlock();
876
877 return pages;
878}
879
Liang Lia7a9a882016-05-05 15:32:57 +0800880static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
881 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200882{
Juan Quintela24795692017-03-21 11:45:01 +0100883 RAMState *rs = &ram_state;
Juan Quintela56e93d22015-05-07 19:33:31 +0200884 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +0800885 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +0200886
Juan Quintela24795692017-03-21 11:45:01 +0100887 bytes_sent = save_page_header(rs, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +0200888 RAM_SAVE_FLAG_COMPRESS_PAGE);
Liang Lia7a9a882016-05-05 15:32:57 +0800889 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
Juan Quintela56e93d22015-05-07 19:33:31 +0200890 migrate_compress_level());
Liang Lib3be2892016-05-05 15:32:54 +0800891 if (blen < 0) {
892 bytes_sent = 0;
893 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
894 error_report("compressed data failed!");
895 } else {
896 bytes_sent += blen;
Juan Quintela57273092017-03-20 22:25:28 +0100897 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +0800898 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200899
900 return bytes_sent;
901}
902
Juan Quintelace25d332017-03-15 11:00:51 +0100903static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200904{
905 int idx, len, thread_count;
906
907 if (!migrate_use_compression()) {
908 return;
909 }
910 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +0800911
Liang Li0d9f9a52016-05-05 15:32:59 +0800912 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200913 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +0800914 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +0800915 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200916 }
Liang Lia7a9a882016-05-05 15:32:57 +0800917 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800918 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800919
920 for (idx = 0; idx < thread_count; idx++) {
921 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800922 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +0100923 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Juan Quintela2f4fde92017-03-13 21:58:11 +0100924 rs->bytes_transferred += len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200925 }
Liang Lia7a9a882016-05-05 15:32:57 +0800926 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200927 }
928}
929
930static inline void set_compress_params(CompressParam *param, RAMBlock *block,
931 ram_addr_t offset)
932{
933 param->block = block;
934 param->offset = offset;
935}
936
Juan Quintelace25d332017-03-15 11:00:51 +0100937static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
938 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200939{
940 int idx, thread_count, bytes_xmit = -1, pages = -1;
941
942 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +0800943 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200944 while (true) {
945 for (idx = 0; idx < thread_count; idx++) {
946 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +0800947 comp_param[idx].done = false;
Juan Quintelace25d332017-03-15 11:00:51 +0100948 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +0800949 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200950 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +0800951 qemu_cond_signal(&comp_param[idx].cond);
952 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200953 pages = 1;
Juan Quintelab4d1c6e2017-03-13 20:40:53 +0100954 rs->norm_pages++;
Juan Quintela072c2512017-03-14 10:27:31 +0100955 rs->bytes_transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200956 break;
957 }
958 }
959 if (pages > 0) {
960 break;
961 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +0800962 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200963 }
964 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800965 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200966
967 return pages;
968}
969
970/**
971 * ram_save_compressed_page: compress the given page and send it to the stream
972 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100973 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200974 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100975 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200976 * @block: block that contains the page we want to send
977 * @offset: offset inside the block for the page
978 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200979 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +0100980static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
981 bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200982{
983 int pages = -1;
Liang Lifc504382016-05-05 15:32:55 +0800984 uint64_t bytes_xmit = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200985 uint8_t *p;
Liang Lifc504382016-05-05 15:32:55 +0800986 int ret, blen;
zhanghailianga08f6892016-01-15 11:37:44 +0800987 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100988 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200989
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100990 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200991
Juan Quintelace25d332017-03-15 11:00:51 +0100992 ret = ram_control_save_page(rs->f, block->offset,
Juan Quintela56e93d22015-05-07 19:33:31 +0200993 offset, TARGET_PAGE_SIZE, &bytes_xmit);
994 if (bytes_xmit) {
Juan Quintela072c2512017-03-14 10:27:31 +0100995 rs->bytes_transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200996 pages = 1;
997 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200998 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
999 if (ret != RAM_SAVE_CONTROL_DELAYED) {
1000 if (bytes_xmit > 0) {
Juan Quintelab4d1c6e2017-03-13 20:40:53 +01001001 rs->norm_pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001002 } else if (bytes_xmit == 0) {
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001003 rs->zero_pages++;
Juan Quintela56e93d22015-05-07 19:33:31 +02001004 }
1005 }
1006 } else {
1007 /* When starting the process of a new block, the first page of
1008 * the block should be sent out before other pages in the same
1009 * block, and all the pages in last block should have been sent
1010 * out, keeping this order is important, because the 'cont' flag
1011 * is used to avoid resending the block name.
1012 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001013 if (block != rs->last_sent_block) {
Juan Quintelace25d332017-03-15 11:00:51 +01001014 flush_compressed_data(rs);
1015 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001016 if (pages == -1) {
Liang Lifc504382016-05-05 15:32:55 +08001017 /* Make sure the first page is sent out before other pages */
Juan Quintela24795692017-03-21 11:45:01 +01001018 bytes_xmit = save_page_header(rs, block, offset |
Liang Lifc504382016-05-05 15:32:55 +08001019 RAM_SAVE_FLAG_COMPRESS_PAGE);
Juan Quintelace25d332017-03-15 11:00:51 +01001020 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
Liang Lifc504382016-05-05 15:32:55 +08001021 migrate_compress_level());
1022 if (blen > 0) {
Juan Quintela072c2512017-03-14 10:27:31 +01001023 rs->bytes_transferred += bytes_xmit + blen;
Juan Quintelab4d1c6e2017-03-13 20:40:53 +01001024 rs->norm_pages++;
Liang Lib3be2892016-05-05 15:32:54 +08001025 pages = 1;
Liang Lifc504382016-05-05 15:32:55 +08001026 } else {
Juan Quintelace25d332017-03-15 11:00:51 +01001027 qemu_file_set_error(rs->f, blen);
Liang Lifc504382016-05-05 15:32:55 +08001028 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +08001029 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001030 }
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001031 if (pages > 0) {
Juan Quintelaa935e302017-03-21 15:36:51 +01001032 ram_release_pages(block->idstr, offset, pages);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001033 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001034 } else {
Juan Quintelace25d332017-03-15 11:00:51 +01001035 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +02001036 if (pages == -1) {
Juan Quintelace25d332017-03-15 11:00:51 +01001037 pages = compress_page_with_multi_thread(rs, block, offset);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001038 } else {
Juan Quintelaa935e302017-03-21 15:36:51 +01001039 ram_release_pages(block->idstr, offset, pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02001040 }
1041 }
1042 }
1043
1044 return pages;
1045}
1046
Juan Quintela3d0684b2017-03-23 15:06:39 +01001047/**
1048 * find_dirty_block: find the next dirty page and update any state
1049 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001050 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001051 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001052 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001053 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001054 * @pss: data about the state of the current dirty page scan
1055 * @again: set to false if the search has scanned the whole of RAM
Juan Quintela06b10682017-03-21 15:18:05 +01001056 * @page_abs: pointer into where to store the dirty page
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001057 */
Juan Quintelace25d332017-03-15 11:00:51 +01001058static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss,
Juan Quintela06b10682017-03-21 15:18:05 +01001059 bool *again, unsigned long *page_abs)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001060{
Juan Quintelaa935e302017-03-21 15:36:51 +01001061 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page,
1062 page_abs);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001063 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +01001064 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001065 /*
1066 * We've been once around the RAM and haven't found anything.
1067 * Give up.
1068 */
1069 *again = false;
1070 return false;
1071 }
Juan Quintelaa935e302017-03-21 15:36:51 +01001072 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001073 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +01001074 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001075 pss->block = QLIST_NEXT_RCU(pss->block, next);
1076 if (!pss->block) {
1077 /* Hit the end of the list */
1078 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1079 /* Flag that we've looped */
1080 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001081 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001082 if (migrate_use_xbzrle()) {
1083 /* If xbzrle is on, stop using the data compression at this
1084 * point. In theory, xbzrle can do better than compression.
1085 */
Juan Quintelace25d332017-03-15 11:00:51 +01001086 flush_compressed_data(rs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001087 }
1088 }
1089 /* Didn't find anything this time, but try again on the new block */
1090 *again = true;
1091 return false;
1092 } else {
1093 /* Can go around again, but... */
1094 *again = true;
1095 /* We've found something so probably don't need to */
1096 return true;
1097 }
1098}
1099
Juan Quintela3d0684b2017-03-23 15:06:39 +01001100/**
1101 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001102 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001103 * Helper for 'get_queued_page' - gets a page off the queue
1104 *
1105 * Returns the block of the page (or NULL if none available)
1106 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001107 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001108 * @offset: used to return the offset within the RAMBlock
Juan Quintela06b10682017-03-21 15:18:05 +01001109 * @page_abs: pointer into where to store the dirty page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001110 */
Juan Quintelaec481c62017-03-20 22:12:40 +01001111static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset,
Juan Quintela06b10682017-03-21 15:18:05 +01001112 unsigned long *page_abs)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001113{
1114 RAMBlock *block = NULL;
1115
Juan Quintelaec481c62017-03-20 22:12:40 +01001116 qemu_mutex_lock(&rs->src_page_req_mutex);
1117 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1118 struct RAMSrcPageRequest *entry =
1119 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001120 block = entry->rb;
1121 *offset = entry->offset;
Juan Quintela06b10682017-03-21 15:18:05 +01001122 *page_abs = (entry->offset + entry->rb->offset) >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001123
1124 if (entry->len > TARGET_PAGE_SIZE) {
1125 entry->len -= TARGET_PAGE_SIZE;
1126 entry->offset += TARGET_PAGE_SIZE;
1127 } else {
1128 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001129 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001130 g_free(entry);
1131 }
1132 }
Juan Quintelaec481c62017-03-20 22:12:40 +01001133 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001134
1135 return block;
1136}
1137
Juan Quintela3d0684b2017-03-23 15:06:39 +01001138/**
1139 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001140 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001141 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001142 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001143 * Returns if a queued page is found
1144 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001145 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001146 * @pss: data about the state of the current dirty page scan
Juan Quintela06b10682017-03-21 15:18:05 +01001147 * @page_abs: pointer into where to store the dirty page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001148 */
Juan Quintelaec481c62017-03-20 22:12:40 +01001149static bool get_queued_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintela06b10682017-03-21 15:18:05 +01001150 unsigned long *page_abs)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001151{
1152 RAMBlock *block;
1153 ram_addr_t offset;
1154 bool dirty;
1155
1156 do {
Juan Quintela06b10682017-03-21 15:18:05 +01001157 block = unqueue_page(rs, &offset, page_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001158 /*
1159 * We're sending this page, and since it's postcopy nothing else
1160 * will dirty it, and we must make sure it doesn't get sent again
1161 * even if this queue request was received after the background
1162 * search already sent it.
1163 */
1164 if (block) {
1165 unsigned long *bitmap;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001166 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Juan Quintela06b10682017-03-21 15:18:05 +01001167 dirty = test_bit(*page_abs, bitmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001168 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001169 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1170 *page_abs,
1171 test_bit(*page_abs,
1172 atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001173 } else {
Juan Quintela06b10682017-03-21 15:18:05 +01001174 trace_get_queued_page(block->idstr, (uint64_t)offset,
1175 *page_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001176 }
1177 }
1178
1179 } while (block && !dirty);
1180
1181 if (block) {
1182 /*
1183 * As soon as we start servicing pages out of order, then we have
1184 * to kill the bulk stage, since the bulk stage assumes
1185 * in (migration_bitmap_find_and_reset_dirty) that every page is
1186 * dirty, that's no longer true.
1187 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001188 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001189
1190 /*
1191 * We want the background search to continue from the queued page
1192 * since the guest is likely to want other pages near to the page
1193 * it just requested.
1194 */
1195 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001196 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001197 }
1198
1199 return !!block;
1200}
1201
Juan Quintela56e93d22015-05-07 19:33:31 +02001202/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001203 * migration_page_queue_free: drop any remaining pages in the ram
1204 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001205 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001206 * It should be empty at the end anyway, but in error cases there may
1207 * be some left. in case that there is any page left, we drop it.
1208 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001209 */
Juan Quintelaec481c62017-03-20 22:12:40 +01001210void migration_page_queue_free(void)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001211{
Juan Quintelaec481c62017-03-20 22:12:40 +01001212 struct RAMSrcPageRequest *mspr, *next_mspr;
1213 RAMState *rs = &ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001214 /* This queue generally should be empty - but in the case of a failed
1215 * migration might have some droppings in.
1216 */
1217 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01001218 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001219 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001220 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001221 g_free(mspr);
1222 }
1223 rcu_read_unlock();
1224}
1225
1226/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001227 * ram_save_queue_pages: queue the page for transmission
1228 *
1229 * A request from postcopy destination for example.
1230 *
1231 * Returns zero on success or negative on error
1232 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001233 * @rbname: Name of the RAMBLock of the request. NULL means the
1234 * same that last one.
1235 * @start: starting address from the start of the RAMBlock
1236 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001237 */
Juan Quintela96506892017-03-14 18:41:03 +01001238int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001239{
1240 RAMBlock *ramblock;
Juan Quintela68a098f2017-03-14 13:48:42 +01001241 RAMState *rs = &ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001242
Juan Quintela96506892017-03-14 18:41:03 +01001243 rs->postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001244 rcu_read_lock();
1245 if (!rbname) {
1246 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001247 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001248
1249 if (!ramblock) {
1250 /*
1251 * Shouldn't happen, we can't reuse the last RAMBlock if
1252 * it's the 1st request.
1253 */
1254 error_report("ram_save_queue_pages no previous block");
1255 goto err;
1256 }
1257 } else {
1258 ramblock = qemu_ram_block_by_name(rbname);
1259
1260 if (!ramblock) {
1261 /* We shouldn't be asked for a non-existent RAMBlock */
1262 error_report("ram_save_queue_pages no block '%s'", rbname);
1263 goto err;
1264 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001265 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001266 }
1267 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1268 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001269 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1270 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001271 __func__, start, len, ramblock->used_length);
1272 goto err;
1273 }
1274
Juan Quintelaec481c62017-03-20 22:12:40 +01001275 struct RAMSrcPageRequest *new_entry =
1276 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001277 new_entry->rb = ramblock;
1278 new_entry->offset = start;
1279 new_entry->len = len;
1280
1281 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001282 qemu_mutex_lock(&rs->src_page_req_mutex);
1283 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1284 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001285 rcu_read_unlock();
1286
1287 return 0;
1288
1289err:
1290 rcu_read_unlock();
1291 return -1;
1292}
1293
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001294/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001295 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001296 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001297 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001298 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001299 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001300 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001301 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001302 * @last_stage: if we are at the completion stage
Juan Quintela06b10682017-03-21 15:18:05 +01001303 * @page_abs: page number of the dirty page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001304 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001305static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintela06b10682017-03-21 15:18:05 +01001306 bool last_stage, unsigned long page_abs)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001307{
1308 int res = 0;
1309
1310 /* Check the pages is dirty and if it is send it */
Juan Quintela06b10682017-03-21 15:18:05 +01001311 if (migration_bitmap_clear_dirty(rs, page_abs)) {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001312 unsigned long *unsentmap;
Juan Quintela6d358d92017-03-16 21:29:34 +01001313 /*
1314 * If xbzrle is on, stop using the data compression after first
1315 * round of migration even if compression is enabled. In theory,
1316 * xbzrle can do better than compression.
1317 */
1318
1319 if (migrate_use_compression()
1320 && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001321 res = ram_save_compressed_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001322 } else {
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001323 res = ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001324 }
1325
1326 if (res < 0) {
1327 return res;
1328 }
Juan Quintelaeb859c52017-03-13 21:51:55 +01001329 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001330 if (unsentmap) {
Juan Quintela06b10682017-03-21 15:18:05 +01001331 clear_bit(page_abs, unsentmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001332 }
1333 }
1334
1335 return res;
1336}
1337
1338/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001339 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001340 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001341 * Starting at *offset send pages up to the end of the current host
1342 * page. It's valid for the initial offset to point into the middle of
1343 * a host page in which case the remainder of the hostpage is sent.
1344 * Only dirty target pages are sent. Note that the host page size may
1345 * be a huge page for this block.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001346 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001347 * Returns the number of pages written or negative on error
1348 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001349 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001350 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001351 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001352 * @last_stage: if we are at the completion stage
Juan Quintela06b10682017-03-21 15:18:05 +01001353 * @page_abs: Page number of the dirty page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001354 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001355static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
zhanghailianga08f6892016-01-15 11:37:44 +08001356 bool last_stage,
Juan Quintela06b10682017-03-21 15:18:05 +01001357 unsigned long page_abs)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001358{
1359 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01001360 size_t pagesize_bits =
1361 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001362
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001363 do {
Juan Quintela06b10682017-03-21 15:18:05 +01001364 tmppages = ram_save_target_page(rs, pss, last_stage, page_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001365 if (tmppages < 0) {
1366 return tmppages;
1367 }
1368
1369 pages += tmppages;
Juan Quintelaa935e302017-03-21 15:36:51 +01001370 pss->page++;
Juan Quintela06b10682017-03-21 15:18:05 +01001371 page_abs++;
Juan Quintelaa935e302017-03-21 15:36:51 +01001372 } while (pss->page & (pagesize_bits - 1));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001373
1374 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01001375 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001376 return pages;
1377}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001378
1379/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001380 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001381 *
1382 * Called within an RCU critical section.
1383 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001384 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02001385 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001386 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001387 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001388 *
1389 * On systems where host-page-size > target-page-size it will send all the
1390 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001391 */
1392
Juan Quintelace25d332017-03-15 11:00:51 +01001393static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001394{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001395 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001396 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001397 bool again, found;
Juan Quintela06b10682017-03-21 15:18:05 +01001398 unsigned long page_abs; /* Page number of the dirty page */
Juan Quintela56e93d22015-05-07 19:33:31 +02001399
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301400 /* No dirty page as there is zero RAM */
1401 if (!ram_bytes_total()) {
1402 return pages;
1403 }
1404
Juan Quintela6f37bb82017-03-13 19:26:29 +01001405 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001406 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001407 pss.complete_round = false;
1408
1409 if (!pss.block) {
1410 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1411 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001412
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001413 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001414 again = true;
Juan Quintela06b10682017-03-21 15:18:05 +01001415 found = get_queued_page(rs, &pss, &page_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001416
1417 if (!found) {
1418 /* priority queue empty, so just search for something dirty */
Juan Quintela06b10682017-03-21 15:18:05 +01001419 found = find_dirty_block(rs, &pss, &again, &page_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001420 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001421
1422 if (found) {
Juan Quintela06b10682017-03-21 15:18:05 +01001423 pages = ram_save_host_page(rs, &pss, last_stage, page_abs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001424 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001425 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001426
Juan Quintela6f37bb82017-03-13 19:26:29 +01001427 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001428 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02001429
1430 return pages;
1431}
1432
1433void acct_update_position(QEMUFile *f, size_t size, bool zero)
1434{
1435 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001436 RAMState *rs = &ram_state;
1437
Juan Quintela56e93d22015-05-07 19:33:31 +02001438 if (zero) {
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001439 rs->zero_pages += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02001440 } else {
Juan Quintelab4d1c6e2017-03-13 20:40:53 +01001441 rs->norm_pages += pages;
Juan Quintela2f4fde92017-03-13 21:58:11 +01001442 rs->bytes_transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02001443 qemu_update_position(f, size);
1444 }
1445}
1446
Juan Quintela56e93d22015-05-07 19:33:31 +02001447uint64_t ram_bytes_total(void)
1448{
1449 RAMBlock *block;
1450 uint64_t total = 0;
1451
1452 rcu_read_lock();
1453 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1454 total += block->used_length;
1455 rcu_read_unlock();
1456 return total;
1457}
1458
1459void free_xbzrle_decoded_buf(void)
1460{
1461 g_free(xbzrle_decoded_buf);
1462 xbzrle_decoded_buf = NULL;
1463}
1464
Juan Quintelaeb859c52017-03-13 21:51:55 +01001465static void migration_bitmap_free(struct RAMBitmap *bmap)
Denis V. Lunev60be6342015-09-28 14:41:58 +03001466{
1467 g_free(bmap->bmap);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001468 g_free(bmap->unsentmap);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001469 g_free(bmap);
1470}
1471
Liang Li6ad2a212015-11-02 15:37:03 +08001472static void ram_migration_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001473{
Juan Quintelaeb859c52017-03-13 21:51:55 +01001474 RAMState *rs = opaque;
1475
Li Zhijian2ff64032015-07-02 20:18:05 +08001476 /* caller have hold iothread lock or is in a bh, so there is
1477 * no writing race against this migration_bitmap
1478 */
Juan Quintelaeb859c52017-03-13 21:51:55 +01001479 struct RAMBitmap *bitmap = rs->ram_bitmap;
1480 atomic_rcu_set(&rs->ram_bitmap, NULL);
Li Zhijian2ff64032015-07-02 20:18:05 +08001481 if (bitmap) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001482 memory_global_dirty_log_stop();
Denis V. Lunev60be6342015-09-28 14:41:58 +03001483 call_rcu(bitmap, migration_bitmap_free, rcu);
Juan Quintela56e93d22015-05-07 19:33:31 +02001484 }
1485
1486 XBZRLE_cache_lock();
1487 if (XBZRLE.cache) {
1488 cache_fini(XBZRLE.cache);
1489 g_free(XBZRLE.encoded_buf);
1490 g_free(XBZRLE.current_buf);
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +01001491 g_free(ZERO_TARGET_PAGE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001492 XBZRLE.cache = NULL;
1493 XBZRLE.encoded_buf = NULL;
1494 XBZRLE.current_buf = NULL;
1495 }
1496 XBZRLE_cache_unlock();
1497}
1498
Juan Quintela6f37bb82017-03-13 19:26:29 +01001499static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001500{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001501 rs->last_seen_block = NULL;
1502 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001503 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001504 rs->last_version = ram_list.version;
1505 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02001506}
1507
1508#define MAX_WAIT 50 /* ms, half buffered_file limit */
1509
Li Zhijiandd631692015-07-02 20:18:06 +08001510void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1511{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001512 RAMState *rs = &ram_state;
Juan Quintela108cfae2017-03-13 21:38:09 +01001513
Li Zhijiandd631692015-07-02 20:18:06 +08001514 /* called in qemu main thread, so there is
1515 * no writing race against this migration_bitmap
1516 */
Juan Quintelaeb859c52017-03-13 21:51:55 +01001517 if (rs->ram_bitmap) {
1518 struct RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1519 bitmap = g_new(struct RAMBitmap, 1);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001520 bitmap->bmap = bitmap_new(new);
Li Zhijiandd631692015-07-02 20:18:06 +08001521
1522 /* prevent migration_bitmap content from being set bit
1523 * by migration_bitmap_sync_range() at the same time.
1524 * it is safe to migration if migration_bitmap is cleared bit
1525 * at the same time.
1526 */
Juan Quintela108cfae2017-03-13 21:38:09 +01001527 qemu_mutex_lock(&rs->bitmap_mutex);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001528 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1529 bitmap_set(bitmap->bmap, old, new - old);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001530
1531 /* We don't have a way to safely extend the sentmap
1532 * with RCU; so mark it as missing, entry to postcopy
1533 * will fail.
1534 */
1535 bitmap->unsentmap = NULL;
1536
Juan Quintelaeb859c52017-03-13 21:51:55 +01001537 atomic_rcu_set(&rs->ram_bitmap, bitmap);
Juan Quintela108cfae2017-03-13 21:38:09 +01001538 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela0d8ec882017-03-13 21:21:41 +01001539 rs->migration_dirty_pages += new - old;
Denis V. Lunev60be6342015-09-28 14:41:58 +03001540 call_rcu(old_bitmap, migration_bitmap_free, rcu);
Li Zhijiandd631692015-07-02 20:18:06 +08001541 }
1542}
Juan Quintela56e93d22015-05-07 19:33:31 +02001543
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001544/*
1545 * 'expected' is the value you expect the bitmap mostly to be full
1546 * of; it won't bother printing lines that are all this value.
1547 * If 'todump' is null the migration bitmap is dumped.
1548 */
1549void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1550{
1551 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001552 RAMState *rs = &ram_state;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001553 int64_t cur;
1554 int64_t linelen = 128;
1555 char linebuf[129];
1556
1557 if (!todump) {
Juan Quintelaeb859c52017-03-13 21:51:55 +01001558 todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001559 }
1560
1561 for (cur = 0; cur < ram_pages; cur += linelen) {
1562 int64_t curb;
1563 bool found = false;
1564 /*
1565 * Last line; catch the case where the line length
1566 * is longer than remaining ram
1567 */
1568 if (cur + linelen > ram_pages) {
1569 linelen = ram_pages - cur;
1570 }
1571 for (curb = 0; curb < linelen; curb++) {
1572 bool thisbit = test_bit(cur + curb, todump);
1573 linebuf[curb] = thisbit ? '1' : '.';
1574 found = found || (thisbit != expected);
1575 }
1576 if (found) {
1577 linebuf[curb] = '\0';
1578 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1579 }
1580 }
1581}
1582
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001583/* **** functions for postcopy ***** */
1584
Pavel Butsykinced1c612017-02-03 18:23:21 +03001585void ram_postcopy_migrated_memory_release(MigrationState *ms)
1586{
Juan Quintelaeb859c52017-03-13 21:51:55 +01001587 RAMState *rs = &ram_state;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001588 struct RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001589 unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001590
1591 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1592 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1593 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1594 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1595
1596 while (run_start < range) {
1597 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01001598 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03001599 (run_end - run_start) << TARGET_PAGE_BITS);
1600 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1601 }
1602 }
1603}
1604
Juan Quintela3d0684b2017-03-23 15:06:39 +01001605/**
1606 * postcopy_send_discard_bm_ram: discard a RAMBlock
1607 *
1608 * Returns zero on success
1609 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001610 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1611 * Note: At this point the 'unsentmap' is the processed bitmap combined
1612 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001613 *
1614 * @ms: current migration state
1615 * @pds: state for postcopy
1616 * @start: RAMBlock starting page
1617 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001618 */
1619static int postcopy_send_discard_bm_ram(MigrationState *ms,
1620 PostcopyDiscardState *pds,
1621 unsigned long start,
1622 unsigned long length)
1623{
Juan Quintelaeb859c52017-03-13 21:51:55 +01001624 RAMState *rs = &ram_state;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001625 unsigned long end = start + length; /* one after the end */
1626 unsigned long current;
1627 unsigned long *unsentmap;
1628
Juan Quintelaeb859c52017-03-13 21:51:55 +01001629 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001630 for (current = start; current < end; ) {
1631 unsigned long one = find_next_bit(unsentmap, end, current);
1632
1633 if (one <= end) {
1634 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1635 unsigned long discard_length;
1636
1637 if (zero >= end) {
1638 discard_length = end - one;
1639 } else {
1640 discard_length = zero - one;
1641 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001642 if (discard_length) {
1643 postcopy_discard_send_range(ms, pds, one, discard_length);
1644 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001645 current = one + discard_length;
1646 } else {
1647 current = one;
1648 }
1649 }
1650
1651 return 0;
1652}
1653
Juan Quintela3d0684b2017-03-23 15:06:39 +01001654/**
1655 * postcopy_each_ram_send_discard: discard all RAMBlocks
1656 *
1657 * Returns 0 for success or negative for error
1658 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001659 * Utility for the outgoing postcopy code.
1660 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1661 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001662 * (qemu_ram_foreach_block ends up passing unscaled lengths
1663 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01001664 *
1665 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001666 */
1667static int postcopy_each_ram_send_discard(MigrationState *ms)
1668{
1669 struct RAMBlock *block;
1670 int ret;
1671
1672 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1673 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1674 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1675 first,
1676 block->idstr);
1677
1678 /*
1679 * Postcopy sends chunks of bitmap over the wire, but it
1680 * just needs indexes at this point, avoids it having
1681 * target page specific code.
1682 */
1683 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1684 block->used_length >> TARGET_PAGE_BITS);
1685 postcopy_discard_send_finish(ms, pds);
1686 if (ret) {
1687 return ret;
1688 }
1689 }
1690
1691 return 0;
1692}
1693
Juan Quintela3d0684b2017-03-23 15:06:39 +01001694/**
1695 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001696 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001697 * Helper for postcopy_chunk_hostpages; it's called twice to
1698 * canonicalize the two bitmaps, that are similar, but one is
1699 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001700 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001701 * Postcopy requires that all target pages in a hostpage are dirty or
1702 * clean, not a mix. This function canonicalizes the bitmaps.
1703 *
1704 * @ms: current migration state
1705 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1706 * otherwise we need to canonicalize partially dirty host pages
1707 * @block: block that contains the page we want to canonicalize
1708 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001709 */
1710static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1711 RAMBlock *block,
1712 PostcopyDiscardState *pds)
1713{
Juan Quintela0d8ec882017-03-13 21:21:41 +01001714 RAMState *rs = &ram_state;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001715 unsigned long *bitmap;
1716 unsigned long *unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001717 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001718 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1719 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1720 unsigned long last = first + (len - 1);
1721 unsigned long run_start;
1722
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001723 if (block->page_size == TARGET_PAGE_SIZE) {
1724 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1725 return;
1726 }
1727
Juan Quintelaeb859c52017-03-13 21:51:55 +01001728 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1729 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001730
1731 if (unsent_pass) {
1732 /* Find a sent page */
1733 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1734 } else {
1735 /* Find a dirty page */
1736 run_start = find_next_bit(bitmap, last + 1, first);
1737 }
1738
1739 while (run_start <= last) {
1740 bool do_fixup = false;
1741 unsigned long fixup_start_addr;
1742 unsigned long host_offset;
1743
1744 /*
1745 * If the start of this run of pages is in the middle of a host
1746 * page, then we need to fixup this host page.
1747 */
1748 host_offset = run_start % host_ratio;
1749 if (host_offset) {
1750 do_fixup = true;
1751 run_start -= host_offset;
1752 fixup_start_addr = run_start;
1753 /* For the next pass */
1754 run_start = run_start + host_ratio;
1755 } else {
1756 /* Find the end of this run */
1757 unsigned long run_end;
1758 if (unsent_pass) {
1759 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1760 } else {
1761 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1762 }
1763 /*
1764 * If the end isn't at the start of a host page, then the
1765 * run doesn't finish at the end of a host page
1766 * and we need to discard.
1767 */
1768 host_offset = run_end % host_ratio;
1769 if (host_offset) {
1770 do_fixup = true;
1771 fixup_start_addr = run_end - host_offset;
1772 /*
1773 * This host page has gone, the next loop iteration starts
1774 * from after the fixup
1775 */
1776 run_start = fixup_start_addr + host_ratio;
1777 } else {
1778 /*
1779 * No discards on this iteration, next loop starts from
1780 * next sent/dirty page
1781 */
1782 run_start = run_end + 1;
1783 }
1784 }
1785
1786 if (do_fixup) {
1787 unsigned long page;
1788
1789 /* Tell the destination to discard this page */
1790 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1791 /* For the unsent_pass we:
1792 * discard partially sent pages
1793 * For the !unsent_pass (dirty) we:
1794 * discard partially dirty pages that were sent
1795 * (any partially sent pages were already discarded
1796 * by the previous unsent_pass)
1797 */
1798 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1799 host_ratio);
1800 }
1801
1802 /* Clean up the bitmap */
1803 for (page = fixup_start_addr;
1804 page < fixup_start_addr + host_ratio; page++) {
1805 /* All pages in this host page are now not sent */
1806 set_bit(page, unsentmap);
1807
1808 /*
1809 * Remark them as dirty, updating the count for any pages
1810 * that weren't previously dirty.
1811 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01001812 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001813 }
1814 }
1815
1816 if (unsent_pass) {
1817 /* Find the next sent page for the next iteration */
1818 run_start = find_next_zero_bit(unsentmap, last + 1,
1819 run_start);
1820 } else {
1821 /* Find the next dirty page for the next iteration */
1822 run_start = find_next_bit(bitmap, last + 1, run_start);
1823 }
1824 }
1825}
1826
Juan Quintela3d0684b2017-03-23 15:06:39 +01001827/**
1828 * postcopy_chuck_hostpages: discrad any partially sent host page
1829 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001830 * Utility for the outgoing postcopy code.
1831 *
1832 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001833 * dirty host-page size chunks as all dirty. In this case the host-page
1834 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001835 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001836 * Returns zero on success
1837 *
1838 * @ms: current migration state
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001839 */
1840static int postcopy_chunk_hostpages(MigrationState *ms)
1841{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001842 RAMState *rs = &ram_state;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001843 struct RAMBlock *block;
1844
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001845 /* Easiest way to make sure we don't resume in the middle of a host-page */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001846 rs->last_seen_block = NULL;
1847 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001848 rs->last_page = 0;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001849
1850 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1851 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1852
1853 PostcopyDiscardState *pds =
1854 postcopy_discard_send_init(ms, first, block->idstr);
1855
1856 /* First pass: Discard all partially sent host pages */
1857 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1858 /*
1859 * Second pass: Ensure that all partially dirty host pages are made
1860 * fully dirty.
1861 */
1862 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1863
1864 postcopy_discard_send_finish(ms, pds);
1865 } /* ram_list loop */
1866
1867 return 0;
1868}
1869
Juan Quintela3d0684b2017-03-23 15:06:39 +01001870/**
1871 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1872 *
1873 * Returns zero on success
1874 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001875 * Transmit the set of pages to be discarded after precopy to the target
1876 * these are pages that:
1877 * a) Have been previously transmitted but are now dirty again
1878 * b) Pages that have never been transmitted, this ensures that
1879 * any pages on the destination that have been mapped by background
1880 * tasks get discarded (transparent huge pages is the specific concern)
1881 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01001882 *
1883 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001884 */
1885int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1886{
Juan Quintelaeb859c52017-03-13 21:51:55 +01001887 RAMState *rs = &ram_state;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001888 int ret;
1889 unsigned long *bitmap, *unsentmap;
1890
1891 rcu_read_lock();
1892
1893 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01001894 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001895
Juan Quintelaeb859c52017-03-13 21:51:55 +01001896 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001897 if (!unsentmap) {
1898 /* We don't have a safe way to resize the sentmap, so
1899 * if the bitmap was resized it will be NULL at this
1900 * point.
1901 */
1902 error_report("migration ram resized during precopy phase");
1903 rcu_read_unlock();
1904 return -EINVAL;
1905 }
1906
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001907 /* Deal with TPS != HPS and huge pages */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001908 ret = postcopy_chunk_hostpages(ms);
1909 if (ret) {
1910 rcu_read_unlock();
1911 return ret;
1912 }
1913
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001914 /*
1915 * Update the unsentmap to be unsentmap = unsentmap | dirty
1916 */
Juan Quintelaeb859c52017-03-13 21:51:55 +01001917 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001918 bitmap_or(unsentmap, unsentmap, bitmap,
1919 last_ram_offset() >> TARGET_PAGE_BITS);
1920
1921
1922 trace_ram_postcopy_send_discard_bitmap();
1923#ifdef DEBUG_POSTCOPY
1924 ram_debug_dump_bitmap(unsentmap, true);
1925#endif
1926
1927 ret = postcopy_each_ram_send_discard(ms);
1928 rcu_read_unlock();
1929
1930 return ret;
1931}
1932
Juan Quintela3d0684b2017-03-23 15:06:39 +01001933/**
1934 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001935 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001936 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001937 *
Juan Quintela36449152017-03-23 15:11:59 +01001938 * @rbname: name of the RAMBlock of the request. NULL means the
1939 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001940 * @start: RAMBlock starting page
1941 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001942 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01001943int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001944{
1945 int ret = -1;
1946
Juan Quintela36449152017-03-23 15:11:59 +01001947 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001948
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001949 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01001950 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001951
1952 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01001953 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001954 goto err;
1955 }
1956
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001957 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001958
1959err:
1960 rcu_read_unlock();
1961
1962 return ret;
1963}
1964
Juan Quintelaceb4d162017-03-13 21:29:54 +01001965static int ram_state_init(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001966{
Juan Quintela56e93d22015-05-07 19:33:31 +02001967 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1968
Juan Quintelaceb4d162017-03-13 21:29:54 +01001969 memset(rs, 0, sizeof(*rs));
Juan Quintela108cfae2017-03-13 21:38:09 +01001970 qemu_mutex_init(&rs->bitmap_mutex);
Juan Quintelaec481c62017-03-20 22:12:40 +01001971 qemu_mutex_init(&rs->src_page_req_mutex);
1972 QSIMPLEQ_INIT(&rs->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02001973
1974 if (migrate_use_xbzrle()) {
1975 XBZRLE_cache_lock();
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +01001976 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001977 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1978 TARGET_PAGE_SIZE,
1979 TARGET_PAGE_SIZE);
1980 if (!XBZRLE.cache) {
1981 XBZRLE_cache_unlock();
1982 error_report("Error creating cache");
1983 return -1;
1984 }
1985 XBZRLE_cache_unlock();
1986
1987 /* We prefer not to abort if there is no memory */
1988 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1989 if (!XBZRLE.encoded_buf) {
1990 error_report("Error allocating encoded_buf");
1991 return -1;
1992 }
1993
1994 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1995 if (!XBZRLE.current_buf) {
1996 error_report("Error allocating current_buf");
1997 g_free(XBZRLE.encoded_buf);
1998 XBZRLE.encoded_buf = NULL;
1999 return -1;
2000 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002001 }
2002
Paolo Bonzini49877832016-02-15 19:57:57 +01002003 /* For memory_global_dirty_log_start below. */
2004 qemu_mutex_lock_iothread();
2005
Juan Quintela56e93d22015-05-07 19:33:31 +02002006 qemu_mutex_lock_ramlist();
2007 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01002008 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002009
Juan Quintelaeb859c52017-03-13 21:51:55 +01002010 rs->ram_bitmap = g_new0(struct RAMBitmap, 1);
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302011 /* Skip setting bitmap if there is no RAM */
2012 if (ram_bytes_total()) {
2013 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
Juan Quintelaeb859c52017-03-13 21:51:55 +01002014 rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2015 bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02002016
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302017 if (migrate_postcopy_ram()) {
Juan Quintelaeb859c52017-03-13 21:51:55 +01002018 rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2019 bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302020 }
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00002021 }
2022
Juan Quintela56e93d22015-05-07 19:33:31 +02002023 /*
2024 * Count the total number of pages used by ram blocks not including any
2025 * gaps due to alignment or unplugs.
2026 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01002027 rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +02002028
2029 memory_global_dirty_log_start();
Juan Quintela8d820d62017-03-13 19:35:50 +01002030 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002031 qemu_mutex_unlock_ramlist();
Paolo Bonzini49877832016-02-15 19:57:57 +01002032 qemu_mutex_unlock_iothread();
zhanghailianga91246c2016-10-27 14:42:59 +08002033 rcu_read_unlock();
2034
2035 return 0;
2036}
2037
Juan Quintela3d0684b2017-03-23 15:06:39 +01002038/*
2039 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002040 * long-running RCU critical section. When rcu-reclaims in the code
2041 * start to become numerous it will be necessary to reduce the
2042 * granularity of these critical sections.
2043 */
2044
Juan Quintela3d0684b2017-03-23 15:06:39 +01002045/**
2046 * ram_save_setup: Setup RAM for migration
2047 *
2048 * Returns zero to indicate success and negative for error
2049 *
2050 * @f: QEMUFile where to send the data
2051 * @opaque: RAMState pointer
2052 */
zhanghailianga91246c2016-10-27 14:42:59 +08002053static int ram_save_setup(QEMUFile *f, void *opaque)
2054{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002055 RAMState *rs = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002056 RAMBlock *block;
2057
2058 /* migration has already setup the bitmap, reuse it. */
2059 if (!migration_in_colo_state()) {
Juan Quintelaceb4d162017-03-13 21:29:54 +01002060 if (ram_state_init(rs) < 0) {
zhanghailianga91246c2016-10-27 14:42:59 +08002061 return -1;
2062 }
2063 }
Juan Quintela204b88b2017-03-15 09:16:57 +01002064 rs->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08002065
2066 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02002067
2068 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2069
2070 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2071 qemu_put_byte(f, strlen(block->idstr));
2072 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2073 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002074 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2075 qemu_put_be64(f, block->page_size);
2076 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002077 }
2078
2079 rcu_read_unlock();
2080
2081 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2082 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2083
2084 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2085
2086 return 0;
2087}
2088
Juan Quintela3d0684b2017-03-23 15:06:39 +01002089/**
2090 * ram_save_iterate: iterative stage for migration
2091 *
2092 * Returns zero to indicate success and negative for error
2093 *
2094 * @f: QEMUFile where to send the data
2095 * @opaque: RAMState pointer
2096 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002097static int ram_save_iterate(QEMUFile *f, void *opaque)
2098{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002099 RAMState *rs = opaque;
Juan Quintela56e93d22015-05-07 19:33:31 +02002100 int ret;
2101 int i;
2102 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002103 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002104
2105 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01002106 if (ram_list.version != rs->last_version) {
2107 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002108 }
2109
2110 /* Read version before ram_list.blocks */
2111 smp_rmb();
2112
2113 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2114
2115 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2116 i = 0;
2117 while ((ret = qemu_file_rate_limit(f)) == 0) {
2118 int pages;
2119
Juan Quintelace25d332017-03-15 11:00:51 +01002120 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02002121 /* no more pages to sent */
2122 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01002123 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002124 break;
2125 }
Juan Quintela23b28c32017-03-13 20:51:34 +01002126 rs->iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04002127
Juan Quintela56e93d22015-05-07 19:33:31 +02002128 /* we want to check in the 1st loop, just in case it was the 1st time
2129 and we had to sync the dirty bitmap.
2130 qemu_get_clock_ns() is a bit expensive, so we only check each some
2131 iterations
2132 */
2133 if ((i & 63) == 0) {
2134 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2135 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01002136 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02002137 break;
2138 }
2139 }
2140 i++;
2141 }
Juan Quintelace25d332017-03-15 11:00:51 +01002142 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002143 rcu_read_unlock();
2144
2145 /*
2146 * Must occur before EOS (or any QEMUFile operation)
2147 * because of RDMA protocol.
2148 */
2149 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2150
2151 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela2f4fde92017-03-13 21:58:11 +01002152 rs->bytes_transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02002153
2154 ret = qemu_file_get_error(f);
2155 if (ret < 0) {
2156 return ret;
2157 }
2158
Thomas Huth5c903082016-11-04 14:10:17 +01002159 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002160}
2161
Juan Quintela3d0684b2017-03-23 15:06:39 +01002162/**
2163 * ram_save_complete: function called to send the remaining amount of ram
2164 *
2165 * Returns zero to indicate success
2166 *
2167 * Called with iothread lock
2168 *
2169 * @f: QEMUFile where to send the data
2170 * @opaque: RAMState pointer
2171 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002172static int ram_save_complete(QEMUFile *f, void *opaque)
2173{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002174 RAMState *rs = opaque;
2175
Juan Quintela56e93d22015-05-07 19:33:31 +02002176 rcu_read_lock();
2177
Juan Quintela57273092017-03-20 22:25:28 +01002178 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01002179 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002180 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002181
2182 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2183
2184 /* try transferring iterative blocks of memory */
2185
2186 /* flush all remaining blocks regardless of rate limiting */
2187 while (true) {
2188 int pages;
2189
Juan Quintelace25d332017-03-15 11:00:51 +01002190 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02002191 /* no more blocks to sent */
2192 if (pages == 0) {
2193 break;
2194 }
2195 }
2196
Juan Quintelace25d332017-03-15 11:00:51 +01002197 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002198 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002199
2200 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002201
Juan Quintela56e93d22015-05-07 19:33:31 +02002202 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2203
2204 return 0;
2205}
2206
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002207static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2208 uint64_t *non_postcopiable_pending,
2209 uint64_t *postcopiable_pending)
Juan Quintela56e93d22015-05-07 19:33:31 +02002210{
Juan Quintela8d820d62017-03-13 19:35:50 +01002211 RAMState *rs = opaque;
Juan Quintela56e93d22015-05-07 19:33:31 +02002212 uint64_t remaining_size;
2213
Juan Quintela9edabd42017-03-14 12:02:16 +01002214 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002215
Juan Quintela57273092017-03-20 22:25:28 +01002216 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002217 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002218 qemu_mutex_lock_iothread();
2219 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01002220 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002221 rcu_read_unlock();
2222 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01002223 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002224 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002225
2226 /* We can do postcopy, and all the data is postcopiable */
2227 *postcopiable_pending += remaining_size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002228}
2229
2230static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2231{
2232 unsigned int xh_len;
2233 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002234 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002235
2236 if (!xbzrle_decoded_buf) {
2237 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2238 }
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002239 loaded_data = xbzrle_decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002240
2241 /* extract RLE header */
2242 xh_flags = qemu_get_byte(f);
2243 xh_len = qemu_get_be16(f);
2244
2245 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2246 error_report("Failed to load XBZRLE page - wrong compression!");
2247 return -1;
2248 }
2249
2250 if (xh_len > TARGET_PAGE_SIZE) {
2251 error_report("Failed to load XBZRLE page - len overflow!");
2252 return -1;
2253 }
2254 /* load data and decode */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002255 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002256
2257 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002258 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002259 TARGET_PAGE_SIZE) == -1) {
2260 error_report("Failed to load XBZRLE page - decode error!");
2261 return -1;
2262 }
2263
2264 return 0;
2265}
2266
Juan Quintela3d0684b2017-03-23 15:06:39 +01002267/**
2268 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002269 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002270 * Must be called from within a rcu critical section.
2271 *
2272 * Returns a pointer from within the RCU-protected ram_list.
2273 *
2274 * @f: QEMUFile where to read the data from
2275 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002276 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002277static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002278{
2279 static RAMBlock *block = NULL;
2280 char id[256];
2281 uint8_t len;
2282
2283 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002284 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002285 error_report("Ack, bad migration stream!");
2286 return NULL;
2287 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002288 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002289 }
2290
2291 len = qemu_get_byte(f);
2292 qemu_get_buffer(f, (uint8_t *)id, len);
2293 id[len] = 0;
2294
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002295 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002296 if (!block) {
2297 error_report("Can't find block %s", id);
2298 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002299 }
2300
zhanghailiang4c4bad42016-01-15 11:37:41 +08002301 return block;
2302}
2303
2304static inline void *host_from_ram_block_offset(RAMBlock *block,
2305 ram_addr_t offset)
2306{
2307 if (!offset_in_ramblock(block, offset)) {
2308 return NULL;
2309 }
2310
2311 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002312}
2313
Juan Quintela3d0684b2017-03-23 15:06:39 +01002314/**
2315 * ram_handle_compressed: handle the zero page case
2316 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002317 * If a page (or a whole RDMA chunk) has been
2318 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002319 *
2320 * @host: host address for the zero page
2321 * @ch: what the page is filled from. We only support zero
2322 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002323 */
2324void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2325{
2326 if (ch != 0 || !is_zero_range(host, size)) {
2327 memset(host, ch, size);
2328 }
2329}
2330
2331static void *do_data_decompress(void *opaque)
2332{
2333 DecompressParam *param = opaque;
2334 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002335 uint8_t *des;
2336 int len;
Juan Quintela56e93d22015-05-07 19:33:31 +02002337
Liang Li33d151f2016-05-05 15:32:58 +08002338 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002339 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002340 if (param->des) {
2341 des = param->des;
2342 len = param->len;
2343 param->des = 0;
2344 qemu_mutex_unlock(&param->mutex);
2345
Liang Li73a89122016-05-05 15:32:51 +08002346 pagesize = TARGET_PAGE_SIZE;
2347 /* uncompress() will return failed in some case, especially
2348 * when the page is dirted when doing the compression, it's
2349 * not a problem because the dirty page will be retransferred
2350 * and uncompress() won't break the data in other pages.
2351 */
Liang Li33d151f2016-05-05 15:32:58 +08002352 uncompress((Bytef *)des, &pagesize,
2353 (const Bytef *)param->compbuf, len);
Liang Li73a89122016-05-05 15:32:51 +08002354
Liang Li33d151f2016-05-05 15:32:58 +08002355 qemu_mutex_lock(&decomp_done_lock);
2356 param->done = true;
2357 qemu_cond_signal(&decomp_done_cond);
2358 qemu_mutex_unlock(&decomp_done_lock);
2359
2360 qemu_mutex_lock(&param->mutex);
2361 } else {
2362 qemu_cond_wait(&param->cond, &param->mutex);
2363 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002364 }
Liang Li33d151f2016-05-05 15:32:58 +08002365 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002366
2367 return NULL;
2368}
2369
Liang Li5533b2e2016-05-05 15:32:52 +08002370static void wait_for_decompress_done(void)
2371{
2372 int idx, thread_count;
2373
2374 if (!migrate_use_compression()) {
2375 return;
2376 }
2377
2378 thread_count = migrate_decompress_threads();
2379 qemu_mutex_lock(&decomp_done_lock);
2380 for (idx = 0; idx < thread_count; idx++) {
2381 while (!decomp_param[idx].done) {
2382 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2383 }
2384 }
2385 qemu_mutex_unlock(&decomp_done_lock);
2386}
2387
Juan Quintela56e93d22015-05-07 19:33:31 +02002388void migrate_decompress_threads_create(void)
2389{
2390 int i, thread_count;
2391
2392 thread_count = migrate_decompress_threads();
2393 decompress_threads = g_new0(QemuThread, thread_count);
2394 decomp_param = g_new0(DecompressParam, thread_count);
Liang Li73a89122016-05-05 15:32:51 +08002395 qemu_mutex_init(&decomp_done_lock);
2396 qemu_cond_init(&decomp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +02002397 for (i = 0; i < thread_count; i++) {
2398 qemu_mutex_init(&decomp_param[i].mutex);
2399 qemu_cond_init(&decomp_param[i].cond);
2400 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
Liang Li73a89122016-05-05 15:32:51 +08002401 decomp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +08002402 decomp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002403 qemu_thread_create(decompress_threads + i, "decompress",
2404 do_data_decompress, decomp_param + i,
2405 QEMU_THREAD_JOINABLE);
2406 }
2407}
2408
2409void migrate_decompress_threads_join(void)
2410{
2411 int i, thread_count;
2412
Juan Quintela56e93d22015-05-07 19:33:31 +02002413 thread_count = migrate_decompress_threads();
2414 for (i = 0; i < thread_count; i++) {
2415 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002416 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002417 qemu_cond_signal(&decomp_param[i].cond);
2418 qemu_mutex_unlock(&decomp_param[i].mutex);
2419 }
2420 for (i = 0; i < thread_count; i++) {
2421 qemu_thread_join(decompress_threads + i);
2422 qemu_mutex_destroy(&decomp_param[i].mutex);
2423 qemu_cond_destroy(&decomp_param[i].cond);
2424 g_free(decomp_param[i].compbuf);
2425 }
2426 g_free(decompress_threads);
2427 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002428 decompress_threads = NULL;
2429 decomp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002430}
2431
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002432static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002433 void *host, int len)
2434{
2435 int idx, thread_count;
2436
2437 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002438 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002439 while (true) {
2440 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002441 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002442 decomp_param[idx].done = false;
2443 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002444 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002445 decomp_param[idx].des = host;
2446 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002447 qemu_cond_signal(&decomp_param[idx].cond);
2448 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002449 break;
2450 }
2451 }
2452 if (idx < thread_count) {
2453 break;
Liang Li73a89122016-05-05 15:32:51 +08002454 } else {
2455 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002456 }
2457 }
Liang Li73a89122016-05-05 15:32:51 +08002458 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002459}
2460
Juan Quintela3d0684b2017-03-23 15:06:39 +01002461/**
2462 * ram_postcopy_incoming_init: allocate postcopy data structures
2463 *
2464 * Returns 0 for success and negative if there was one error
2465 *
2466 * @mis: current migration incoming state
2467 *
2468 * Allocate data structures etc needed by incoming migration with
2469 * postcopy-ram. postcopy-ram's similarly names
2470 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002471 */
2472int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2473{
2474 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2475
2476 return postcopy_ram_incoming_init(mis, ram_pages);
2477}
2478
Juan Quintela3d0684b2017-03-23 15:06:39 +01002479/**
2480 * ram_load_postcopy: load a page in postcopy case
2481 *
2482 * Returns 0 for success or -errno in case of error
2483 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002484 * Called in postcopy mode by ram_load().
2485 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002486 *
2487 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002488 */
2489static int ram_load_postcopy(QEMUFile *f)
2490{
2491 int flags = 0, ret = 0;
2492 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002493 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002494 MigrationIncomingState *mis = migration_incoming_get_current();
2495 /* Temporary page that is later 'placed' */
2496 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002497 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002498 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002499
2500 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2501 ram_addr_t addr;
2502 void *host = NULL;
2503 void *page_buffer = NULL;
2504 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002505 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002506 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002507
2508 addr = qemu_get_be64(f);
2509 flags = addr & ~TARGET_PAGE_MASK;
2510 addr &= TARGET_PAGE_MASK;
2511
2512 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2513 place_needed = false;
2514 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002515 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002516
2517 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002518 if (!host) {
2519 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2520 ret = -EINVAL;
2521 break;
2522 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002523 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002524 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002525 * Postcopy requires that we place whole host pages atomically;
2526 * these may be huge pages for RAMBlocks that are backed by
2527 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002528 * To make it atomic, the data is read into a temporary page
2529 * that's moved into place later.
2530 * The migration protocol uses, possibly smaller, target-pages
2531 * however the source ensures it always sends all the components
2532 * of a host page in order.
2533 */
2534 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002535 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002536 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002537 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002538 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002539 } else {
2540 /* not the 1st TP within the HP */
2541 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002542 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002543 host, last_host);
2544 ret = -EINVAL;
2545 break;
2546 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002547 }
2548
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002549
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002550 /*
2551 * If it's the last part of a host page then we place the host
2552 * page
2553 */
2554 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002555 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002556 place_source = postcopy_host_page;
2557 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002558 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002559
2560 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2561 case RAM_SAVE_FLAG_COMPRESS:
2562 ch = qemu_get_byte(f);
2563 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2564 if (ch) {
2565 all_zero = false;
2566 }
2567 break;
2568
2569 case RAM_SAVE_FLAG_PAGE:
2570 all_zero = false;
2571 if (!place_needed || !matching_page_sizes) {
2572 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2573 } else {
2574 /* Avoids the qemu_file copy during postcopy, which is
2575 * going to do a copy later; can only do it when we
2576 * do this read in one go (matching page sizes)
2577 */
2578 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2579 TARGET_PAGE_SIZE);
2580 }
2581 break;
2582 case RAM_SAVE_FLAG_EOS:
2583 /* normal exit */
2584 break;
2585 default:
2586 error_report("Unknown combination of migration flags: %#x"
2587 " (postcopy mode)", flags);
2588 ret = -EINVAL;
2589 }
2590
2591 if (place_needed) {
2592 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002593 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2594
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002595 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002596 ret = postcopy_place_page_zero(mis, place_dest,
2597 block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002598 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002599 ret = postcopy_place_page(mis, place_dest,
2600 place_source, block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002601 }
2602 }
2603 if (!ret) {
2604 ret = qemu_file_get_error(f);
2605 }
2606 }
2607
2608 return ret;
2609}
2610
Juan Quintela56e93d22015-05-07 19:33:31 +02002611static int ram_load(QEMUFile *f, void *opaque, int version_id)
2612{
2613 int flags = 0, ret = 0;
2614 static uint64_t seq_iter;
2615 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002616 /*
2617 * If system is running in postcopy mode, page inserts to host memory must
2618 * be atomic
2619 */
2620 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002621 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2622 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002623
2624 seq_iter++;
2625
2626 if (version_id != 4) {
2627 ret = -EINVAL;
2628 }
2629
2630 /* This RCU critical section can be very long running.
2631 * When RCU reclaims in the code start to become numerous,
2632 * it will be necessary to reduce the granularity of this
2633 * critical section.
2634 */
2635 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002636
2637 if (postcopy_running) {
2638 ret = ram_load_postcopy(f);
2639 }
2640
2641 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002642 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002643 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002644 uint8_t ch;
2645
2646 addr = qemu_get_be64(f);
2647 flags = addr & ~TARGET_PAGE_MASK;
2648 addr &= TARGET_PAGE_MASK;
2649
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002650 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2651 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002652 RAMBlock *block = ram_block_from_stream(f, flags);
2653
2654 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002655 if (!host) {
2656 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2657 ret = -EINVAL;
2658 break;
2659 }
2660 }
2661
Juan Quintela56e93d22015-05-07 19:33:31 +02002662 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2663 case RAM_SAVE_FLAG_MEM_SIZE:
2664 /* Synchronize RAM block list */
2665 total_ram_bytes = addr;
2666 while (!ret && total_ram_bytes) {
2667 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002668 char id[256];
2669 ram_addr_t length;
2670
2671 len = qemu_get_byte(f);
2672 qemu_get_buffer(f, (uint8_t *)id, len);
2673 id[len] = 0;
2674 length = qemu_get_be64(f);
2675
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002676 block = qemu_ram_block_by_name(id);
2677 if (block) {
2678 if (length != block->used_length) {
2679 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002680
Gongleifa53a0e2016-05-10 10:04:59 +08002681 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002682 &local_err);
2683 if (local_err) {
2684 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02002685 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002686 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002687 /* For postcopy we need to check hugepage sizes match */
2688 if (postcopy_advised &&
2689 block->page_size != qemu_host_page_size) {
2690 uint64_t remote_page_size = qemu_get_be64(f);
2691 if (remote_page_size != block->page_size) {
2692 error_report("Mismatched RAM page size %s "
2693 "(local) %zd != %" PRId64,
2694 id, block->page_size,
2695 remote_page_size);
2696 ret = -EINVAL;
2697 }
2698 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002699 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2700 block->idstr);
2701 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02002702 error_report("Unknown ramblock \"%s\", cannot "
2703 "accept migration", id);
2704 ret = -EINVAL;
2705 }
2706
2707 total_ram_bytes -= length;
2708 }
2709 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002710
Juan Quintela56e93d22015-05-07 19:33:31 +02002711 case RAM_SAVE_FLAG_COMPRESS:
Juan Quintela56e93d22015-05-07 19:33:31 +02002712 ch = qemu_get_byte(f);
2713 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2714 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002715
Juan Quintela56e93d22015-05-07 19:33:31 +02002716 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002717 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2718 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002719
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002720 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002721 len = qemu_get_be32(f);
2722 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2723 error_report("Invalid compressed data length: %d", len);
2724 ret = -EINVAL;
2725 break;
2726 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002727 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002728 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002729
Juan Quintela56e93d22015-05-07 19:33:31 +02002730 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002731 if (load_xbzrle(f, addr, host) < 0) {
2732 error_report("Failed to decompress XBZRLE page at "
2733 RAM_ADDR_FMT, addr);
2734 ret = -EINVAL;
2735 break;
2736 }
2737 break;
2738 case RAM_SAVE_FLAG_EOS:
2739 /* normal exit */
2740 break;
2741 default:
2742 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01002743 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02002744 } else {
2745 error_report("Unknown combination of migration flags: %#x",
2746 flags);
2747 ret = -EINVAL;
2748 }
2749 }
2750 if (!ret) {
2751 ret = qemu_file_get_error(f);
2752 }
2753 }
2754
Liang Li5533b2e2016-05-05 15:32:52 +08002755 wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02002756 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01002757 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02002758 return ret;
2759}
2760
2761static SaveVMHandlers savevm_ram_handlers = {
2762 .save_live_setup = ram_save_setup,
2763 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00002764 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00002765 .save_live_complete_precopy = ram_save_complete,
Juan Quintela56e93d22015-05-07 19:33:31 +02002766 .save_live_pending = ram_save_pending,
2767 .load_state = ram_load,
Liang Li6ad2a212015-11-02 15:37:03 +08002768 .cleanup = ram_migration_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02002769};
2770
2771void ram_mig_init(void)
2772{
2773 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002774 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02002775}