blob: d8b6713c4ad99199a516c53eefded6a3f04e5d75 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Peter Maydell1393a482016-01-26 18:16:54 +000028#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010029#include "qemu-common.h"
30#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +000032#include "qapi-event.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020033#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020034#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020036#include "qemu/timer.h"
37#include "qemu/main-loop.h"
Juan Quintela709e3fe2017-04-05 21:47:50 +020038#include "xbzrle.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020039#include "ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020040#include "migration/migration.h"
Juan Quintelaf2a8f0a2017-04-24 13:42:55 +020041#include "migration/register.h"
Juan Quintela7b1e1a22017-04-17 20:26:27 +020042#include "migration/misc.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020043#include "qemu-file.h"
Juan Quintela987772d2017-04-17 19:02:59 +020044#include "migration/vmstate.h"
Juan Quintelabe07b0a2017-04-20 13:12:24 +020045#include "postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020046#include "exec/address-spaces.h"
47#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020048#include "qemu/error-report.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020049#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020050#include "exec/ram_addr.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020051#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080052#include "migration/colo.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020053
Juan Quintela56e93d22015-05-07 19:33:31 +020054/***********************************************************/
55/* ram save/restore */
56
Juan Quintelabb890ed2017-04-28 09:39:55 +020057/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
58 * worked for pages that where filled with the same char. We switched
59 * it to only search for the zero value. And to avoid confusion with
60 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
61 */
62
Juan Quintela56e93d22015-05-07 19:33:31 +020063#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
Juan Quintelabb890ed2017-04-28 09:39:55 +020064#define RAM_SAVE_FLAG_ZERO 0x02
Juan Quintela56e93d22015-05-07 19:33:31 +020065#define RAM_SAVE_FLAG_MEM_SIZE 0x04
66#define RAM_SAVE_FLAG_PAGE 0x08
67#define RAM_SAVE_FLAG_EOS 0x10
68#define RAM_SAVE_FLAG_CONTINUE 0x20
69#define RAM_SAVE_FLAG_XBZRLE 0x40
70/* 0x80 is reserved in migration.h start with 0x100 next */
71#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
72
Juan Quintela56e93d22015-05-07 19:33:31 +020073static inline bool is_zero_range(uint8_t *p, uint64_t size)
74{
Richard Hendersona1febc42016-08-29 11:46:14 -070075 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020076}
77
Juan Quintela93604472017-06-06 19:49:03 +020078XBZRLECacheStats xbzrle_counters;
79
Juan Quintela56e93d22015-05-07 19:33:31 +020080/* struct contains XBZRLE cache and a static page
81 used by the compression */
82static struct {
83 /* buffer used for XBZRLE encoding */
84 uint8_t *encoded_buf;
85 /* buffer for storing page content */
86 uint8_t *current_buf;
87 /* Cache for XBZRLE, Protected by lock. */
88 PageCache *cache;
89 QemuMutex lock;
Juan Quintelac00e0922017-05-09 16:22:01 +020090 /* it will store a page full of zeros */
91 uint8_t *zero_target_page;
Juan Quintela56e93d22015-05-07 19:33:31 +020092} XBZRLE;
93
94/* buffer used for XBZRLE decoding */
95static uint8_t *xbzrle_decoded_buf;
96
97static void XBZRLE_cache_lock(void)
98{
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
101}
102
103static void XBZRLE_cache_unlock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
107}
108
Juan Quintela3d0684b2017-03-23 15:06:39 +0100109/**
110 * xbzrle_cache_resize: resize the xbzrle cache
111 *
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
116 *
117 * Returns the new_size or negative in case of error.
118 *
119 * @new_size: new cache size
Juan Quintela56e93d22015-05-07 19:33:31 +0200120 */
121int64_t xbzrle_cache_resize(int64_t new_size)
122{
123 PageCache *new_cache;
124 int64_t ret;
125
126 if (new_size < TARGET_PAGE_SIZE) {
127 return -1;
128 }
129
130 XBZRLE_cache_lock();
131
132 if (XBZRLE.cache != NULL) {
133 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
134 goto out_new_size;
135 }
136 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
137 TARGET_PAGE_SIZE);
138 if (!new_cache) {
139 error_report("Error creating cache");
140 ret = -1;
141 goto out;
142 }
143
144 cache_fini(XBZRLE.cache);
145 XBZRLE.cache = new_cache;
146 }
147
148out_new_size:
149 ret = pow2floor(new_size);
150out:
151 XBZRLE_cache_unlock();
152 return ret;
153}
154
Juan Quintelaec481c62017-03-20 22:12:40 +0100155/*
156 * An outstanding page request, on the source, having been received
157 * and queued
158 */
159struct RAMSrcPageRequest {
160 RAMBlock *rb;
161 hwaddr offset;
162 hwaddr len;
163
164 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
165};
166
Juan Quintela6f37bb82017-03-13 19:26:29 +0100167/* State of RAM for migration */
168struct RAMState {
Juan Quintela204b88b2017-03-15 09:16:57 +0100169 /* QEMUFile used for this migration */
170 QEMUFile *f;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100171 /* Last block that we have visited searching for dirty pages */
172 RAMBlock *last_seen_block;
173 /* Last block from where we have sent data */
174 RAMBlock *last_sent_block;
Juan Quintela269ace22017-03-21 15:23:31 +0100175 /* Last dirty target page we have sent */
176 ram_addr_t last_page;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100177 /* last ram version we have seen */
178 uint32_t last_version;
179 /* We are in the first round */
180 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100181 /* How many times we have dirty too many pages */
182 int dirty_rate_high_cnt;
Juan Quintelaf664da82017-03-13 19:44:57 +0100183 /* these variables are used for bitmap sync */
184 /* last time we did a full bitmap_sync */
185 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200186 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200187 uint64_t bytes_xfer_prev;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200188 /* number of dirty pages since start_time */
Juan Quintela68908ed2017-03-28 15:05:53 +0200189 uint64_t num_dirty_pages_period;
Juan Quintelab5833fd2017-03-13 19:49:19 +0100190 /* xbzrle misses since the beginning of the period */
191 uint64_t xbzrle_cache_miss_prev;
Juan Quintela36040d92017-03-13 19:51:13 +0100192 /* number of iterations at the beginning of period */
193 uint64_t iterations_prev;
Juan Quintela23b28c32017-03-13 20:51:34 +0100194 /* Iterations since start */
195 uint64_t iterations;
Juan Quintela108cfae2017-03-13 21:38:09 +0100196 /* protects modification of the bitmap */
Juan Quintela93604472017-06-06 19:49:03 +0200197 uint64_t migration_dirty_pages;
198 /* number of dirty bits in the bitmap */
Juan Quintela108cfae2017-03-13 21:38:09 +0100199 QemuMutex bitmap_mutex;
Juan Quintela68a098f2017-03-14 13:48:42 +0100200 /* The RAMBlock used in the last src_page_requests */
201 RAMBlock *last_req_rb;
Juan Quintelaec481c62017-03-20 22:12:40 +0100202 /* Queue of outstanding page requests from the destination */
203 QemuMutex src_page_req_mutex;
204 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100205};
206typedef struct RAMState RAMState;
207
Juan Quintela53518d92017-05-04 11:46:24 +0200208static RAMState *ram_state;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100209
Juan Quintela9edabd42017-03-14 12:02:16 +0100210uint64_t ram_bytes_remaining(void)
211{
Juan Quintela53518d92017-05-04 11:46:24 +0200212 return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela9edabd42017-03-14 12:02:16 +0100213}
214
Juan Quintela93604472017-06-06 19:49:03 +0200215MigrationStats ram_counters;
Juan Quintela96506892017-03-14 18:41:03 +0100216
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100217/* used by the search for pages to send */
218struct PageSearchStatus {
219 /* Current block being searched */
220 RAMBlock *block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100221 /* Current page to search from */
222 unsigned long page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100223 /* Set once we wrap around */
224 bool complete_round;
225};
226typedef struct PageSearchStatus PageSearchStatus;
227
Juan Quintela56e93d22015-05-07 19:33:31 +0200228struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200229 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800230 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200231 QEMUFile *file;
232 QemuMutex mutex;
233 QemuCond cond;
234 RAMBlock *block;
235 ram_addr_t offset;
236};
237typedef struct CompressParam CompressParam;
238
239struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800240 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800241 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200242 QemuMutex mutex;
243 QemuCond cond;
244 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000245 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200246 int len;
247};
248typedef struct DecompressParam DecompressParam;
249
250static CompressParam *comp_param;
251static QemuThread *compress_threads;
252/* comp_done_cond is used to wake up the migration thread when
253 * one of the compression threads has finished the compression.
254 * comp_done_lock is used to co-work with comp_done_cond.
255 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800256static QemuMutex comp_done_lock;
257static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200258/* The empty QEMUFileOps will be used by file in CompressParam */
259static const QEMUFileOps empty_ops = { };
260
Juan Quintela56e93d22015-05-07 19:33:31 +0200261static DecompressParam *decomp_param;
262static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800263static QemuMutex decomp_done_lock;
264static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200265
Liang Lia7a9a882016-05-05 15:32:57 +0800266static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
267 ram_addr_t offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200268
269static void *do_data_compress(void *opaque)
270{
271 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800272 RAMBlock *block;
273 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200274
Liang Lia7a9a882016-05-05 15:32:57 +0800275 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800276 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800277 if (param->block) {
278 block = param->block;
279 offset = param->offset;
280 param->block = NULL;
281 qemu_mutex_unlock(&param->mutex);
282
283 do_compress_ram_page(param->file, block, offset);
284
Liang Li0d9f9a52016-05-05 15:32:59 +0800285 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800286 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800287 qemu_cond_signal(&comp_done_cond);
288 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800289
290 qemu_mutex_lock(&param->mutex);
291 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200292 qemu_cond_wait(&param->cond, &param->mutex);
293 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200294 }
Liang Lia7a9a882016-05-05 15:32:57 +0800295 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200296
297 return NULL;
298}
299
300static inline void terminate_compression_threads(void)
301{
302 int idx, thread_count;
303
304 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100305
Juan Quintela56e93d22015-05-07 19:33:31 +0200306 for (idx = 0; idx < thread_count; idx++) {
307 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800308 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200309 qemu_cond_signal(&comp_param[idx].cond);
310 qemu_mutex_unlock(&comp_param[idx].mutex);
311 }
312}
313
314void migrate_compress_threads_join(void)
315{
316 int i, thread_count;
317
318 if (!migrate_use_compression()) {
319 return;
320 }
321 terminate_compression_threads();
322 thread_count = migrate_compress_threads();
323 for (i = 0; i < thread_count; i++) {
324 qemu_thread_join(compress_threads + i);
325 qemu_fclose(comp_param[i].file);
326 qemu_mutex_destroy(&comp_param[i].mutex);
327 qemu_cond_destroy(&comp_param[i].cond);
328 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800329 qemu_mutex_destroy(&comp_done_lock);
330 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200331 g_free(compress_threads);
332 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200333 compress_threads = NULL;
334 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200335}
336
337void migrate_compress_threads_create(void)
338{
339 int i, thread_count;
340
341 if (!migrate_use_compression()) {
342 return;
343 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200344 thread_count = migrate_compress_threads();
345 compress_threads = g_new0(QemuThread, thread_count);
346 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800347 qemu_cond_init(&comp_done_cond);
348 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200349 for (i = 0; i < thread_count; i++) {
Cao jine110aa92016-07-29 15:10:31 +0800350 /* comp_param[i].file is just used as a dummy buffer to save data,
351 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200352 */
353 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
354 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800355 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200356 qemu_mutex_init(&comp_param[i].mutex);
357 qemu_cond_init(&comp_param[i].cond);
358 qemu_thread_create(compress_threads + i, "compress",
359 do_data_compress, comp_param + i,
360 QEMU_THREAD_JOINABLE);
361 }
362}
363
364/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100365 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200366 *
367 * If this is the 1st block, it also writes the block identification
368 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100369 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200370 *
371 * @f: QEMUFile where to send the data
372 * @block: block that contains the page we want to send
373 * @offset: offset inside the block for the page
374 * in the lower bits, it contains flags
375 */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200376static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
377 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200378{
Liang Li9f5f3802015-07-13 17:34:10 +0800379 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200380
Juan Quintela24795692017-03-21 11:45:01 +0100381 if (block == rs->last_sent_block) {
382 offset |= RAM_SAVE_FLAG_CONTINUE;
383 }
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200384 qemu_put_be64(f, offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200385 size = 8;
386
387 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800388 len = strlen(block->idstr);
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200389 qemu_put_byte(f, len);
390 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
Liang Li9f5f3802015-07-13 17:34:10 +0800391 size += 1 + len;
Juan Quintela24795692017-03-21 11:45:01 +0100392 rs->last_sent_block = block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200393 }
394 return size;
395}
396
Juan Quintela3d0684b2017-03-23 15:06:39 +0100397/**
398 * mig_throttle_guest_down: throotle down the guest
399 *
400 * Reduce amount of guest cpu execution to hopefully slow down memory
401 * writes. If guest dirty memory rate is reduced below the rate at
402 * which we can transfer pages to the destination then we should be
403 * able to complete migration. Some workloads dirty memory way too
404 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400405 */
406static void mig_throttle_guest_down(void)
407{
408 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100409 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
410 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400411
412 /* We have not started throttling yet. Let's start it. */
413 if (!cpu_throttle_active()) {
414 cpu_throttle_set(pct_initial);
415 } else {
416 /* Throttling already on, just increase the rate */
417 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
418 }
419}
420
Juan Quintela3d0684b2017-03-23 15:06:39 +0100421/**
422 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
423 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100424 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100425 * @current_addr: address for the zero page
426 *
427 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200428 * The important thing is that a stale (not-yet-0'd) page be replaced
429 * by the new data.
430 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100431 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200432 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100433static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200434{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100435 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200436 return;
437 }
438
439 /* We don't care if this fails to allocate a new cache page
440 * as long as it updated an old one */
Juan Quintelac00e0922017-05-09 16:22:01 +0200441 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
Juan Quintela93604472017-06-06 19:49:03 +0200442 ram_counters.dirty_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200443}
444
445#define ENCODING_FLAG_XBZRLE 0x1
446
447/**
448 * save_xbzrle_page: compress and send current page
449 *
450 * Returns: 1 means that we wrote the page
451 * 0 means that page is identical to the one already sent
452 * -1 means that xbzrle would be longer than normal
453 *
Juan Quintela5a987732017-03-13 19:39:02 +0100454 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100455 * @current_data: pointer to the address of the page contents
456 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200457 * @block: block that contains the page we want to send
458 * @offset: offset inside the block for the page
459 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200460 */
Juan Quintela204b88b2017-03-15 09:16:57 +0100461static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200462 ram_addr_t current_addr, RAMBlock *block,
Juan Quintela072c2512017-03-14 10:27:31 +0100463 ram_addr_t offset, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200464{
465 int encoded_len = 0, bytes_xbzrle;
466 uint8_t *prev_cached_page;
467
Juan Quintela93604472017-06-06 19:49:03 +0200468 if (!cache_is_cached(XBZRLE.cache, current_addr,
469 ram_counters.dirty_sync_count)) {
470 xbzrle_counters.cache_miss++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200471 if (!last_stage) {
472 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela93604472017-06-06 19:49:03 +0200473 ram_counters.dirty_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200474 return -1;
475 } else {
476 /* update *current_data when the page has been
477 inserted into cache */
478 *current_data = get_cached_data(XBZRLE.cache, current_addr);
479 }
480 }
481 return -1;
482 }
483
484 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
485
486 /* save current buffer into memory */
487 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
488
489 /* XBZRLE encoding (if there is no overflow) */
490 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
491 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
492 TARGET_PAGE_SIZE);
493 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100494 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200495 return 0;
496 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100497 trace_save_xbzrle_page_overflow();
Juan Quintela93604472017-06-06 19:49:03 +0200498 xbzrle_counters.overflow++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200499 /* update data in the cache */
500 if (!last_stage) {
501 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
502 *current_data = prev_cached_page;
503 }
504 return -1;
505 }
506
507 /* we need to update the data in the cache, in order to get the same data */
508 if (!last_stage) {
509 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
510 }
511
512 /* Send XBZRLE based compressed page */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200513 bytes_xbzrle = save_page_header(rs, rs->f, block,
Juan Quintela204b88b2017-03-15 09:16:57 +0100514 offset | RAM_SAVE_FLAG_XBZRLE);
515 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
516 qemu_put_be16(rs->f, encoded_len);
517 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
Juan Quintela56e93d22015-05-07 19:33:31 +0200518 bytes_xbzrle += encoded_len + 1 + 2;
Juan Quintela93604472017-06-06 19:49:03 +0200519 xbzrle_counters.pages++;
520 xbzrle_counters.bytes += bytes_xbzrle;
521 ram_counters.transferred += bytes_xbzrle;
Juan Quintela56e93d22015-05-07 19:33:31 +0200522
523 return 1;
524}
525
Juan Quintela3d0684b2017-03-23 15:06:39 +0100526/**
527 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000528 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100529 * Called with rcu_read_lock() to protect migration_bitmap
530 *
531 * Returns the byte offset within memory region of the start of a dirty page
532 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100533 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100534 * @rb: RAMBlock where to search for dirty pages
Juan Quintelaa935e302017-03-21 15:36:51 +0100535 * @start: page where we start the search
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000536 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200537static inline
Juan Quintelaa935e302017-03-21 15:36:51 +0100538unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100539 unsigned long start)
Juan Quintela56e93d22015-05-07 19:33:31 +0200540{
Juan Quintela6b6712e2017-03-22 15:18:04 +0100541 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
542 unsigned long *bitmap = rb->bmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200543 unsigned long next;
544
Juan Quintela6b6712e2017-03-22 15:18:04 +0100545 if (rs->ram_bulk_stage && start > 0) {
546 next = start + 1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200547 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +0100548 next = find_next_bit(bitmap, size, start);
Juan Quintela56e93d22015-05-07 19:33:31 +0200549 }
550
Juan Quintela6b6712e2017-03-22 15:18:04 +0100551 return next;
Juan Quintela56e93d22015-05-07 19:33:31 +0200552}
553
Juan Quintela06b10682017-03-21 15:18:05 +0100554static inline bool migration_bitmap_clear_dirty(RAMState *rs,
Juan Quintelaf20e2862017-03-21 16:19:05 +0100555 RAMBlock *rb,
556 unsigned long page)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000557{
558 bool ret;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000559
Juan Quintela6b6712e2017-03-22 15:18:04 +0100560 ret = test_and_clear_bit(page, rb->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000561
562 if (ret) {
Juan Quintela0d8ec882017-03-13 21:21:41 +0100563 rs->migration_dirty_pages--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000564 }
565 return ret;
566}
567
Juan Quintela15440dd2017-03-21 09:35:04 +0100568static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
569 ram_addr_t start, ram_addr_t length)
Juan Quintela56e93d22015-05-07 19:33:31 +0200570{
Juan Quintela0d8ec882017-03-13 21:21:41 +0100571 rs->migration_dirty_pages +=
Juan Quintela6b6712e2017-03-22 15:18:04 +0100572 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
Juan Quintela0d8ec882017-03-13 21:21:41 +0100573 &rs->num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +0200574}
575
Juan Quintela3d0684b2017-03-23 15:06:39 +0100576/**
577 * ram_pagesize_summary: calculate all the pagesizes of a VM
578 *
579 * Returns a summary bitmap of the page sizes of all RAMBlocks
580 *
581 * For VMs with just normal pages this is equivalent to the host page
582 * size. If it's got some huge pages then it's the OR of all the
583 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000584 */
585uint64_t ram_pagesize_summary(void)
586{
587 RAMBlock *block;
588 uint64_t summary = 0;
589
Peter Xu99e15582017-05-12 12:17:39 +0800590 RAMBLOCK_FOREACH(block) {
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000591 summary |= block->page_size;
592 }
593
594 return summary;
595}
596
Juan Quintela8d820d62017-03-13 19:35:50 +0100597static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200598{
599 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200600 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200601 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200602
Juan Quintela93604472017-06-06 19:49:03 +0200603 ram_counters.dirty_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200604
Juan Quintelaf664da82017-03-13 19:44:57 +0100605 if (!rs->time_last_bitmap_sync) {
606 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +0200607 }
608
609 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200610 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200611
Juan Quintela108cfae2017-03-13 21:38:09 +0100612 qemu_mutex_lock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200613 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +0800614 RAMBLOCK_FOREACH(block) {
Juan Quintela15440dd2017-03-21 09:35:04 +0100615 migration_bitmap_sync_range(rs, block, 0, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200616 }
617 rcu_read_unlock();
Juan Quintela108cfae2017-03-13 21:38:09 +0100618 qemu_mutex_unlock(&rs->bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200619
Juan Quintelaa66cd902017-03-28 15:02:43 +0200620 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
Chao Fan1ffb5df2017-03-14 09:55:07 +0800621
Juan Quintela56e93d22015-05-07 19:33:31 +0200622 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
623
624 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +0100625 if (end_time > rs->time_last_bitmap_sync + 1000) {
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100626 /* calculate period counters */
Juan Quintela93604472017-06-06 19:49:03 +0200627 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100628 / (end_time - rs->time_last_bitmap_sync);
Juan Quintela93604472017-06-06 19:49:03 +0200629 bytes_xfer_now = ram_counters.transferred;
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100630
Juan Quintela56e93d22015-05-07 19:33:31 +0200631 if (migrate_auto_converge()) {
632 /* The following detection logic can be refined later. For now:
633 Check to see if the dirtied bytes is 50% more than the approx.
634 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400635 were in this routine. If that happens twice, start or increase
636 throttling */
Jason J. Herne070afca2015-09-08 13:12:35 -0400637
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100638 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +0200639 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Felipe Franciosib4a3c642017-05-24 17:10:03 +0100640 (++rs->dirty_rate_high_cnt >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200641 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +0100642 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400643 mig_throttle_guest_down();
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100644 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200645 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400646
Juan Quintela56e93d22015-05-07 19:33:31 +0200647 if (migrate_use_xbzrle()) {
Juan Quintela23b28c32017-03-13 20:51:34 +0100648 if (rs->iterations_prev != rs->iterations) {
Juan Quintela93604472017-06-06 19:49:03 +0200649 xbzrle_counters.cache_miss_rate =
650 (double)(xbzrle_counters.cache_miss -
Juan Quintelab5833fd2017-03-13 19:49:19 +0100651 rs->xbzrle_cache_miss_prev) /
Juan Quintela23b28c32017-03-13 20:51:34 +0100652 (rs->iterations - rs->iterations_prev);
Juan Quintela56e93d22015-05-07 19:33:31 +0200653 }
Juan Quintela23b28c32017-03-13 20:51:34 +0100654 rs->iterations_prev = rs->iterations;
Juan Quintela93604472017-06-06 19:49:03 +0200655 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
Juan Quintela56e93d22015-05-07 19:33:31 +0200656 }
Felipe Franciosid693c6f2017-05-24 17:10:01 +0100657
658 /* reset period counters */
Juan Quintelaf664da82017-03-13 19:44:57 +0100659 rs->time_last_bitmap_sync = end_time;
Juan Quintelaa66cd902017-03-28 15:02:43 +0200660 rs->num_dirty_pages_period = 0;
Felipe Franciosid2a4d852017-05-24 17:10:02 +0100661 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200662 }
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000663 if (migrate_use_events()) {
Juan Quintela93604472017-06-06 19:49:03 +0200664 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000665 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200666}
667
668/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100669 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200670 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100671 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200672 *
Juan Quintelaf7ccd612017-03-13 20:30:21 +0100673 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200674 * @block: block that contains the page we want to send
675 * @offset: offset inside the block for the page
676 * @p: pointer to the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200677 */
Juan Quintelace25d332017-03-15 11:00:51 +0100678static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
679 uint8_t *p)
Juan Quintela56e93d22015-05-07 19:33:31 +0200680{
681 int pages = -1;
682
683 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
Juan Quintela93604472017-06-06 19:49:03 +0200684 ram_counters.duplicate++;
685 ram_counters.transferred +=
Juan Quintelabb890ed2017-04-28 09:39:55 +0200686 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
Juan Quintelace25d332017-03-15 11:00:51 +0100687 qemu_put_byte(rs->f, 0);
Juan Quintela93604472017-06-06 19:49:03 +0200688 ram_counters.transferred += 1;
Juan Quintela56e93d22015-05-07 19:33:31 +0200689 pages = 1;
690 }
691
692 return pages;
693}
694
Juan Quintela57273092017-03-20 22:25:28 +0100695static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300696{
Juan Quintela57273092017-03-20 22:25:28 +0100697 if (!migrate_release_ram() || !migration_in_postcopy()) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300698 return;
699 }
700
Juan Quintelaaaa20642017-03-21 11:35:24 +0100701 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300702}
703
Juan Quintela56e93d22015-05-07 19:33:31 +0200704/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100705 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200706 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100707 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +0000708 * < 0 - error
709 * >=0 - Number of pages written - this might legally be 0
710 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +0200711 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100712 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200713 * @block: block that contains the page we want to send
714 * @offset: offset inside the block for the page
715 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200716 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +0100717static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200718{
719 int pages = -1;
720 uint64_t bytes_xmit;
721 ram_addr_t current_addr;
Juan Quintela56e93d22015-05-07 19:33:31 +0200722 uint8_t *p;
723 int ret;
724 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +0800725 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100726 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200727
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100728 p = block->host + offset;
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +0100729 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +0200730
731 /* In doubt sent page as normal */
732 bytes_xmit = 0;
Juan Quintelace25d332017-03-15 11:00:51 +0100733 ret = ram_control_save_page(rs->f, block->offset,
Juan Quintela56e93d22015-05-07 19:33:31 +0200734 offset, TARGET_PAGE_SIZE, &bytes_xmit);
735 if (bytes_xmit) {
Juan Quintela93604472017-06-06 19:49:03 +0200736 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200737 pages = 1;
738 }
739
740 XBZRLE_cache_lock();
741
742 current_addr = block->offset + offset;
743
Juan Quintela56e93d22015-05-07 19:33:31 +0200744 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
745 if (ret != RAM_SAVE_CONTROL_DELAYED) {
746 if (bytes_xmit > 0) {
Juan Quintela93604472017-06-06 19:49:03 +0200747 ram_counters.normal++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200748 } else if (bytes_xmit == 0) {
Juan Quintela93604472017-06-06 19:49:03 +0200749 ram_counters.duplicate++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200750 }
751 }
752 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100753 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +0200754 if (pages > 0) {
755 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
756 * page would be stale
757 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100758 xbzrle_cache_zero_page(rs, current_addr);
Juan Quintelaa935e302017-03-21 15:36:51 +0100759 ram_release_pages(block->idstr, offset, pages);
Juan Quintela6f37bb82017-03-13 19:26:29 +0100760 } else if (!rs->ram_bulk_stage &&
Juan Quintela57273092017-03-20 22:25:28 +0100761 !migration_in_postcopy() && migrate_use_xbzrle()) {
Juan Quintela204b88b2017-03-15 09:16:57 +0100762 pages = save_xbzrle_page(rs, &p, current_addr, block,
Juan Quintela072c2512017-03-14 10:27:31 +0100763 offset, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +0200764 if (!last_stage) {
765 /* Can't send this cached data async, since the cache page
766 * might get updated before it gets to the wire
767 */
768 send_async = false;
769 }
770 }
771 }
772
773 /* XBZRLE overflow or normal page */
774 if (pages == -1) {
Juan Quintela93604472017-06-06 19:49:03 +0200775 ram_counters.transferred +=
776 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
Juan Quintela56e93d22015-05-07 19:33:31 +0200777 if (send_async) {
Juan Quintelace25d332017-03-15 11:00:51 +0100778 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300779 migrate_release_ram() &
Juan Quintela57273092017-03-20 22:25:28 +0100780 migration_in_postcopy());
Juan Quintela56e93d22015-05-07 19:33:31 +0200781 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100782 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +0200783 }
Juan Quintela93604472017-06-06 19:49:03 +0200784 ram_counters.transferred += TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +0200785 pages = 1;
Juan Quintela93604472017-06-06 19:49:03 +0200786 ram_counters.normal++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200787 }
788
789 XBZRLE_cache_unlock();
790
791 return pages;
792}
793
Liang Lia7a9a882016-05-05 15:32:57 +0800794static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
795 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200796{
Juan Quintela53518d92017-05-04 11:46:24 +0200797 RAMState *rs = ram_state;
Juan Quintela56e93d22015-05-07 19:33:31 +0200798 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +0800799 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +0200800
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200801 bytes_sent = save_page_header(rs, f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +0200802 RAM_SAVE_FLAG_COMPRESS_PAGE);
Liang Lia7a9a882016-05-05 15:32:57 +0800803 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
Juan Quintela56e93d22015-05-07 19:33:31 +0200804 migrate_compress_level());
Liang Lib3be2892016-05-05 15:32:54 +0800805 if (blen < 0) {
806 bytes_sent = 0;
807 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
808 error_report("compressed data failed!");
809 } else {
810 bytes_sent += blen;
Juan Quintela57273092017-03-20 22:25:28 +0100811 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +0800812 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200813
814 return bytes_sent;
815}
816
Juan Quintelace25d332017-03-15 11:00:51 +0100817static void flush_compressed_data(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200818{
819 int idx, len, thread_count;
820
821 if (!migrate_use_compression()) {
822 return;
823 }
824 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +0800825
Liang Li0d9f9a52016-05-05 15:32:59 +0800826 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200827 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +0800828 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +0800829 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200830 }
Liang Lia7a9a882016-05-05 15:32:57 +0800831 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800832 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800833
834 for (idx = 0; idx < thread_count; idx++) {
835 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800836 if (!comp_param[idx].quit) {
Juan Quintelace25d332017-03-15 11:00:51 +0100837 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Juan Quintela93604472017-06-06 19:49:03 +0200838 ram_counters.transferred += len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200839 }
Liang Lia7a9a882016-05-05 15:32:57 +0800840 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200841 }
842}
843
844static inline void set_compress_params(CompressParam *param, RAMBlock *block,
845 ram_addr_t offset)
846{
847 param->block = block;
848 param->offset = offset;
849}
850
Juan Quintelace25d332017-03-15 11:00:51 +0100851static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
852 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200853{
854 int idx, thread_count, bytes_xmit = -1, pages = -1;
855
856 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +0800857 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200858 while (true) {
859 for (idx = 0; idx < thread_count; idx++) {
860 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +0800861 comp_param[idx].done = false;
Juan Quintelace25d332017-03-15 11:00:51 +0100862 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +0800863 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200864 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +0800865 qemu_cond_signal(&comp_param[idx].cond);
866 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200867 pages = 1;
Juan Quintela93604472017-06-06 19:49:03 +0200868 ram_counters.normal++;
869 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200870 break;
871 }
872 }
873 if (pages > 0) {
874 break;
875 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +0800876 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200877 }
878 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800879 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200880
881 return pages;
882}
883
884/**
885 * ram_save_compressed_page: compress the given page and send it to the stream
886 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100887 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200888 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100889 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200890 * @block: block that contains the page we want to send
891 * @offset: offset inside the block for the page
892 * @last_stage: if we are at the completion stage
Juan Quintela56e93d22015-05-07 19:33:31 +0200893 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +0100894static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
895 bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +0200896{
897 int pages = -1;
Liang Lifc504382016-05-05 15:32:55 +0800898 uint64_t bytes_xmit = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200899 uint8_t *p;
Liang Lifc504382016-05-05 15:32:55 +0800900 int ret, blen;
zhanghailianga08f6892016-01-15 11:37:44 +0800901 RAMBlock *block = pss->block;
Juan Quintelaa935e302017-03-21 15:36:51 +0100902 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200903
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100904 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200905
Juan Quintelace25d332017-03-15 11:00:51 +0100906 ret = ram_control_save_page(rs->f, block->offset,
Juan Quintela56e93d22015-05-07 19:33:31 +0200907 offset, TARGET_PAGE_SIZE, &bytes_xmit);
908 if (bytes_xmit) {
Juan Quintela93604472017-06-06 19:49:03 +0200909 ram_counters.transferred += bytes_xmit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200910 pages = 1;
911 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200912 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
913 if (ret != RAM_SAVE_CONTROL_DELAYED) {
914 if (bytes_xmit > 0) {
Juan Quintela93604472017-06-06 19:49:03 +0200915 ram_counters.normal++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200916 } else if (bytes_xmit == 0) {
Juan Quintela93604472017-06-06 19:49:03 +0200917 ram_counters.duplicate++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200918 }
919 }
920 } else {
921 /* When starting the process of a new block, the first page of
922 * the block should be sent out before other pages in the same
923 * block, and all the pages in last block should have been sent
924 * out, keeping this order is important, because the 'cont' flag
925 * is used to avoid resending the block name.
926 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100927 if (block != rs->last_sent_block) {
Juan Quintelace25d332017-03-15 11:00:51 +0100928 flush_compressed_data(rs);
929 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +0200930 if (pages == -1) {
Liang Lifc504382016-05-05 15:32:55 +0800931 /* Make sure the first page is sent out before other pages */
Juan Quintela2bf3aa82017-05-10 13:28:13 +0200932 bytes_xmit = save_page_header(rs, rs->f, block, offset |
Liang Lifc504382016-05-05 15:32:55 +0800933 RAM_SAVE_FLAG_COMPRESS_PAGE);
Juan Quintelace25d332017-03-15 11:00:51 +0100934 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
Liang Lifc504382016-05-05 15:32:55 +0800935 migrate_compress_level());
936 if (blen > 0) {
Juan Quintela93604472017-06-06 19:49:03 +0200937 ram_counters.transferred += bytes_xmit + blen;
938 ram_counters.normal++;
Liang Lib3be2892016-05-05 15:32:54 +0800939 pages = 1;
Liang Lifc504382016-05-05 15:32:55 +0800940 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100941 qemu_file_set_error(rs->f, blen);
Liang Lifc504382016-05-05 15:32:55 +0800942 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +0800943 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200944 }
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300945 if (pages > 0) {
Juan Quintelaa935e302017-03-21 15:36:51 +0100946 ram_release_pages(block->idstr, offset, pages);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300947 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200948 } else {
Juan Quintelace25d332017-03-15 11:00:51 +0100949 pages = save_zero_page(rs, block, offset, p);
Juan Quintela56e93d22015-05-07 19:33:31 +0200950 if (pages == -1) {
Juan Quintelace25d332017-03-15 11:00:51 +0100951 pages = compress_page_with_multi_thread(rs, block, offset);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300952 } else {
Juan Quintelaa935e302017-03-21 15:36:51 +0100953 ram_release_pages(block->idstr, offset, pages);
Juan Quintela56e93d22015-05-07 19:33:31 +0200954 }
955 }
956 }
957
958 return pages;
959}
960
Juan Quintela3d0684b2017-03-23 15:06:39 +0100961/**
962 * find_dirty_block: find the next dirty page and update any state
963 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100964 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100965 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100966 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100967 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100968 * @pss: data about the state of the current dirty page scan
969 * @again: set to false if the search has scanned the whole of RAM
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100970 */
Juan Quintelaf20e2862017-03-21 16:19:05 +0100971static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100972{
Juan Quintelaf20e2862017-03-21 16:19:05 +0100973 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
Juan Quintela6f37bb82017-03-13 19:26:29 +0100974 if (pss->complete_round && pss->block == rs->last_seen_block &&
Juan Quintelaa935e302017-03-21 15:36:51 +0100975 pss->page >= rs->last_page) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100976 /*
977 * We've been once around the RAM and haven't found anything.
978 * Give up.
979 */
980 *again = false;
981 return false;
982 }
Juan Quintelaa935e302017-03-21 15:36:51 +0100983 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100984 /* Didn't find anything in this RAM Block */
Juan Quintelaa935e302017-03-21 15:36:51 +0100985 pss->page = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100986 pss->block = QLIST_NEXT_RCU(pss->block, next);
987 if (!pss->block) {
988 /* Hit the end of the list */
989 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
990 /* Flag that we've looped */
991 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100992 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100993 if (migrate_use_xbzrle()) {
994 /* If xbzrle is on, stop using the data compression at this
995 * point. In theory, xbzrle can do better than compression.
996 */
Juan Quintelace25d332017-03-15 11:00:51 +0100997 flush_compressed_data(rs);
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +0100998 }
999 }
1000 /* Didn't find anything this time, but try again on the new block */
1001 *again = true;
1002 return false;
1003 } else {
1004 /* Can go around again, but... */
1005 *again = true;
1006 /* We've found something so probably don't need to */
1007 return true;
1008 }
1009}
1010
Juan Quintela3d0684b2017-03-23 15:06:39 +01001011/**
1012 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001013 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001014 * Helper for 'get_queued_page' - gets a page off the queue
1015 *
1016 * Returns the block of the page (or NULL if none available)
1017 *
Juan Quintelaec481c62017-03-20 22:12:40 +01001018 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001019 * @offset: used to return the offset within the RAMBlock
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001020 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001021static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001022{
1023 RAMBlock *block = NULL;
1024
Juan Quintelaec481c62017-03-20 22:12:40 +01001025 qemu_mutex_lock(&rs->src_page_req_mutex);
1026 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1027 struct RAMSrcPageRequest *entry =
1028 QSIMPLEQ_FIRST(&rs->src_page_requests);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001029 block = entry->rb;
1030 *offset = entry->offset;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001031
1032 if (entry->len > TARGET_PAGE_SIZE) {
1033 entry->len -= TARGET_PAGE_SIZE;
1034 entry->offset += TARGET_PAGE_SIZE;
1035 } else {
1036 memory_region_unref(block->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001037 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001038 g_free(entry);
1039 }
1040 }
Juan Quintelaec481c62017-03-20 22:12:40 +01001041 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001042
1043 return block;
1044}
1045
Juan Quintela3d0684b2017-03-23 15:06:39 +01001046/**
1047 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001048 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001049 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001050 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001051 * Returns if a queued page is found
1052 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001053 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001054 * @pss: data about the state of the current dirty page scan
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001055 */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001056static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001057{
1058 RAMBlock *block;
1059 ram_addr_t offset;
1060 bool dirty;
1061
1062 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001063 block = unqueue_page(rs, &offset);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001064 /*
1065 * We're sending this page, and since it's postcopy nothing else
1066 * will dirty it, and we must make sure it doesn't get sent again
1067 * even if this queue request was received after the background
1068 * search already sent it.
1069 */
1070 if (block) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001071 unsigned long page;
1072
Juan Quintela6b6712e2017-03-22 15:18:04 +01001073 page = offset >> TARGET_PAGE_BITS;
1074 dirty = test_bit(page, block->bmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001075 if (!dirty) {
Juan Quintela06b10682017-03-21 15:18:05 +01001076 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001077 page, test_bit(page, block->unsentmap));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001078 } else {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001079 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001080 }
1081 }
1082
1083 } while (block && !dirty);
1084
1085 if (block) {
1086 /*
1087 * As soon as we start servicing pages out of order, then we have
1088 * to kill the bulk stage, since the bulk stage assumes
1089 * in (migration_bitmap_find_and_reset_dirty) that every page is
1090 * dirty, that's no longer true.
1091 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001092 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001093
1094 /*
1095 * We want the background search to continue from the queued page
1096 * since the guest is likely to want other pages near to the page
1097 * it just requested.
1098 */
1099 pss->block = block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001100 pss->page = offset >> TARGET_PAGE_BITS;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001101 }
1102
1103 return !!block;
1104}
1105
Juan Quintela56e93d22015-05-07 19:33:31 +02001106/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001107 * migration_page_queue_free: drop any remaining pages in the ram
1108 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001109 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001110 * It should be empty at the end anyway, but in error cases there may
1111 * be some left. in case that there is any page left, we drop it.
1112 *
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001113 */
Juan Quintela83c13382017-05-04 11:45:01 +02001114static void migration_page_queue_free(RAMState *rs)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001115{
Juan Quintelaec481c62017-03-20 22:12:40 +01001116 struct RAMSrcPageRequest *mspr, *next_mspr;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001117 /* This queue generally should be empty - but in the case of a failed
1118 * migration might have some droppings in.
1119 */
1120 rcu_read_lock();
Juan Quintelaec481c62017-03-20 22:12:40 +01001121 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001122 memory_region_unref(mspr->rb->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001123 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001124 g_free(mspr);
1125 }
1126 rcu_read_unlock();
1127}
1128
1129/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001130 * ram_save_queue_pages: queue the page for transmission
1131 *
1132 * A request from postcopy destination for example.
1133 *
1134 * Returns zero on success or negative on error
1135 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001136 * @rbname: Name of the RAMBLock of the request. NULL means the
1137 * same that last one.
1138 * @start: starting address from the start of the RAMBlock
1139 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001140 */
Juan Quintela96506892017-03-14 18:41:03 +01001141int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001142{
1143 RAMBlock *ramblock;
Juan Quintela53518d92017-05-04 11:46:24 +02001144 RAMState *rs = ram_state;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001145
Juan Quintela93604472017-06-06 19:49:03 +02001146 ram_counters.postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001147 rcu_read_lock();
1148 if (!rbname) {
1149 /* Reuse last RAMBlock */
Juan Quintela68a098f2017-03-14 13:48:42 +01001150 ramblock = rs->last_req_rb;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001151
1152 if (!ramblock) {
1153 /*
1154 * Shouldn't happen, we can't reuse the last RAMBlock if
1155 * it's the 1st request.
1156 */
1157 error_report("ram_save_queue_pages no previous block");
1158 goto err;
1159 }
1160 } else {
1161 ramblock = qemu_ram_block_by_name(rbname);
1162
1163 if (!ramblock) {
1164 /* We shouldn't be asked for a non-existent RAMBlock */
1165 error_report("ram_save_queue_pages no block '%s'", rbname);
1166 goto err;
1167 }
Juan Quintela68a098f2017-03-14 13:48:42 +01001168 rs->last_req_rb = ramblock;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001169 }
1170 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1171 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001172 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1173 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001174 __func__, start, len, ramblock->used_length);
1175 goto err;
1176 }
1177
Juan Quintelaec481c62017-03-20 22:12:40 +01001178 struct RAMSrcPageRequest *new_entry =
1179 g_malloc0(sizeof(struct RAMSrcPageRequest));
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001180 new_entry->rb = ramblock;
1181 new_entry->offset = start;
1182 new_entry->len = len;
1183
1184 memory_region_ref(ramblock->mr);
Juan Quintelaec481c62017-03-20 22:12:40 +01001185 qemu_mutex_lock(&rs->src_page_req_mutex);
1186 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1187 qemu_mutex_unlock(&rs->src_page_req_mutex);
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001188 rcu_read_unlock();
1189
1190 return 0;
1191
1192err:
1193 rcu_read_unlock();
1194 return -1;
1195}
1196
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001197/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001198 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001199 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001200 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001201 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001202 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001203 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001204 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001205 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001206 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001207static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001208 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001209{
1210 int res = 0;
1211
1212 /* Check the pages is dirty and if it is send it */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001213 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
Juan Quintela6d358d92017-03-16 21:29:34 +01001214 /*
1215 * If xbzrle is on, stop using the data compression after first
1216 * round of migration even if compression is enabled. In theory,
1217 * xbzrle can do better than compression.
1218 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001219 if (migrate_use_compression() &&
1220 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001221 res = ram_save_compressed_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001222 } else {
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001223 res = ram_save_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001224 }
1225
1226 if (res < 0) {
1227 return res;
1228 }
Juan Quintela6b6712e2017-03-22 15:18:04 +01001229 if (pss->block->unsentmap) {
1230 clear_bit(pss->page, pss->block->unsentmap);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001231 }
1232 }
1233
1234 return res;
1235}
1236
1237/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001238 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001239 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001240 * Starting at *offset send pages up to the end of the current host
1241 * page. It's valid for the initial offset to point into the middle of
1242 * a host page in which case the remainder of the hostpage is sent.
1243 * Only dirty target pages are sent. Note that the host page size may
1244 * be a huge page for this block.
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001245 * The saving stops at the boundary of the used_length of the block
1246 * if the RAMBlock isn't a multiple of the host page size.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001247 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001248 * Returns the number of pages written or negative on error
1249 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001250 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001251 * @ms: current migration state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001252 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001253 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001254 */
Juan Quintelaa0a8aa12017-03-20 22:29:07 +01001255static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
Juan Quintelaf20e2862017-03-21 16:19:05 +01001256 bool last_stage)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001257{
1258 int tmppages, pages = 0;
Juan Quintelaa935e302017-03-21 15:36:51 +01001259 size_t pagesize_bits =
1260 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001261
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001262 do {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001263 tmppages = ram_save_target_page(rs, pss, last_stage);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001264 if (tmppages < 0) {
1265 return tmppages;
1266 }
1267
1268 pages += tmppages;
Juan Quintelaa935e302017-03-21 15:36:51 +01001269 pss->page++;
Dr. David Alan Gilbert1eb3fc02017-05-17 17:58:09 +01001270 } while ((pss->page & (pagesize_bits - 1)) &&
1271 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001272
1273 /* The offset we leave with is the last one we looked at */
Juan Quintelaa935e302017-03-21 15:36:51 +01001274 pss->page--;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001275 return pages;
1276}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001277
1278/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001279 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001280 *
1281 * Called within an RCU critical section.
1282 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001283 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02001284 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001285 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001286 * @last_stage: if we are at the completion stage
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001287 *
1288 * On systems where host-page-size > target-page-size it will send all the
1289 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001290 */
1291
Juan Quintelace25d332017-03-15 11:00:51 +01001292static int ram_find_and_save_block(RAMState *rs, bool last_stage)
Juan Quintela56e93d22015-05-07 19:33:31 +02001293{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001294 PageSearchStatus pss;
Juan Quintela56e93d22015-05-07 19:33:31 +02001295 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001296 bool again, found;
Juan Quintela56e93d22015-05-07 19:33:31 +02001297
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301298 /* No dirty page as there is zero RAM */
1299 if (!ram_bytes_total()) {
1300 return pages;
1301 }
1302
Juan Quintela6f37bb82017-03-13 19:26:29 +01001303 pss.block = rs->last_seen_block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001304 pss.page = rs->last_page;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001305 pss.complete_round = false;
1306
1307 if (!pss.block) {
1308 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1309 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001310
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001311 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001312 again = true;
Juan Quintelaf20e2862017-03-21 16:19:05 +01001313 found = get_queued_page(rs, &pss);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001314
1315 if (!found) {
1316 /* priority queue empty, so just search for something dirty */
Juan Quintelaf20e2862017-03-21 16:19:05 +01001317 found = find_dirty_block(rs, &pss, &again);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001318 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001319
1320 if (found) {
Juan Quintelaf20e2862017-03-21 16:19:05 +01001321 pages = ram_save_host_page(rs, &pss, last_stage);
Juan Quintela56e93d22015-05-07 19:33:31 +02001322 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001323 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001324
Juan Quintela6f37bb82017-03-13 19:26:29 +01001325 rs->last_seen_block = pss.block;
Juan Quintelaa935e302017-03-21 15:36:51 +01001326 rs->last_page = pss.page;
Juan Quintela56e93d22015-05-07 19:33:31 +02001327
1328 return pages;
1329}
1330
1331void acct_update_position(QEMUFile *f, size_t size, bool zero)
1332{
1333 uint64_t pages = size / TARGET_PAGE_SIZE;
Juan Quintelaf7ccd612017-03-13 20:30:21 +01001334
Juan Quintela56e93d22015-05-07 19:33:31 +02001335 if (zero) {
Juan Quintela93604472017-06-06 19:49:03 +02001336 ram_counters.duplicate += pages;
Juan Quintela56e93d22015-05-07 19:33:31 +02001337 } else {
Juan Quintela93604472017-06-06 19:49:03 +02001338 ram_counters.normal += pages;
1339 ram_counters.transferred += size;
Juan Quintela56e93d22015-05-07 19:33:31 +02001340 qemu_update_position(f, size);
1341 }
1342}
1343
Juan Quintela56e93d22015-05-07 19:33:31 +02001344uint64_t ram_bytes_total(void)
1345{
1346 RAMBlock *block;
1347 uint64_t total = 0;
1348
1349 rcu_read_lock();
Peter Xu99e15582017-05-12 12:17:39 +08001350 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001351 total += block->used_length;
Peter Xu99e15582017-05-12 12:17:39 +08001352 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001353 rcu_read_unlock();
1354 return total;
1355}
1356
1357void free_xbzrle_decoded_buf(void)
1358{
1359 g_free(xbzrle_decoded_buf);
1360 xbzrle_decoded_buf = NULL;
1361}
1362
Liang Li6ad2a212015-11-02 15:37:03 +08001363static void ram_migration_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001364{
Juan Quintela53518d92017-05-04 11:46:24 +02001365 RAMState **rsp = opaque;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001366 RAMBlock *block;
Juan Quintelaeb859c52017-03-13 21:51:55 +01001367
Li Zhijian2ff64032015-07-02 20:18:05 +08001368 /* caller have hold iothread lock or is in a bh, so there is
1369 * no writing race against this migration_bitmap
1370 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001371 memory_global_dirty_log_stop();
1372
1373 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1374 g_free(block->bmap);
1375 block->bmap = NULL;
1376 g_free(block->unsentmap);
1377 block->unsentmap = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001378 }
1379
1380 XBZRLE_cache_lock();
1381 if (XBZRLE.cache) {
1382 cache_fini(XBZRLE.cache);
1383 g_free(XBZRLE.encoded_buf);
1384 g_free(XBZRLE.current_buf);
Juan Quintelac00e0922017-05-09 16:22:01 +02001385 g_free(XBZRLE.zero_target_page);
Juan Quintela56e93d22015-05-07 19:33:31 +02001386 XBZRLE.cache = NULL;
1387 XBZRLE.encoded_buf = NULL;
1388 XBZRLE.current_buf = NULL;
Juan Quintelac00e0922017-05-09 16:22:01 +02001389 XBZRLE.zero_target_page = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001390 }
1391 XBZRLE_cache_unlock();
Juan Quintela53518d92017-05-04 11:46:24 +02001392 migration_page_queue_free(*rsp);
1393 g_free(*rsp);
1394 *rsp = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001395}
1396
Juan Quintela6f37bb82017-03-13 19:26:29 +01001397static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001398{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001399 rs->last_seen_block = NULL;
1400 rs->last_sent_block = NULL;
Juan Quintela269ace22017-03-21 15:23:31 +01001401 rs->last_page = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001402 rs->last_version = ram_list.version;
1403 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02001404}
1405
1406#define MAX_WAIT 50 /* ms, half buffered_file limit */
1407
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001408/*
1409 * 'expected' is the value you expect the bitmap mostly to be full
1410 * of; it won't bother printing lines that are all this value.
1411 * If 'todump' is null the migration bitmap is dumped.
1412 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001413void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1414 unsigned long pages)
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001415{
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001416 int64_t cur;
1417 int64_t linelen = 128;
1418 char linebuf[129];
1419
Juan Quintela6b6712e2017-03-22 15:18:04 +01001420 for (cur = 0; cur < pages; cur += linelen) {
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001421 int64_t curb;
1422 bool found = false;
1423 /*
1424 * Last line; catch the case where the line length
1425 * is longer than remaining ram
1426 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001427 if (cur + linelen > pages) {
1428 linelen = pages - cur;
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001429 }
1430 for (curb = 0; curb < linelen; curb++) {
1431 bool thisbit = test_bit(cur + curb, todump);
1432 linebuf[curb] = thisbit ? '1' : '.';
1433 found = found || (thisbit != expected);
1434 }
1435 if (found) {
1436 linebuf[curb] = '\0';
1437 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1438 }
1439 }
1440}
1441
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001442/* **** functions for postcopy ***** */
1443
Pavel Butsykinced1c612017-02-03 18:23:21 +03001444void ram_postcopy_migrated_memory_release(MigrationState *ms)
1445{
1446 struct RAMBlock *block;
Pavel Butsykinced1c612017-02-03 18:23:21 +03001447
Peter Xu99e15582017-05-12 12:17:39 +08001448 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001449 unsigned long *bitmap = block->bmap;
1450 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1451 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
Pavel Butsykinced1c612017-02-03 18:23:21 +03001452
1453 while (run_start < range) {
1454 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
Juan Quintelaaaa20642017-03-21 11:35:24 +01001455 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
Pavel Butsykinced1c612017-02-03 18:23:21 +03001456 (run_end - run_start) << TARGET_PAGE_BITS);
1457 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1458 }
1459 }
1460}
1461
Juan Quintela3d0684b2017-03-23 15:06:39 +01001462/**
1463 * postcopy_send_discard_bm_ram: discard a RAMBlock
1464 *
1465 * Returns zero on success
1466 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001467 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1468 * Note: At this point the 'unsentmap' is the processed bitmap combined
1469 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001470 *
1471 * @ms: current migration state
1472 * @pds: state for postcopy
1473 * @start: RAMBlock starting page
1474 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001475 */
1476static int postcopy_send_discard_bm_ram(MigrationState *ms,
1477 PostcopyDiscardState *pds,
Juan Quintela6b6712e2017-03-22 15:18:04 +01001478 RAMBlock *block)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001479{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001480 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001481 unsigned long current;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001482 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001483
Juan Quintela6b6712e2017-03-22 15:18:04 +01001484 for (current = 0; current < end; ) {
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001485 unsigned long one = find_next_bit(unsentmap, end, current);
1486
1487 if (one <= end) {
1488 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1489 unsigned long discard_length;
1490
1491 if (zero >= end) {
1492 discard_length = end - one;
1493 } else {
1494 discard_length = zero - one;
1495 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001496 if (discard_length) {
1497 postcopy_discard_send_range(ms, pds, one, discard_length);
1498 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001499 current = one + discard_length;
1500 } else {
1501 current = one;
1502 }
1503 }
1504
1505 return 0;
1506}
1507
Juan Quintela3d0684b2017-03-23 15:06:39 +01001508/**
1509 * postcopy_each_ram_send_discard: discard all RAMBlocks
1510 *
1511 * Returns 0 for success or negative for error
1512 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001513 * Utility for the outgoing postcopy code.
1514 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1515 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001516 * (qemu_ram_foreach_block ends up passing unscaled lengths
1517 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01001518 *
1519 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001520 */
1521static int postcopy_each_ram_send_discard(MigrationState *ms)
1522{
1523 struct RAMBlock *block;
1524 int ret;
1525
Peter Xu99e15582017-05-12 12:17:39 +08001526 RAMBLOCK_FOREACH(block) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001527 PostcopyDiscardState *pds =
1528 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001529
1530 /*
1531 * Postcopy sends chunks of bitmap over the wire, but it
1532 * just needs indexes at this point, avoids it having
1533 * target page specific code.
1534 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001535 ret = postcopy_send_discard_bm_ram(ms, pds, block);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001536 postcopy_discard_send_finish(ms, pds);
1537 if (ret) {
1538 return ret;
1539 }
1540 }
1541
1542 return 0;
1543}
1544
Juan Quintela3d0684b2017-03-23 15:06:39 +01001545/**
1546 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001547 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001548 * Helper for postcopy_chunk_hostpages; it's called twice to
1549 * canonicalize the two bitmaps, that are similar, but one is
1550 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001551 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001552 * Postcopy requires that all target pages in a hostpage are dirty or
1553 * clean, not a mix. This function canonicalizes the bitmaps.
1554 *
1555 * @ms: current migration state
1556 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1557 * otherwise we need to canonicalize partially dirty host pages
1558 * @block: block that contains the page we want to canonicalize
1559 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001560 */
1561static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1562 RAMBlock *block,
1563 PostcopyDiscardState *pds)
1564{
Juan Quintela53518d92017-05-04 11:46:24 +02001565 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001566 unsigned long *bitmap = block->bmap;
1567 unsigned long *unsentmap = block->unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001568 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001569 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001570 unsigned long run_start;
1571
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001572 if (block->page_size == TARGET_PAGE_SIZE) {
1573 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1574 return;
1575 }
1576
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001577 if (unsent_pass) {
1578 /* Find a sent page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001579 run_start = find_next_zero_bit(unsentmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001580 } else {
1581 /* Find a dirty page */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001582 run_start = find_next_bit(bitmap, pages, 0);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001583 }
1584
Juan Quintela6b6712e2017-03-22 15:18:04 +01001585 while (run_start < pages) {
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001586 bool do_fixup = false;
1587 unsigned long fixup_start_addr;
1588 unsigned long host_offset;
1589
1590 /*
1591 * If the start of this run of pages is in the middle of a host
1592 * page, then we need to fixup this host page.
1593 */
1594 host_offset = run_start % host_ratio;
1595 if (host_offset) {
1596 do_fixup = true;
1597 run_start -= host_offset;
1598 fixup_start_addr = run_start;
1599 /* For the next pass */
1600 run_start = run_start + host_ratio;
1601 } else {
1602 /* Find the end of this run */
1603 unsigned long run_end;
1604 if (unsent_pass) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001605 run_end = find_next_bit(unsentmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001606 } else {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001607 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001608 }
1609 /*
1610 * If the end isn't at the start of a host page, then the
1611 * run doesn't finish at the end of a host page
1612 * and we need to discard.
1613 */
1614 host_offset = run_end % host_ratio;
1615 if (host_offset) {
1616 do_fixup = true;
1617 fixup_start_addr = run_end - host_offset;
1618 /*
1619 * This host page has gone, the next loop iteration starts
1620 * from after the fixup
1621 */
1622 run_start = fixup_start_addr + host_ratio;
1623 } else {
1624 /*
1625 * No discards on this iteration, next loop starts from
1626 * next sent/dirty page
1627 */
1628 run_start = run_end + 1;
1629 }
1630 }
1631
1632 if (do_fixup) {
1633 unsigned long page;
1634
1635 /* Tell the destination to discard this page */
1636 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1637 /* For the unsent_pass we:
1638 * discard partially sent pages
1639 * For the !unsent_pass (dirty) we:
1640 * discard partially dirty pages that were sent
1641 * (any partially sent pages were already discarded
1642 * by the previous unsent_pass)
1643 */
1644 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1645 host_ratio);
1646 }
1647
1648 /* Clean up the bitmap */
1649 for (page = fixup_start_addr;
1650 page < fixup_start_addr + host_ratio; page++) {
1651 /* All pages in this host page are now not sent */
1652 set_bit(page, unsentmap);
1653
1654 /*
1655 * Remark them as dirty, updating the count for any pages
1656 * that weren't previously dirty.
1657 */
Juan Quintela0d8ec882017-03-13 21:21:41 +01001658 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001659 }
1660 }
1661
1662 if (unsent_pass) {
1663 /* Find the next sent page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001664 run_start = find_next_zero_bit(unsentmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001665 } else {
1666 /* Find the next dirty page for the next iteration */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001667 run_start = find_next_bit(bitmap, pages, run_start);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001668 }
1669 }
1670}
1671
Juan Quintela3d0684b2017-03-23 15:06:39 +01001672/**
1673 * postcopy_chuck_hostpages: discrad any partially sent host page
1674 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001675 * Utility for the outgoing postcopy code.
1676 *
1677 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001678 * dirty host-page size chunks as all dirty. In this case the host-page
1679 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001680 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001681 * Returns zero on success
1682 *
1683 * @ms: current migration state
Juan Quintela6b6712e2017-03-22 15:18:04 +01001684 * @block: block we want to work with
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001685 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001686static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001687{
Juan Quintela6b6712e2017-03-22 15:18:04 +01001688 PostcopyDiscardState *pds =
1689 postcopy_discard_send_init(ms, block->idstr);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001690
Juan Quintela6b6712e2017-03-22 15:18:04 +01001691 /* First pass: Discard all partially sent host pages */
1692 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1693 /*
1694 * Second pass: Ensure that all partially dirty host pages are made
1695 * fully dirty.
1696 */
1697 postcopy_chunk_hostpages_pass(ms, false, block, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001698
Juan Quintela6b6712e2017-03-22 15:18:04 +01001699 postcopy_discard_send_finish(ms, pds);
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001700 return 0;
1701}
1702
Juan Quintela3d0684b2017-03-23 15:06:39 +01001703/**
1704 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1705 *
1706 * Returns zero on success
1707 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001708 * Transmit the set of pages to be discarded after precopy to the target
1709 * these are pages that:
1710 * a) Have been previously transmitted but are now dirty again
1711 * b) Pages that have never been transmitted, this ensures that
1712 * any pages on the destination that have been mapped by background
1713 * tasks get discarded (transparent huge pages is the specific concern)
1714 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01001715 *
1716 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001717 */
1718int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1719{
Juan Quintela53518d92017-05-04 11:46:24 +02001720 RAMState *rs = ram_state;
Juan Quintela6b6712e2017-03-22 15:18:04 +01001721 RAMBlock *block;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001722 int ret;
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001723
1724 rcu_read_lock();
1725
1726 /* This should be our last sync, the src is now paused */
Juan Quintelaeb859c52017-03-13 21:51:55 +01001727 migration_bitmap_sync(rs);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001728
Juan Quintela6b6712e2017-03-22 15:18:04 +01001729 /* Easiest way to make sure we don't resume in the middle of a host-page */
1730 rs->last_seen_block = NULL;
1731 rs->last_sent_block = NULL;
1732 rs->last_page = 0;
1733
1734 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1735 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1736 unsigned long *bitmap = block->bmap;
1737 unsigned long *unsentmap = block->unsentmap;
1738
1739 if (!unsentmap) {
1740 /* We don't have a safe way to resize the sentmap, so
1741 * if the bitmap was resized it will be NULL at this
1742 * point.
1743 */
1744 error_report("migration ram resized during precopy phase");
1745 rcu_read_unlock();
1746 return -EINVAL;
1747 }
1748 /* Deal with TPS != HPS and huge pages */
1749 ret = postcopy_chunk_hostpages(ms, block);
1750 if (ret) {
1751 rcu_read_unlock();
1752 return ret;
1753 }
1754
1755 /*
1756 * Update the unsentmap to be unsentmap = unsentmap | dirty
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001757 */
Juan Quintela6b6712e2017-03-22 15:18:04 +01001758 bitmap_or(unsentmap, unsentmap, bitmap, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001759#ifdef DEBUG_POSTCOPY
Juan Quintela6b6712e2017-03-22 15:18:04 +01001760 ram_debug_dump_bitmap(unsentmap, true, pages);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001761#endif
Juan Quintela6b6712e2017-03-22 15:18:04 +01001762 }
1763 trace_ram_postcopy_send_discard_bitmap();
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001764
1765 ret = postcopy_each_ram_send_discard(ms);
1766 rcu_read_unlock();
1767
1768 return ret;
1769}
1770
Juan Quintela3d0684b2017-03-23 15:06:39 +01001771/**
1772 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001773 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001774 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001775 *
Juan Quintela36449152017-03-23 15:11:59 +01001776 * @rbname: name of the RAMBlock of the request. NULL means the
1777 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001778 * @start: RAMBlock starting page
1779 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001780 */
Juan Quintelaaaa20642017-03-21 11:35:24 +01001781int ram_discard_range(const char *rbname, uint64_t start, size_t length)
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001782{
1783 int ret = -1;
1784
Juan Quintela36449152017-03-23 15:11:59 +01001785 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001786
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001787 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01001788 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001789
1790 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01001791 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001792 goto err;
1793 }
1794
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001795 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001796
1797err:
1798 rcu_read_unlock();
1799
1800 return ret;
1801}
1802
Juan Quintela53518d92017-05-04 11:46:24 +02001803static int ram_state_init(RAMState **rsp)
Juan Quintela56e93d22015-05-07 19:33:31 +02001804{
Juan Quintela53518d92017-05-04 11:46:24 +02001805 *rsp = g_new0(RAMState, 1);
1806
1807 qemu_mutex_init(&(*rsp)->bitmap_mutex);
1808 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
1809 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
Juan Quintela56e93d22015-05-07 19:33:31 +02001810
1811 if (migrate_use_xbzrle()) {
1812 XBZRLE_cache_lock();
Juan Quintelac00e0922017-05-09 16:22:01 +02001813 XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001814 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1815 TARGET_PAGE_SIZE,
1816 TARGET_PAGE_SIZE);
1817 if (!XBZRLE.cache) {
1818 XBZRLE_cache_unlock();
1819 error_report("Error creating cache");
Juan Quintela53518d92017-05-04 11:46:24 +02001820 g_free(*rsp);
1821 *rsp = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001822 return -1;
1823 }
1824 XBZRLE_cache_unlock();
1825
1826 /* We prefer not to abort if there is no memory */
1827 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1828 if (!XBZRLE.encoded_buf) {
1829 error_report("Error allocating encoded_buf");
Juan Quintela53518d92017-05-04 11:46:24 +02001830 g_free(*rsp);
1831 *rsp = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001832 return -1;
1833 }
1834
1835 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1836 if (!XBZRLE.current_buf) {
1837 error_report("Error allocating current_buf");
1838 g_free(XBZRLE.encoded_buf);
1839 XBZRLE.encoded_buf = NULL;
Juan Quintela53518d92017-05-04 11:46:24 +02001840 g_free(*rsp);
1841 *rsp = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02001842 return -1;
1843 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001844 }
1845
Paolo Bonzini49877832016-02-15 19:57:57 +01001846 /* For memory_global_dirty_log_start below. */
1847 qemu_mutex_lock_iothread();
1848
Juan Quintela56e93d22015-05-07 19:33:31 +02001849 qemu_mutex_lock_ramlist();
1850 rcu_read_lock();
Juan Quintela53518d92017-05-04 11:46:24 +02001851 ram_state_reset(*rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02001852
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301853 /* Skip setting bitmap if there is no RAM */
1854 if (ram_bytes_total()) {
Juan Quintela6b6712e2017-03-22 15:18:04 +01001855 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02001856
Juan Quintela6b6712e2017-03-22 15:18:04 +01001857 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1858 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
1859
1860 block->bmap = bitmap_new(pages);
1861 bitmap_set(block->bmap, 0, pages);
1862 if (migrate_postcopy_ram()) {
1863 block->unsentmap = bitmap_new(pages);
1864 bitmap_set(block->unsentmap, 0, pages);
1865 }
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301866 }
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001867 }
1868
Juan Quintela56e93d22015-05-07 19:33:31 +02001869 /*
1870 * Count the total number of pages used by ram blocks not including any
1871 * gaps due to alignment or unplugs.
1872 */
Juan Quintela53518d92017-05-04 11:46:24 +02001873 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +02001874
1875 memory_global_dirty_log_start();
Juan Quintela53518d92017-05-04 11:46:24 +02001876 migration_bitmap_sync(*rsp);
Juan Quintela56e93d22015-05-07 19:33:31 +02001877 qemu_mutex_unlock_ramlist();
Paolo Bonzini49877832016-02-15 19:57:57 +01001878 qemu_mutex_unlock_iothread();
zhanghailianga91246c2016-10-27 14:42:59 +08001879 rcu_read_unlock();
1880
1881 return 0;
1882}
1883
Juan Quintela3d0684b2017-03-23 15:06:39 +01001884/*
1885 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08001886 * long-running RCU critical section. When rcu-reclaims in the code
1887 * start to become numerous it will be necessary to reduce the
1888 * granularity of these critical sections.
1889 */
1890
Juan Quintela3d0684b2017-03-23 15:06:39 +01001891/**
1892 * ram_save_setup: Setup RAM for migration
1893 *
1894 * Returns zero to indicate success and negative for error
1895 *
1896 * @f: QEMUFile where to send the data
1897 * @opaque: RAMState pointer
1898 */
zhanghailianga91246c2016-10-27 14:42:59 +08001899static int ram_save_setup(QEMUFile *f, void *opaque)
1900{
Juan Quintela53518d92017-05-04 11:46:24 +02001901 RAMState **rsp = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08001902 RAMBlock *block;
1903
1904 /* migration has already setup the bitmap, reuse it. */
1905 if (!migration_in_colo_state()) {
Juan Quintela53518d92017-05-04 11:46:24 +02001906 if (ram_state_init(rsp) != 0) {
zhanghailianga91246c2016-10-27 14:42:59 +08001907 return -1;
Juan Quintela53518d92017-05-04 11:46:24 +02001908 }
zhanghailianga91246c2016-10-27 14:42:59 +08001909 }
Juan Quintela53518d92017-05-04 11:46:24 +02001910 (*rsp)->f = f;
zhanghailianga91246c2016-10-27 14:42:59 +08001911
1912 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02001913
1914 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1915
Peter Xu99e15582017-05-12 12:17:39 +08001916 RAMBLOCK_FOREACH(block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001917 qemu_put_byte(f, strlen(block->idstr));
1918 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1919 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00001920 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
1921 qemu_put_be64(f, block->page_size);
1922 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001923 }
1924
1925 rcu_read_unlock();
1926
1927 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1928 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1929
1930 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1931
1932 return 0;
1933}
1934
Juan Quintela3d0684b2017-03-23 15:06:39 +01001935/**
1936 * ram_save_iterate: iterative stage for migration
1937 *
1938 * Returns zero to indicate success and negative for error
1939 *
1940 * @f: QEMUFile where to send the data
1941 * @opaque: RAMState pointer
1942 */
Juan Quintela56e93d22015-05-07 19:33:31 +02001943static int ram_save_iterate(QEMUFile *f, void *opaque)
1944{
Juan Quintela53518d92017-05-04 11:46:24 +02001945 RAMState **temp = opaque;
1946 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02001947 int ret;
1948 int i;
1949 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01001950 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02001951
1952 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01001953 if (ram_list.version != rs->last_version) {
1954 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001955 }
1956
1957 /* Read version before ram_list.blocks */
1958 smp_rmb();
1959
1960 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1961
1962 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1963 i = 0;
1964 while ((ret = qemu_file_rate_limit(f)) == 0) {
1965 int pages;
1966
Juan Quintelace25d332017-03-15 11:00:51 +01001967 pages = ram_find_and_save_block(rs, false);
Juan Quintela56e93d22015-05-07 19:33:31 +02001968 /* no more pages to sent */
1969 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01001970 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02001971 break;
1972 }
Juan Quintela23b28c32017-03-13 20:51:34 +01001973 rs->iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04001974
Juan Quintela56e93d22015-05-07 19:33:31 +02001975 /* we want to check in the 1st loop, just in case it was the 1st time
1976 and we had to sync the dirty bitmap.
1977 qemu_get_clock_ns() is a bit expensive, so we only check each some
1978 iterations
1979 */
1980 if ((i & 63) == 0) {
1981 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1982 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01001983 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02001984 break;
1985 }
1986 }
1987 i++;
1988 }
Juan Quintelace25d332017-03-15 11:00:51 +01001989 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001990 rcu_read_unlock();
1991
1992 /*
1993 * Must occur before EOS (or any QEMUFile operation)
1994 * because of RDMA protocol.
1995 */
1996 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
1997
1998 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
Juan Quintela93604472017-06-06 19:49:03 +02001999 ram_counters.transferred += 8;
Juan Quintela56e93d22015-05-07 19:33:31 +02002000
2001 ret = qemu_file_get_error(f);
2002 if (ret < 0) {
2003 return ret;
2004 }
2005
Thomas Huth5c903082016-11-04 14:10:17 +01002006 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002007}
2008
Juan Quintela3d0684b2017-03-23 15:06:39 +01002009/**
2010 * ram_save_complete: function called to send the remaining amount of ram
2011 *
2012 * Returns zero to indicate success
2013 *
2014 * Called with iothread lock
2015 *
2016 * @f: QEMUFile where to send the data
2017 * @opaque: RAMState pointer
2018 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002019static int ram_save_complete(QEMUFile *f, void *opaque)
2020{
Juan Quintela53518d92017-05-04 11:46:24 +02002021 RAMState **temp = opaque;
2022 RAMState *rs = *temp;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002023
Juan Quintela56e93d22015-05-07 19:33:31 +02002024 rcu_read_lock();
2025
Juan Quintela57273092017-03-20 22:25:28 +01002026 if (!migration_in_postcopy()) {
Juan Quintela8d820d62017-03-13 19:35:50 +01002027 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002028 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002029
2030 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2031
2032 /* try transferring iterative blocks of memory */
2033
2034 /* flush all remaining blocks regardless of rate limiting */
2035 while (true) {
2036 int pages;
2037
Juan Quintelace25d332017-03-15 11:00:51 +01002038 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
Juan Quintela56e93d22015-05-07 19:33:31 +02002039 /* no more blocks to sent */
2040 if (pages == 0) {
2041 break;
2042 }
2043 }
2044
Juan Quintelace25d332017-03-15 11:00:51 +01002045 flush_compressed_data(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002046 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002047
2048 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002049
Juan Quintela56e93d22015-05-07 19:33:31 +02002050 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2051
2052 return 0;
2053}
2054
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002055static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2056 uint64_t *non_postcopiable_pending,
2057 uint64_t *postcopiable_pending)
Juan Quintela56e93d22015-05-07 19:33:31 +02002058{
Juan Quintela53518d92017-05-04 11:46:24 +02002059 RAMState **temp = opaque;
2060 RAMState *rs = *temp;
Juan Quintela56e93d22015-05-07 19:33:31 +02002061 uint64_t remaining_size;
2062
Juan Quintela9edabd42017-03-14 12:02:16 +01002063 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002064
Juan Quintela57273092017-03-20 22:25:28 +01002065 if (!migration_in_postcopy() &&
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002066 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002067 qemu_mutex_lock_iothread();
2068 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01002069 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002070 rcu_read_unlock();
2071 qemu_mutex_unlock_iothread();
Juan Quintela9edabd42017-03-14 12:02:16 +01002072 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002073 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002074
2075 /* We can do postcopy, and all the data is postcopiable */
2076 *postcopiable_pending += remaining_size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002077}
2078
2079static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2080{
2081 unsigned int xh_len;
2082 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002083 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002084
2085 if (!xbzrle_decoded_buf) {
2086 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2087 }
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002088 loaded_data = xbzrle_decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002089
2090 /* extract RLE header */
2091 xh_flags = qemu_get_byte(f);
2092 xh_len = qemu_get_be16(f);
2093
2094 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2095 error_report("Failed to load XBZRLE page - wrong compression!");
2096 return -1;
2097 }
2098
2099 if (xh_len > TARGET_PAGE_SIZE) {
2100 error_report("Failed to load XBZRLE page - len overflow!");
2101 return -1;
2102 }
2103 /* load data and decode */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002104 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002105
2106 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002107 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002108 TARGET_PAGE_SIZE) == -1) {
2109 error_report("Failed to load XBZRLE page - decode error!");
2110 return -1;
2111 }
2112
2113 return 0;
2114}
2115
Juan Quintela3d0684b2017-03-23 15:06:39 +01002116/**
2117 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002118 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002119 * Must be called from within a rcu critical section.
2120 *
2121 * Returns a pointer from within the RCU-protected ram_list.
2122 *
2123 * @f: QEMUFile where to read the data from
2124 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002125 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002126static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002127{
2128 static RAMBlock *block = NULL;
2129 char id[256];
2130 uint8_t len;
2131
2132 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002133 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002134 error_report("Ack, bad migration stream!");
2135 return NULL;
2136 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002137 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002138 }
2139
2140 len = qemu_get_byte(f);
2141 qemu_get_buffer(f, (uint8_t *)id, len);
2142 id[len] = 0;
2143
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002144 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002145 if (!block) {
2146 error_report("Can't find block %s", id);
2147 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002148 }
2149
zhanghailiang4c4bad42016-01-15 11:37:41 +08002150 return block;
2151}
2152
2153static inline void *host_from_ram_block_offset(RAMBlock *block,
2154 ram_addr_t offset)
2155{
2156 if (!offset_in_ramblock(block, offset)) {
2157 return NULL;
2158 }
2159
2160 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002161}
2162
Juan Quintela3d0684b2017-03-23 15:06:39 +01002163/**
2164 * ram_handle_compressed: handle the zero page case
2165 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002166 * If a page (or a whole RDMA chunk) has been
2167 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002168 *
2169 * @host: host address for the zero page
2170 * @ch: what the page is filled from. We only support zero
2171 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002172 */
2173void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2174{
2175 if (ch != 0 || !is_zero_range(host, size)) {
2176 memset(host, ch, size);
2177 }
2178}
2179
2180static void *do_data_decompress(void *opaque)
2181{
2182 DecompressParam *param = opaque;
2183 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002184 uint8_t *des;
2185 int len;
Juan Quintela56e93d22015-05-07 19:33:31 +02002186
Liang Li33d151f2016-05-05 15:32:58 +08002187 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002188 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002189 if (param->des) {
2190 des = param->des;
2191 len = param->len;
2192 param->des = 0;
2193 qemu_mutex_unlock(&param->mutex);
2194
Liang Li73a89122016-05-05 15:32:51 +08002195 pagesize = TARGET_PAGE_SIZE;
2196 /* uncompress() will return failed in some case, especially
2197 * when the page is dirted when doing the compression, it's
2198 * not a problem because the dirty page will be retransferred
2199 * and uncompress() won't break the data in other pages.
2200 */
Liang Li33d151f2016-05-05 15:32:58 +08002201 uncompress((Bytef *)des, &pagesize,
2202 (const Bytef *)param->compbuf, len);
Liang Li73a89122016-05-05 15:32:51 +08002203
Liang Li33d151f2016-05-05 15:32:58 +08002204 qemu_mutex_lock(&decomp_done_lock);
2205 param->done = true;
2206 qemu_cond_signal(&decomp_done_cond);
2207 qemu_mutex_unlock(&decomp_done_lock);
2208
2209 qemu_mutex_lock(&param->mutex);
2210 } else {
2211 qemu_cond_wait(&param->cond, &param->mutex);
2212 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002213 }
Liang Li33d151f2016-05-05 15:32:58 +08002214 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002215
2216 return NULL;
2217}
2218
Liang Li5533b2e2016-05-05 15:32:52 +08002219static void wait_for_decompress_done(void)
2220{
2221 int idx, thread_count;
2222
2223 if (!migrate_use_compression()) {
2224 return;
2225 }
2226
2227 thread_count = migrate_decompress_threads();
2228 qemu_mutex_lock(&decomp_done_lock);
2229 for (idx = 0; idx < thread_count; idx++) {
2230 while (!decomp_param[idx].done) {
2231 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2232 }
2233 }
2234 qemu_mutex_unlock(&decomp_done_lock);
2235}
2236
Juan Quintela56e93d22015-05-07 19:33:31 +02002237void migrate_decompress_threads_create(void)
2238{
2239 int i, thread_count;
2240
2241 thread_count = migrate_decompress_threads();
2242 decompress_threads = g_new0(QemuThread, thread_count);
2243 decomp_param = g_new0(DecompressParam, thread_count);
Liang Li73a89122016-05-05 15:32:51 +08002244 qemu_mutex_init(&decomp_done_lock);
2245 qemu_cond_init(&decomp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +02002246 for (i = 0; i < thread_count; i++) {
2247 qemu_mutex_init(&decomp_param[i].mutex);
2248 qemu_cond_init(&decomp_param[i].cond);
2249 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
Liang Li73a89122016-05-05 15:32:51 +08002250 decomp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +08002251 decomp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002252 qemu_thread_create(decompress_threads + i, "decompress",
2253 do_data_decompress, decomp_param + i,
2254 QEMU_THREAD_JOINABLE);
2255 }
2256}
2257
2258void migrate_decompress_threads_join(void)
2259{
2260 int i, thread_count;
2261
Juan Quintela56e93d22015-05-07 19:33:31 +02002262 thread_count = migrate_decompress_threads();
2263 for (i = 0; i < thread_count; i++) {
2264 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002265 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002266 qemu_cond_signal(&decomp_param[i].cond);
2267 qemu_mutex_unlock(&decomp_param[i].mutex);
2268 }
2269 for (i = 0; i < thread_count; i++) {
2270 qemu_thread_join(decompress_threads + i);
2271 qemu_mutex_destroy(&decomp_param[i].mutex);
2272 qemu_cond_destroy(&decomp_param[i].cond);
2273 g_free(decomp_param[i].compbuf);
2274 }
2275 g_free(decompress_threads);
2276 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002277 decompress_threads = NULL;
2278 decomp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002279}
2280
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002281static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002282 void *host, int len)
2283{
2284 int idx, thread_count;
2285
2286 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002287 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002288 while (true) {
2289 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002290 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002291 decomp_param[idx].done = false;
2292 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002293 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002294 decomp_param[idx].des = host;
2295 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002296 qemu_cond_signal(&decomp_param[idx].cond);
2297 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002298 break;
2299 }
2300 }
2301 if (idx < thread_count) {
2302 break;
Liang Li73a89122016-05-05 15:32:51 +08002303 } else {
2304 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002305 }
2306 }
Liang Li73a89122016-05-05 15:32:51 +08002307 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002308}
2309
Juan Quintela3d0684b2017-03-23 15:06:39 +01002310/**
2311 * ram_postcopy_incoming_init: allocate postcopy data structures
2312 *
2313 * Returns 0 for success and negative if there was one error
2314 *
2315 * @mis: current migration incoming state
2316 *
2317 * Allocate data structures etc needed by incoming migration with
2318 * postcopy-ram. postcopy-ram's similarly names
2319 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002320 */
2321int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2322{
Juan Quintelab8c48992017-03-21 17:44:30 +01002323 unsigned long ram_pages = last_ram_page();
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002324
2325 return postcopy_ram_incoming_init(mis, ram_pages);
2326}
2327
Juan Quintela3d0684b2017-03-23 15:06:39 +01002328/**
2329 * ram_load_postcopy: load a page in postcopy case
2330 *
2331 * Returns 0 for success or -errno in case of error
2332 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002333 * Called in postcopy mode by ram_load().
2334 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002335 *
2336 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002337 */
2338static int ram_load_postcopy(QEMUFile *f)
2339{
2340 int flags = 0, ret = 0;
2341 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002342 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002343 MigrationIncomingState *mis = migration_incoming_get_current();
2344 /* Temporary page that is later 'placed' */
2345 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002346 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002347 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002348
2349 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2350 ram_addr_t addr;
2351 void *host = NULL;
2352 void *page_buffer = NULL;
2353 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002354 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002355 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002356
2357 addr = qemu_get_be64(f);
2358 flags = addr & ~TARGET_PAGE_MASK;
2359 addr &= TARGET_PAGE_MASK;
2360
2361 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2362 place_needed = false;
Juan Quintelabb890ed2017-04-28 09:39:55 +02002363 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002364 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002365
2366 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002367 if (!host) {
2368 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2369 ret = -EINVAL;
2370 break;
2371 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002372 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002373 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002374 * Postcopy requires that we place whole host pages atomically;
2375 * these may be huge pages for RAMBlocks that are backed by
2376 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002377 * To make it atomic, the data is read into a temporary page
2378 * that's moved into place later.
2379 * The migration protocol uses, possibly smaller, target-pages
2380 * however the source ensures it always sends all the components
2381 * of a host page in order.
2382 */
2383 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002384 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002385 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002386 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002387 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002388 } else {
2389 /* not the 1st TP within the HP */
2390 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002391 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002392 host, last_host);
2393 ret = -EINVAL;
2394 break;
2395 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002396 }
2397
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002398
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002399 /*
2400 * If it's the last part of a host page then we place the host
2401 * page
2402 */
2403 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002404 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002405 place_source = postcopy_host_page;
2406 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002407 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002408
2409 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
Juan Quintelabb890ed2017-04-28 09:39:55 +02002410 case RAM_SAVE_FLAG_ZERO:
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002411 ch = qemu_get_byte(f);
2412 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2413 if (ch) {
2414 all_zero = false;
2415 }
2416 break;
2417
2418 case RAM_SAVE_FLAG_PAGE:
2419 all_zero = false;
2420 if (!place_needed || !matching_page_sizes) {
2421 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2422 } else {
2423 /* Avoids the qemu_file copy during postcopy, which is
2424 * going to do a copy later; can only do it when we
2425 * do this read in one go (matching page sizes)
2426 */
2427 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2428 TARGET_PAGE_SIZE);
2429 }
2430 break;
2431 case RAM_SAVE_FLAG_EOS:
2432 /* normal exit */
2433 break;
2434 default:
2435 error_report("Unknown combination of migration flags: %#x"
2436 " (postcopy mode)", flags);
2437 ret = -EINVAL;
2438 }
2439
2440 if (place_needed) {
2441 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002442 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2443
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002444 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002445 ret = postcopy_place_page_zero(mis, place_dest,
2446 block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002447 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002448 ret = postcopy_place_page(mis, place_dest,
2449 place_source, block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002450 }
2451 }
2452 if (!ret) {
2453 ret = qemu_file_get_error(f);
2454 }
2455 }
2456
2457 return ret;
2458}
2459
Juan Quintela56e93d22015-05-07 19:33:31 +02002460static int ram_load(QEMUFile *f, void *opaque, int version_id)
2461{
2462 int flags = 0, ret = 0;
2463 static uint64_t seq_iter;
2464 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002465 /*
2466 * If system is running in postcopy mode, page inserts to host memory must
2467 * be atomic
2468 */
2469 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002470 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2471 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002472
2473 seq_iter++;
2474
2475 if (version_id != 4) {
2476 ret = -EINVAL;
2477 }
2478
2479 /* This RCU critical section can be very long running.
2480 * When RCU reclaims in the code start to become numerous,
2481 * it will be necessary to reduce the granularity of this
2482 * critical section.
2483 */
2484 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002485
2486 if (postcopy_running) {
2487 ret = ram_load_postcopy(f);
2488 }
2489
2490 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002491 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002492 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002493 uint8_t ch;
2494
2495 addr = qemu_get_be64(f);
2496 flags = addr & ~TARGET_PAGE_MASK;
2497 addr &= TARGET_PAGE_MASK;
2498
Juan Quintelabb890ed2017-04-28 09:39:55 +02002499 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002500 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002501 RAMBlock *block = ram_block_from_stream(f, flags);
2502
2503 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002504 if (!host) {
2505 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2506 ret = -EINVAL;
2507 break;
2508 }
Dr. David Alan Gilbert1db9d8e2017-04-26 19:37:21 +01002509 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002510 }
2511
Juan Quintela56e93d22015-05-07 19:33:31 +02002512 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2513 case RAM_SAVE_FLAG_MEM_SIZE:
2514 /* Synchronize RAM block list */
2515 total_ram_bytes = addr;
2516 while (!ret && total_ram_bytes) {
2517 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002518 char id[256];
2519 ram_addr_t length;
2520
2521 len = qemu_get_byte(f);
2522 qemu_get_buffer(f, (uint8_t *)id, len);
2523 id[len] = 0;
2524 length = qemu_get_be64(f);
2525
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002526 block = qemu_ram_block_by_name(id);
2527 if (block) {
2528 if (length != block->used_length) {
2529 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002530
Gongleifa53a0e2016-05-10 10:04:59 +08002531 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002532 &local_err);
2533 if (local_err) {
2534 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02002535 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002536 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002537 /* For postcopy we need to check hugepage sizes match */
2538 if (postcopy_advised &&
2539 block->page_size != qemu_host_page_size) {
2540 uint64_t remote_page_size = qemu_get_be64(f);
2541 if (remote_page_size != block->page_size) {
2542 error_report("Mismatched RAM page size %s "
2543 "(local) %zd != %" PRId64,
2544 id, block->page_size,
2545 remote_page_size);
2546 ret = -EINVAL;
2547 }
2548 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002549 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2550 block->idstr);
2551 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02002552 error_report("Unknown ramblock \"%s\", cannot "
2553 "accept migration", id);
2554 ret = -EINVAL;
2555 }
2556
2557 total_ram_bytes -= length;
2558 }
2559 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002560
Juan Quintelabb890ed2017-04-28 09:39:55 +02002561 case RAM_SAVE_FLAG_ZERO:
Juan Quintela56e93d22015-05-07 19:33:31 +02002562 ch = qemu_get_byte(f);
2563 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2564 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002565
Juan Quintela56e93d22015-05-07 19:33:31 +02002566 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002567 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2568 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002569
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002570 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002571 len = qemu_get_be32(f);
2572 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2573 error_report("Invalid compressed data length: %d", len);
2574 ret = -EINVAL;
2575 break;
2576 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002577 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002578 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002579
Juan Quintela56e93d22015-05-07 19:33:31 +02002580 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002581 if (load_xbzrle(f, addr, host) < 0) {
2582 error_report("Failed to decompress XBZRLE page at "
2583 RAM_ADDR_FMT, addr);
2584 ret = -EINVAL;
2585 break;
2586 }
2587 break;
2588 case RAM_SAVE_FLAG_EOS:
2589 /* normal exit */
2590 break;
2591 default:
2592 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01002593 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02002594 } else {
2595 error_report("Unknown combination of migration flags: %#x",
2596 flags);
2597 ret = -EINVAL;
2598 }
2599 }
2600 if (!ret) {
2601 ret = qemu_file_get_error(f);
2602 }
2603 }
2604
Liang Li5533b2e2016-05-05 15:32:52 +08002605 wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02002606 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01002607 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02002608 return ret;
2609}
2610
2611static SaveVMHandlers savevm_ram_handlers = {
2612 .save_live_setup = ram_save_setup,
2613 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00002614 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00002615 .save_live_complete_precopy = ram_save_complete,
Juan Quintela56e93d22015-05-07 19:33:31 +02002616 .save_live_pending = ram_save_pending,
2617 .load_state = ram_load,
Liang Li6ad2a212015-11-02 15:37:03 +08002618 .cleanup = ram_migration_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02002619};
2620
2621void ram_mig_init(void)
2622{
2623 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002624 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02002625}