Blame - migration/ram.c - qemu

2015-05-07 19:33:31 +0200

[diff] [blame]

1

/*

2

* QEMU System Emulator

3

*

4

Juan Quintela

76cc7b5

2015-05-08 13:20:21 +0200

[diff] [blame]

*

* Authors:

* Juan Quintela <quintela@redhat.com>

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

9

*

10

* Permission is hereby granted, free of charge, to any person obtaining a copy

11

* of this software and associated documentation files (the "Software"), to deal

12

* in the Software without restriction, including without limitation the rights

13

* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

14

* copies of the Software, and to permit persons to whom the Software is

15

* furnished to do so, subject to the following conditions:

16

*

17

* The above copyright notice and this permission notice shall be included in

18

* all copies or substantial portions of the Software.

19

*

20

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

21

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

22

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

23

* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

24

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

25

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

26

* THE SOFTWARE.

27

*/

Markus Armbruster

e688df6

2018-02-01 12:18:31 +0100

[diff] [blame]

28

Peter Maydell

1393a48

2016-01-26 18:16:54 +0000

[diff] [blame]

29

#include "qemu/osdep.h"

Veronia Bahaa

f348b6d

2016-03-20 19:16:19 +0200

[diff] [blame]

30

#include "qemu/cutils.h"

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

31

#include "qemu/bitops.h"

32

#include "qemu/bitmap.h"

Peter Maydell

b85ea5f

2022-02-08 20:08:52 +0000

[diff] [blame]

33

#include "qemu/madvise.h"

Juan Quintela

7205c9e

2015-05-08 13:54:36 +0200

[diff] [blame]

34

#include "qemu/main-loop.h"

Daniel P. Berrangé

c0e0825

2022-06-20 12:01:46 +0100

[diff] [blame]

35

#include "io/channel-null.h"

Juan Quintela

709e3fe

2017-04-05 21:47:50 +0200

[diff] [blame]

36

#include "xbzrle.h"

Juan Quintela

7b1e1a2

2017-04-17 20:26:27 +0200

[diff] [blame]

37

#include "ram.h"

Juan Quintela

6666c96

2017-04-24 20:07:27 +0200

[diff] [blame]

38

#include "migration.h"

Juan Quintela

f2a8f0a

2017-04-24 13:42:55 +0200

[diff] [blame]

39

#include "migration/register.h"

Juan Quintela

7b1e1a2

2017-04-17 20:26:27 +0200

[diff] [blame]

40

#include "migration/misc.h"

Juan Quintela

08a0aee

2017-04-20 18:52:18 +0200

[diff] [blame]

41

#include "qemu-file.h"

Juan Quintela

be07b0a

2017-04-20 13:12:24 +0200

[diff] [blame]

42

#include "postcopy-ram.h"

Michael S. Tsirkin

53d37d3

2018-05-03 22:50:51 +0300

[diff] [blame]

43

#include "page_cache.h"

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

44

#include "qemu/error-report.h"

Markus Armbruster

e688df6

2018-02-01 12:18:31 +0100

[diff] [blame]

45

#include "qapi/error.h"

Juan Quintela

ab7cbb0

2019-05-15 13:37:46 +0200

[diff] [blame]

46

#include "qapi/qapi-types-migration.h"

Markus Armbruster

9af2398

2018-02-11 10:36:01 +0100

[diff] [blame]

47

#include "qapi/qapi-events-migration.h"

Juan Quintela

8acabf6

2017-10-05 22:00:31 +0200

[diff] [blame]

48

#include "qapi/qmp/qerror.h"

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

49

#include "trace.h"

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

50

#include "exec/ram_addr.h"

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

51

#include "exec/target_page.h"

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

52

#include "qemu/rcu_queue.h"

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

53

#include "migration/colo.h"

Michael S. Tsirkin

53d37d3

2018-05-03 22:50:51 +0300

[diff] [blame]

54

#include "block.h"

Claudio Fontana

b0c3cf9

2020-06-29 11:35:03 +0200

[diff] [blame]

55

#include "sysemu/cpu-throttle.h"

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

56

#include "savevm.h"

Juan Quintela

b9ee2f7

2016-01-15 11:40:13 +0100

[diff] [blame]

57

#include "qemu/iov.h"

Juan Quintela

d32ca5a

2020-01-22 16:16:07 +0100

[diff] [blame]

58

#include "multifd.h"

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

59

#include "sysemu/runstate.h"

60

Lukas Straub

e5fdf92

2021-07-04 18:14:44 +0200

[diff] [blame]

61

#include "hw/boards.h" /* for machine_dump_guest_core() */

62

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

63

#if defined(__linux__)

64

#include "qemu/userfaultfd.h"

65

#endif /* defined(__linux__) */

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

66

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

67

/***********************************************************/

68

/* ram save/restore */

69

Juan Quintela

2017-04-28 09:39:55 +0200

[diff] [blame]

70

/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it

71

* worked for pages that where filled with the same char. We switched

72

* it to only search for the zero value. And to avoid confusion with

73

* RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.

74

*/

75

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

76

#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */

Juan Quintela

2017-04-28 09:39:55 +0200

[diff] [blame]

77

#define RAM_SAVE_FLAG_ZERO 0x02

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

78

#define RAM_SAVE_FLAG_MEM_SIZE 0x04

79

#define RAM_SAVE_FLAG_PAGE 0x08

80

#define RAM_SAVE_FLAG_EOS 0x10

81

#define RAM_SAVE_FLAG_CONTINUE 0x20

82

#define RAM_SAVE_FLAG_XBZRLE 0x40

83

/* 0x80 is reserved in migration.h start with 0x100 next */

84

#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100

85

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

86

XBZRLECacheStats xbzrle_counters;

87

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

88

/* used by the search for pages to send */

89

struct PageSearchStatus {

90

/* The migration channel used for a specific host page */

91

QEMUFile *pss_channel;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

92

/* Last block from where we have sent data */

93

RAMBlock *last_sent_block;

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

94

/* Current block being searched */

95

RAMBlock *block;

96

/* Current page to search from */

97

unsigned long page;

98

/* Set once we wrap around */

99

bool complete_round;

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

100

/* Whether we're sending a host page */

101

bool host_page_sending;

102

/* The start/end of current host page. Invalid if host_page_sending==false */

103

unsigned long host_page_start;

104

unsigned long host_page_end;

105

};

106

typedef struct PageSearchStatus PageSearchStatus;

107

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

108

/* struct contains XBZRLE cache and a static page

109

used by the compression */

110

static struct {

111

/* buffer used for XBZRLE encoding */

112

uint8_t *encoded_buf;

113

/* buffer for storing page content */

114

uint8_t *current_buf;

115

/* Cache for XBZRLE, Protected by lock. */

116

PageCache *cache;

117

QemuMutex lock;

Juan Quintela

c00e092

2017-05-09 16:22:01 +0200

[diff] [blame]

118

/* it will store a page full of zeros */

119

uint8_t *zero_target_page;

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

120

/* buffer used for XBZRLE decoding */

121

uint8_t *decoded_buf;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

122

} XBZRLE;

123

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

124

static void XBZRLE_cache_lock(void)

125

{

Bihong Yu

2020-10-20 11:10:45 +0800

[diff] [blame]

126

if (migrate_use_xbzrle()) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

127

qemu_mutex_lock(&XBZRLE.lock);

Bihong Yu

2020-10-20 11:10:45 +0800

[diff] [blame]

128

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

129

}

130

131

static void XBZRLE_cache_unlock(void)

132

{

Bihong Yu

2020-10-20 11:10:45 +0800

[diff] [blame]

133

if (migrate_use_xbzrle()) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

134

qemu_mutex_unlock(&XBZRLE.lock);

Bihong Yu

2020-10-20 11:10:45 +0800

[diff] [blame]

135

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

136

}

137

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

138

/**

139

* xbzrle_cache_resize: resize the xbzrle cache

140

*

Daniel P. Berrangé

cbde7be

2021-02-19 18:40:12 +0000

[diff] [blame]

141

* This function is called from migrate_params_apply in main

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

142

* thread, possibly while a migration is in progress. A running

143

* migration may be using the cache and might finish during this call,

144

* hence changes to the cache are protected by XBZRLE.lock().

145

*

Juan Quintela

c9dede2

2017-10-06 23:03:55 +0200

[diff] [blame]

146

* Returns 0 for success or -1 for error

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

147

*

148

* @new_size: new cache size

Juan Quintela

8acabf6

2017-10-05 22:00:31 +0200

[diff] [blame]

149

* @errp: set *errp if the check failed, with reason

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

150

*/

Markus Armbruster

8b9407a

2021-02-02 15:17:32 +0100

[diff] [blame]

151

int xbzrle_cache_resize(uint64_t new_size, Error **errp)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

152

{

153

PageCache *new_cache;

Juan Quintela

c9dede2

2017-10-06 23:03:55 +0200

[diff] [blame]

154

int64_t ret = 0;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

155

Juan Quintela

8acabf6

2017-10-05 22:00:31 +0200

[diff] [blame]

156

/* Check for truncation */

157

if (new_size != (size_t)new_size) {

158

error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",

159

"exceeding address space");

return -1;

}

Juan Quintela

2017-10-06 23:00:12 +0200

[diff] [blame]

163

if (new_size == migrate_xbzrle_cache_size()) {

164

/* nothing to do */

Juan Quintela

c9dede2

2017-10-06 23:03:55 +0200

[diff] [blame]

165

return 0;

Juan Quintela

2a313e5

2017-10-06 23:00:12 +0200

[diff] [blame]

166

}

167

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

168

XBZRLE_cache_lock();

169

170

if (XBZRLE.cache != NULL) {

Juan Quintela

80f8dfd

2017-10-06 22:30:45 +0200

[diff] [blame]

171

new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

172

if (!new_cache) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

ret = -1;

goto out;

}

cache_fini(XBZRLE.cache);

178

XBZRLE.cache = new_cache;

179

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

180

out:

181

XBZRLE_cache_unlock();

return ret;

}

Peter Xu

2022-10-11 17:55:46 -0400

[diff] [blame]

185

static bool postcopy_preempt_active(void)

186

{

187

return migrate_postcopy_preempt() && migration_in_postcopy();

188

}

189

Chuan Zheng

3ded54b

2020-09-16 14:22:00 +0800

[diff] [blame]

190

bool ramblock_is_ignored(RAMBlock *block)

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

191

{

192

return !qemu_ram_is_migratable(block) ||

193

(migrate_ignore_shared() && qemu_ram_is_shared(block));

194

}

195

Dr. David Alan Gilbert

343f632

2018-06-05 17:25:45 +0100

[diff] [blame]

196

#undef RAMBLOCK_FOREACH

197

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

198

int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)

{

RAMBlock *block;

int ret = 0;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

203

RCU_READ_LOCK_GUARD();

204

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

205

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

206

ret = func(block, opaque);

if (ret) {

break;

}

}

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

return ret;

}

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

214

static void ramblock_recv_map_init(void)

{

RAMBlock *rb;

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

218

RAMBLOCK_FOREACH_NOT_IGNORED(rb) {

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

219

assert(!rb->receivedmap);

220

rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());

}

}

int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)

225

{

226

return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),

227

rb->receivedmap);

228

}

229

Dr. David Alan Gilbert

1cba9f6

2018-03-12 17:21:08 +0000

[diff] [blame]

230

bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)

231

{

232

return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);

233

}

234

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

235

void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)

236

{

237

set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);

238

}

239

240

void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,

241

size_t nr)

242

{

243

bitmap_set_atomic(rb->receivedmap,

244

ramblock_recv_bitmap_offset(host_addr, rb),

nr);

}

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

248

#define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)

249

250

/*

251

* Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).

252

*

253

* Returns >0 if success with sent bytes, or <0 if error.

254

*/

255

int64_t ramblock_recv_bitmap_send(QEMUFile *file,

256

const char *block_name)

257

{

258

RAMBlock *block = qemu_ram_block_by_name(block_name);

259

unsigned long *le_bitmap, nbits;

uint64_t size;

if (!block) {

error_report("%s: invalid block name: %s", __func__, block_name);

return -1;

}

David Hildenbrand

2021-04-29 13:27:06 +0200

[diff] [blame]

267

nbits = block->postcopy_length >> TARGET_PAGE_BITS;

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

268

269

/*

270

* Make sure the tmp bitmap buffer is big enough, e.g., on 32bit

271

* machines we may need 4 more bytes for padding (see below

272

* comment). So extend it a bit before hand.

273

*/

274

le_bitmap = bitmap_new(nbits + BITS_PER_LONG);

275

276

/*

277

* Always use little endian when sending the bitmap. This is

278

* required that when source and destination VMs are not using the

zhaolichang

2020-09-17 15:50:21 +0800

[diff] [blame]

279

* same endianness. (Note: big endian won't work.)

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

280

*/

281

bitmap_to_le(le_bitmap, block->receivedmap, nbits);

282

283

/* Size of the bitmap, in bytes */

Peter Xu

a725ef9

2018-07-10 17:18:55 +0800

[diff] [blame]

284

size = DIV_ROUND_UP(nbits, 8);

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

285

286

/*

287

* size is always aligned to 8 bytes for 64bit machines, but it

288

* may not be true for 32bit machines. We need this padding to

289

* make sure the migration can survive even between 32bit and

290

* 64bit machines.

291

*/

292

size = ROUND_UP(size, 8);

293

294

qemu_put_be64(file, size);

295

qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);

296

/*

297

* Mark as an end, in case the middle part is screwed up due to

zhaolichang

2020-09-17 15:50:21 +0800

[diff] [blame]

298

* some "mysterious" reason.

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

299

*/

300

qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);

301

qemu_fflush(file);

302

Peter Xu

bf26990

2018-05-25 09:50:42 +0800

[diff] [blame]

303

g_free(le_bitmap);

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

304

305

if (qemu_file_get_error(file)) {

306

return qemu_file_get_error(file);

307

}

308

309

return size + sizeof(size);

310

}

311

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

312

/*

313

* An outstanding page request, on the source, having been received

314

* and queued

315

*/

316

struct RAMSrcPageRequest {

RAMBlock *rb;

hwaddr offset;

hwaddr len;

QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;

322

};

323

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

324

/* State of RAM for migration */

325

struct RAMState {

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

326

/*

327

* PageSearchStatus structures for the channels when send pages.

328

* Protected by the bitmap_mutex.

329

*/

330

PageSearchStatus pss[RAM_CHANNEL_MAX];

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

331

/* UFFD file descriptor, used in 'write-tracking' migration */

332

int uffdio_fd;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

333

/* Last block that we have visited searching for dirty pages */

334

RAMBlock *last_seen_block;

Juan Quintela

269ace2

2017-03-21 15:23:31 +0100

[diff] [blame]

335

/* Last dirty target page we have sent */

336

ram_addr_t last_page;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

337

/* last ram version we have seen */

338

uint32_t last_version;

Juan Quintela

8d820d6

2017-03-13 19:35:50 +0100

[diff] [blame]

339

/* How many times we have dirty too many pages */

340

int dirty_rate_high_cnt;

Juan Quintela

2017-03-13 19:44:57 +0100

[diff] [blame]

341

/* these variables are used for bitmap sync */

342

/* last time we did a full bitmap_sync */

343

int64_t time_last_bitmap_sync;

Juan Quintela

eac7415

2017-03-28 14:59:01 +0200

[diff] [blame]

344

/* bytes transferred at start_time */

Juan Quintela

c4bdf0c

2017-03-28 14:59:54 +0200

[diff] [blame]

345

uint64_t bytes_xfer_prev;

Juan Quintela

a66cd90

2017-03-28 15:02:43 +0200

[diff] [blame]

346

/* number of dirty pages since start_time */

Juan Quintela

68908ed

2017-03-28 15:05:53 +0200

[diff] [blame]

347

uint64_t num_dirty_pages_period;

Juan Quintela

b5833fd

2017-03-13 19:49:19 +0100

[diff] [blame]

348

/* xbzrle misses since the beginning of the period */

349

uint64_t xbzrle_cache_miss_prev;

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

350

/* Amount of xbzrle pages since the beginning of the period */

351

uint64_t xbzrle_pages_prev;

352

/* Amount of xbzrle encoded bytes since the beginning of the period */

353

uint64_t xbzrle_bytes_prev;

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

354

/* Start using XBZRLE (e.g., after the first round). */

355

bool xbzrle_enabled;

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

356

/* Are we on the last stage of migration */

357

bool last_stage;

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

358

/* compression statistics since the beginning of the period */

359

/* amount of count that no free thread to compress data */

360

uint64_t compress_thread_busy_prev;

361

/* amount bytes after compression */

362

uint64_t compressed_size_prev;

363

/* amount of compressed pages */

364

uint64_t compress_pages_prev;

365

Xiao Guangrong

2018-09-03 17:26:42 +0800

[diff] [blame]

366

/* total handled target pages at the beginning of period */

367

uint64_t target_page_count_prev;

368

/* total handled target pages since start */

369

uint64_t target_page_count;

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

370

/* number of dirty bits in the bitmap */

Peter Xu

2dfaf12

2017-08-02 17:41:19 +0800

[diff] [blame]

371

uint64_t migration_dirty_pages;

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

372

/*

373

* Protects:

374

* - dirty/clear bitmap

375

* - migration_dirty_pages

376

* - pss structures

377

*/

Juan Quintela

108cfae

2017-03-13 21:38:09 +0100

[diff] [blame]

378

QemuMutex bitmap_mutex;

Juan Quintela

68a098f

2017-03-14 13:48:42 +0100

[diff] [blame]

379

/* The RAMBlock used in the last src_page_requests */

380

RAMBlock *last_req_rb;

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

381

/* Queue of outstanding page requests from the destination */

382

QemuMutex src_page_req_mutex;

Paolo Bonzini

b58deb3

2018-12-06 11:58:10 +0100

[diff] [blame]

383

QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

384

};

385

typedef struct RAMState RAMState;

386

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

387

static RAMState *ram_state;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

388

Wei Wang

2018-12-11 16:24:51 +0800

[diff] [blame]

389

static NotifierWithReturnList precopy_notifier_list;

390

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

391

/* Whether postcopy has queued requests? */

392

static bool postcopy_has_request(RAMState *rs)

393

{

394

return !QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests);

395

}

396

Wei Wang

2018-12-11 16:24:51 +0800

[diff] [blame]

397

void precopy_infrastructure_init(void)

398

{

399

notifier_with_return_list_init(&precopy_notifier_list);

400

}

401

402

void precopy_add_notifier(NotifierWithReturn *n)

403

{

404

notifier_with_return_list_add(&precopy_notifier_list, n);

405

}

406

407

void precopy_remove_notifier(NotifierWithReturn *n)

408

{

409

notifier_with_return_remove(n);

410

}

411

412

int precopy_notify(PrecopyNotifyReason reason, Error **errp)

413

{

414

PrecopyNotifyData pnd;

pnd.reason = reason;

pnd.errp = errp;

return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);

419

}

420

Juan Quintela

9edabd4

2017-03-14 12:02:16 +0100

[diff] [blame]

421

uint64_t ram_bytes_remaining(void)

422

{

Dr. David Alan Gilbert

bae416e

2017-12-15 11:51:23 +0000

[diff] [blame]

423

return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :

424

0;

Juan Quintela

9edabd4

2017-03-14 12:02:16 +0100

[diff] [blame]

425

}

426

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

427

/*

428

* NOTE: not all stats in ram_counters are used in reality. See comments

429

* for struct MigrationAtomicStats. The ultimate result of ram migration

430

* counters will be a merged version with both ram_counters and the atomic

431

* fields in ram_atomic_counters.

432

*/

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

433

MigrationStats ram_counters;

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

434

MigrationAtomicStats ram_atomic_counters;

Juan Quintela

9650689

2017-03-14 18:41:03 +0100

[diff] [blame]

435

Juan Quintela

26a2606

2022-02-22 21:02:03 +0100

[diff] [blame]

436

void ram_transferred_add(uint64_t bytes)

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

437

{

David Edmondson

ae68066

2021-12-21 09:34:41 +0000

[diff] [blame]

438

if (runstate_is_running()) {

439

ram_counters.precopy_bytes += bytes;

440

} else if (migration_in_postcopy()) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

441

stat64_add(&ram_atomic_counters.postcopy_bytes, bytes);

David Edmondson

ae68066

2021-12-21 09:34:41 +0000

[diff] [blame]

442

} else {

443

ram_counters.downtime_bytes += bytes;

444

}

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

445

stat64_add(&ram_atomic_counters.transferred, bytes);

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

446

}

447

Leonardo Bras

d59c40c

2022-07-11 18:11:13 -0300

[diff] [blame]

448

void dirty_sync_missed_zero_copy(void)

449

{

450

ram_counters.dirty_sync_missed_zero_copy++;

451

}

452

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

453

CompressionStats compression_counters;

454

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

455

struct CompressParam {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

456

bool done;

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

457

bool quit;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

458

bool zero_page;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

QEMUFile *file;

QemuMutex mutex;

QemuCond cond;

RAMBlock *block;

ram_addr_t offset;

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

464

465

/* internally used fields */

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

466

z_stream stream;

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

467

uint8_t *originbuf;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

468

};

469

typedef struct CompressParam CompressParam;

470

471

struct DecompressParam {

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

472

bool done;

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

473

bool quit;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

474

QemuMutex mutex;

475

QemuCond cond;

476

void *des;

Peter Maydell

d341d9f

2016-01-22 15:09:21 +0000

[diff] [blame]

477

uint8_t *compbuf;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

478

int len;

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

479

z_stream stream;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

480

};

481

typedef struct DecompressParam DecompressParam;

482

483

static CompressParam *comp_param;

484

static QemuThread *compress_threads;

485

/* comp_done_cond is used to wake up the migration thread when

486

* one of the compression threads has finished the compression.

487

* comp_done_lock is used to co-work with comp_done_cond.

488

*/

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

489

static QemuMutex comp_done_lock;

490

static QemuCond comp_done_cond;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

491

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

492

static QEMUFile *decomp_file;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

493

static DecompressParam *decomp_param;

494

static QemuThread *decompress_threads;

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

495

static QemuMutex decomp_done_lock;

496

static QemuCond decomp_done_cond;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

497

Peter Xu

2022-10-11 17:55:57 -0400

[diff] [blame]

498

static int ram_save_host_page_urgent(PageSearchStatus *pss);

499

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

500

static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,

Xiao Guangrong

2018-08-21 16:10:23 +0800

[diff] [blame]

501

ram_addr_t offset, uint8_t *source_buf);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

502

Peter Xu

ebd88a4

2022-10-11 17:55:54 -0400

[diff] [blame]

503

/* NOTE: page is the PFN not real ram_addr_t. */

504

static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page)

{

pss->block = rb;

pss->page = page;

pss->complete_round = false;

509

}

510

Peter Xu

2022-10-11 17:55:57 -0400

[diff] [blame]

511

/*

512

* Check whether two PSSs are actively sending the same page. Return true

513

* if it is, false otherwise.

514

*/

515

static bool pss_overlap(PageSearchStatus *pss1, PageSearchStatus *pss2)

516

{

517

return pss1->host_page_sending && pss2->host_page_sending &&

518

(pss1->host_page_start == pss2->host_page_start);

519

}

520

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

521

static void *do_data_compress(void *opaque)

522

{

523

CompressParam *param = opaque;

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

524

RAMBlock *block;

525

ram_addr_t offset;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

526

bool zero_page;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

527

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

528

qemu_mutex_lock(&param->mutex);

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

529

while (!param->quit) {

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

530

if (param->block) {

531

block = param->block;

532

offset = param->offset;

533

param->block = NULL;

534

qemu_mutex_unlock(&param->mutex);

535

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

536

zero_page = do_compress_ram_page(param->file, &param->stream,

537

block, offset, param->originbuf);

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

538

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

539

qemu_mutex_lock(&comp_done_lock);

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

540

param->done = true;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

541

param->zero_page = zero_page;

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

542

qemu_cond_signal(&comp_done_cond);

543

qemu_mutex_unlock(&comp_done_lock);

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

544

545

qemu_mutex_lock(&param->mutex);

546

} else {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

547

qemu_cond_wait(&param->cond, &param->mutex);

548

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

549

}

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

550

qemu_mutex_unlock(&param->mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return NULL;

}

Juan Quintela

2017-06-28 11:52:28 +0200

[diff] [blame]

555

static void compress_threads_save_cleanup(void)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

{

int i, thread_count;

Fei Li

2018-09-25 17:14:40 +0800

[diff] [blame]

559

if (!migrate_use_compression() || !comp_param) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

560

return;

561

}

Fei Li

0530693

2018-09-25 17:14:40 +0800

[diff] [blame]

562

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

563

thread_count = migrate_compress_threads();

564

for (i = 0; i < thread_count; i++) {

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

565

/*

566

* we use it as a indicator which shows if the thread is

567

* properly init'd or not

568

*/

569

if (!comp_param[i].file) {

570

break;

571

}

Fei Li

0530693

2018-09-25 17:14:40 +0800

[diff] [blame]

572

573

qemu_mutex_lock(&comp_param[i].mutex);

574

comp_param[i].quit = true;

575

qemu_cond_signal(&comp_param[i].cond);

576

qemu_mutex_unlock(&comp_param[i].mutex);

577

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

578

qemu_thread_join(compress_threads + i);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

579

qemu_mutex_destroy(&comp_param[i].mutex);

580

qemu_cond_destroy(&comp_param[i].cond);

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

581

deflateEnd(&comp_param[i].stream);

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

582

g_free(comp_param[i].originbuf);

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

583

qemu_fclose(comp_param[i].file);

584

comp_param[i].file = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

585

}

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

586

qemu_mutex_destroy(&comp_done_lock);

587

qemu_cond_destroy(&comp_done_cond);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

588

g_free(compress_threads);

589

g_free(comp_param);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

590

compress_threads = NULL;

591

comp_param = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

592

}

593

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

594

static int compress_threads_save_setup(void)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

{

int i, thread_count;

if (!migrate_use_compression()) {

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

599

return 0;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

600

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

601

thread_count = migrate_compress_threads();

602

compress_threads = g_new0(QemuThread, thread_count);

603

comp_param = g_new0(CompressParam, thread_count);

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

604

qemu_cond_init(&comp_done_cond);

605

qemu_mutex_init(&comp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

606

for (i = 0; i < thread_count; i++) {

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

607

comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);

608

if (!comp_param[i].originbuf) {

goto exit;

}

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

612

if (deflateInit(&comp_param[i].stream,

613

migrate_compress_level()) != Z_OK) {

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

614

g_free(comp_param[i].originbuf);

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

goto exit;

}

Cao jin

2016-07-29 15:10:31 +0800

[diff] [blame]

618

/* comp_param[i].file is just used as a dummy buffer to save data,

619

* set its ops to empty.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

620

*/

Daniel P. Berrangé

77ef2dc

2022-06-20 12:02:05 +0100

[diff] [blame]

621

comp_param[i].file = qemu_file_new_output(

Daniel P. Berrangé

c0e0825

2022-06-20 12:01:46 +0100

[diff] [blame]

622

QIO_CHANNEL(qio_channel_null_new()));

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

623

comp_param[i].done = true;

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

624

comp_param[i].quit = false;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

625

qemu_mutex_init(&comp_param[i].mutex);

626

qemu_cond_init(&comp_param[i].cond);

627

qemu_thread_create(compress_threads + i, "compress",

628

do_data_compress, comp_param + i,

629

QEMU_THREAD_JOINABLE);

630

}

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

return 0;

exit:

compress_threads_save_cleanup();

635

return -1;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

636

}

637

638

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

639

* save_page_header: write page header to wire

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

640

*

641

* If this is the 1st block, it also writes the block identification

642

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

643

* Returns the number of bytes written

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

644

*

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

645

* @pss: current PSS channel status

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

646

* @block: block that contains the page we want to send

647

* @offset: offset inside the block for the page

648

* in the lower bits, it contains flags

649

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

650

static size_t save_page_header(PageSearchStatus *pss, RAMBlock *block,

Juan Quintela

2bf3aa8

2017-05-10 13:28:13 +0200

[diff] [blame]

651

ram_addr_t offset)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

652

{

Liang Li

9f5f380

2015-07-13 17:34:10 +0800

[diff] [blame]

653

size_t size, len;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

654

bool same_block = (block == pss->last_sent_block);

655

QEMUFile *f = pss->pss_channel;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

656

Peter Xu

10661f1

2022-10-11 17:55:48 -0400

[diff] [blame]

657

if (same_block) {

Juan Quintela

2479569

2017-03-21 11:45:01 +0100

[diff] [blame]

658

offset |= RAM_SAVE_FLAG_CONTINUE;

659

}

Juan Quintela

2bf3aa8

2017-05-10 13:28:13 +0200

[diff] [blame]

660

qemu_put_be64(f, offset);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

661

size = 8;

662

Peter Xu

10661f1

2022-10-11 17:55:48 -0400

[diff] [blame]

663

if (!same_block) {

Liang Li

9f5f380

2015-07-13 17:34:10 +0800

[diff] [blame]

664

len = strlen(block->idstr);

Juan Quintela

2bf3aa8

2017-05-10 13:28:13 +0200

[diff] [blame]

665

qemu_put_byte(f, len);

666

qemu_put_buffer(f, (uint8_t *)block->idstr, len);

Liang Li

9f5f380

2015-07-13 17:34:10 +0800

[diff] [blame]

667

size += 1 + len;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

668

pss->last_sent_block = block;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

}

return size;

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

673

/**

Olaf Hering

179a808

2021-07-08 18:21:59 +0200

[diff] [blame]

674

* mig_throttle_guest_down: throttle down the guest

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

675

*

676

* Reduce amount of guest cpu execution to hopefully slow down memory

677

* writes. If guest dirty memory rate is reduced below the rate at

678

* which we can transfer pages to the destination then we should be

679

* able to complete migration. Some workloads dirty memory way too

680

* fast and will not effectively converge, even with auto-converge.

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

681

*/

Keqian Zhu

2020-04-13 18:15:08 +0800

[diff] [blame]

682

static void mig_throttle_guest_down(uint64_t bytes_dirty_period,

683

uint64_t bytes_dirty_threshold)

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

684

{

685

MigrationState *s = migrate_get_current();

Daniel P. Berrange

2594f56

2016-04-27 11:05:14 +0100

[diff] [blame]

686

uint64_t pct_initial = s->parameters.cpu_throttle_initial;

Keqian Zhu

2020-04-13 18:15:08 +0800

[diff] [blame]

687

uint64_t pct_increment = s->parameters.cpu_throttle_increment;

688

bool pct_tailslow = s->parameters.cpu_throttle_tailslow;

Li Qiang

4cbc9c7

2018-08-01 06:00:20 -0700

[diff] [blame]

689

int pct_max = s->parameters.max_cpu_throttle;

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

690

Keqian Zhu

2020-04-13 18:15:08 +0800

[diff] [blame]

691

uint64_t throttle_now = cpu_throttle_get_percentage();

692

uint64_t cpu_now, cpu_ideal, throttle_inc;

693

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

694

/* We have not started throttling yet. Let's start it. */

695

if (!cpu_throttle_active()) {

696

cpu_throttle_set(pct_initial);

697

} else {

698

/* Throttling already on, just increase the rate */

Keqian Zhu

2020-04-13 18:15:08 +0800

[diff] [blame]

699

if (!pct_tailslow) {

700

throttle_inc = pct_increment;

701

} else {

702

/* Compute the ideal CPU percentage used by Guest, which may

703

* make the dirty rate match the dirty rate threshold. */

704

cpu_now = 100 - throttle_now;

705

cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /

706

bytes_dirty_period);

707

throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);

708

}

709

cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

}

}

Rao, Lei

2021-11-09 11:04:54 +0800

[diff] [blame]

713

void mig_throttle_counter_reset(void)

714

{

715

RAMState *rs = ram_state;

716

717

rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

718

rs->num_dirty_pages_period = 0;

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

719

rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred);

Rao, Lei

91fe9a8

2021-11-09 11:04:54 +0800

[diff] [blame]

720

}

721

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

722

/**

723

* xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache

724

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

725

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

726

* @current_addr: address for the zero page

727

*

728

* Update the xbzrle cache to reflect a page that's been sent as all 0.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

729

* The important thing is that a stale (not-yet-0'd) page be replaced

730

* by the new data.

731

* As a bonus, if the page wasn't in the cache it gets added so that

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

732

* when a small write is made into the 0'd page it gets XBZRLE sent.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

733

*/

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

734

static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

735

{

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

736

/* We don't care if this fails to allocate a new cache page

737

* as long as it updated an old one */

Juan Quintela

c00e092

2017-05-09 16:22:01 +0200

[diff] [blame]

738

cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

739

ram_counters.dirty_sync_count);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

740

}

741

742

#define ENCODING_FLAG_XBZRLE 0x1

743

744

/**

745

* save_xbzrle_page: compress and send current page

746

*

747

* Returns: 1 means that we wrote the page

748

* 0 means that page is identical to the one already sent

749

* -1 means that xbzrle would be longer than normal

750

*

Juan Quintela

5a98773

2017-03-13 19:39:02 +0100

[diff] [blame]

751

* @rs: current RAM state

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

752

* @pss: current PSS channel

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

753

* @current_data: pointer to the address of the page contents

754

* @current_addr: addr of the page

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

755

* @block: block that contains the page we want to send

756

* @offset: offset inside the block for the page

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

757

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

758

static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

759

uint8_t **current_data, ram_addr_t current_addr,

760

RAMBlock *block, ram_addr_t offset)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

761

{

762

int encoded_len = 0, bytes_xbzrle;

763

uint8_t *prev_cached_page;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

764

QEMUFile *file = pss->pss_channel;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

765

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

766

if (!cache_is_cached(XBZRLE.cache, current_addr,

767

ram_counters.dirty_sync_count)) {

768

xbzrle_counters.cache_miss++;

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

769

if (!rs->last_stage) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

770

if (cache_insert(XBZRLE.cache, current_addr, *current_data,

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

771

ram_counters.dirty_sync_count) == -1) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

772

return -1;

773

} else {

774

/* update *current_data when the page has been

775

inserted into cache */

776

*current_data = get_cached_data(XBZRLE.cache, current_addr);

}

}

return -1;

}

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

782

/*

783

* Reaching here means the page has hit the xbzrle cache, no matter what

784

* encoding result it is (normal encoding, overflow or skipping the page),

zhaolichang

2020-09-17 15:50:21 +0800

[diff] [blame]

785

* count the page as encoded. This is used to calculate the encoding rate.

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

786

*

787

* Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,

788

* 2nd page turns out to be skipped (i.e. no new bytes written to the

789

* page), the overall encoding rate will be 8KB / 2KB = 4, which has the

790

* skipped page included. In this way, the encoding rate can tell if the

791

* guest page is good for xbzrle encoding.

792

*/

793

xbzrle_counters.pages++;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

794

prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);

795

796

/* save current buffer into memory */

797

memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);

798

799

/* XBZRLE encoding (if there is no overflow) */

800

encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,

801

TARGET_PAGE_SIZE, XBZRLE.encoded_buf,

802

TARGET_PAGE_SIZE);

Wei Yang

ca35380

2019-06-10 08:41:59 +0800

[diff] [blame]

803

804

/*

805

* Update the cache contents, so that it corresponds to the data

806

* sent, in all cases except where we skip the page.

807

*/

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

808

if (!rs->last_stage && encoded_len != 0) {

Wei Yang

ca35380

2019-06-10 08:41:59 +0800

[diff] [blame]

809

memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);

810

/*

811

* In the case where we couldn't compress, ensure that the caller

812

* sends the data from the cache, since the guest might have

813

* changed the RAM since we copied it.

814

*/

815

*current_data = prev_cached_page;

816

}

817

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

818

if (encoded_len == 0) {

Juan Quintela

55c4446

2017-01-23 22:32:05 +0100

[diff] [blame]

819

trace_save_xbzrle_page_skipping();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

820

return 0;

821

} else if (encoded_len == -1) {

Juan Quintela

55c4446

2017-01-23 22:32:05 +0100

[diff] [blame]

822

trace_save_xbzrle_page_overflow();

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

823

xbzrle_counters.overflow++;

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

824

xbzrle_counters.bytes += TARGET_PAGE_SIZE;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return -1;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

828

/* Send XBZRLE based compressed page */

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

829

bytes_xbzrle = save_page_header(pss, block,

Juan Quintela

204b88b

2017-03-15 09:16:57 +0100

[diff] [blame]

830

offset | RAM_SAVE_FLAG_XBZRLE);

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

831

qemu_put_byte(file, ENCODING_FLAG_XBZRLE);

832

qemu_put_be16(file, encoded_len);

833

qemu_put_buffer(file, XBZRLE.encoded_buf, encoded_len);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

834

bytes_xbzrle += encoded_len + 1 + 2;

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

835

/*

836

* Like compressed_size (please see update_compress_thread_counts),

837

* the xbzrle encoded bytes don't count the 8 byte header with

838

* RAM_SAVE_FLAG_CONTINUE.

839

*/

840

xbzrle_counters.bytes += bytes_xbzrle - 8;

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

841

ram_transferred_add(bytes_xbzrle);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return 1;

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

846

/**

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

847

* pss_find_next_dirty: find the next dirty page of current ramblock

Dr. David Alan Gilbert

f3f491f

2015-11-05 18:11:01 +0000

[diff] [blame]

848

*

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

849

* This function updates pss->page to point to the next dirty page index

850

* within the ramblock to migrate, or the end of ramblock when nothing

851

* found. Note that when pss->host_page_sending==true it means we're

852

* during sending a host page, so we won't look for dirty page that is

853

* outside the host page boundary.

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

854

*

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

855

* @pss: the current page search status

Dr. David Alan Gilbert

f3f491f

2015-11-05 18:11:01 +0000

[diff] [blame]

856

*/

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

857

static void pss_find_next_dirty(PageSearchStatus *pss)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

858

{

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

859

RAMBlock *rb = pss->block;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

860

unsigned long size = rb->used_length >> TARGET_PAGE_BITS;

861

unsigned long *bitmap = rb->bmap;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

862

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

863

if (ramblock_is_ignored(rb)) {

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

864

/* Points directly to the end, so we know no dirty page */

865

pss->page = size;

866

return;

Cédric Le Goater

2018-05-14 08:57:00 +0200

[diff] [blame]

867

}

868

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

869

/*

870

* If during sending a host page, only look for dirty pages within the

871

* current host page being send.

872

*/

873

if (pss->host_page_sending) {

874

assert(pss->host_page_end);

875

size = MIN(size, pss->host_page_end);

876

}

877

878

pss->page = find_next_bit(bitmap, size, pss->page);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

879

}

880

David Hildenbrand

2021-09-04 18:09:07 +0200

[diff] [blame]

881

static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,

Wei Wang

2021-07-22 04:30:55 -0400

[diff] [blame]

unsigned long page)

{

uint8_t shift;

hwaddr size, start;

if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {

return;

}

shift = rb->clear_bmap_shift;

892

/*

893

* CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this

894

* can make things easier sometimes since then start address

895

* of the small chunk will always be 64 pages aligned so the

896

* bitmap will always be aligned to unsigned long. We should

897

* even be able to remove this restriction but I'm simply

* keeping it.

*/

assert(shift >= 6);

size = 1ULL << (TARGET_PAGE_BITS + shift);

David Hildenbrand

7648297

2021-10-11 19:53:44 +0200

[diff] [blame]

903

start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);

Wei Wang

2021-07-22 04:30:55 -0400

[diff] [blame]

904

trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);

905

memory_region_clear_dirty_bitmap(rb->mr, start, size);

906

}

907

908

static void

David Hildenbrand

2021-09-04 18:09:07 +0200

[diff] [blame]

909

migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,

Wei Wang

2021-07-22 04:30:55 -0400

[diff] [blame]

910

unsigned long start,

911

unsigned long npages)

912

{

913

unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;

914

unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);

915

unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);

916

917

/*

918

* Clear pages from start to start + npages - 1, so the end boundary is

919

* exclusive.

920

*/

921

for (i = chunk_start; i < chunk_end; i += chunk_pages) {

David Hildenbrand

2021-09-04 18:09:07 +0200

[diff] [blame]

922

migration_clear_memory_region_dirty_bitmap(rb, i);

Wei Wang

2021-07-22 04:30:55 -0400

[diff] [blame]

}

}

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

926

/*

927

* colo_bitmap_find_diry:find contiguous dirty pages from start

928

*

929

* Returns the page offset within memory region of the start of the contiguout

930

* dirty page

931

*

932

* @rs: current RAM state

933

* @rb: RAMBlock where to search for dirty pages

934

* @start: page where we start the search

935

* @num: the number of contiguous dirty pages

936

*/

937

static inline

938

unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,

939

unsigned long start, unsigned long *num)

940

{

941

unsigned long size = rb->used_length >> TARGET_PAGE_BITS;

942

unsigned long *bitmap = rb->bmap;

943

unsigned long first, next;

*num = 0;

if (ramblock_is_ignored(rb)) {

return size;

}

first = find_next_bit(bitmap, size, start);

if (first >= size) {

return first;

}

next = find_next_zero_bit(bitmap, size, first + 1);

956

assert(next >= first);

*num = next - first;

return first;

}

Juan Quintela

2017-03-21 15:18:05 +0100

[diff] [blame]

961

static inline bool migration_bitmap_clear_dirty(RAMState *rs,

Juan Quintela

2017-03-21 16:19:05 +0100

[diff] [blame]

962

RAMBlock *rb,

963

unsigned long page)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

964

{

965

bool ret;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

966

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

967

/*

968

* Clear dirty bitmap if needed. This _must_ be called before we

969

* send any of the page in the chunk because we need to make sure

970

* we can capture further page content changes when we sync dirty

971

* log the next time. So as long as we are going to send any of

972

* the page in the chunk we clear the remote dirty bitmap for all.

973

* Clearing it earlier won't be a problem, but too late will.

974

*/

David Hildenbrand

2021-09-04 18:09:07 +0200

[diff] [blame]

975

migration_clear_memory_region_dirty_bitmap(rb, page);

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

976

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

977

ret = test_and_clear_bit(page, rb->bmap);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

978

if (ret) {

Juan Quintela

0d8ec88

2017-03-13 21:21:41 +0100

[diff] [blame]

979

rs->migration_dirty_pages--;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

980

}

Wei Wang

386a907

2018-12-11 16:24:49 +0800

[diff] [blame]

981

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

return ret;

}

David Hildenbrand

2021-10-11 19:53:41 +0200

[diff] [blame]

985

static void dirty_bitmap_clear_section(MemoryRegionSection *section,

986

void *opaque)

987

{

988

const hwaddr offset = section->offset_within_region;

989

const hwaddr size = int128_get64(section->size);

990

const unsigned long start = offset >> TARGET_PAGE_BITS;

991

const unsigned long npages = size >> TARGET_PAGE_BITS;

992

RAMBlock *rb = section->mr->ram_block;

993

uint64_t *cleared_bits = opaque;

994

995

/*

996

* We don't grab ram_state->bitmap_mutex because we expect to run

997

* only when starting migration or during postcopy recovery where

998

* we don't have concurrent access.

999

*/

1000

if (!migration_in_postcopy() && !migrate_background_snapshot()) {

1001

migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);

1002

}

1003

*cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);

1004

bitmap_clear(rb->bmap, start, npages);

}

/*

* Exclude all dirty pages from migration that fall into a discarded range as

1009

* managed by a RamDiscardManager responsible for the mapped memory region of

1010

* the RAMBlock. Clear the corresponding bits in the dirty bitmaps.

1011

*

1012

* Discarded pages ("logically unplugged") have undefined content and must

1013

* not get migrated, because even reading these pages for migration might

1014

* result in undesired behavior.

1015

*

1016

* Returns the number of cleared bits in the RAMBlock dirty bitmap.

1017

*

1018

* Note: The result is only stable while migrating (precopy/postcopy).

1019

*/

1020

static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)

1021

{

1022

uint64_t cleared_bits = 0;

1023

1024

if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {

1025

RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);

1026

MemoryRegionSection section = {

1027

.mr = rb->mr,

1028

.offset_within_region = 0,

1029

.size = int128_make64(qemu_ram_get_used_length(rb)),

1030

};

1031

1032

ram_discard_manager_replay_discarded(rdm, &section,

1033

dirty_bitmap_clear_section,

&cleared_bits);

}

return cleared_bits;

}

David Hildenbrand

2021-10-11 19:53:43 +0200

[diff] [blame]

1039

/*

1040

* Check if a host-page aligned page falls into a discarded range as managed by

1041

* a RamDiscardManager responsible for the mapped memory region of the RAMBlock.

1042

*

1043

* Note: The result is only stable while migrating (precopy/postcopy).

1044

*/

1045

bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)

1046

{

1047

if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {

1048

RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);

1049

MemoryRegionSection section = {

1050

.mr = rb->mr,

1051

.offset_within_region = start,

1052

.size = int128_make64(qemu_ram_pagesize(rb)),

1053

};

1054

1055

return !ram_discard_manager_is_populated(rdm, &section);

}

return false;

}

Peter Xu

2019-06-03 14:50:46 +0800

[diff] [blame]

1060

/* Called with RCU critical section */

Wei Yang

7a3e957

2019-08-08 11:31:55 +0800

[diff] [blame]

1061

static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1062

{

Keqian Zhu

fb61358

2020-06-22 11:20:37 +0800

[diff] [blame]

1063

uint64_t new_dirty_pages =

1064

cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);

1065

1066

rs->migration_dirty_pages += new_dirty_pages;

1067

rs->num_dirty_pages_period += new_dirty_pages;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1068

}

1069

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1070

/**

1071

* ram_pagesize_summary: calculate all the pagesizes of a VM

1072

*

1073

* Returns a summary bitmap of the page sizes of all RAMBlocks

1074

*

1075

* For VMs with just normal pages this is equivalent to the host page

1076

* size. If it's got some huge pages then it's the OR of all the

1077

* different page sizes.

Dr. David Alan Gilbert

e8ca1db

2017-02-24 18:28:29 +0000

[diff] [blame]

1078

*/

1079

uint64_t ram_pagesize_summary(void)

1080

{

1081

RAMBlock *block;

1082

uint64_t summary = 0;

1083

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

1084

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Dr. David Alan Gilbert

e8ca1db

2017-02-24 18:28:29 +0000

[diff] [blame]

1085

summary |= block->page_size;

}

return summary;

}

Xiao Guangrong

2019-01-11 14:37:30 +0800

[diff] [blame]

1091

uint64_t ram_get_total_transferred_pages(void)

1092

{

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1093

return stat64_get(&ram_atomic_counters.normal) +

1094

stat64_get(&ram_atomic_counters.duplicate) +

1095

compression_counters.pages + xbzrle_counters.pages;

Xiao Guangrong

aecbfe9

2019-01-11 14:37:30 +0800

[diff] [blame]

1096

}

1097

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1098

static void migration_update_rates(RAMState *rs, int64_t end_time)

1099

{

Xiao Guangrong

2018-09-03 17:26:42 +0800

[diff] [blame]

1100

uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

1101

double compressed_size;

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1102

1103

/* calculate period counters */

1104

ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000

1105

/ (end_time - rs->time_last_bitmap_sync);

1106

Xiao Guangrong

2018-09-03 17:26:42 +0800

[diff] [blame]

1107

if (!page_count) {

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

return;

}

if (migrate_use_xbzrle()) {

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

1112

double encoded_size, unencoded_size;

1113

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1114

xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -

Xiao Guangrong

2018-09-03 17:26:42 +0800

[diff] [blame]

1115

rs->xbzrle_cache_miss_prev) / page_count;

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1116

rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

1117

unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *

1118

TARGET_PAGE_SIZE;

1119

encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;

Wei Wang

9227140

2020-06-17 13:13:05 -0700

[diff] [blame]

1120

if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

1121

xbzrle_counters.encoding_rate = 0;

Wei Wang

2020-04-30 08:59:35 +0800

[diff] [blame]

1122

} else {

1123

xbzrle_counters.encoding_rate = unencoded_size / encoded_size;

1124

}

1125

rs->xbzrle_pages_prev = xbzrle_counters.pages;

1126

rs->xbzrle_bytes_prev = xbzrle_counters.bytes;

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1127

}

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

1128

1129

if (migrate_use_compression()) {

1130

compression_counters.busy_rate = (double)(compression_counters.busy -

1131

rs->compress_thread_busy_prev) / page_count;

1132

rs->compress_thread_busy_prev = compression_counters.busy;

1133

1134

compressed_size = compression_counters.compressed_size -

1135

rs->compressed_size_prev;

1136

if (compressed_size) {

1137

double uncompressed_size = (compression_counters.pages -

1138

rs->compress_pages_prev) * TARGET_PAGE_SIZE;

1139

1140

/* Compression-Ratio = Uncompressed-size / Compressed-size */

1141

compression_counters.compression_rate =

1142

uncompressed_size / compressed_size;

1143

1144

rs->compress_pages_prev = compression_counters.pages;

1145

rs->compressed_size_prev = compression_counters.compressed_size;

1146

}

1147

}

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1148

}

1149

Keqian Zhu

2020-02-24 10:31:42 +0800

[diff] [blame]

1150

static void migration_trigger_throttle(RAMState *rs)

1151

{

1152

MigrationState *s = migrate_get_current();

1153

uint64_t threshold = s->parameters.throttle_trigger_threshold;

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1154

uint64_t bytes_xfer_period =

1155

stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev;

Keqian Zhu

2020-02-24 10:31:42 +0800

[diff] [blame]

1156

uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;

1157

uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;

1158

1159

/* During block migration the auto-converge logic incorrectly detects

1160

* that ram migration makes no progress. Avoid this by disabling the

1161

* throttling logic during the bulk phase of block migration. */

1162

if (migrate_auto_converge() && !blk_mig_bulk_active()) {

1163

/* The following detection logic can be refined later. For now:

1164

Check to see if the ratio between dirtied bytes and the approx.

1165

amount of bytes that just got transferred since the last time

1166

we were in this routine reaches the threshold. If that happens

1167

twice, start or increase throttling. */

1168

1169

if ((bytes_dirty_period > bytes_dirty_threshold) &&

1170

(++rs->dirty_rate_high_cnt >= 2)) {

1171

trace_migration_throttle();

1172

rs->dirty_rate_high_cnt = 0;

Keqian Zhu

2020-04-13 18:15:08 +0800

[diff] [blame]

1173

mig_throttle_guest_down(bytes_dirty_period,

1174

bytes_dirty_threshold);

Keqian Zhu

2020-02-24 10:31:42 +0800

[diff] [blame]

}

}

}

Juan Quintela

2017-03-13 19:35:50 +0100

[diff] [blame]

1179

static void migration_bitmap_sync(RAMState *rs)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1180

{

1181

RAMBlock *block;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1182

int64_t end_time;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1183

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

1184

ram_counters.dirty_sync_count++;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1185

Juan Quintela

2017-03-13 19:44:57 +0100

[diff] [blame]

1186

if (!rs->time_last_bitmap_sync) {

1187

rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1188

}

1189

1190

trace_migration_bitmap_sync_start();

Paolo Bonzini

9c1f8f4

2016-09-22 16:08:31 +0200

[diff] [blame]

1191

memory_global_dirty_log_sync();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1192

Juan Quintela

108cfae

2017-03-13 21:38:09 +0100

[diff] [blame]

1193

qemu_mutex_lock(&rs->bitmap_mutex);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

1194

WITH_RCU_READ_LOCK_GUARD() {

1195

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

1196

ramblock_sync_dirty_bitmap(rs, block);

1197

}

1198

ram_counters.remaining = ram_bytes_remaining();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1199

}

Juan Quintela

108cfae

2017-03-13 21:38:09 +0100

[diff] [blame]

1200

qemu_mutex_unlock(&rs->bitmap_mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1201

Paolo Bonzini

9458a9a

2018-02-06 18:37:39 +0100

[diff] [blame]

1202

memory_global_after_dirty_log_sync();

Juan Quintela

a66cd90

2017-03-28 15:02:43 +0200

[diff] [blame]

1203

trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);

Chao Fan

1ffb5df

2017-03-14 09:55:07 +0800

[diff] [blame]

1204

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1205

end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);

1206

1207

/* more than 1 second = 1000 millisecons */

Juan Quintela

2017-03-13 19:44:57 +0100

[diff] [blame]

1208

if (end_time > rs->time_last_bitmap_sync + 1000) {

Keqian Zhu

2020-02-24 10:31:42 +0800

[diff] [blame]

1209

migration_trigger_throttle(rs);

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

1210

Xiao Guangrong

2018-06-04 17:55:12 +0800

[diff] [blame]

1211

migration_update_rates(rs, end_time);

1212

Xiao Guangrong

2018-09-03 17:26:42 +0800

[diff] [blame]

1213

rs->target_page_count_prev = rs->target_page_count;

Felipe Franciosi

d693c6f

2017-05-24 17:10:01 +0100

[diff] [blame]

1214

1215

/* reset period counters */

Juan Quintela

2017-03-13 19:44:57 +0100

[diff] [blame]

1216

rs->time_last_bitmap_sync = end_time;

Juan Quintela

a66cd90

2017-03-28 15:02:43 +0200

[diff] [blame]

1217

rs->num_dirty_pages_period = 0;

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1218

rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1219

}

Dr. David Alan Gilbert

4addcd4

2015-12-16 11:47:36 +0000

[diff] [blame]

1220

if (migrate_use_events()) {

Peter Xu

3ab7238

2018-08-15 21:37:37 +0800

[diff] [blame]

1221

qapi_event_send_migration_pass(ram_counters.dirty_sync_count);

Dr. David Alan Gilbert

4addcd4

2015-12-16 11:47:36 +0000

[diff] [blame]

1222

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1223

}

1224

Wei Wang

2018-12-11 16:24:51 +0800

[diff] [blame]

1225

static void migration_bitmap_sync_precopy(RAMState *rs)

1226

{

1227

Error *local_err = NULL;

1228

1229

/*

1230

* The current notifier usage is just an optimization to migration, so we

1231

* don't stop the normal migration process in the error case.

1232

*/

1233

if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {

1234

error_report_err(local_err);

Vladimir Sementsov-Ogievskiy

b4a1733

2020-03-24 18:36:29 +0300

[diff] [blame]

1235

local_err = NULL;

Wei Wang

2018-12-11 16:24:51 +0800

[diff] [blame]

1236

}

1237

1238

migration_bitmap_sync(rs);

1239

1240

if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {

1241

error_report_err(local_err);

}

}

Juan Quintela

2021-12-16 10:19:38 +0100

[diff] [blame]

1245

void ram_release_page(const char *rbname, uint64_t offset)

Juan Quintela

47fe16f

2021-12-16 09:58:49 +0100

[diff] [blame]

1246

{

1247

if (!migrate_release_ram() || !migration_in_postcopy()) {

return;

}

ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);

1252

}

1253

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1254

/**

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1255

* save_zero_page_to_file: send the zero page to the file

1256

*

1257

* Returns the size of data written to the file, 0 means the page is not

1258

* a zero page

1259

*

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1260

* @pss: current PSS channel

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1261

* @block: block that contains the page we want to send

1262

* @offset: offset inside the block for the page

1263

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1264

static int save_zero_page_to_file(PageSearchStatus *pss,

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1265

RAMBlock *block, ram_addr_t offset)

1266

{

1267

uint8_t *p = block->host + offset;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1268

QEMUFile *file = pss->pss_channel;

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1269

int len = 0;

1270

Juan Quintela

bad452a

2021-11-18 15:56:38 +0100

[diff] [blame]

1271

if (buffer_is_zero(p, TARGET_PAGE_SIZE)) {

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1272

len += save_page_header(pss, block, offset | RAM_SAVE_FLAG_ZERO);

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1273

qemu_put_byte(file, 0);

1274

len += 1;

Juan Quintela

47fe16f

2021-12-16 09:58:49 +0100

[diff] [blame]

1275

ram_release_page(block->idstr, offset);

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

}

return len;

}

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1281

* save_zero_page: send the zero page to the stream

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1282

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1283

* Returns the number of pages written.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1284

*

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1285

* @pss: current PSS channel

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1286

* @block: block that contains the page we want to send

1287

* @offset: offset inside the block for the page

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1288

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1289

static int save_zero_page(PageSearchStatus *pss, RAMBlock *block,

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1290

ram_addr_t offset)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1291

{

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1292

int len = save_zero_page_to_file(pss, block, offset);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1293

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1294

if (len) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1295

stat64_add(&ram_atomic_counters.duplicate, 1);

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

1296

ram_transferred_add(len);

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1297

return 1;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1298

}

Xiao Guangrong

2018-08-21 16:10:22 +0800

[diff] [blame]

1299

return -1;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1300

}

1301

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1302

/*

1303

* @pages: the number of pages written by the control path,

1304

* < 0 - error

1305

* > 0 - number of pages written

1306

*

1307

* Return true if the pages has been saved, otherwise false is returned.

1308

*/

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1309

static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,

1310

ram_addr_t offset, int *pages)

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1311

{

1312

uint64_t bytes_xmit = 0;

1313

int ret;

1314

1315

*pages = -1;

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1316

ret = ram_control_save_page(pss->pss_channel, block->offset, offset,

1317

TARGET_PAGE_SIZE, &bytes_xmit);

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1318

if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {

return false;

}

if (bytes_xmit) {

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

1323

ram_transferred_add(bytes_xmit);

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

*pages = 1;

}

if (ret == RAM_SAVE_CONTROL_DELAYED) {

return true;

}

if (bytes_xmit > 0) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1332

stat64_add(&ram_atomic_counters.normal, 1);

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1333

} else if (bytes_xmit == 0) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1334

stat64_add(&ram_atomic_counters.duplicate, 1);

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

}

return true;

}

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1340

/*

1341

* directly send the page to the stream

1342

*

1343

* Returns the number of pages written.

1344

*

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1345

* @pss: current PSS channel

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1346

* @block: block that contains the page we want to send

1347

* @offset: offset inside the block for the page

1348

* @buf: the page to be sent

1349

* @async: send to page asyncly

1350

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1351

static int save_normal_page(PageSearchStatus *pss, RAMBlock *block,

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1352

ram_addr_t offset, uint8_t *buf, bool async)

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1353

{

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1354

QEMUFile *file = pss->pss_channel;

1355

1356

ram_transferred_add(save_page_header(pss, block,

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

1357

offset | RAM_SAVE_FLAG_PAGE));

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1358

if (async) {

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1359

qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE,

Dr. David Alan Gilbert

f912ec5

2022-04-06 11:25:15 +0100

[diff] [blame]

1360

migrate_release_ram() &&

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1361

migration_in_postcopy());

1362

} else {

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1363

qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

1364

}

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

1365

ram_transferred_add(TARGET_PAGE_SIZE);

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1366

stat64_add(&ram_atomic_counters.normal, 1);

Xiao Guangrong

2018-03-30 15:51:27 +0800

[diff] [blame]

return 1;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1370

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1371

* ram_save_page: send the given page to the stream

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1372

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1373

* Returns the number of pages written.

Dr. David Alan Gilbert

3fd3c4b

2015-12-10 16:31:46 +0000

[diff] [blame]

1374

* < 0 - error

1375

* >=0 - Number of pages written - this might legally be 0

1376

* if xbzrle noticed the page was the same.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1377

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

1378

* @rs: current RAM state

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1379

* @block: block that contains the page we want to send

1380

* @offset: offset inside the block for the page

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1381

*/

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

1382

static int ram_save_page(RAMState *rs, PageSearchStatus *pss)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1383

{

1384

int pages = -1;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1385

uint8_t *p;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1386

bool send_async = true;

zhanghailiang

a08f689

2016-01-15 11:37:44 +0800

[diff] [blame]

1387

RAMBlock *block = pss->block;

Alexey Romko

2020-01-10 14:51:34 +0100

[diff] [blame]

1388

ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1389

ram_addr_t current_addr = block->offset + offset;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1390

Dr. David Alan Gilbert

2f68e39

2015-08-13 11:51:30 +0100

[diff] [blame]

1391

p = block->host + offset;

Dr. David Alan Gilbert

1db9d8e

2017-04-26 19:37:21 +0100

[diff] [blame]

1392

trace_ram_save_page(block->idstr, (uint64_t)offset, p);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1393

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1394

XBZRLE_cache_lock();

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

1395

if (rs->xbzrle_enabled && !migration_in_postcopy()) {

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1396

pages = save_xbzrle_page(rs, pss, &p, current_addr,

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1397

block, offset);

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

1398

if (!rs->last_stage) {

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1399

/* Can't send this cached data async, since the cache page

1400

* might get updated before it gets to the wire

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1401

*/

Xiao Guangrong

2018-03-30 15:51:23 +0800

[diff] [blame]

1402

send_async = false;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

}

}

/* XBZRLE overflow or normal page */

1407

if (pages == -1) {

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1408

pages = save_normal_page(pss, block, offset, p, send_async);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1409

}

1410

1411

XBZRLE_cache_unlock();

return pages;

}

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1416

static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block,

Juan Quintela

b9ee2f7

2016-01-15 11:40:13 +0100

[diff] [blame]

1417

ram_addr_t offset)

1418

{

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1419

if (multifd_queue_page(file, block, offset) < 0) {

Ivan Ren

713f762

2019-06-25 21:18:17 +0800

[diff] [blame]

1420

return -1;

1421

}

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1422

stat64_add(&ram_atomic_counters.normal, 1);

Juan Quintela

b9ee2f7

2016-01-15 11:40:13 +0100

[diff] [blame]

return 1;

}

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1427

static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,

Xiao Guangrong

2018-08-21 16:10:23 +0800

[diff] [blame]

1428

ram_addr_t offset, uint8_t *source_buf)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1429

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

1430

RAMState *rs = ram_state;

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1431

PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];

Juan Quintela

20d549c

2021-12-21 10:28:16 +0100

[diff] [blame]

1432

uint8_t *p = block->host + offset;

Xiao Guangrong

2018-08-21 16:10:23 +0800

[diff] [blame]

1433

int ret;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1434

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1435

if (save_zero_page_to_file(pss, block, offset)) {

Juan Quintela

e7f2e19

2021-12-16 09:39:49 +0100

[diff] [blame]

1436

return true;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1437

}

1438

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

1439

save_page_header(pss, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

1440

1441

/*

1442

* copy it to a internal buffer to avoid it being modified by VM

1443

* so that we can catch up the error during compression and

1444

* decompression

1445

*/

1446

memcpy(source_buf, p, TARGET_PAGE_SIZE);

Xiao Guangrong

2018-08-21 16:10:23 +0800

[diff] [blame]

1447

ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);

1448

if (ret < 0) {

1449

qemu_file_set_error(migrate_get_current()->to_dst_file, ret);

Liang Li

b3be289

2016-05-05 15:32:54 +0800

[diff] [blame]

1450

error_report("compressed data failed!");

Liang Li

b3be289

2016-05-05 15:32:54 +0800

[diff] [blame]

1451

}

Juan Quintela

e7f2e19

2021-12-16 09:39:49 +0100

[diff] [blame]

1452

return false;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

}

static void

update_compress_thread_counts(const CompressParam *param, int bytes_xmit)

1457

{

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

1458

ram_transferred_add(bytes_xmit);

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

1459

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1460

if (param->zero_page) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

1461

stat64_add(&ram_atomic_counters.duplicate, 1);

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

1462

return;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1463

}

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

1464

1465

/* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */

1466

compression_counters.compressed_size += bytes_xmit - 8;

1467

compression_counters.pages++;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1468

}

1469

Xiao Guangrong

32b0549

2018-09-06 15:01:01 +0800

[diff] [blame]

1470

static bool save_page_use_compression(RAMState *rs);

1471

Juan Quintela

ce25d33

2017-03-15 11:00:51 +0100

[diff] [blame]

1472

static void flush_compressed_data(RAMState *rs)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1473

{

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

1474

MigrationState *ms = migrate_get_current();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1475

int idx, len, thread_count;

1476

Xiao Guangrong

32b0549

2018-09-06 15:01:01 +0800

[diff] [blame]

1477

if (!save_page_use_compression(rs)) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1478

return;

1479

}

1480

thread_count = migrate_compress_threads();

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

1481

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

1482

qemu_mutex_lock(&comp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1483

for (idx = 0; idx < thread_count; idx++) {

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

1484

while (!comp_param[idx].done) {

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

1485

qemu_cond_wait(&comp_done_cond, &comp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1486

}

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

1487

}

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

1488

qemu_mutex_unlock(&comp_done_lock);

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

1489

1490

for (idx = 0; idx < thread_count; idx++) {

1491

qemu_mutex_lock(&comp_param[idx].mutex);

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

1492

if (!comp_param[idx].quit) {

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

1493

len = qemu_put_qemu_file(ms->to_dst_file, comp_param[idx].file);

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1494

/*

1495

* it's safe to fetch zero_page without holding comp_done_lock

1496

* as there is no further request submitted to the thread,

1497

* i.e, the thread should be waiting for a request at this point.

1498

*/

1499

update_compress_thread_counts(&comp_param[idx], len);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1500

}

Liang Li

2016-05-05 15:32:57 +0800

[diff] [blame]

1501

qemu_mutex_unlock(&comp_param[idx].mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

}

}

static inline void set_compress_params(CompressParam *param, RAMBlock *block,

1506

ram_addr_t offset)

1507

{

1508

param->block = block;

1509

param->offset = offset;

1510

}

1511

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

1512

static int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1513

{

1514

int idx, thread_count, bytes_xmit = -1, pages = -1;

Xiao Guangrong

2018-08-21 16:10:20 +0800

[diff] [blame]

1515

bool wait = migrate_compress_wait_thread();

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

1516

MigrationState *ms = migrate_get_current();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1517

1518

thread_count = migrate_compress_threads();

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

1519

qemu_mutex_lock(&comp_done_lock);

Xiao Guangrong

2018-08-21 16:10:20 +0800

[diff] [blame]

1520

retry:

1521

for (idx = 0; idx < thread_count; idx++) {

1522

if (comp_param[idx].done) {

1523

comp_param[idx].done = false;

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

1524

bytes_xmit = qemu_put_qemu_file(ms->to_dst_file,

1525

comp_param[idx].file);

Xiao Guangrong

2018-08-21 16:10:20 +0800

[diff] [blame]

1526

qemu_mutex_lock(&comp_param[idx].mutex);

1527

set_compress_params(&comp_param[idx], block, offset);

1528

qemu_cond_signal(&comp_param[idx].cond);

1529

qemu_mutex_unlock(&comp_param[idx].mutex);

1530

pages = 1;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

1531

update_compress_thread_counts(&comp_param[idx], bytes_xmit);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1532

break;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

1533

}

1534

}

Xiao Guangrong

2018-08-21 16:10:20 +0800

[diff] [blame]

1535

1536

/*

1537

* wait for the free thread if the user specifies 'compress-wait-thread',

1538

* otherwise we will post the page out in the main thread as normal page.

1539

*/

1540

if (pages < 0 && wait) {

1541

qemu_cond_wait(&comp_done_cond, &comp_done_lock);

1542

goto retry;

1543

}

Liang Li

2016-05-05 15:32:59 +0800

[diff] [blame]

1544

qemu_mutex_unlock(&comp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return pages;

}

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1550

* find_dirty_block: find the next dirty page and update any state

1551

* associated with the search process.

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1552

*

Wei Yang

a5f7b1a

2019-05-11 07:37:29 +0800

[diff] [blame]

1553

* Returns true if a page is found

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1554

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

1555

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1556

* @pss: data about the state of the current dirty page scan

1557

* @again: set to false if the search has scanned the whole of RAM

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1558

*/

Juan Quintela

2017-03-21 16:19:05 +0100

[diff] [blame]

1559

static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1560

{

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

1561

/* Update pss->page for the next dirty bit in ramblock */

1562

pss_find_next_dirty(pss);

1563

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

1564

if (pss->complete_round && pss->block == rs->last_seen_block &&

Juan Quintela

2017-03-21 15:36:51 +0100

[diff] [blame]

1565

pss->page >= rs->last_page) {

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1566

/*

1567

* We've been once around the RAM and haven't found anything.

* Give up.

*/

*again = false;

return false;

}

David Hildenbrand

542147f

2021-04-29 13:27:08 +0200

[diff] [blame]

1573

if (!offset_in_ramblock(pss->block,

1574

((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1575

/* Didn't find anything in this RAM Block */

Juan Quintela

2017-03-21 15:36:51 +0100

[diff] [blame]

1576

pss->page = 0;

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1577

pss->block = QLIST_NEXT_RCU(pss->block, next);

1578

if (!pss->block) {

Xiao Guangrong

48df9d8

2018-09-06 15:00:59 +0800

[diff] [blame]

1579

/*

1580

* If memory migration starts over, we will meet a dirtied page

1581

* which may still exists in compression threads's ring, so we

1582

* should flush the compressed data to make sure the new page

1583

* is not overwritten by the old one in the destination.

1584

*

1585

* Also If xbzrle is on, stop using the data compression at this

1586

* point. In theory, xbzrle can do better than compression.

1587

*/

1588

flush_compressed_data(rs);

1589

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1590

/* Hit the end of the list */

1591

pss->block = QLIST_FIRST_RCU(&ram_list.blocks);

1592

/* Flag that we've looped */

1593

pss->complete_round = true;

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

1594

/* After the first round, enable XBZRLE. */

1595

if (migrate_use_xbzrle()) {

1596

rs->xbzrle_enabled = true;

1597

}

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

1598

}

1599

/* Didn't find anything this time, but try again on the new block */

*again = true;

return false;

} else {

/* Can go around again, but... */

1604

*again = true;

1605

/* We've found something so probably don't need to */

return true;

}

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1610

/**

1611

* unqueue_page: gets a page of the queue

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1612

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1613

* Helper for 'get_queued_page' - gets a page off the queue

1614

*

1615

* Returns the block of the page (or NULL if none available)

1616

*

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

1617

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

1618

* @offset: used to return the offset within the RAMBlock

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1619

*/

Juan Quintela

2017-03-21 16:19:05 +0100

[diff] [blame]

1620

static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1621

{

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

1622

struct RAMSrcPageRequest *entry;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1623

RAMBlock *block = NULL;

1624

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

1625

if (!postcopy_has_request(rs)) {

Xiao Guangrong

ae526e3

2018-08-21 16:10:25 +0800

[diff] [blame]

return NULL;

}

Daniel Brodsky

2020-04-03 21:21:08 -0700

[diff] [blame]

1629

QEMU_LOCK_GUARD(&rs->src_page_req_mutex);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1630

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

1631

/*

1632

* This should _never_ change even after we take the lock, because no one

1633

* should be taking anything off the request list other than us.

1634

*/

1635

assert(postcopy_has_request(rs));

1636

1637

entry = QSIMPLEQ_FIRST(&rs->src_page_requests);

1638

block = entry->rb;

1639

*offset = entry->offset;

1640

Thomas Huth

777f53c

2022-08-02 08:19:49 +0200

[diff] [blame]

1641

if (entry->len > TARGET_PAGE_SIZE) {

1642

entry->len -= TARGET_PAGE_SIZE;

1643

entry->offset += TARGET_PAGE_SIZE;

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

1644

} else {

1645

memory_region_unref(block->mr);

1646

QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);

1647

g_free(entry);

1648

migration_consume_urgent_request();

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

1649

}

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

return block;

}

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1654

#if defined(__linux__)

1655

/**

1656

* poll_fault_page: try to get next UFFD write fault page and, if pending fault

1657

* is found, return RAM block pointer and page offset

1658

*

1659

* Returns pointer to the RAMBlock containing faulting page,

1660

* NULL if no write faults are pending

1661

*

1662

* @rs: current RAM state

1663

* @offset: page offset from the beginning of the block

1664

*/

1665

static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)

1666

{

1667

struct uffd_msg uffd_msg;

1668

void *page_address;

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1669

RAMBlock *block;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1670

int res;

1671

1672

if (!migrate_background_snapshot()) {

return NULL;

}

res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);

if (res <= 0) {

return NULL;

}

page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1682

block = qemu_ram_block_from_host(page_address, false, offset);

1683

assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);

1684

return block;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

}

/**

* ram_save_release_protection: release UFFD write protection after

1689

* a range of pages has been saved

1690

*

1691

* @rs: current RAM state

1692

* @pss: page-search-status structure

1693

* @start_page: index of the first page in the range relative to pss->block

1694

*

1695

* Returns 0 on success, negative value in case of an error

1696

*/

1697

static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,

1698

unsigned long start_page)

{

int res = 0;

/* Check if page is from UFFD-managed region. */

1703

if (pss->block->flags & RAM_UF_WRITEPROTECT) {

1704

void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);

Peter Xu

258f5c98

2022-01-19 16:09:15 +0800

[diff] [blame]

1705

uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1706

1707

/* Flush async buffers before un-protect. */

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

1708

qemu_fflush(pss->pss_channel);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1709

/* Un-protect memory range. */

1710

res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,

false, false);

}

return res;

}

/* ram_write_tracking_available: check if kernel supports required UFFD features

1718

*

1719

* Returns true if supports, false otherwise

1720

*/

1721

bool ram_write_tracking_available(void)

1722

{

1723

uint64_t uffd_features;

1724

int res;

1725

1726

res = uffd_query_features(&uffd_features);

1727

return (res == 0 &&

1728

(uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);

1729

}

1730

1731

/* ram_write_tracking_compatible: check if guest configuration is

1732

* compatible with 'write-tracking'

1733

*

1734

* Returns true if compatible, false otherwise

1735

*/

1736

bool ram_write_tracking_compatible(void)

1737

{

1738

const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);

1739

int uffd_fd;

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1740

RAMBlock *block;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1741

bool ret = false;

1742

1743

/* Open UFFD file descriptor */

1744

uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);

if (uffd_fd < 0) {

return false;

}

RCU_READ_LOCK_GUARD();

1750

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1751

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1752

uint64_t uffd_ioctls;

1753

1754

/* Nothing to do with read-only and MMIO-writable regions */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1755

if (block->mr->readonly || block->mr->rom_device) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1756

continue;

1757

}

1758

/* Try to register block memory via UFFD-IO to track writes */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1759

if (uffd_register_memory(uffd_fd, block->host, block->max_length,

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1760

UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {

1761

goto out;

1762

}

1763

if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {

goto out;

}

}

ret = true;

out:

uffd_close_fd(uffd_fd);

return ret;

}

David Hildenbrand

2021-10-11 19:53:45 +0200

[diff] [blame]

1774

static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,

1775

ram_addr_t size)

1776

{

David Hildenbrand

5f19a44

2023-01-05 13:45:24 +0100

[diff] [blame^]

1777

const ram_addr_t end = offset + size;

1778

David Hildenbrand

2021-10-11 19:53:45 +0200

[diff] [blame]

1779

/*

1780

* We read one byte of each page; this will preallocate page tables if

1781

* required and populate the shared zeropage on MAP_PRIVATE anonymous memory

1782

* where no page was populated yet. This might require adaption when

1783

* supporting other mappings, like shmem.

1784

*/

David Hildenbrand

5f19a44

2023-01-05 13:45:24 +0100

[diff] [blame^]

1785

for (; offset < end; offset += block->page_size) {

David Hildenbrand

2021-10-11 19:53:45 +0200

[diff] [blame]

1786

char tmp = *((char *)block->host + offset);

1787

1788

/* Don't optimize the read out */

1789

asm volatile("" : "+r" (tmp));

}

}

David Hildenbrand

2021-10-11 19:53:46 +0200

[diff] [blame]

1793

static inline int populate_read_section(MemoryRegionSection *section,

1794

void *opaque)

1795

{

1796

const hwaddr size = int128_get64(section->size);

1797

hwaddr offset = section->offset_within_region;

1798

RAMBlock *block = section->mr->ram_block;

1799

1800

populate_read_range(block, offset, size);

return 0;

}

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1804

/*

David Hildenbrand

2021-10-11 19:53:45 +0200

[diff] [blame]

1805

* ram_block_populate_read: preallocate page tables and populate pages in the

1806

* RAM block by reading a byte of each page.

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

1807

*

1808

* Since it's solely used for userfault_fd WP feature, here we just

1809

* hardcode page size to qemu_real_host_page_size.

1810

*

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1811

* @block: RAM block to populate

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

1812

*/

David Hildenbrand

6fee3a1

2021-10-11 19:53:46 +0200

[diff] [blame]

1813

static void ram_block_populate_read(RAMBlock *rb)

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

1814

{

David Hildenbrand

6fee3a1

2021-10-11 19:53:46 +0200

[diff] [blame]

1815

/*

1816

* Skip populating all pages that fall into a discarded range as managed by

1817

* a RamDiscardManager responsible for the mapped memory region of the

1818

* RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock

1819

* must not get populated automatically. We don't have to track

1820

* modifications via userfaultfd WP reliably, because these pages will

1821

* not be part of the migration stream either way -- see

1822

* ramblock_dirty_bitmap_exclude_discarded_pages().

1823

*

1824

* Note: The result is only stable while migrating (precopy/postcopy).

1825

*/

1826

if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {

1827

RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);

1828

MemoryRegionSection section = {

1829

.mr = rb->mr,

1830

.offset_within_region = 0,

1831

.size = rb->mr->size,

1832

};

1833

1834

ram_discard_manager_replay_populated(rdm, &section,

1835

populate_read_section, NULL);

1836

} else {

1837

populate_read_range(rb, 0, rb->used_length);

1838

}

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

}

/*

* ram_write_tracking_prepare: prepare for UFFD-WP memory tracking

1843

*/

1844

void ram_write_tracking_prepare(void)

1845

{

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1846

RAMBlock *block;

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

1847

1848

RCU_READ_LOCK_GUARD();

1849

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1850

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

1851

/* Nothing to do with read-only and MMIO-writable regions */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1852

if (block->mr->readonly || block->mr->rom_device) {

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

continue;

}

/*

* Populate pages of the RAM block before enabling userfault_fd

1858

* write protection.

1859

*

1860

* This stage is required since ioctl(UFFDIO_WRITEPROTECT) with

1861

* UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip

1862

* pages with pte_none() entries in page table.

1863

*/

David Hildenbrand

2021-10-11 19:53:45 +0200

[diff] [blame]

1864

ram_block_populate_read(block);

Andrey Gruzdev

2021-04-01 12:22:25 +0300

[diff] [blame]

}

}

/*

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1869

* ram_write_tracking_start: start UFFD-WP memory tracking

1870

*

1871

* Returns 0 for success or negative value in case of error

1872

*/

1873

int ram_write_tracking_start(void)

1874

{

1875

int uffd_fd;

1876

RAMState *rs = ram_state;

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1877

RAMBlock *block;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1878

1879

/* Open UFFD file descriptor */

1880

uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);

if (uffd_fd < 0) {

return uffd_fd;

}

rs->uffdio_fd = uffd_fd;

1885

1886

RCU_READ_LOCK_GUARD();

1887

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1888

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1889

/* Nothing to do with read-only and MMIO-writable regions */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1890

if (block->mr->readonly || block->mr->rom_device) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

continue;

}

/* Register block memory with UFFD to track writes */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1895

if (uffd_register_memory(rs->uffdio_fd, block->host,

1896

block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1897

goto fail;

1898

}

1899

/* Apply UFFD write protection to the block memory range */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1900

if (uffd_change_protection(rs->uffdio_fd, block->host,

1901

block->max_length, true, false)) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1902

goto fail;

1903

}

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1904

block->flags |= RAM_UF_WRITEPROTECT;

1905

memory_region_ref(block->mr);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1906

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1907

trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,

1908

block->host, block->max_length);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

}

return 0;

fail:

error_report("ram_write_tracking_start() failed: restoring initial memory state");

1915

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1916

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

1917

if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

continue;

}

/*

* In case some memory block failed to be write-protected

1922

* remove protection and unregister all succeeded RAM blocks

1923

*/

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1924

uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,

1925

false, false);

1926

uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1927

/* Cleanup flags and remove reference */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1928

block->flags &= ~RAM_UF_WRITEPROTECT;

1929

memory_region_unref(block->mr);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1930

}

1931

1932

uffd_close_fd(uffd_fd);

rs->uffdio_fd = -1;

return -1;

}

/**

* ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection

1939

*/

1940

void ram_write_tracking_stop(void)

1941

{

1942

RAMState *rs = ram_state;

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1943

RAMBlock *block;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1944

1945

RCU_READ_LOCK_GUARD();

1946

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1947

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

1948

if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1949

continue;

1950

}

1951

/* Remove protection and unregister all affected RAM blocks */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1952

uffd_change_protection(rs->uffdio_fd, block->host, block->max_length,

1953

false, false);

1954

uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1955

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1956

trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,

1957

block->host, block->max_length);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1958

1959

/* Cleanup flags and remove reference */

Andrey Gruzdev

2021-04-01 12:22:26 +0300

[diff] [blame]

1960

block->flags &= ~RAM_UF_WRITEPROTECT;

1961

memory_region_unref(block->mr);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

1962

}

1963

1964

/* Finally close UFFD file descriptor */

1965

uffd_close_fd(rs->uffdio_fd);

rs->uffdio_fd = -1;

}

#else

/* No target OS support, stubs just fail or ignore */

1971

1972

static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)

{

(void) rs;

(void) offset;

return NULL;

}

static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,

1981

unsigned long start_page)

{

(void) rs;

(void) pss;

(void) start_page;

return 0;

}

bool ram_write_tracking_available(void)

{

return false;

}

bool ram_write_tracking_compatible(void)

{

assert(0);

return false;

}

int ram_write_tracking_start(void)

{

assert(0);

return -1;

}

void ram_write_tracking_stop(void)

{

assert(0);

}

#endif /* defined(__linux__) */

2012

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2013

/**

Li Qiang

ff1543a

2019-05-24 23:28:32 -0700

[diff] [blame]

2014

* get_queued_page: unqueue a page from the postcopy requests

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2015

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2016

* Skips pages that are already sent (!dirty)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2017

*

Wei Yang

a5f7b1a

2019-05-11 07:37:29 +0800

[diff] [blame]

2018

* Returns true if a queued page is found

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2019

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2020

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2021

* @pss: data about the state of the current dirty page scan

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2022

*/

Juan Quintela

2017-03-21 16:19:05 +0100

[diff] [blame]

2023

static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2024

{

2025

RAMBlock *block;

2026

ram_addr_t offset;

Thomas Huth

777f53c

2022-08-02 08:19:49 +0200

[diff] [blame]

2027

bool dirty;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2028

Thomas Huth

777f53c

2022-08-02 08:19:49 +0200

[diff] [blame]

2029

do {

2030

block = unqueue_page(rs, &offset);

2031

/*

2032

* We're sending this page, and since it's postcopy nothing else

2033

* will dirty it, and we must make sure it doesn't get sent again

2034

* even if this queue request was received after the background

2035

* search already sent it.

*/

if (block) {

unsigned long page;

page = offset >> TARGET_PAGE_BITS;

2041

dirty = test_bit(page, block->bmap);

2042

if (!dirty) {

2043

trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,

2044

page);

2045

} else {

2046

trace_get_queued_page(block->idstr, (uint64_t)offset, page);

}

}

} while (block && !dirty);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2051

Peter Xu

b062106

2022-10-11 17:55:58 -0400

[diff] [blame]

2052

if (!block) {

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

2053

/*

2054

* Poll write faults too if background snapshot is enabled; that's

2055

* when we have vcpus got blocked by the write protected pages.

2056

*/

2057

block = poll_fault_page(rs, &offset);

2058

}

2059

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2060

if (block) {

2061

/*

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2062

* We want the background search to continue from the queued page

2063

* since the guest is likely to want other pages near to the page

2064

* it just requested.

2065

*/

2066

pss->block = block;

Juan Quintela

2017-03-21 15:36:51 +0100

[diff] [blame]

2067

pss->page = offset >> TARGET_PAGE_BITS;

Wei Yang

422314e

2019-06-05 09:08:28 +0800

[diff] [blame]

2068

2069

/*

2070

* This unqueued page would break the "one round" check, even is

2071

* really rare.

2072

*/

2073

pss->complete_round = false;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

}

return !!block;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2079

/**

Juan Quintela

5e58f96

2017-04-03 22:06:54 +0200

[diff] [blame]

2080

* migration_page_queue_free: drop any remaining pages in the ram

2081

* request queue

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2082

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2083

* It should be empty at the end anyway, but in error cases there may

2084

* be some left. in case that there is any page left, we drop it.

2085

*

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2086

*/

Juan Quintela

83c1338

2017-05-04 11:45:01 +0200

[diff] [blame]

2087

static void migration_page_queue_free(RAMState *rs)

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2088

{

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2089

struct RAMSrcPageRequest *mspr, *next_mspr;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2090

/* This queue generally should be empty - but in the case of a failed

2091

* migration might have some droppings in.

2092

*/

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

2093

RCU_READ_LOCK_GUARD();

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2094

QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2095

memory_region_unref(mspr->rb->mr);

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2096

QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2097

g_free(mspr);

2098

}

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2099

}

2100

2101

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2102

* ram_save_queue_pages: queue the page for transmission

2103

*

2104

* A request from postcopy destination for example.

2105

*

2106

* Returns zero on success or negative on error

2107

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2108

* @rbname: Name of the RAMBLock of the request. NULL means the

2109

* same that last one.

2110

* @start: starting address from the start of the RAMBlock

2111

* @len: length (in bytes) to send

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2112

*/

Juan Quintela

9650689

2017-03-14 18:41:03 +0100

[diff] [blame]

2113

int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2114

{

2115

RAMBlock *ramblock;

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2116

RAMState *rs = ram_state;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2117

Juan Quintela

2017-06-06 19:49:03 +0200

[diff] [blame]

2118

ram_counters.postcopy_requests++;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

2119

RCU_READ_LOCK_GUARD();

2120

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2121

if (!rbname) {

2122

/* Reuse last RAMBlock */

Juan Quintela

68a098f

2017-03-14 13:48:42 +0100

[diff] [blame]

2123

ramblock = rs->last_req_rb;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

if (!ramblock) {

/*

* Shouldn't happen, we can't reuse the last RAMBlock if

2128

* it's the 1st request.

2129

*/

2130

error_report("ram_save_queue_pages no previous block");

Daniel Henrique Barboza

2020-01-06 15:23:31 -0300

[diff] [blame]

2131

return -1;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2132

}

2133

} else {

2134

ramblock = qemu_ram_block_by_name(rbname);

2135

2136

if (!ramblock) {

2137

/* We shouldn't be asked for a non-existent RAMBlock */

2138

error_report("ram_save_queue_pages no block '%s'", rbname);

Daniel Henrique Barboza

2020-01-06 15:23:31 -0300

[diff] [blame]

2139

return -1;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2140

}

Juan Quintela

68a098f

2017-03-14 13:48:42 +0100

[diff] [blame]

2141

rs->last_req_rb = ramblock;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2142

}

2143

trace_ram_save_queue_pages(ramblock->idstr, start, len);

David Hildenbrand

542147f

2021-04-29 13:27:08 +0200

[diff] [blame]

2144

if (!offset_in_ramblock(ramblock, start + len - 1)) {

Juan Quintela

9458ad6

2015-11-10 17:42:05 +0100

[diff] [blame]

2145

error_report("%s request overrun start=" RAM_ADDR_FMT " len="

2146

RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2147

__func__, start, len, ramblock->used_length);

Daniel Henrique Barboza

2020-01-06 15:23:31 -0300

[diff] [blame]

2148

return -1;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2149

}

2150

Peter Xu

2022-10-11 17:55:57 -0400

[diff] [blame]

2151

/*

2152

* When with postcopy preempt, we send back the page directly in the

2153

* rp-return thread.

2154

*/

2155

if (postcopy_preempt_active()) {

2156

ram_addr_t page_start = start >> TARGET_PAGE_BITS;

2157

size_t page_size = qemu_ram_pagesize(ramblock);

2158

PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY];

2159

int ret = 0;

2160

2161

qemu_mutex_lock(&rs->bitmap_mutex);

2162

2163

pss_init(pss, ramblock, page_start);

2164

/*

2165

* Always use the preempt channel, and make sure it's there. It's

2166

* safe to access without lock, because when rp-thread is running

2167

* we should be the only one who operates on the qemufile

2168

*/

2169

pss->pss_channel = migrate_get_current()->postcopy_qemufile_src;

Peter Xu

2022-10-11 17:55:57 -0400

[diff] [blame]

2170

assert(pss->pss_channel);

2171

2172

/*

2173

* It must be either one or multiple of host page size. Just

2174

* assert; if something wrong we're mostly split brain anyway.

2175

*/

2176

assert(len % page_size == 0);

2177

while (len) {

2178

if (ram_save_host_page_urgent(pss)) {

2179

error_report("%s: ram_save_host_page_urgent() failed: "

2180

"ramblock=%s, start_addr=0x"RAM_ADDR_FMT,

2181

__func__, ramblock->idstr, start);

ret = -1;

break;

}

/*

* NOTE: after ram_save_host_page_urgent() succeeded, pss->page

2187

* will automatically be moved and point to the next host page

2188

* we're going to send, so no need to update here.

2189

*

2190

* Normally QEMU never sends >1 host page in requests, so

2191

* logically we don't even need that as the loop should only

2192

* run once, but just to be consistent.

*/

len -= page_size;

};

qemu_mutex_unlock(&rs->bitmap_mutex);

return ret;

}

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2201

struct RAMSrcPageRequest *new_entry =

Markus Armbruster

b21e238

2022-03-15 15:41:56 +0100

[diff] [blame]

2202

g_new0(struct RAMSrcPageRequest, 1);

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2203

new_entry->rb = ramblock;

2204

new_entry->offset = start;

2205

new_entry->len = len;

2206

2207

memory_region_ref(ramblock->mr);

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2208

qemu_mutex_lock(&rs->src_page_req_mutex);

2209

QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);

Dr. David Alan Gilbert

e03a34f

2018-06-13 11:26:42 +0100

[diff] [blame]

2210

migration_make_urgent_request();

Juan Quintela

2017-03-20 22:12:40 +0100

[diff] [blame]

2211

qemu_mutex_unlock(&rs->src_page_req_mutex);

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2212

2213

return 0;

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2214

}

2215

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2216

static bool save_page_use_compression(RAMState *rs)

2217

{

2218

if (!migrate_use_compression()) {

return false;

}

/*

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

2223

* If xbzrle is enabled (e.g., after first round of migration), stop

2224

* using the data compression. In theory, xbzrle can do better than

2225

* compression.

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2226

*/

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

2227

if (rs->xbzrle_enabled) {

2228

return false;

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2229

}

2230

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

2231

return true;

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2232

}

2233

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

2234

/*

2235

* try to compress the page before posting it out, return true if the page

2236

* has been properly handled by compression, otherwise needs other

2237

* paths to handle it

2238

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2239

static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,

2240

RAMBlock *block, ram_addr_t offset)

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

2241

{

2242

if (!save_page_use_compression(rs)) {

return false;

}

/*

* When starting the process of a new block, the first page of

2248

* the block should be sent out before other pages in the same

2249

* block, and all the pages in last block should have been sent

2250

* out, keeping this order is important, because the 'cont' flag

2251

* is used to avoid resending the block name.

2252

*

2253

* We post the fist page as normal page as compression will take

2254

* much CPU resource.

2255

*/

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2256

if (block != pss->last_sent_block) {

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

2257

flush_compressed_data(rs);

return false;

}

Peter Xu

2022-10-11 17:55:49 -0400

[diff] [blame]

2261

if (compress_page_with_multi_thread(block, offset) > 0) {

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

return true;

}

Xiao Guangrong

2018-09-06 15:01:00 +0800

[diff] [blame]

2265

compression_counters.busy++;

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

2266

return false;

2267

}

2268

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2269

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2270

* ram_save_target_page: save one target page

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2271

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2272

* Returns the number of pages written

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2273

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2274

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2275

* @pss: data about the page we want to send

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2276

*/

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

2277

static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2278

{

Xiao Guangrong

a8ec91f

2018-03-30 15:51:25 +0800

[diff] [blame]

2279

RAMBlock *block = pss->block;

Alexey Romko

2020-01-10 14:51:34 +0100

[diff] [blame]

2280

ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;

Xiao Guangrong

a8ec91f

2018-03-30 15:51:25 +0800

[diff] [blame]

2281

int res;

2282

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

2283

if (control_save_page(pss, block, offset, &res)) {

Xiao Guangrong

a8ec91f

2018-03-30 15:51:25 +0800

[diff] [blame]

return res;

}

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2287

if (save_compress_page(rs, pss, block, offset)) {

Xiao Guangrong

2018-08-21 16:10:24 +0800

[diff] [blame]

2288

return 1;

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2289

}

2290

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2291

res = save_zero_page(pss, block, offset);

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2292

if (res > 0) {

2293

/* Must let xbzrle know, otherwise a previous (now 0'd) cached

2294

* page would be stale

2295

*/

Peter Xu

ef5c3d1

2022-10-11 17:55:47 -0400

[diff] [blame]

2296

if (rs->xbzrle_enabled) {

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

2297

XBZRLE_cache_lock();

2298

xbzrle_cache_zero_page(rs, block->offset + offset);

2299

XBZRLE_cache_unlock();

2300

}

Xiao Guangrong

2018-03-30 15:51:26 +0800

[diff] [blame]

return res;

}

Xiao Guangrong

2018-03-30 15:51:28 +0800

[diff] [blame]

2304

/*

Peter Xu

6f39c90

2022-10-04 14:24:30 -0400

[diff] [blame]

2305

* Do not use multifd in postcopy as one whole host page should be

2306

* placed. Meanwhile postcopy requires atomic update of pages, so even

2307

* if host page size == guest page size the dest guest during run may

2308

* still see partially copied pages which is data corruption.

Xiao Guangrong

da3f56c

2018-03-30 15:51:28 +0800

[diff] [blame]

2309

*/

Peter Xu

6f39c90

2022-10-04 14:24:30 -0400

[diff] [blame]

2310

if (migrate_use_multifd() && !migration_in_postcopy()) {

Peter Xu

2022-10-11 17:55:53 -0400

[diff] [blame]

2311

return ram_save_multifd_page(pss->pss_channel, block, offset);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2312

}

2313

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

2314

return ram_save_page(rs, pss);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2315

}

2316

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

2317

/* Should be called before sending a host page */

2318

static void pss_host_page_prepare(PageSearchStatus *pss)

2319

{

2320

/* How many guest pages are there in one host page? */

2321

size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;

2322

2323

pss->host_page_sending = true;

Peter Xu

301d7ff

2023-01-20 11:31:47 -0500

[diff] [blame]

2324

if (guest_pfns <= 1) {

2325

/*

2326

* This covers both when guest psize == host psize, or when guest

2327

* has larger psize than the host (guest_pfns==0).

2328

*

2329

* For the latter, we always send one whole guest page per

2330

* iteration of the host page (example: an Alpha VM on x86 host

2331

* will have guest psize 8K while host psize 4K).

2332

*/

2333

pss->host_page_start = pss->page;

2334

pss->host_page_end = pss->page + 1;

2335

} else {

2336

/*

2337

* The host page spans over multiple guest pages, we send them

2338

* within the same host page iteration.

2339

*/

2340

pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);

2341

pss->host_page_end = ROUND_UP(pss->page + 1, guest_pfns);

2342

}

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

}

/*

* Whether the page pointed by PSS is within the host page being sent.

2347

* Must be called after a previous pss_host_page_prepare().

2348

*/

2349

static bool pss_within_range(PageSearchStatus *pss)

{

ram_addr_t ram_addr;

assert(pss->host_page_sending);

2354

2355

/* Over host-page boundary? */

2356

if (pss->page >= pss->host_page_end) {

return false;

}

ram_addr = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;

2361

2362

return offset_in_ramblock(pss->block, ram_addr);

2363

}

2364

2365

static void pss_host_page_finish(PageSearchStatus *pss)

2366

{

2367

pss->host_page_sending = false;

2368

/* This is not needed, but just to reset it */

2369

pss->host_page_start = pss->host_page_end = 0;

2370

}

2371

Peter Xu

2022-10-11 17:55:57 -0400

[diff] [blame]

2372

/*

2373

* Send an urgent host page specified by `pss'. Need to be called with

2374

* bitmap_mutex held.

2375

*

2376

* Returns 0 if save host page succeeded, false otherwise.

2377

*/

2378

static int ram_save_host_page_urgent(PageSearchStatus *pss)

2379

{

2380

bool page_dirty, sent = false;

2381

RAMState *rs = ram_state;

2382

int ret = 0;

2383

2384

trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);

2385

pss_host_page_prepare(pss);

2386

2387

/*

2388

* If precopy is sending the same page, let it be done in precopy, or

2389

* we could send the same page in two channels and none of them will

2390

* receive the whole page.

2391

*/

2392

if (pss_overlap(pss, &ram_state->pss[RAM_CHANNEL_PRECOPY])) {

2393

trace_postcopy_preempt_hit(pss->block->idstr,

2394

pss->page << TARGET_PAGE_BITS);

return 0;

}

do {

page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);

2400

2401

if (page_dirty) {

2402

/* Be strict to return code; it must be 1, or what else? */

2403

if (ram_save_target_page(rs, pss) != 1) {

2404

error_report_once("%s: ram_save_target_page failed", __func__);

ret = -1;

goto out;

}

sent = true;

}

pss_find_next_dirty(pss);

2411

} while (pss_within_range(pss));

2412

out:

2413

pss_host_page_finish(pss);

2414

/* For urgent requests, flush immediately if sent */

2415

if (sent) {

2416

qemu_fflush(pss->pss_channel);

}

return ret;

}

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2421

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2422

* ram_save_host_page: save a whole host page

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2423

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2424

* Starting at *offset send pages up to the end of the current host

2425

* page. It's valid for the initial offset to point into the middle of

2426

* a host page in which case the remainder of the hostpage is sent.

2427

* Only dirty target pages are sent. Note that the host page size may

2428

* be a huge page for this block.

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2429

*

Dr. David Alan Gilbert

1eb3fc0

2017-05-17 17:58:09 +0100

[diff] [blame]

2430

* The saving stops at the boundary of the used_length of the block

2431

* if the RAMBlock isn't a multiple of the host page size.

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2432

*

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2433

* The caller must be with ram_state.bitmap_mutex held to call this

2434

* function. Note that this function can temporarily release the lock, but

2435

* when the function is returned it'll make sure the lock is still held.

2436

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2437

* Returns the number of pages written or negative on error

2438

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2439

* @rs: current RAM state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2440

* @pss: data about the page we want to send

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2441

*/

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

2442

static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2443

{

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2444

bool page_dirty, preempt_active = postcopy_preempt_active();

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2445

int tmppages, pages = 0;

Juan Quintela

2017-03-21 15:36:51 +0100

[diff] [blame]

2446

size_t pagesize_bits =

2447

qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

2448

unsigned long start_page = pss->page;

2449

int res;

Dr. David Alan Gilbert

4c011c3

2017-02-24 18:28:39 +0000

[diff] [blame]

2450

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2451

if (ramblock_is_ignored(pss->block)) {

Cédric Le Goater

2018-05-14 08:57:00 +0200

[diff] [blame]

2452

error_report("block %s should not be migrated !", pss->block->idstr);

return 0;

}

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

2456

/* Update host page boundary information */

2457

pss_host_page_prepare(pss);

2458

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2459

do {

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2460

page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);

Xiao Guangrong

1faa566

2018-03-30 15:51:24 +0800

[diff] [blame]

2461

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2462

/* Check the pages is dirty and if it is send it */

2463

if (page_dirty) {

Kunkun Jiang

ba1b7c8

2021-03-16 20:57:16 +0800

[diff] [blame]

2464

/*

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2465

* Properly yield the lock only in postcopy preempt mode

2466

* because both migration thread and rp-return thread can

2467

* operate on the bitmaps.

Kunkun Jiang

ba1b7c8

2021-03-16 20:57:16 +0800

[diff] [blame]

2468

*/

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2469

if (preempt_active) {

2470

qemu_mutex_unlock(&rs->bitmap_mutex);

Kunkun Jiang

ba1b7c8

2021-03-16 20:57:16 +0800

[diff] [blame]

2471

}

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2472

tmppages = ram_save_target_page(rs, pss);

if (tmppages >= 0) {

pages += tmppages;

/*

* Allow rate limiting to happen in the middle of huge pages if

2477

* something is sent in the current iteration.

2478

*/

2479

if (pagesize_bits > 1 && tmppages > 0) {

2480

migration_rate_limit();

2481

}

2482

}

2483

if (preempt_active) {

2484

qemu_mutex_lock(&rs->bitmap_mutex);

2485

}

2486

} else {

2487

tmppages = 0;

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2488

}

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

2489

2490

if (tmppages < 0) {

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

2491

pss_host_page_finish(pss);

Peter Xu

2022-10-11 17:55:50 -0400

[diff] [blame]

return tmppages;

}

Peter Xu

2022-10-11 17:55:52 -0400

[diff] [blame]

2495

pss_find_next_dirty(pss);

2496

} while (pss_within_range(pss));

2497

2498

pss_host_page_finish(pss);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

2499

2500

res = ram_save_release_protection(rs, pss, start_page);

2501

return (res < 0 ? res : pages);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2502

}

Dr. David Alan Gilbert

2015-11-05 18:11:08 +0000

[diff] [blame]

2503

2504

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2505

* ram_find_and_save_block: finds a dirty page and sends it to f

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2506

*

2507

* Called within an RCU critical section.

2508

*

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

2509

* Returns the number of pages written where zero means no dirty pages,

2510

* or negative on error

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2511

*

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2512

* @rs: current RAM state

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2513

*

2514

* On systems where host-page-size > target-page-size it will send all the

2515

* pages in a host page that are dirty.

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2516

*/

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

2517

static int ram_find_and_save_block(RAMState *rs)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2518

{

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

2519

PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2520

int pages = 0;

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

2521

bool again, found;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2522

Ashijeet Acharya

0827b9e

2017-02-08 19:58:45 +0530

[diff] [blame]

2523

/* No dirty page as there is zero RAM */

2524

if (!ram_bytes_total()) {

return pages;

}

Peter Xu

2022-10-04 14:24:26 -0400

[diff] [blame]

2528

/*

2529

* Always keep last_seen_block/last_page valid during this procedure,

2530

* because find_dirty_block() relies on these values (e.g., we compare

2531

* last_seen_block with pss.block to see whether we searched all the

2532

* ramblocks) to detect the completion of migration. Having NULL value

2533

* of last_seen_block can conditionally cause below loop to run forever.

2534

*/

2535

if (!rs->last_seen_block) {

2536

rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);

rs->last_page = 0;

}

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

2540

pss_init(pss, rs->last_seen_block, rs->last_page);

Dr. David Alan Gilbert

b8fb8cb

2015-09-23 15:27:10 +0100

[diff] [blame]

2541

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

2542

do {

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2543

again = true;

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

2544

found = get_queued_page(rs, pss);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2545

2546

if (!found) {

Peter Xu

b062106

2022-10-11 17:55:58 -0400

[diff] [blame]

2547

/* priority queue empty, so just search for something dirty */

2548

found = find_dirty_block(rs, pss, &again);

Dr. David Alan Gilbert

2015-11-05 18:11:09 +0000

[diff] [blame]

2549

}

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

2550

2551

if (found) {

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

2552

pages = ram_save_host_page(rs, pss);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2553

}

Dr. David Alan Gilbert

2015-09-23 15:27:11 +0100

[diff] [blame]

2554

} while (!pages && again);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2555

Peter Xu

2022-10-11 17:55:55 -0400

[diff] [blame]

2556

rs->last_seen_block = pss->block;

2557

rs->last_page = pss->page;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return pages;

}

void acct_update_position(QEMUFile *f, size_t size, bool zero)

2563

{

2564

uint64_t pages = size / TARGET_PAGE_SIZE;

Juan Quintela

f7ccd61

2017-03-13 20:30:21 +0100

[diff] [blame]

2565

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2566

if (zero) {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

2567

stat64_add(&ram_atomic_counters.duplicate, pages);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2568

} else {

Peter Xu

2022-10-11 17:55:51 -0400

[diff] [blame]

2569

stat64_add(&ram_atomic_counters.normal, pages);

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

2570

ram_transferred_add(size);

Daniel P. Berrangé

1a93bd2

2022-06-20 12:01:51 +0100

[diff] [blame]

2571

qemu_file_credit_transfer(f, size);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

}

}

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2575

static uint64_t ram_bytes_total_common(bool count_ignored)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

{

RAMBlock *block;

uint64_t total = 0;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

2580

RCU_READ_LOCK_GUARD();

2581

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2582

if (count_ignored) {

2583

RAMBLOCK_FOREACH_MIGRATABLE(block) {

2584

total += block->used_length;

2585

}

2586

} else {

2587

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

2588

total += block->used_length;

2589

}

Peter Xu

99e1558

2017-05-12 12:17:39 +0800

[diff] [blame]

2590

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return total;

}

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2594

uint64_t ram_bytes_total(void)

2595

{

2596

return ram_bytes_total_common(false);

2597

}

2598

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

2599

static void xbzrle_load_setup(void)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2600

{

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

2601

XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2602

}

2603

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

2604

static void xbzrle_load_cleanup(void)

2605

{

2606

g_free(XBZRLE.decoded_buf);

2607

XBZRLE.decoded_buf = NULL;

2608

}

2609

Peter Xu

7d7c96b

2017-10-19 14:31:58 +0800

[diff] [blame]

2610

static void ram_state_cleanup(RAMState **rsp)

2611

{

Dr. David Alan Gilbert

b9ccaf6

2018-02-12 16:03:39 +0000

[diff] [blame]

2612

if (*rsp) {

2613

migration_page_queue_free(*rsp);

2614

qemu_mutex_destroy(&(*rsp)->bitmap_mutex);

2615

qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);

2616

g_free(*rsp);

2617

*rsp = NULL;

2618

}

Peter Xu

7d7c96b

2017-10-19 14:31:58 +0800

[diff] [blame]

2619

}

2620

Peter Xu

84593a0

2017-10-19 14:31:59 +0800

[diff] [blame]

2621

static void xbzrle_cleanup(void)

{

XBZRLE_cache_lock();

if (XBZRLE.cache) {

cache_fini(XBZRLE.cache);

2626

g_free(XBZRLE.encoded_buf);

2627

g_free(XBZRLE.current_buf);

2628

g_free(XBZRLE.zero_target_page);

2629

XBZRLE.cache = NULL;

2630

XBZRLE.encoded_buf = NULL;

2631

XBZRLE.current_buf = NULL;

2632

XBZRLE.zero_target_page = NULL;

2633

}

2634

XBZRLE_cache_unlock();

2635

}

2636

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

2637

static void ram_save_cleanup(void *opaque)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2638

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2639

RAMState **rsp = opaque;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2640

RAMBlock *block;

Juan Quintela

eb859c5

2017-03-13 21:51:55 +0100

[diff] [blame]

2641

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

2642

/* We don't use dirty log with background snapshots */

2643

if (!migrate_background_snapshot()) {

2644

/* caller have hold iothread lock or is in a bh, so there is

2645

* no writing race against the migration bitmap

2646

*/

Hyman Huang(é»„å‹‡)

63b41db

2021-06-29 16:01:19 +0000

[diff] [blame]

2647

if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {

2648

/*

2649

* do not stop dirty log without starting it, since

2650

* memory_global_dirty_log_stop will assert that

2651

* memory_global_dirty_log_start/stop used in pairs

2652

*/

2653

memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);

2654

}

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

2655

}

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2656

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2657

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

2658

g_free(block->clear_bmap);

2659

block->clear_bmap = NULL;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2660

g_free(block->bmap);

2661

block->bmap = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2662

}

2663

Peter Xu

84593a0

2017-10-19 14:31:59 +0800

[diff] [blame]

2664

xbzrle_cleanup();

Juan Quintela

f0afa33

2017-06-28 11:52:28 +0200

[diff] [blame]

2665

compress_threads_save_cleanup();

Peter Xu

7d7c96b

2017-10-19 14:31:58 +0800

[diff] [blame]

2666

ram_state_cleanup(rsp);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2667

}

2668

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2669

static void ram_state_reset(RAMState *rs)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2670

{

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2671

int i;

2672

2673

for (i = 0; i < RAM_CHANNEL_MAX; i++) {

2674

rs->pss[i].last_sent_block = NULL;

2675

}

2676

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2677

rs->last_seen_block = NULL;

Juan Quintela

269ace2

2017-03-21 15:23:31 +0100

[diff] [blame]

2678

rs->last_page = 0;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

2679

rs->last_version = ram_list.version;

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

2680

rs->xbzrle_enabled = false;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2681

}

2682

2683

#define MAX_WAIT 50 /* ms, half buffered_file limit */

2684

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2685

/* **** functions for postcopy ***** */

2686

Pavel Butsykin

2017-02-03 18:23:21 +0300

[diff] [blame]

2687

void ram_postcopy_migrated_memory_release(MigrationState *ms)

2688

{

2689

struct RAMBlock *block;

Pavel Butsykin

2017-02-03 18:23:21 +0300

[diff] [blame]

2690

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2691

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2692

unsigned long *bitmap = block->bmap;

2693

unsigned long range = block->used_length >> TARGET_PAGE_BITS;

2694

unsigned long run_start = find_next_zero_bit(bitmap, range, 0);

Pavel Butsykin

2017-02-03 18:23:21 +0300

[diff] [blame]

2695

2696

while (run_start < range) {

2697

unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);

Alexey Romko

2020-01-10 14:51:34 +0100

[diff] [blame]

2698

ram_discard_range(block->idstr,

2699

((ram_addr_t)run_start) << TARGET_PAGE_BITS,

2700

((ram_addr_t)(run_end - run_start))

2701

<< TARGET_PAGE_BITS);

Pavel Butsykin

2017-02-03 18:23:21 +0300

[diff] [blame]

2702

run_start = find_next_zero_bit(bitmap, range, run_end + 1);

}

}

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2707

/**

2708

* postcopy_send_discard_bm_ram: discard a RAMBlock

2709

*

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2710

* Callback from postcopy_each_ram_send_discard for each RAMBlock

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2711

*

2712

* @ms: current migration state

Wei Yang

89dab31

2019-07-15 10:05:49 +0800

[diff] [blame]

2713

* @block: RAMBlock to discard

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2714

*/

Philippe Mathieu-Daudé

9e7d122

2021-12-30 17:05:25 +0100

[diff] [blame]

2715

static void postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2716

{

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2717

unsigned long end = block->used_length >> TARGET_PAGE_BITS;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2718

unsigned long current;

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2719

unsigned long *bitmap = block->bmap;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2720

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2721

for (current = 0; current < end; ) {

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2722

unsigned long one = find_next_bit(bitmap, end, current);

Wei Yang

2019-06-27 10:08:21 +0800

[diff] [blame]

2723

unsigned long zero, discard_length;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2724

Wei Yang

2019-06-27 10:08:21 +0800

[diff] [blame]

2725

if (one >= end) {

2726

break;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2727

}

Wei Yang

2019-06-27 10:08:21 +0800

[diff] [blame]

2728

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2729

zero = find_next_zero_bit(bitmap, end, one + 1);

Wei Yang

2019-06-27 10:08:21 +0800

[diff] [blame]

2730

2731

if (zero >= end) {

2732

discard_length = end - one;

2733

} else {

2734

discard_length = zero - one;

2735

}

Wei Yang

810cf2b

2019-07-24 09:07:21 +0800

[diff] [blame]

2736

postcopy_discard_send_range(ms, one, discard_length);

Wei Yang

2019-06-27 10:08:21 +0800

[diff] [blame]

2737

current = one + discard_length;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2738

}

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2739

}

2740

Peter Xu

f30c2e5

2021-12-07 19:50:13 +0800

[diff] [blame]

2741

static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block);

2742

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2743

/**

2744

* postcopy_each_ram_send_discard: discard all RAMBlocks

2745

*

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2746

* Utility for the outgoing postcopy code.

2747

* Calls postcopy_send_discard_bm_ram for each RAMBlock

2748

* passing it bitmap indexes and name.

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2749

* (qemu_ram_foreach_block ends up passing unscaled lengths

2750

* which would mean postcopy code would have to deal with target page)

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2751

*

2752

* @ms: current migration state

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2753

*/

Peter Xu

2021-12-07 19:50:14 +0800

[diff] [blame]

2754

static void postcopy_each_ram_send_discard(MigrationState *ms)

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2755

{

2756

struct RAMBlock *block;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2757

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

2758

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Wei Yang

810cf2b

2019-07-24 09:07:21 +0800

[diff] [blame]

2759

postcopy_discard_send_init(ms, block->idstr);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2760

2761

/*

Peter Xu

f30c2e5

2021-12-07 19:50:13 +0800

[diff] [blame]

2762

* Deal with TPS != HPS and huge pages. It discard any partially sent

2763

* host-page size chunks, mark any partially dirty host-page size

2764

* chunks as all dirty. In this case the host-page is the host-page

2765

* for the particular RAMBlock, i.e. it might be a huge page.

2766

*/

2767

postcopy_chunk_hostpages_pass(ms, block);

2768

2769

/*

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2770

* Postcopy sends chunks of bitmap over the wire, but it

2771

* just needs indexes at this point, avoids it having

2772

* target page specific code.

2773

*/

Peter Xu

2021-12-07 19:50:14 +0800

[diff] [blame]

2774

postcopy_send_discard_bm_ram(ms, block);

Wei Yang

810cf2b

2019-07-24 09:07:21 +0800

[diff] [blame]

2775

postcopy_discard_send_finish(ms);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2776

}

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2777

}

2778

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2779

/**

Wei Yang

8324ef8

2019-08-19 14:18:41 +0800

[diff] [blame]

2780

* postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2781

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2782

* Helper for postcopy_chunk_hostpages; it's called twice to

2783

* canonicalize the two bitmaps, that are similar, but one is

2784

* inverted.

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2785

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2786

* Postcopy requires that all target pages in a hostpage are dirty or

2787

* clean, not a mix. This function canonicalizes the bitmaps.

2788

*

2789

* @ms: current migration state

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2790

* @block: block that contains the page we want to canonicalize

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2791

*/

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2792

static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2793

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2794

RAMState *rs = ram_state;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2795

unsigned long *bitmap = block->bmap;

Dr. David Alan Gilbert

29c5917

2017-02-24 18:28:31 +0000

[diff] [blame]

2796

unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2797

unsigned long pages = block->used_length >> TARGET_PAGE_BITS;

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2798

unsigned long run_start;

2799

Dr. David Alan Gilbert

29c5917

2017-02-24 18:28:31 +0000

[diff] [blame]

2800

if (block->page_size == TARGET_PAGE_SIZE) {

2801

/* Easy case - TPS==HPS for a non-huge page RAMBlock */

return;

}

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2805

/* Find a dirty page */

2806

run_start = find_next_bit(bitmap, pages, 0);

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2807

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2808

while (run_start < pages) {

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2809

2810

/*

2811

* If the start of this run of pages is in the middle of a host

2812

* page, then we need to fixup this host page.

2813

*/

Wei Yang

9dec3cc

2019-08-06 08:46:48 +0800

[diff] [blame]

2814

if (QEMU_IS_ALIGNED(run_start, host_ratio)) {

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2815

/* Find the end of this run */

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2816

run_start = find_next_zero_bit(bitmap, pages, run_start + 1);

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2817

/*

2818

* If the end isn't at the start of a host page, then the

2819

* run doesn't finish at the end of a host page

2820

* and we need to discard.

2821

*/

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2822

}

2823

Wei Yang

9dec3cc

2019-08-06 08:46:48 +0800

[diff] [blame]

2824

if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2825

unsigned long page;

Wei Yang

dad45ab

2019-08-06 08:46:47 +0800

[diff] [blame]

2826

unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,

2827

host_ratio);

2828

run_start = QEMU_ALIGN_UP(run_start, host_ratio);

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2829

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2830

/* Clean up the bitmap */

2831

for (page = fixup_start_addr;

2832

page < fixup_start_addr + host_ratio; page++) {

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

2833

/*

2834

* Remark them as dirty, updating the count for any pages

2835

* that weren't previously dirty.

2836

*/

Juan Quintela

0d8ec88

2017-03-13 21:21:41 +0100

[diff] [blame]

2837

rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

}

}

Wei Yang

2019-08-19 14:18:42 +0800

[diff] [blame]

2841

/* Find the next dirty page for the next iteration */

2842

run_start = find_next_bit(bitmap, pages, run_start);

Dr. David Alan Gilbert

2015-11-05 18:11:15 +0000

[diff] [blame]

}

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2846

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2847

* ram_postcopy_send_discard_bitmap: transmit the discard bitmap

2848

*

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2849

* Transmit the set of pages to be discarded after precopy to the target

2850

* these are pages that:

2851

* a) Have been previously transmitted but are now dirty again

2852

* b) Pages that have never been transmitted, this ensures that

2853

* any pages on the destination that have been mapped by background

2854

* tasks get discarded (transparent huge pages is the specific concern)

2855

* Hopefully this is pretty sparse

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2856

*

2857

* @ms: current migration state

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2858

*/

Peter Xu

2021-12-07 19:50:14 +0800

[diff] [blame]

2859

void ram_postcopy_send_discard_bitmap(MigrationState *ms)

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2860

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2861

RAMState *rs = ram_state;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2862

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

2863

RCU_READ_LOCK_GUARD();

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2864

2865

/* This should be our last sync, the src is now paused */

Juan Quintela

eb859c5

2017-03-13 21:51:55 +0100

[diff] [blame]

2866

migration_bitmap_sync(rs);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2867

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2868

/* Easiest way to make sure we don't resume in the middle of a host-page */

Peter Xu

2022-10-11 17:55:56 -0400

[diff] [blame]

2869

rs->pss[RAM_CHANNEL_PRECOPY].last_sent_block = NULL;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2870

rs->last_seen_block = NULL;

Juan Quintela

2017-03-22 15:18:04 +0100

[diff] [blame]

2871

rs->last_page = 0;

2872

Peter Xu

2021-12-07 19:50:14 +0800

[diff] [blame]

2873

postcopy_each_ram_send_discard(ms);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2874

Peter Xu

2021-12-07 19:50:14 +0800

[diff] [blame]

2875

trace_ram_postcopy_send_discard_bitmap();

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2876

}

2877

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2878

/**

2879

* ram_discard_range: discard dirtied pages at the beginning of postcopy

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2880

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2881

* Returns zero on success

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2882

*

Juan Quintela

2017-03-23 15:11:59 +0100

[diff] [blame]

2883

* @rbname: name of the RAMBlock of the request. NULL means the

2884

* same that last one.

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

2885

* @start: RAMBlock starting page

2886

* @length: RAMBlock size

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2887

*/

Juan Quintela

aaa2064

2017-03-21 11:35:24 +0100

[diff] [blame]

2888

int ram_discard_range(const char *rbname, uint64_t start, size_t length)

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2889

{

Juan Quintela

2017-03-23 15:11:59 +0100

[diff] [blame]

2890

trace_ram_discard_range(rbname, start, length);

Dr. David Alan Gilbert

d3a5038

2017-02-24 18:28:32 +0000

[diff] [blame]

2891

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

2892

RCU_READ_LOCK_GUARD();

Juan Quintela

2017-03-23 15:11:59 +0100

[diff] [blame]

2893

RAMBlock *rb = qemu_ram_block_by_name(rbname);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2894

2895

if (!rb) {

Juan Quintela

2017-03-23 15:11:59 +0100

[diff] [blame]

2896

error_report("ram_discard_range: Failed to find block '%s'", rbname);

Daniel Henrique Barboza

2020-01-06 15:23:31 -0300

[diff] [blame]

2897

return -1;

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2898

}

2899

Peter Xu

814bb08

2018-07-23 20:33:02 +0800

[diff] [blame]

2900

/*

2901

* On source VM, we don't need to update the received bitmap since

2902

* we don't even have one.

2903

*/

2904

if (rb->receivedmap) {

2905

bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),

2906

length >> qemu_target_page_bits());

2907

}

2908

Daniel Henrique Barboza

2020-01-06 15:23:31 -0300

[diff] [blame]

2909

return ram_block_discard_range(rb, start, length);

Dr. David Alan Gilbert

2015-11-05 18:11:02 +0000

[diff] [blame]

2910

}

2911

Peter Xu

84593a0

2017-10-19 14:31:59 +0800

[diff] [blame]

2912

/*

2913

* For every allocation, we will try not to crash the VM if the

2914

* allocation failed.

2915

*/

2916

static int xbzrle_init(void)

2917

{

2918

Error *local_err = NULL;

2919

2920

if (!migrate_use_xbzrle()) {

return 0;

}

XBZRLE_cache_lock();

XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);

2927

if (!XBZRLE.zero_target_page) {

2928

error_report("%s: Error allocating zero page", __func__);

goto err_out;

}

XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),

2933

TARGET_PAGE_SIZE, &local_err);

2934

if (!XBZRLE.cache) {

2935

error_report_err(local_err);

goto free_zero_page;

}

XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);

2940

if (!XBZRLE.encoded_buf) {

2941

error_report("%s: Error allocating encoded_buf", __func__);

goto free_cache;

}

XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);

2946

if (!XBZRLE.current_buf) {

2947

error_report("%s: Error allocating current_buf", __func__);

2948

goto free_encoded_buf;

2949

}

2950

2951

/* We are all good */

2952

XBZRLE_cache_unlock();

return 0;

free_encoded_buf:

g_free(XBZRLE.encoded_buf);

2957

XBZRLE.encoded_buf = NULL;

2958

free_cache:

2959

cache_fini(XBZRLE.cache);

2960

XBZRLE.cache = NULL;

2961

free_zero_page:

2962

g_free(XBZRLE.zero_target_page);

2963

XBZRLE.zero_target_page = NULL;

2964

err_out:

2965

XBZRLE_cache_unlock();

return -ENOMEM;

}

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2969

static int ram_state_init(RAMState **rsp)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2970

{

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

2971

*rsp = g_try_new0(RAMState, 1);

2972

2973

if (!*rsp) {

2974

error_report("%s: Init ramstate fail", __func__);

2975

return -1;

2976

}

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

2977

2978

qemu_mutex_init(&(*rsp)->bitmap_mutex);

2979

qemu_mutex_init(&(*rsp)->src_page_req_mutex);

2980

QSIMPLEQ_INIT(&(*rsp)->src_page_requests);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

2981

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

2982

/*

Ivan Ren

2019-07-14 22:51:19 +0800

[diff] [blame]

2983

* Count the total number of pages used by ram blocks not including any

2984

* gaps due to alignment or unplugs.

Wei Yang

0315851

2019-06-04 14:17:27 +0800

[diff] [blame]

2985

* This must match with the initial values of dirty bitmap.

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

2986

*/

Ivan Ren

2019-07-14 22:51:19 +0800

[diff] [blame]

2987

(*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

2988

ram_state_reset(*rsp);

return 0;

}

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

2993

static void ram_list_init_bitmaps(void)

2994

{

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

2995

MigrationState *ms = migrate_get_current();

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

2996

RAMBlock *block;

2997

unsigned long pages;

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

2998

uint8_t shift;

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

2999

3000

/* Skip setting bitmap if there is no RAM */

3001

if (ram_bytes_total()) {

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

3002

shift = ms->clear_bitmap_shift;

3003

if (shift > CLEAR_BITMAP_SHIFT_MAX) {

3004

error_report("clear_bitmap_shift (%u) too big, using "

3005

"max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);

3006

shift = CLEAR_BITMAP_SHIFT_MAX;

3007

} else if (shift < CLEAR_BITMAP_SHIFT_MIN) {

3008

error_report("clear_bitmap_shift (%u) too small, using "

3009

"min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);

3010

shift = CLEAR_BITMAP_SHIFT_MIN;

3011

}

3012

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3013

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3014

pages = block->max_length >> TARGET_PAGE_BITS;

Wei Yang

0315851

2019-06-04 14:17:27 +0800

[diff] [blame]

3015

/*

3016

* The initial dirty bitmap for migration must be set with all

3017

* ones to make sure we'll migrate every guest RAM page to

3018

* destination.

Ivan Ren

2019-07-14 22:51:19 +0800

[diff] [blame]

3019

* Here we set RAMBlock.bmap all to 1 because when rebegin a

3020

* new migration after a failed migration, ram_list.

3021

* dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole

3022

* guest memory.

Wei Yang

0315851

2019-06-04 14:17:27 +0800

[diff] [blame]

3023

*/

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3024

block->bmap = bitmap_new(pages);

Ivan Ren

2019-07-14 22:51:19 +0800

[diff] [blame]

3025

bitmap_set(block->bmap, 0, pages);

Peter Xu

2019-06-03 14:50:56 +0800

[diff] [blame]

3026

block->clear_bmap_shift = shift;

3027

block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

}

}

}

David Hildenbrand

2021-10-11 19:53:41 +0200

[diff] [blame]

3032

static void migration_bitmap_clear_discarded_pages(RAMState *rs)

{

unsigned long pages;

RAMBlock *rb;

RCU_READ_LOCK_GUARD();

3038

3039

RAMBLOCK_FOREACH_NOT_IGNORED(rb) {

3040

pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);

3041

rs->migration_dirty_pages -= pages;

}

}

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3045

static void ram_init_bitmaps(RAMState *rs)

3046

{

3047

/* For memory_global_dirty_log_start below. */

3048

qemu_mutex_lock_iothread();

3049

qemu_mutex_lock_ramlist();

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3050

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3051

WITH_RCU_READ_LOCK_GUARD() {

3052

ram_list_init_bitmaps();

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

3053

/* We don't use dirty log with background snapshots */

3054

if (!migrate_background_snapshot()) {

Hyman Huang(é»„å‹‡)

63b41db

2021-06-29 16:01:19 +0000

[diff] [blame]

3055

memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);

Andrey Gruzdev

2021-01-29 13:14:05 +0300

[diff] [blame]

3056

migration_bitmap_sync_precopy(rs);

3057

}

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3058

}

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3059

qemu_mutex_unlock_ramlist();

3060

qemu_mutex_unlock_iothread();

David Hildenbrand

be39b4c

2021-10-11 19:53:41 +0200

[diff] [blame]

3061

3062

/*

3063

* After an eventual first bitmap sync, fixup the initial bitmap

3064

* containing all 1s to exclude any discarded pages from migration.

3065

*/

3066

migration_bitmap_clear_discarded_pages(rs);

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3067

}

3068

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

3069

static int ram_init_all(RAMState **rsp)

3070

{

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

3071

if (ram_state_init(rsp)) {

return -1;

}

Peter Xu

2017-10-19 14:31:59 +0800

[diff] [blame]

3075

if (xbzrle_init()) {

3076

ram_state_cleanup(rsp);

3077

return -1;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3078

}

3079

Peter Xu

2017-10-19 14:32:00 +0800

[diff] [blame]

3080

ram_init_bitmaps(*rsp);

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

return 0;

}

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

3085

static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)

{

RAMBlock *block;

uint64_t pages = 0;

/*

* Postcopy is not using xbzrle/compression, so no need for that.

3092

* Also, since source are already halted, we don't need to care

3093

* about dirty page logging as well.

3094

*/

3095

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3096

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

3097

pages += bitmap_count_one(block->bmap,

3098

block->used_length >> TARGET_PAGE_BITS);

3099

}

3100

3101

/* This may not be aligned with current bitmaps. Recalculate. */

3102

rs->migration_dirty_pages = pages;

3103

David Hildenbrand

2021-02-16 11:50:39 +0100

[diff] [blame]

3104

ram_state_reset(rs);

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

3105

3106

/* Update RAMState cache of output QEMUFile */

Peter Xu

2022-10-11 17:55:59 -0400

[diff] [blame]

3107

rs->pss[RAM_CHANNEL_PRECOPY].pss_channel = out;

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

3108

3109

trace_ram_state_resume_prepare(pages);

3110

}

3111

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3112

/*

Wei Wang

6bcb05f

2018-12-11 16:24:50 +0800

[diff] [blame]

3113

* This function clears bits of the free pages reported by the caller from the

3114

* migration dirty bitmap. @addr is the host address corresponding to the

3115

* start of the continuous guest free pages, and @len is the total bytes of

3116

* those pages.

3117

*/

3118

void qemu_guest_free_page_hint(void *addr, size_t len)

{

RAMBlock *block;

ram_addr_t offset;

size_t used_len, start, npages;

3123

MigrationState *s = migrate_get_current();

3124

3125

/* This function is currently expected to be used during live migration */

3126

if (!migration_is_setup_or_active(s->state)) {

return;

}

for (; len > 0; len -= used_len, addr += used_len) {

3131

block = qemu_ram_block_from_host(addr, false, &offset);

3132

if (unlikely(!block || offset >= block->used_length)) {

3133

/*

3134

* The implementation might not support RAMBlock resize during

3135

* live migration, but it could happen in theory with future

3136

* updates. So we add a check here to capture that case.

3137

*/

3138

error_report_once("%s unexpected error", __func__);

return;

}

if (len <= block->used_length - offset) {

3143

used_len = len;

3144

} else {

3145

used_len = block->used_length - offset;

3146

}

3147

3148

start = offset >> TARGET_PAGE_BITS;

3149

npages = used_len >> TARGET_PAGE_BITS;

3150

3151

qemu_mutex_lock(&ram_state->bitmap_mutex);

Wei Wang

2021-07-22 04:30:55 -0400

[diff] [blame]

3152

/*

3153

* The skipped free pages are equavalent to be sent from clear_bmap's

3154

* perspective, so clear the bits from the memory region bitmap which

3155

* are initially set. Otherwise those skipped pages will be sent in

3156

* the next round after syncing from the memory region bitmap.

3157

*/

David Hildenbrand

2021-09-04 18:09:07 +0200

[diff] [blame]

3158

migration_clear_memory_region_dirty_bitmap_range(block, start, npages);

Wei Wang

6bcb05f

2018-12-11 16:24:50 +0800

[diff] [blame]

3159

ram_state->migration_dirty_pages -=

3160

bitmap_count_one_with_offset(block->bmap, start, npages);

3161

bitmap_clear(block->bmap, start, npages);

3162

qemu_mutex_unlock(&ram_state->bitmap_mutex);

}

}

/*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3167

* Each of ram_save_setup, ram_save_iterate and ram_save_complete has

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3168

* long-running RCU critical section. When rcu-reclaims in the code

3169

* start to become numerous it will be necessary to reduce the

3170

* granularity of these critical sections.

3171

*/

3172

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3173

/**

3174

* ram_save_setup: Setup RAM for migration

3175

*

3176

* Returns zero to indicate success and negative for error

3177

*

3178

* @f: QEMUFile where to send the data

3179

* @opaque: RAMState pointer

3180

*/

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3181

static int ram_save_setup(QEMUFile *f, void *opaque)

3182

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

3183

RAMState **rsp = opaque;

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3184

RAMBlock *block;

Leonardo Bras

2022-05-13 03:28:35 -0300

[diff] [blame]

3185

int ret;

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3186

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

3187

if (compress_threads_save_setup()) {

return -1;

}

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3191

/* migration has already setup the bitmap, reuse it. */

3192

if (!migration_in_colo_state()) {

Peter Xu

2017-10-19 14:31:57 +0800

[diff] [blame]

3193

if (ram_init_all(rsp) != 0) {

Xiao Guangrong

2018-03-30 15:51:20 +0800

[diff] [blame]

3194

compress_threads_save_cleanup();

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3195

return -1;

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

3196

}

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3197

}

Peter Xu

2022-10-11 17:55:59 -0400

[diff] [blame]

3198

(*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;

zhanghailiang

2016-10-27 14:42:59 +0800

[diff] [blame]

3199

Dr. David Alan Gilbert

0e6ebd4

2019-10-07 15:36:38 +0100

[diff] [blame]

3200

WITH_RCU_READ_LOCK_GUARD() {

3201

qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3202

Dr. David Alan Gilbert

0e6ebd4

2019-10-07 15:36:38 +0100

[diff] [blame]

3203

RAMBLOCK_FOREACH_MIGRATABLE(block) {

3204

qemu_put_byte(f, strlen(block->idstr));

3205

qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));

3206

qemu_put_be64(f, block->used_length);

3207

if (migrate_postcopy_ram() && block->page_size !=

3208

qemu_host_page_size) {

3209

qemu_put_be64(f, block->page_size);

3210

}

3211

if (migrate_ignore_shared()) {

3212

qemu_put_be64(f, block->mr->addr);

3213

}

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3214

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3215

}

3216

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3217

ram_control_before_iterate(f, RAM_CONTROL_SETUP);

3218

ram_control_after_iterate(f, RAM_CONTROL_SETUP);

3219

Leonardo Bras

2022-05-13 03:28:35 -0300

[diff] [blame]

3220

ret = multifd_send_sync_main(f);

if (ret < 0) {

return ret;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3225

qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

Juan Quintela

35374cb

2018-04-18 10:13:21 +0200

[diff] [blame]

3226

qemu_fflush(f);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return 0;

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3231

/**

3232

* ram_save_iterate: iterative stage for migration

3233

*

3234

* Returns zero to indicate success and negative for error

3235

*

3236

* @f: QEMUFile where to send the data

3237

* @opaque: RAMState pointer

3238

*/

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3239

static int ram_save_iterate(QEMUFile *f, void *opaque)

3240

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

3241

RAMState **temp = opaque;

3242

RAMState *rs = *temp;

Juan Quintela

3d4095b

2019-12-18 05:12:36 +0100

[diff] [blame]

3243

int ret = 0;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3244

int i;

3245

int64_t t0;

Thomas Huth

5c90308

2016-11-04 14:10:17 +0100

[diff] [blame]

3246

int done = 0;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3247

Peter Lieven

b255734

2018-03-08 12:18:24 +0100

[diff] [blame]

3248

if (blk_mig_bulk_active()) {

3249

/* Avoid transferring ram during bulk phase of block migration as

3250

* the bulk phase will usually take a long time and transferring

3251

* ram updates during that time is pointless. */

goto out;

}

Peter Xu

2021-06-30 16:08:05 -0400

[diff] [blame]

3255

/*

3256

* We'll take this lock a little bit long, but it's okay for two reasons.

3257

* Firstly, the only possible other thread to take it is who calls

3258

* qemu_guest_free_page_hint(), which should be rare; secondly, see

3259

* MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which

3260

* guarantees that we'll at least released it in a regular basis.

3261

*/

3262

qemu_mutex_lock(&rs->bitmap_mutex);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3263

WITH_RCU_READ_LOCK_GUARD() {

3264

if (ram_list.version != rs->last_version) {

3265

ram_state_reset(rs);

Dr. David Alan Gilbert

e03a34f

2018-06-13 11:26:42 +0100

[diff] [blame]

3266

}

3267

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3268

/* Read version before ram_list.blocks */

3269

smp_rmb();

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

3270

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3271

ram_control_before_iterate(f, RAM_CONTROL_ROUND);

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

3272

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3273

t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);

3274

i = 0;

3275

while ((ret = qemu_file_rate_limit(f)) == 0 ||

Peter Xu

2022-01-19 16:09:18 +0800

[diff] [blame]

3276

postcopy_has_request(rs)) {

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3277

int pages;

Jason J. Herne

2015-09-08 13:12:35 -0400

[diff] [blame]

3278

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3279

if (qemu_file_get_error(f)) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3280

break;

3281

}

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3282

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

3283

pages = ram_find_and_save_block(rs);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3284

/* no more pages to sent */

if (pages == 0) {

done = 1;

break;

}

if (pages < 0) {

qemu_file_set_error(f, pages);

break;

}

rs->target_page_count += pages;

3296

3297

/*

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

3298

* During postcopy, it is necessary to make sure one whole host

3299

* page is sent in one chunk.

3300

*/

3301

if (migrate_postcopy_ram()) {

3302

flush_compressed_data(rs);

3303

}

3304

3305

/*

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3306

* we want to check in the 1st loop, just in case it was the 1st

3307

* time and we had to sync the dirty bitmap.

3308

* qemu_clock_get_ns() is a bit expensive, so we only check each

* some iterations

*/

if ((i & 63) == 0) {

uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /

3313

1000000;

3314

if (t1 > MAX_WAIT) {

3315

trace_ram_save_iterate_big_wait(t1, i);

break;

}

}

i++;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3320

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3321

}

Peter Xu

63268c4

2021-06-30 16:08:05 -0400

[diff] [blame]

3322

qemu_mutex_unlock(&rs->bitmap_mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3323

3324

/*

3325

* Must occur before EOS (or any QEMUFile operation)

3326

* because of RDMA protocol.

3327

*/

3328

ram_control_after_iterate(f, RAM_CONTROL_ROUND);

3329

Peter Lieven

b255734

2018-03-08 12:18:24 +0100

[diff] [blame]

3330

out:

Juan Quintela

b69a022

2020-01-22 11:36:12 +0100

[diff] [blame]

3331

if (ret >= 0

3332

&& migration_is_setup_or_active(migrate_get_current()->state)) {

Peter Xu

2022-10-11 17:55:59 -0400

[diff] [blame]

3333

ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel);

Leonardo Bras

2022-05-13 03:28:35 -0300

[diff] [blame]

if (ret < 0) {

return ret;

}

Juan Quintela

2019-12-18 05:12:36 +0100

[diff] [blame]

3338

qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

3339

qemu_fflush(f);

David Edmondson

2021-12-21 09:34:40 +0000

[diff] [blame]

3340

ram_transferred_add(8);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3341

Juan Quintela

3d4095b

2019-12-18 05:12:36 +0100

[diff] [blame]

3342

ret = qemu_file_get_error(f);

3343

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

if (ret < 0) {

return ret;

}

Thomas Huth

2016-11-04 14:10:17 +0100

[diff] [blame]

3348

return done;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3349

}

3350

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3351

/**

3352

* ram_save_complete: function called to send the remaining amount of ram

3353

*

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

3354

* Returns zero to indicate success or negative on error

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3355

*

3356

* Called with iothread lock

3357

*

3358

* @f: QEMUFile where to send the data

3359

* @opaque: RAMState pointer

3360

*/

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3361

static int ram_save_complete(QEMUFile *f, void *opaque)

3362

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

3363

RAMState **temp = opaque;

3364

RAMState *rs = *temp;

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

3365

int ret = 0;

Juan Quintela

2017-03-13 19:26:29 +0100

[diff] [blame]

3366

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

3367

rs->last_stage = !migration_in_colo_state();

3368

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3369

WITH_RCU_READ_LOCK_GUARD() {

3370

if (!migration_in_postcopy()) {

3371

migration_bitmap_sync_precopy(rs);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3372

}

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3373

3374

ram_control_before_iterate(f, RAM_CONTROL_FINISH);

3375

3376

/* try transferring iterative blocks of memory */

3377

3378

/* flush all remaining blocks regardless of rate limiting */

Peter Xu

c13221b

2022-10-11 17:55:45 -0400

[diff] [blame]

3379

qemu_mutex_lock(&rs->bitmap_mutex);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

while (true) {

int pages;

Juan Quintela

2021-12-15 19:01:21 +0100

[diff] [blame]

3383

pages = ram_find_and_save_block(rs);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3384

/* no more blocks to sent */

if (pages == 0) {

break;

}

if (pages < 0) {

ret = pages;

break;

}

Xiao Guangrong

2018-09-03 17:26:44 +0800

[diff] [blame]

3392

}

Peter Xu

c13221b

2022-10-11 17:55:45 -0400

[diff] [blame]

3393

qemu_mutex_unlock(&rs->bitmap_mutex);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3394

3395

flush_compressed_data(rs);

3396

ram_control_after_iterate(f, RAM_CONTROL_FINISH);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3397

}

3398

Leonardo Bras

2022-05-13 03:28:35 -0300

[diff] [blame]

3399

if (ret < 0) {

3400

return ret;

Juan Quintela

3d4095b

2019-12-18 05:12:36 +0100

[diff] [blame]

3401

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3402

Peter Xu

2022-10-11 17:55:59 -0400

[diff] [blame]

3403

ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel);

Leonardo Bras

2022-05-13 03:28:35 -0300

[diff] [blame]

if (ret < 0) {

return ret;

}

qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

3409

qemu_fflush(f);

3410

3411

return 0;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3412

}

3413

Juan Quintela

fd70385

2022-10-03 02:50:42 +0200

[diff] [blame]

3414

static void ram_state_pending_estimate(void *opaque,

Juan Quintela

2022-10-03 02:00:03 +0200

[diff] [blame]

3415

uint64_t *res_precopy_only,

3416

uint64_t *res_compatible,

3417

uint64_t *res_postcopy_only)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3418

{

Juan Quintela

2017-05-04 11:46:24 +0200

[diff] [blame]

3419

RAMState **temp = opaque;

3420

RAMState *rs = *temp;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3421

Juan Quintela

2022-10-03 02:00:03 +0200

[diff] [blame]

3422

uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3423

Juan Quintela

2022-10-03 02:00:03 +0200

[diff] [blame]

3424

if (migrate_postcopy_ram()) {

3425

/* We can do postcopy, and all the data is postcopiable */

3426

*res_postcopy_only += remaining_size;

3427

} else {

3428

*res_precopy_only += remaining_size;

}

}

Juan Quintela

2022-10-03 02:50:42 +0200

[diff] [blame]

3432

static void ram_state_pending_exact(void *opaque,

Juan Quintela

2022-10-03 02:00:03 +0200

[diff] [blame]

3433

uint64_t *res_precopy_only,

3434

uint64_t *res_compatible,

3435

uint64_t *res_postcopy_only)

3436

{

3437

RAMState **temp = opaque;

3438

RAMState *rs = *temp;

3439

3440

uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;

3441

3442

if (!migration_in_postcopy()) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3443

qemu_mutex_lock_iothread();

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3444

WITH_RCU_READ_LOCK_GUARD() {

3445

migration_bitmap_sync_precopy(rs);

3446

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3447

qemu_mutex_unlock_iothread();

Juan Quintela

9edabd4

2017-03-14 12:02:16 +0100

[diff] [blame]

3448

remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3449

}

Dr. David Alan Gilbert

c31b098

2015-11-05 18:10:54 +0000

[diff] [blame]

3450

Vladimir Sementsov-Ogievskiy

86e1167

2017-07-10 19:30:15 +0300

[diff] [blame]

3451

if (migrate_postcopy_ram()) {

3452

/* We can do postcopy, and all the data is postcopiable */

Vladimir Sementsov-Ogievskiy

4799502

2018-03-13 15:34:00 -0400

[diff] [blame]

3453

*res_compatible += remaining_size;

Vladimir Sementsov-Ogievskiy

86e1167

2017-07-10 19:30:15 +0300

[diff] [blame]

3454

} else {

Vladimir Sementsov-Ogievskiy

4799502

2018-03-13 15:34:00 -0400

[diff] [blame]

3455

*res_precopy_only += remaining_size;

Vladimir Sementsov-Ogievskiy

86e1167

2017-07-10 19:30:15 +0300

[diff] [blame]

3456

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3457

}

3458

3459

static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)

3460

{

3461

unsigned int xh_len;

3462

int xh_flags;

Dr. David Alan Gilbert

063e760

2015-12-16 11:47:37 +0000

[diff] [blame]

3463

uint8_t *loaded_data;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3464

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3465

/* extract RLE header */

3466

xh_flags = qemu_get_byte(f);

3467

xh_len = qemu_get_be16(f);

3468

3469

if (xh_flags != ENCODING_FLAG_XBZRLE) {

3470

error_report("Failed to load XBZRLE page - wrong compression!");

return -1;

}

if (xh_len > TARGET_PAGE_SIZE) {

3475

error_report("Failed to load XBZRLE page - len overflow!");

3476

return -1;

3477

}

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

3478

loaded_data = XBZRLE.decoded_buf;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3479

/* load data and decode */

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

3480

/* it can change loaded_data to point to an internal buffer */

Dr. David Alan Gilbert

063e760

2015-12-16 11:47:37 +0000

[diff] [blame]

3481

qemu_get_buffer_in_place(f, &loaded_data, xh_len);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3482

3483

/* decode RLE */

Dr. David Alan Gilbert

063e760

2015-12-16 11:47:37 +0000

[diff] [blame]

3484

if (xbzrle_decode_buffer(loaded_data, xh_len, host,

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3485

TARGET_PAGE_SIZE) == -1) {

3486

error_report("Failed to load XBZRLE page - decode error!");

return -1;

}

return 0;

}

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3493

/**

3494

* ram_block_from_stream: read a RAMBlock id from the migration stream

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3495

*

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3496

* Must be called from within a rcu critical section.

3497

*

3498

* Returns a pointer from within the RCU-protected ram_list.

3499

*

Peter Xu

2022-03-01 16:39:07 +0800

[diff] [blame]

3500

* @mis: the migration incoming state pointer

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3501

* @f: QEMUFile where to read the data from

3502

* @flags: Page flags (mostly to see if it's a continuation of previous block)

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

3503

* @channel: the channel we're using

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3504

*/

Peter Xu

2022-03-01 16:39:07 +0800

[diff] [blame]

3505

static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

3506

QEMUFile *f, int flags,

3507

int channel)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3508

{

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

3509

RAMBlock *block = mis->last_recv_block[channel];

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

char id[256];

uint8_t len;

if (flags & RAM_SAVE_FLAG_CONTINUE) {

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

3514

if (!block) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3515

error_report("Ack, bad migration stream!");

3516

return NULL;

3517

}

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

3518

return block;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3519

}

3520

3521

len = qemu_get_byte(f);

3522

qemu_get_buffer(f, (uint8_t *)id, len);

3523

id[len] = 0;

3524

Dr. David Alan Gilbert

2015-11-05 18:10:33 +0000

[diff] [blame]

3525

block = qemu_ram_block_by_name(id);

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

3526

if (!block) {

3527

error_report("Can't find block %s", id);

3528

return NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3529

}

3530

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3531

if (ramblock_is_ignored(block)) {

Cédric Le Goater

2018-05-14 08:57:00 +0200

[diff] [blame]

3532

error_report("block %s should not be migrated !", id);

return NULL;

}

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

3536

mis->last_recv_block[channel] = block;

Peter Xu

2022-03-01 16:39:07 +0800

[diff] [blame]

3537

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

return block;

}

static inline void *host_from_ram_block_offset(RAMBlock *block,

3542

ram_addr_t offset)

3543

{

3544

if (!offset_in_ramblock(block, offset)) {

return NULL;

}

return block->host + offset;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3549

}

3550

David Hildenbrand

2021-04-29 13:27:05 +0200

[diff] [blame]

3551

static void *host_page_from_ram_block_offset(RAMBlock *block,

3552

ram_addr_t offset)

3553

{

3554

/* Note: Explicitly no check against offset_in_ramblock(). */

3555

return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),

block->page_size);

}

static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,

3560

ram_addr_t offset)

3561

{

3562

return ((uintptr_t)block->host + offset) & (block->page_size - 1);

3563

}

3564

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3565

static inline void *colo_cache_from_block_offset(RAMBlock *block,

zhanghailiang

2020-02-24 14:54:11 +0800

[diff] [blame]

3566

ram_addr_t offset, bool record_bitmap)

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3567

{

3568

if (!offset_in_ramblock(block, offset)) {

3569

return NULL;

3570

}

3571

if (!block->colo_cache) {

3572

error_report("%s: colo_cache is NULL in block :%s",

3573

__func__, block->idstr);

3574

return NULL;

3575

}

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3576

3577

/*

3578

* During colo checkpoint, we need bitmap of these migrated pages.

3579

* It help us to decide which pages in ram cache should be flushed

3580

* into VM's RAM later.

3581

*/

zhanghailiang

2020-02-24 14:54:11 +0800

[diff] [blame]

3582

if (record_bitmap &&

3583

!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3584

ram_state->migration_dirty_pages++;

3585

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3586

return block->colo_cache + offset;

3587

}

3588

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3589

/**

3590

* ram_handle_compressed: handle the zero page case

3591

*

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3592

* If a page (or a whole RDMA chunk) has been

3593

* determined to be zero, then zap it.

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3594

*

3595

* @host: host address for the zero page

3596

* @ch: what the page is filled from. We only support zero

3597

* @size: size of the zero page

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3598

*/

3599

void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)

3600

{

Juan Quintela

bad452a

2021-11-18 15:56:38 +0100

[diff] [blame]

3601

if (ch != 0 || !buffer_is_zero(host, size)) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3602

memset(host, ch, size);

}

}

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3606

/* return the size after decompression, or negative value on error */

3607

static int

3608

qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,

3609

const uint8_t *source, size_t source_len)

{

int err;

err = inflateReset(stream);

if (err != Z_OK) {

return -1;

}

stream->avail_in = source_len;

3619

stream->next_in = (uint8_t *)source;

3620

stream->avail_out = dest_len;

3621

stream->next_out = dest;

3622

3623

err = inflate(stream, Z_NO_FLUSH);

3624

if (err != Z_STREAM_END) {

return -1;

}

return stream->total_out;

3629

}

3630

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3631

static void *do_data_decompress(void *opaque)

3632

{

3633

DecompressParam *param = opaque;

3634

unsigned long pagesize;

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3635

uint8_t *des;

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3636

int len, ret;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3637

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3638

qemu_mutex_lock(&param->mutex);

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

3639

while (!param->quit) {

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

if (param->des) {

des = param->des;

len = param->len;

param->des = 0;

qemu_mutex_unlock(&param->mutex);

3645

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

3646

pagesize = TARGET_PAGE_SIZE;

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3647

3648

ret = qemu_uncompress_data(&param->stream, des, pagesize,

3649

param->compbuf, len);

Xiao Guangrong

f548222

2018-05-03 16:06:11 +0800

[diff] [blame]

3650

if (ret < 0 && migrate_get_current()->decompress_error_check) {

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3651

error_report("decompress data failed");

3652

qemu_file_set_error(decomp_file, ret);

3653

}

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

3654

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3655

qemu_mutex_lock(&decomp_done_lock);

3656

param->done = true;

3657

qemu_cond_signal(&decomp_done_cond);

3658

qemu_mutex_unlock(&decomp_done_lock);

3659

3660

qemu_mutex_lock(&param->mutex);

3661

} else {

3662

qemu_cond_wait(&param->cond, &param->mutex);

3663

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3664

}

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3665

qemu_mutex_unlock(&param->mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

return NULL;

}

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3670

static int wait_for_decompress_done(void)

Liang Li

5533b2e

2016-05-05 15:32:52 +0800

[diff] [blame]

3671

{

3672

int idx, thread_count;

3673

3674

if (!migrate_use_compression()) {

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3675

return 0;

Liang Li

5533b2e

2016-05-05 15:32:52 +0800

[diff] [blame]

3676

}

3677

3678

thread_count = migrate_decompress_threads();

3679

qemu_mutex_lock(&decomp_done_lock);

3680

for (idx = 0; idx < thread_count; idx++) {

3681

while (!decomp_param[idx].done) {

3682

qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);

3683

}

3684

}

3685

qemu_mutex_unlock(&decomp_done_lock);

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3686

return qemu_file_get_error(decomp_file);

Liang Li

5533b2e

2016-05-05 15:32:52 +0800

[diff] [blame]

3687

}

3688

Juan Quintela

f0afa33

2017-06-28 11:52:28 +0200

[diff] [blame]

3689

static void compress_threads_load_cleanup(void)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

{

int i, thread_count;

Juan Quintela

2016-04-20 11:56:01 +0200

[diff] [blame]

3693

if (!migrate_use_compression()) {

3694

return;

3695

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3696

thread_count = migrate_decompress_threads();

3697

for (i = 0; i < thread_count; i++) {

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3698

/*

3699

* we use it as a indicator which shows if the thread is

3700

* properly init'd or not

3701

*/

3702

if (!decomp_param[i].compbuf) {

break;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3706

qemu_mutex_lock(&decomp_param[i].mutex);

Liang Li

2016-05-05 15:32:56 +0800

[diff] [blame]

3707

decomp_param[i].quit = true;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3708

qemu_cond_signal(&decomp_param[i].cond);

3709

qemu_mutex_unlock(&decomp_param[i].mutex);

3710

}

3711

for (i = 0; i < thread_count; i++) {

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3712

if (!decomp_param[i].compbuf) {

break;

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3716

qemu_thread_join(decompress_threads + i);

3717

qemu_mutex_destroy(&decomp_param[i].mutex);

3718

qemu_cond_destroy(&decomp_param[i].cond);

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3719

inflateEnd(&decomp_param[i].stream);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3720

g_free(decomp_param[i].compbuf);

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3721

decomp_param[i].compbuf = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3722

}

3723

g_free(decompress_threads);

3724

g_free(decomp_param);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3725

decompress_threads = NULL;

3726

decomp_param = NULL;

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3727

decomp_file = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3728

}

3729

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3730

static int compress_threads_load_setup(QEMUFile *f)

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

{

int i, thread_count;

if (!migrate_use_compression()) {

return 0;

}

thread_count = migrate_decompress_threads();

3739

decompress_threads = g_new0(QemuThread, thread_count);

3740

decomp_param = g_new0(DecompressParam, thread_count);

3741

qemu_mutex_init(&decomp_done_lock);

3742

qemu_cond_init(&decomp_done_cond);

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3743

decomp_file = f;

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

3744

for (i = 0; i < thread_count; i++) {

3745

if (inflateInit(&decomp_param[i].stream) != Z_OK) {

goto exit;

}

decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));

3750

qemu_mutex_init(&decomp_param[i].mutex);

3751

qemu_cond_init(&decomp_param[i].cond);

3752

decomp_param[i].done = true;

3753

decomp_param[i].quit = false;

3754

qemu_thread_create(decompress_threads + i, "decompress",

3755

do_data_decompress, decomp_param + i,

3756

QEMU_THREAD_JOINABLE);

}

return 0;

exit:

compress_threads_load_cleanup();

3761

return -1;

3762

}

3763

Dr. David Alan Gilbert

c1bc662

2015-12-16 11:47:38 +0000

[diff] [blame]

3764

static void decompress_data_with_multi_threads(QEMUFile *f,

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3765

void *host, int len)

3766

{

3767

int idx, thread_count;

3768

3769

thread_count = migrate_decompress_threads();

Mahmoud Mandour

3739695

2021-03-11 05:15:35 +0200

[diff] [blame]

3770

QEMU_LOCK_GUARD(&decomp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3771

while (true) {

3772

for (idx = 0; idx < thread_count; idx++) {

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

3773

if (decomp_param[idx].done) {

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3774

decomp_param[idx].done = false;

3775

qemu_mutex_lock(&decomp_param[idx].mutex);

Dr. David Alan Gilbert

c1bc662

2015-12-16 11:47:38 +0000

[diff] [blame]

3776

qemu_get_buffer(f, decomp_param[idx].compbuf, len);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

3777

decomp_param[idx].des = host;

3778

decomp_param[idx].len = len;

Liang Li

2016-05-05 15:32:58 +0800

[diff] [blame]

3779

qemu_cond_signal(&decomp_param[idx].cond);

3780

qemu_mutex_unlock(&decomp_param[idx].mutex);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

break;

}

}

if (idx < thread_count) {

3785

break;

Liang Li

2016-05-05 15:32:51 +0800

[diff] [blame]

3786

} else {

3787

qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

}

}

}

Rao, Lei

2020-10-16 13:52:01 +0800

[diff] [blame]

3792

static void colo_init_ram_state(void)

3793

{

3794

ram_state_init(&ram_state);

Rao, Lei

b70cb3b

2020-10-16 13:52:01 +0800

[diff] [blame]

3795

}

3796

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3797

/*

3798

* colo cache: this is for secondary VM, we cache the whole

3799

* memory of the secondary VM, it is need to hold the global lock

3800

* to call this helper.

3801

*/

3802

int colo_init_ram_cache(void)

{

RAMBlock *block;

Paolo Bonzini

2019-12-13 15:07:22 +0100

[diff] [blame]

3806

WITH_RCU_READ_LOCK_GUARD() {

3807

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

3808

block->colo_cache = qemu_anon_ram_alloc(block->used_length,

David Hildenbrand

8dbe22c

2021-05-10 13:43:21 +0200

[diff] [blame]

3809

NULL, false, false);

Paolo Bonzini

44901b5

2019-12-13 15:07:22 +0100

[diff] [blame]

3810

if (!block->colo_cache) {

3811

error_report("%s: Can't alloc memory for COLO cache of block %s,"

3812

"size 0x" RAM_ADDR_FMT, __func__, block->idstr,

3813

block->used_length);

3814

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

3815

if (block->colo_cache) {

3816

qemu_anon_ram_free(block->colo_cache, block->used_length);

3817

block->colo_cache = NULL;

3818

}

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3819

}

Paolo Bonzini

44901b5

2019-12-13 15:07:22 +0100

[diff] [blame]

3820

return -errno;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3821

}

Lukas Straub

e5fdf92

2021-07-04 18:14:44 +0200

[diff] [blame]

3822

if (!machine_dump_guest_core(current_machine)) {

3823

qemu_madvise(block->colo_cache, block->used_length,

3824

QEMU_MADV_DONTDUMP);

3825

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3826

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3827

}

Paolo Bonzini

44901b5

2019-12-13 15:07:22 +0100

[diff] [blame]

3828

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3829

/*

3830

* Record the dirty pages that sent by PVM, we use this dirty bitmap together

3831

* with to decide which page in cache should be flushed into SVM's RAM. Here

3832

* we use the same name 'ram_bitmap' as for migration.

3833

*/

3834

if (ram_bytes_total()) {

3835

RAMBlock *block;

3836

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3837

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3838

unsigned long pages = block->max_length >> TARGET_PAGE_BITS;

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3839

block->bmap = bitmap_new(pages);

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3840

}

3841

}

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

3842

Rao, Lei

b70cb3b

2020-10-16 13:52:01 +0800

[diff] [blame]

3843

colo_init_ram_state();

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3844

return 0;

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3845

}

3846

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

3847

/* TODO: duplicated with ram_init_bitmaps */

3848

void colo_incoming_start_dirty_log(void)

3849

{

3850

RAMBlock *block = NULL;

3851

/* For memory_global_dirty_log_start below. */

3852

qemu_mutex_lock_iothread();

3853

qemu_mutex_lock_ramlist();

3854

3855

memory_global_dirty_log_sync();

3856

WITH_RCU_READ_LOCK_GUARD() {

3857

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

3858

ramblock_sync_dirty_bitmap(ram_state, block);

3859

/* Discard this dirty bitmap record */

3860

bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);

3861

}

Hyman Huang(é»„å‹‡)

63b41db

2021-06-29 16:01:19 +0000

[diff] [blame]

3862

memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

3863

}

3864

ram_state->migration_dirty_pages = 0;

3865

qemu_mutex_unlock_ramlist();

3866

qemu_mutex_unlock_iothread();

3867

}

3868

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3869

/* It is need to hold the global lock to call this helper */

3870

void colo_release_ram_cache(void)

{

RAMBlock *block;

Hyman Huang(é»„å‹‡)

2021-06-29 16:01:19 +0000

[diff] [blame]

3874

memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3875

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Zhang Chen

2018-09-03 12:38:49 +0800

[diff] [blame]

g_free(block->bmap);

block->bmap = NULL;

}

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

3880

WITH_RCU_READ_LOCK_GUARD() {

3881

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

3882

if (block->colo_cache) {

3883

qemu_anon_ram_free(block->colo_cache, block->used_length);

3884

block->colo_cache = NULL;

3885

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3886

}

3887

}

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

3888

ram_state_cleanup(&ram_state);

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3889

}

3890

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3891

/**

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

3892

* ram_load_setup: Setup RAM for migration incoming side

3893

*

3894

* Returns zero to indicate success and negative for error

3895

*

3896

* @f: QEMUFile where to receive the data

3897

* @opaque: RAMState pointer

3898

*/

3899

static int ram_load_setup(QEMUFile *f, void *opaque)

3900

{

Xiao Guangrong

2018-03-30 15:51:22 +0800

[diff] [blame]

3901

if (compress_threads_load_setup(f)) {

Xiao Guangrong

2018-03-30 15:51:21 +0800

[diff] [blame]

return -1;

}

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

3905

xbzrle_load_setup();

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

3906

ramblock_recv_map_init();

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3907

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

return 0;

}

static int ram_load_cleanup(void *opaque)

3912

{

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

3913

RAMBlock *rb;

Junyan He

56eb90a

2018-07-18 15:48:03 +0800

[diff] [blame]

3914

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3915

RAMBLOCK_FOREACH_NOT_IGNORED(rb) {

Beata Michalska

bd108a4

2019-11-21 00:08:42 +0000

[diff] [blame]

3916

qemu_ram_block_writeback(rb);

Junyan He

56eb90a

2018-07-18 15:48:03 +0800

[diff] [blame]

3917

}

3918

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

3919

xbzrle_load_cleanup();

Juan Quintela

f0afa33

2017-06-28 11:52:28 +0200

[diff] [blame]

3920

compress_threads_load_cleanup();

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

3921

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

3922

RAMBLOCK_FOREACH_NOT_IGNORED(rb) {

Alexey Perevalov

2017-10-05 14:13:20 +0300

[diff] [blame]

3923

g_free(rb->receivedmap);

3924

rb->receivedmap = NULL;

3925

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

3926

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

return 0;

}

/**

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3931

* ram_postcopy_incoming_init: allocate postcopy data structures

3932

*

3933

* Returns 0 for success and negative if there was one error

3934

*

3935

* @mis: current migration incoming state

3936

*

3937

* Allocate data structures etc needed by incoming migration with

3938

* postcopy-ram. postcopy-ram's similarly names

3939

* postcopy_ram_incoming_init does the work.

Dr. David Alan Gilbert

1caddf8

2015-11-05 18:11:03 +0000

[diff] [blame]

3940

*/

3941

int ram_postcopy_incoming_init(MigrationIncomingState *mis)

3942

{

David Hildenbrand

c136180

2018-06-20 22:27:36 +0200

[diff] [blame]

3943

return postcopy_ram_incoming_init(mis);

Dr. David Alan Gilbert

1caddf8

2015-11-05 18:11:03 +0000

[diff] [blame]

3944

}

3945

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3946

/**

3947

* ram_load_postcopy: load a page in postcopy case

3948

*

3949

* Returns 0 for success or -errno in case of error

3950

*

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3951

* Called in postcopy mode by ram_load().

3952

* rcu_read_lock is taken prior to this being called.

Juan Quintela

2017-03-23 15:06:39 +0100

[diff] [blame]

3953

*

3954

* @f: QEMUFile where to send the data

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

3955

* @channel: the channel to use for loading

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3956

*/

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

3957

int ram_load_postcopy(QEMUFile *f, int channel)

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3958

{

3959

int flags = 0, ret = 0;

3960

bool place_needed = false;

Peter Xu

2018-07-10 17:18:53 +0800

[diff] [blame]

3961

bool matches_target_page_size = false;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3962

MigrationIncomingState *mis = migration_incoming_get_current();

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

3963

PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[channel];

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3964

3965

while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {

3966

ram_addr_t addr;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3967

void *page_buffer = NULL;

3968

void *place_source = NULL;

Dr. David Alan Gilbert

df9ff5e

2017-02-24 18:28:35 +0000

[diff] [blame]

3969

RAMBlock *block = NULL;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3970

uint8_t ch;

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

3971

int len;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3972

3973

addr = qemu_get_be64(f);

Peter Xu

7a9ddfb

2018-02-08 18:31:05 +0800

[diff] [blame]

3974

3975

/*

3976

* If qemu file error, we should stop here, and then "addr"

3977

* may be invalid

3978

*/

3979

ret = qemu_file_get_error(f);

if (ret) {

break;

}

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

3984

flags = addr & ~TARGET_PAGE_MASK;

3985

addr &= TARGET_PAGE_MASK;

3986

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

3987

trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

3988

if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |

3989

RAM_SAVE_FLAG_COMPRESS_PAGE)) {

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

3990

block = ram_block_from_stream(mis, f, flags, channel);

David Hildenbrand

2021-04-29 13:27:05 +0200

[diff] [blame]

if (!block) {

ret = -EINVAL;

break;

}

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

3995

David Hildenbrand

898ba90

2021-04-29 13:27:06 +0200

[diff] [blame]

3996

/*

3997

* Relying on used_length is racy and can result in false positives.

3998

* We might place pages beyond used_length in case RAM was shrunk

3999

* while in postcopy, which is fine - trying to place via

4000

* UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.

4001

*/

4002

if (!block->host || addr >= block->postcopy_length) {

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4003

error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);

4004

ret = -EINVAL;

4005

break;

4006

}

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4007

tmp_page->target_pages++;

Peter Xu

2018-07-10 17:18:53 +0800

[diff] [blame]

4008

matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4009

/*

Dr. David Alan Gilbert

28abd20

2017-02-24 18:28:37 +0000

[diff] [blame]

4010

* Postcopy requires that we place whole host pages atomically;

4011

* these may be huge pages for RAMBlocks that are backed by

4012

* hugetlbfs.

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4013

* To make it atomic, the data is read into a temporary page

4014

* that's moved into place later.

4015

* The migration protocol uses, possibly smaller, target-pages

4016

* however the source ensures it always sends all the components

Wei Yang

91ba442

2019-11-07 20:39:06 +0800

[diff] [blame]

4017

* of a host page in one chunk.

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4018

*/

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4019

page_buffer = tmp_page->tmp_huge_page +

David Hildenbrand

2021-04-29 13:27:05 +0200

[diff] [blame]

4020

host_page_offset_from_ram_block_offset(block, addr);

4021

/* If all TP are zero then we can optimise the place */

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4022

if (tmp_page->target_pages == 1) {

4023

tmp_page->host_addr =

4024

host_page_from_ram_block_offset(block, addr);

4025

} else if (tmp_page->host_addr !=

4026

host_page_from_ram_block_offset(block, addr)) {

Dr. David Alan Gilbert

c53b7dd

2015-11-05 18:11:12 +0000

[diff] [blame]

4027

/* not the 1st TP within the HP */

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

4028

error_report("Non-same host page detected on channel %d: "

Peter Xu

cfc7dc8

2022-03-01 16:39:05 +0800

[diff] [blame]

4029

"Target host page %p, received host page %p "

4030

"(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

4031

channel, tmp_page->host_addr,

Peter Xu

cfc7dc8

2022-03-01 16:39:05 +0800

[diff] [blame]

4032

host_page_from_ram_block_offset(block, addr),

4033

block->idstr, addr, tmp_page->target_pages);

David Hildenbrand

2021-04-29 13:27:05 +0200

[diff] [blame]

4034

ret = -EINVAL;

4035

break;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

}

/*

* If it's the last part of a host page then we place the host

4040

* page

4041

*/

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4042

if (tmp_page->target_pages ==

4043

(block->page_size / TARGET_PAGE_SIZE)) {

Wei Yang

4cbb3c6

2019-11-07 20:39:04 +0800

[diff] [blame]

4044

place_needed = true;

Wei Yang

4cbb3c6

2019-11-07 20:39:04 +0800

[diff] [blame]

4045

}

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4046

place_source = tmp_page->tmp_huge_page;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4047

}

4048

4049

switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {

Juan Quintela

2017-04-28 09:39:55 +0200

[diff] [blame]

4050

case RAM_SAVE_FLAG_ZERO:

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4051

ch = qemu_get_byte(f);

Wei Yang

2e36bc1

2019-11-07 20:39:02 +0800

[diff] [blame]

4052

/*

4053

* Can skip to set page_buffer when

4054

* this is a zero page and (block->page_size == TARGET_PAGE_SIZE).

4055

*/

4056

if (ch || !matches_target_page_size) {

4057

memset(page_buffer, ch, TARGET_PAGE_SIZE);

4058

}

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4059

if (ch) {

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4060

tmp_page->all_zero = false;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

}

break;

case RAM_SAVE_FLAG_PAGE:

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4065

tmp_page->all_zero = false;

Peter Xu

2018-07-10 17:18:53 +0800

[diff] [blame]

4066

if (!matches_target_page_size) {

4067

/* For huge pages, we always use temporary buffer */

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4068

qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);

4069

} else {

Peter Xu

2018-07-10 17:18:53 +0800

[diff] [blame]

4070

/*

4071

* For small pages that matches target page size, we

4072

* avoid the qemu_file copy. Instead we directly use

4073

* the buffer of QEMUFile to place the page. Note: we

4074

* cannot do any QEMUFile operation before using that

4075

* buffer to make sure the buffer is valid when

4076

* placing the page.

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4077

*/

4078

qemu_get_buffer_in_place(f, (uint8_t **)&place_source,

4079

TARGET_PAGE_SIZE);

4080

}

4081

break;

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

4082

case RAM_SAVE_FLAG_COMPRESS_PAGE:

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4083

tmp_page->all_zero = false;

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

4084

len = qemu_get_be32(f);

4085

if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {

4086

error_report("Invalid compressed data length: %d", len);

ret = -EINVAL;

break;

}

decompress_data_with_multi_threads(f, page_buffer, len);

4091

break;

4092

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4093

case RAM_SAVE_FLAG_EOS:

4094

/* normal exit */

Juan Quintela

6df264a

2018-02-28 09:10:07 +0100

[diff] [blame]

4095

multifd_recv_sync_main();

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4096

break;

4097

default:

Bihong Yu

29fccad

2020-10-20 11:10:42 +0800

[diff] [blame]

4098

error_report("Unknown combination of migration flags: 0x%x"

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4099

" (postcopy mode)", flags);

4100

ret = -EINVAL;

Peter Xu

7a9ddfb

2018-02-08 18:31:05 +0800

[diff] [blame]

4101

break;

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4102

}

4103

Wei Yang

2019-11-07 20:39:07 +0800

[diff] [blame]

4104

/* Got the whole host page, wait for decompress before placing. */

4105

if (place_needed) {

4106

ret |= wait_for_decompress_done();

4107

}

4108

Peter Xu

7a9ddfb

2018-02-08 18:31:05 +0800

[diff] [blame]

4109

/* Detect for any possible file errors */

4110

if (!ret && qemu_file_get_error(f)) {

4111

ret = qemu_file_get_error(f);

4112

}

4113

4114

if (!ret && place_needed) {

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4115

if (tmp_page->all_zero) {

4116

ret = postcopy_place_page_zero(mis, tmp_page->host_addr, block);

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4117

} else {

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4118

ret = postcopy_place_page(mis, tmp_page->host_addr,

4119

place_source, block);

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4120

}

David Hildenbrand

ddf35bd

2020-04-21 10:52:56 +0200

[diff] [blame]

4121

place_needed = false;

Peter Xu

2022-03-01 16:39:04 +0800

[diff] [blame]

4122

postcopy_temp_page_reset(tmp_page);

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4123

}

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

}

return ret;

}

Daniel Henrique Barboza

acab30b

2017-11-16 20:35:26 -0200

[diff] [blame]

4129

static bool postcopy_is_advised(void)

4130

{

4131

PostcopyState ps = postcopy_state_get();

4132

return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;

4133

}

4134

4135

static bool postcopy_is_running(void)

4136

{

4137

PostcopyState ps = postcopy_state_get();

4138

return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;

4139

}

4140

Zhang Chen

2018-09-03 12:38:50 +0800

[diff] [blame]

4141

/*

4142

* Flush content of RAM cache into SVM's memory.

4143

* Only flush the pages that be dirtied by PVM or SVM or both.

4144

*/

Lukas Straub

24fa16f

2020-05-11 13:10:51 +0200

[diff] [blame]

4145

void colo_flush_ram_cache(void)

Zhang Chen

2018-09-03 12:38:50 +0800

[diff] [blame]

4146

{

4147

RAMBlock *block = NULL;

4148

void *dst_host;

4149

void *src_host;

4150

unsigned long offset = 0;

4151

zhanghailiang

d1955d2

2018-09-03 12:38:55 +0800

[diff] [blame]

4152

memory_global_dirty_log_sync();

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4153

WITH_RCU_READ_LOCK_GUARD() {

4154

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

4155

ramblock_sync_dirty_bitmap(ram_state, block);

Zhang Chen

2018-09-03 12:38:50 +0800

[diff] [blame]

4156

}

4157

}

4158

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4159

trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);

4160

WITH_RCU_READ_LOCK_GUARD() {

4161

block = QLIST_FIRST_RCU(&ram_list.blocks);

4162

4163

while (block) {

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

4164

unsigned long num = 0;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4165

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

4166

offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);

David Hildenbrand

542147f

2021-04-29 13:27:08 +0200

[diff] [blame]

4167

if (!offset_in_ramblock(block,

4168

((ram_addr_t)offset) << TARGET_PAGE_BITS)) {

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4169

offset = 0;

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

4170

num = 0;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4171

block = QLIST_NEXT_RCU(block, next);

4172

} else {

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

4173

unsigned long i = 0;

4174

4175

for (i = 0; i < num; i++) {

4176

migration_bitmap_clear_dirty(ram_state, block, offset + i);

4177

}

Alexey Romko

2020-01-10 14:51:34 +0100

[diff] [blame]

4178

dst_host = block->host

4179

+ (((ram_addr_t)offset) << TARGET_PAGE_BITS);

4180

src_host = block->colo_cache

4181

+ (((ram_addr_t)offset) << TARGET_PAGE_BITS);

Rao, Lei

2021-11-09 11:04:55 +0800

[diff] [blame]

4182

memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);

4183

offset += num;

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4184

}

4185

}

4186

}

Zhang Chen

2018-09-03 12:38:50 +0800

[diff] [blame]

4187

trace_colo_flush_ram_cache_end();

4188

}

4189

Wei Yang

2019-07-25 08:20:23 +0800

[diff] [blame]

4190

/**

4191

* ram_load_precopy: load pages in precopy case

4192

*

4193

* Returns 0 for success or -errno in case of error

4194

*

4195

* Called in precopy mode by ram_load().

4196

* rcu_read_lock is taken prior to this being called.

4197

*

4198

* @f: QEMUFile where to send the data

4199

*/

4200

static int ram_load_precopy(QEMUFile *f)

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4201

{

Peter Xu

2022-03-01 16:39:07 +0800

[diff] [blame]

4202

MigrationIncomingState *mis = migration_incoming_get_current();

Yury Kotov

e65cec5

2019-11-25 16:36:32 +0300

[diff] [blame]

4203

int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;

Dr. David Alan Gilbert

ef08fb3

2017-02-24 18:28:30 +0000

[diff] [blame]

4204

/* ADVISE is earlier, it shows the source has the postcopy capability on */

Daniel Henrique Barboza

acab30b

2017-11-16 20:35:26 -0200

[diff] [blame]

4205

bool postcopy_advised = postcopy_is_advised();

Juan Quintela

edc6012

2016-11-02 12:40:46 +0100

[diff] [blame]

4206

if (!migrate_use_compression()) {

4207

invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;

4208

}

Dr. David Alan Gilbert

2015-11-05 18:11:11 +0000

[diff] [blame]

4209

Wei Yang

2019-07-25 08:20:23 +0800

[diff] [blame]

4210

while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4211

ram_addr_t addr, total_ram_bytes;

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4212

void *host = NULL, *host_bak = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4213

uint8_t ch;

4214

Yury Kotov

e65cec5

2019-11-25 16:36:32 +0300

[diff] [blame]

4215

/*

4216

* Yield periodically to let main loop run, but an iteration of

4217

* the main loop is expensive, so do it each some iterations

4218

*/

4219

if ((i & 32767) == 0 && qemu_in_coroutine()) {

4220

aio_co_schedule(qemu_get_current_aio_context(),

4221

qemu_coroutine_self());

4222

qemu_coroutine_yield();

}

i++;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4226

addr = qemu_get_be64(f);

4227

flags = addr & ~TARGET_PAGE_MASK;

4228

addr &= TARGET_PAGE_MASK;

4229

Juan Quintela

edc6012

2016-11-02 12:40:46 +0100

[diff] [blame]

4230

if (flags & invalid_flags) {

4231

if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {

4232

error_report("Received an unexpected compressed page");

}

ret = -EINVAL;

break;

}

Juan Quintela

2017-04-28 09:39:55 +0200

[diff] [blame]

4239

if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4240

RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {

Peter Xu

2022-07-07 14:55:04 -0400

[diff] [blame]

4241

RAMBlock *block = ram_block_from_stream(mis, f, flags,

4242

RAM_CHANNEL_PRECOPY);

zhanghailiang

2016-01-15 11:37:41 +0800

[diff] [blame]

4243

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4244

host = host_from_ram_block_offset(block, addr);

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

4245

/*

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4246

* After going into COLO stage, we should not load the page

4247

* into SVM's memory directly, we put them into colo_cache firstly.

4248

* NOTE: We need to keep a copy of SVM's ram in colo_cache.

4249

* Previously, we copied all these memory in preparing stage of COLO

4250

* while we need to stop VM, which is a time-consuming process.

4251

* Here we optimize it by a trick, back-up every page while in

4252

* migration process while COLO is enabled, though it affects the

4253

* speed of the migration, but it obviously reduce the downtime of

4254

* back-up all SVM'S memory in COLO preparing stage.

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

4255

*/

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4256

if (migration_incoming_colo_enabled()) {

4257

if (migration_incoming_in_colo_state()) {

4258

/* In COLO stage, put all pages into cache temporarily */

zhanghailiang

2020-02-24 14:54:11 +0800

[diff] [blame]

4259

host = colo_cache_from_block_offset(block, addr, true);

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4260

} else {

4261

/*

4262

* In migration stage but before COLO stage,

4263

* Put all pages into both cache and SVM's memory.

4264

*/

zhanghailiang

2020-02-24 14:54:11 +0800

[diff] [blame]

4265

host_bak = colo_cache_from_block_offset(block, addr, false);

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4266

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

4267

}

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4268

if (!host) {

4269

error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);

4270

ret = -EINVAL;

4271

break;

4272

}

Zhang Chen

2018-09-03 12:38:48 +0800

[diff] [blame]

4273

if (!migration_incoming_in_colo_state()) {

4274

ramblock_recv_bitmap_set(block, host);

4275

}

4276

Dr. David Alan Gilbert

1db9d8e

2017-04-26 19:37:21 +0100

[diff] [blame]

4277

trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4278

}

4279

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4280

switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {

4281

case RAM_SAVE_FLAG_MEM_SIZE:

4282

/* Synchronize RAM block list */

4283

total_ram_bytes = addr;

4284

while (!ret && total_ram_bytes) {

4285

RAMBlock *block;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

char id[256];

ram_addr_t length;

len = qemu_get_byte(f);

4290

qemu_get_buffer(f, (uint8_t *)id, len);

4291

id[len] = 0;

4292

length = qemu_get_be64(f);

4293

Dr. David Alan Gilbert

2015-11-05 18:10:33 +0000

[diff] [blame]

4294

block = qemu_ram_block_by_name(id);

Cédric Le Goater

2018-05-14 08:57:00 +0200

[diff] [blame]

4295

if (block && !qemu_ram_is_migratable(block)) {

4296

error_report("block %s should not be migrated !", id);

4297

ret = -EINVAL;

4298

} else if (block) {

Dr. David Alan Gilbert

2015-11-05 18:10:33 +0000

[diff] [blame]

4299

if (length != block->used_length) {

4300

Error *local_err = NULL;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4301

Gonglei

fa53a0e

2016-05-10 10:04:59 +0800

[diff] [blame]

4302

ret = qemu_ram_resize(block, length,

Dr. David Alan Gilbert

2015-11-05 18:10:33 +0000

[diff] [blame]

4303

&local_err);

4304

if (local_err) {

4305

error_report_err(local_err);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4306

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4307

}

Dr. David Alan Gilbert

ef08fb3

2017-02-24 18:28:30 +0000

[diff] [blame]

4308

/* For postcopy we need to check hugepage sizes match */

Stefan Reiter

e846b74

2021-02-04 17:35:22 +0100

[diff] [blame]

4309

if (postcopy_advised && migrate_postcopy_ram() &&

Dr. David Alan Gilbert

ef08fb3

2017-02-24 18:28:30 +0000

[diff] [blame]

4310

block->page_size != qemu_host_page_size) {

4311

uint64_t remote_page_size = qemu_get_be64(f);

4312

if (remote_page_size != block->page_size) {

4313

error_report("Mismatched RAM page size %s "

4314

"(local) %zd != %" PRId64,

4315

id, block->page_size,

remote_page_size);

ret = -EINVAL;

}

}

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

4320

if (migrate_ignore_shared()) {

4321

hwaddr addr = qemu_get_be64(f);

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

4322

if (ramblock_is_ignored(block) &&

4323

block->mr->addr != addr) {

4324

error_report("Mismatched GPAs for block %s "

4325

"%" PRId64 "!= %" PRId64,

4326

id, (uint64_t)addr,

4327

(uint64_t)block->mr->addr);

4328

ret = -EINVAL;

4329

}

4330

}

Dr. David Alan Gilbert

2015-11-05 18:10:33 +0000

[diff] [blame]

4331

ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,

4332

block->idstr);

4333

} else {

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4334

error_report("Unknown ramblock \"%s\", cannot "

4335

"accept migration", id);

ret = -EINVAL;

}

total_ram_bytes -= length;

4340

}

4341

break;

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4342

Juan Quintela

2017-04-28 09:39:55 +0200

[diff] [blame]

4343

case RAM_SAVE_FLAG_ZERO:

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4344

ch = qemu_get_byte(f);

4345

ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);

4346

break;

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4347

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4348

case RAM_SAVE_FLAG_PAGE:

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4349

qemu_get_buffer(f, host, TARGET_PAGE_SIZE);

4350

break;

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4351

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4352

case RAM_SAVE_FLAG_COMPRESS_PAGE:

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4353

len = qemu_get_be32(f);

4354

if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {

4355

error_report("Invalid compressed data length: %d", len);

4356

ret = -EINVAL;

4357

break;

4358

}

Dr. David Alan Gilbert

c1bc662

2015-12-16 11:47:38 +0000

[diff] [blame]

4359

decompress_data_with_multi_threads(f, host, len);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4360

break;

Dr. David Alan Gilbert

2015-11-05 18:10:39 +0000

[diff] [blame]

4361

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4362

case RAM_SAVE_FLAG_XBZRLE:

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4363

if (load_xbzrle(f, addr, host) < 0) {

4364

error_report("Failed to decompress XBZRLE page at "

RAM_ADDR_FMT, addr);

ret = -EINVAL;

break;

}

break;

case RAM_SAVE_FLAG_EOS:

4371

/* normal exit */

Juan Quintela

6df264a

2018-02-28 09:10:07 +0100

[diff] [blame]

4372

multifd_recv_sync_main();

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4373

break;

4374

default:

4375

if (flags & RAM_SAVE_FLAG_HOOK) {

Dr. David Alan Gilbert

632e3a5

2015-06-11 18:17:23 +0100

[diff] [blame]

4376

ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4377

} else {

Bihong Yu

29fccad

2020-10-20 11:10:42 +0800

[diff] [blame]

4378

error_report("Unknown combination of migration flags: 0x%x",

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

flags);

ret = -EINVAL;

}

}

if (!ret) {

ret = qemu_file_get_error(f);

4385

}

zhanghailiang

2020-02-24 14:54:10 +0800

[diff] [blame]

4386

if (!ret && host_bak) {

4387

memcpy(host_bak, host, TARGET_PAGE_SIZE);

4388

}

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4389

}

4390

Wei Yang

ca1a6b7

2019-11-07 20:39:03 +0800

[diff] [blame]

4391

ret |= wait_for_decompress_done();

Wei Yang

2019-07-25 08:20:23 +0800

[diff] [blame]

return ret;

}

static int ram_load(QEMUFile *f, void *opaque, int version_id)

4396

{

4397

int ret = 0;

4398

static uint64_t seq_iter;

4399

/*

4400

* If system is running in postcopy mode, page inserts to host memory must

4401

* be atomic

4402

*/

4403

bool postcopy_running = postcopy_is_running();

seq_iter++;

if (version_id != 4) {

return -EINVAL;

}

/*

* This RCU critical section can be very long running.

4413

* When RCU reclaims in the code start to become numerous,

4414

* it will be necessary to reduce the granularity of this

4415

* critical section.

4416

*/

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4417

WITH_RCU_READ_LOCK_GUARD() {

4418

if (postcopy_running) {

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

4419

/*

4420

* Note! Here RAM_CHANNEL_PRECOPY is the precopy channel of

4421

* postcopy migration, we have another RAM_CHANNEL_POSTCOPY to

4422

* service fast page faults.

4423

*/

4424

ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY);

Dr. David Alan Gilbert

2019-10-07 15:36:39 +0100

[diff] [blame]

4425

} else {

4426

ret = ram_load_precopy(f);

4427

}

Wei Yang

2019-07-25 08:20:23 +0800

[diff] [blame]

4428

}

Juan Quintela

55c4446

2017-01-23 22:32:05 +0100

[diff] [blame]

4429

trace_ram_load_complete(ret, seq_iter);

Zhang Chen

2018-09-03 12:38:50 +0800

[diff] [blame]

4430

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4431

return ret;

4432

}

4433

Vladimir Sementsov-Ogievskiy

c646762

2017-07-10 19:30:14 +0300

[diff] [blame]

4434

static bool ram_has_postcopy(void *opaque)

4435

{

Junyan He

469dd51

2018-07-18 15:48:02 +0800

[diff] [blame]

4436

RAMBlock *rb;

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

4437

RAMBLOCK_FOREACH_NOT_IGNORED(rb) {

Junyan He

469dd51

2018-07-18 15:48:02 +0800

[diff] [blame]

4438

if (ramblock_is_pmem(rb)) {

4439

info_report("Block: %s, host: %p is a nvdimm memory, postcopy"

4440

"is not supported now!", rb->idstr, rb->host);

return false;

}

}

Vladimir Sementsov-Ogievskiy

c646762

2017-07-10 19:30:14 +0300

[diff] [blame]

4445

return migrate_postcopy_ram();

4446

}

4447

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4448

/* Sync all the dirty bitmap with destination VM. */

4449

static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)

4450

{

4451

RAMBlock *block;

4452

QEMUFile *file = s->to_dst_file;

4453

int ramblock_count = 0;

4454

4455

trace_ram_dirty_bitmap_sync_start();

4456

Yury Kotov

2019-02-15 20:45:46 +0300

[diff] [blame]

4457

RAMBLOCK_FOREACH_NOT_IGNORED(block) {

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4458

qemu_savevm_send_recv_bitmap(file, block->idstr);

4459

trace_ram_dirty_bitmap_request(block->idstr);

ramblock_count++;

}

trace_ram_dirty_bitmap_sync_wait();

4464

4465

/* Wait until all the ramblocks' dirty bitmap synced */

4466

while (ramblock_count--) {

4467

qemu_sem_wait(&s->rp_state.rp_sem);

4468

}

4469

4470

trace_ram_dirty_bitmap_sync_complete();

return 0;

}

static void ram_dirty_bitmap_reload_notify(MigrationState *s)

4476

{

4477

qemu_sem_post(&s->rp_state.rp_sem);

4478

}

4479

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4480

/*

4481

* Read the received bitmap, revert it as the initial dirty bitmap.

4482

* This is only used when the postcopy migration is paused but wants

4483

* to resume from a middle point.

4484

*/

4485

int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)

4486

{

4487

int ret = -EINVAL;

Peter Xu

43044ac

2021-07-22 13:58:38 -0400

[diff] [blame]

4488

/* from_dst_file is always valid because we're within rp_thread */

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4489

QEMUFile *file = s->rp_state.from_dst_file;

4490

unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;

Peter Xu

a725ef9

2018-07-10 17:18:55 +0800

[diff] [blame]

4491

uint64_t local_size = DIV_ROUND_UP(nbits, 8);

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4492

uint64_t size, end_mark;

4493

4494

trace_ram_dirty_bitmap_reload_begin(block->idstr);

4495

4496

if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {

4497

error_report("%s: incorrect state %s", __func__,

4498

MigrationStatus_str(s->state));

return -EINVAL;

}

/*

* Note: see comments in ramblock_recv_bitmap_send() on why we

zhaolichang

2020-09-17 15:50:21 +0800

[diff] [blame]

4504

* need the endianness conversion, and the paddings.

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4505

*/

4506

local_size = ROUND_UP(local_size, 8);

4507

4508

/* Add paddings */

4509

le_bitmap = bitmap_new(nbits + BITS_PER_LONG);

4510

4511

size = qemu_get_be64(file);

4512

4513

/* The size of the bitmap should match with our ramblock */

4514

if (size != local_size) {

4515

error_report("%s: ramblock '%s' bitmap size mismatch "

4516

"(0x%"PRIx64" != 0x%"PRIx64")", __func__,

4517

block->idstr, size, local_size);

ret = -EINVAL;

goto out;

}

size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);

4523

end_mark = qemu_get_be64(file);

4524

4525

ret = qemu_file_get_error(file);

4526

if (ret || size != local_size) {

4527

error_report("%s: read bitmap failed for ramblock '%s': %d"

4528

" (size 0x%"PRIx64", got: 0x%"PRIx64")",

4529

__func__, block->idstr, ret, local_size, size);

ret = -EIO;

goto out;

}

if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {

Philippe Mathieu-Daudé

af3bbbe

2020-11-03 12:25:58 +0100

[diff] [blame]

4535

error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64,

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4536

__func__, block->idstr, end_mark);

ret = -EINVAL;

goto out;

}

/*

zhaolichang

2020-09-17 15:50:21 +0800

[diff] [blame]

4542

* Endianness conversion. We are during postcopy (though paused).

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4543

* The dirty bitmap won't change. We can directly modify it.

4544

*/

4545

bitmap_from_le(block->bmap, le_bitmap, nbits);

4546

4547

/*

4548

* What we received is "received bitmap". Revert it as the initial

4549

* dirty bitmap for this ramblock.

4550

*/

4551

bitmap_complement(block->bmap, block->bmap, nbits);

4552

David Hildenbrand

be39b4c

2021-10-11 19:53:41 +0200

[diff] [blame]

4553

/* Clear dirty bits of discarded ranges that we don't want to migrate. */

4554

ramblock_dirty_bitmap_clear_discarded_pages(block);

4555

4556

/* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4557

trace_ram_dirty_bitmap_reload_complete(block->idstr);

4558

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4559

/*

4560

* We succeeded to sync bitmap for current ramblock. If this is

4561

* the last one to sync, we need to notify the main send thread.

4562

*/

4563

ram_dirty_bitmap_reload_notify(s);

4564

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

4565

ret = 0;

4566

out:

Peter Xu

bf26990

2018-05-25 09:50:42 +0800

[diff] [blame]

4567

g_free(le_bitmap);

Peter Xu

2018-05-02 18:47:28 +0800

[diff] [blame]

return ret;

}

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4571

static int ram_resume_prepare(MigrationState *s, void *opaque)

4572

{

4573

RAMState *rs = *(RAMState **)opaque;

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

4574

int ret;

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4575

Peter Xu

2018-05-02 18:47:33 +0800

[diff] [blame]

4576

ret = ram_dirty_bitmap_sync_all(s, rs);

if (ret) {

return ret;

}

ram_state_resume_prepare(rs, s->to_dst_file);

4582

4583

return 0;

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4584

}

4585

Peter Xu

2022-07-07 14:55:02 -0400

[diff] [blame]

4586

void postcopy_preempt_shutdown_file(MigrationState *s)

4587

{

4588

qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);

4589

qemu_fflush(s->postcopy_qemufile_src);

4590

}

4591

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4592

static SaveVMHandlers savevm_ram_handlers = {

Juan Quintela

9907e84

2017-06-28 11:52:24 +0200

[diff] [blame]

4593

.save_setup = ram_save_setup,

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4594

.save_live_iterate = ram_save_iterate,

Dr. David Alan Gilbert

763c906

2015-11-05 18:11:00 +0000

[diff] [blame]

4595

.save_live_complete_postcopy = ram_save_complete,

Dr. David Alan Gilbert

a3e06c3

2015-11-05 18:10:41 +0000

[diff] [blame]

4596

.save_live_complete_precopy = ram_save_complete,

Vladimir Sementsov-Ogievskiy

c646762

2017-07-10 19:30:14 +0300

[diff] [blame]

4597

.has_postcopy = ram_has_postcopy,

Juan Quintela

2022-10-03 02:00:03 +0200

[diff] [blame]

4598

.state_pending_exact = ram_state_pending_exact,

4599

.state_pending_estimate = ram_state_pending_estimate,

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4600

.load_state = ram_load,

Juan Quintela

2017-06-28 11:52:27 +0200

[diff] [blame]

4601

.save_cleanup = ram_save_cleanup,

4602

.load_setup = ram_load_setup,

4603

.load_cleanup = ram_load_cleanup,

Peter Xu

2018-05-02 18:47:32 +0800

[diff] [blame]

4604

.resume_prepare = ram_resume_prepare,

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4605

};

4606

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4607

static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,

4608

size_t old_size, size_t new_size)

4609

{

David Hildenbrand

cc61c70

2021-04-29 13:27:04 +0200

[diff] [blame]

4610

PostcopyState ps = postcopy_state_get();

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4611

ram_addr_t offset;

4612

RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);

4613

Error *err = NULL;

4614

4615

if (ramblock_is_ignored(rb)) {

return;

}

if (!migration_is_idle()) {

4620

/*

4621

* Precopy code on the source cannot deal with the size of RAM blocks

4622

* changing at random points in time - especially after sending the

4623

* RAM block sizes in the migration stream, they must no longer change.

4624

* Abort and indicate a proper reason.

4625

*/

4626

error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);

Laurent Vivier

458fecc

2021-09-29 16:43:10 +0200

[diff] [blame]

4627

migration_cancel(err);

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4628

error_free(err);

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4629

}

David Hildenbrand

cc61c70

2021-04-29 13:27:04 +0200

[diff] [blame]

4630

4631

switch (ps) {

4632

case POSTCOPY_INCOMING_ADVISE:

4633

/*

4634

* Update what ram_postcopy_incoming_init()->init_range() does at the

4635

* time postcopy was advised. Syncing RAM blocks with the source will

4636

* result in RAM resizes.

4637

*/

4638

if (old_size < new_size) {

4639

if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {

4640

error_report("RAM block '%s' discard of resized RAM failed",

4641

rb->idstr);

4642

}

4643

}

David Hildenbrand

898ba90

2021-04-29 13:27:06 +0200

[diff] [blame]

4644

rb->postcopy_length = new_size;

David Hildenbrand

cc61c70

2021-04-29 13:27:04 +0200

[diff] [blame]

4645

break;

4646

case POSTCOPY_INCOMING_NONE:

4647

case POSTCOPY_INCOMING_RUNNING:

4648

case POSTCOPY_INCOMING_END:

4649

/*

4650

* Once our guest is running, postcopy does no longer care about

4651

* resizes. When growing, the new memory was not available on the

4652

* source, no handler needed.

*/

break;

default:

error_report("RAM block '%s' resized during postcopy state: %d",

4657

rb->idstr, ps);

4658

exit(-1);

4659

}

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4660

}

4661

4662

static RAMBlockNotifier ram_mig_ram_notifier = {

4663

.ram_block_resized = ram_mig_ram_block_resized,

4664

};

4665

Juan Quintela

2015-05-07 19:33:31 +0200

[diff] [blame]

4666

void ram_mig_init(void)

4667

{

4668

qemu_mutex_init(&XBZRLE.lock);

Dr. David Alan Gilbert

ce62df5

2019-08-22 12:54:33 +0100

[diff] [blame]

4669

register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);

David Hildenbrand

2021-04-29 13:27:02 +0200

[diff] [blame]

4670

ram_block_notifier_add(&ram_mig_ram_notifier);

Juan Quintela