blob: 015a9038d1aa6b8fbfa19bff2fc1b68a2a2058b8 [file] [log] [blame]
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +08001/*
2 * Dirty page rate limit implementation code
3 *
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5 *
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080014#include "qemu/main-loop.h"
15#include "qapi/qapi-commands-migration.h"
Hyman Huang(黄勇)f3b2e382022-06-26 01:38:36 +080016#include "qapi/qmp/qdict.h"
17#include "qapi/error.h"
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080018#include "sysemu/dirtyrate.h"
19#include "sysemu/dirtylimit.h"
Hyman Huang(黄勇)f3b2e382022-06-26 01:38:36 +080020#include "monitor/hmp.h"
21#include "monitor/monitor.h"
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080022#include "exec/memory.h"
Thomas Huth30ee29f2023-04-13 07:45:09 +020023#include "exec/target_page.h"
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080024#include "hw/boards.h"
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +080025#include "sysemu/kvm.h"
26#include "trace.h"
27
28/*
29 * Dirtylimit stop working if dirty page rate error
30 * value less than DIRTYLIMIT_TOLERANCE_RANGE
31 */
32#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
33/*
34 * Plus or minus vcpu sleep time linearly if dirty
35 * page rate error value percentage over
36 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
37 * Otherwise, plus or minus a fixed vcpu sleep time.
38 */
39#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
40/*
41 * Max vcpu sleep time percentage during a cycle
42 * composed of dirty ring full and sleep time.
43 */
44#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080045
46struct {
47 VcpuStat stat;
48 bool running;
49 QemuThread thread;
50} *vcpu_dirty_rate_stat;
51
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +080052typedef struct VcpuDirtyLimitState {
53 int cpu_index;
54 bool enabled;
55 /*
56 * Quota dirty page rate, unit is MB/s
57 * zero if not enabled.
58 */
59 uint64_t quota;
60} VcpuDirtyLimitState;
61
62struct {
63 VcpuDirtyLimitState *states;
64 /* Max cpus number configured by user */
65 int max_cpus;
66 /* Number of vcpu under dirtylimit */
67 int limited_nvcpu;
68} *dirtylimit_state;
69
70/* protect dirtylimit_state */
71static QemuMutex dirtylimit_mutex;
72
73/* dirtylimit thread quit if dirtylimit_quit is true */
74static bool dirtylimit_quit;
75
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +080076static void vcpu_dirty_rate_stat_collect(void)
77{
78 VcpuStat stat;
79 int i = 0;
80
81 /* calculate vcpu dirtyrate */
82 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
83 &stat,
84 GLOBAL_DIRTY_LIMIT,
85 false);
86
87 for (i = 0; i < stat.nvcpu; i++) {
88 vcpu_dirty_rate_stat->stat.rates[i].id = i;
89 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
90 stat.rates[i].dirty_rate;
91 }
92
93 free(stat.rates);
94}
95
96static void *vcpu_dirty_rate_stat_thread(void *opaque)
97{
98 rcu_register_thread();
99
100 /* start log sync */
101 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
102
103 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
104 vcpu_dirty_rate_stat_collect();
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800105 if (dirtylimit_in_service()) {
106 dirtylimit_process();
107 }
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +0800108 }
109
110 /* stop log sync */
111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
112
113 rcu_unregister_thread();
114 return NULL;
115}
116
117int64_t vcpu_dirty_rate_get(int cpu_index)
118{
119 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
120 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
121}
122
123void vcpu_dirty_rate_stat_start(void)
124{
125 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
126 return;
127 }
128
129 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
130 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
131 "dirtyrate-stat",
132 vcpu_dirty_rate_stat_thread,
133 NULL,
134 QEMU_THREAD_JOINABLE);
135}
136
137void vcpu_dirty_rate_stat_stop(void)
138{
139 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800140 dirtylimit_state_unlock();
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +0800141 qemu_mutex_unlock_iothread();
142 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
143 qemu_mutex_lock_iothread();
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800144 dirtylimit_state_lock();
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +0800145}
146
147void vcpu_dirty_rate_stat_initialize(void)
148{
149 MachineState *ms = MACHINE(qdev_get_machine());
150 int max_cpus = ms->smp.max_cpus;
151
152 vcpu_dirty_rate_stat =
153 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
154
155 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
156 vcpu_dirty_rate_stat->stat.rates =
Markus Armbrusterc5e8d512022-09-23 10:42:54 +0200157 g_new0(DirtyRateVcpu, max_cpus);
Hyman Huang(黄勇)cc2b33e2022-06-26 01:38:33 +0800158
159 vcpu_dirty_rate_stat->running = false;
160}
161
162void vcpu_dirty_rate_stat_finalize(void)
163{
164 free(vcpu_dirty_rate_stat->stat.rates);
165 vcpu_dirty_rate_stat->stat.rates = NULL;
166
167 free(vcpu_dirty_rate_stat);
168 vcpu_dirty_rate_stat = NULL;
169}
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800170
171void dirtylimit_state_lock(void)
172{
173 qemu_mutex_lock(&dirtylimit_mutex);
174}
175
176void dirtylimit_state_unlock(void)
177{
178 qemu_mutex_unlock(&dirtylimit_mutex);
179}
180
181static void
182__attribute__((__constructor__)) dirtylimit_mutex_init(void)
183{
184 qemu_mutex_init(&dirtylimit_mutex);
185}
186
187static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
188{
189 return &dirtylimit_state->states[cpu_index];
190}
191
192void dirtylimit_state_initialize(void)
193{
194 MachineState *ms = MACHINE(qdev_get_machine());
195 int max_cpus = ms->smp.max_cpus;
196 int i;
197
198 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
199
200 dirtylimit_state->states =
Markus Armbrusterc5e8d512022-09-23 10:42:54 +0200201 g_new0(VcpuDirtyLimitState, max_cpus);
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800202
203 for (i = 0; i < max_cpus; i++) {
204 dirtylimit_state->states[i].cpu_index = i;
205 }
206
207 dirtylimit_state->max_cpus = max_cpus;
208 trace_dirtylimit_state_initialize(max_cpus);
209}
210
211void dirtylimit_state_finalize(void)
212{
213 free(dirtylimit_state->states);
214 dirtylimit_state->states = NULL;
215
216 free(dirtylimit_state);
217 dirtylimit_state = NULL;
218
219 trace_dirtylimit_state_finalize();
220}
221
222bool dirtylimit_in_service(void)
223{
224 return !!dirtylimit_state;
225}
226
227bool dirtylimit_vcpu_index_valid(int cpu_index)
228{
229 MachineState *ms = MACHINE(qdev_get_machine());
230
231 return !(cpu_index < 0 ||
232 cpu_index >= ms->smp.max_cpus);
233}
234
Richard Henderson6a6447f2023-04-28 11:33:28 +0100235static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800236{
237 static uint64_t max_dirtyrate;
Juan Quintelabeeda9b2023-05-11 16:12:05 +0200238 uint64_t dirty_ring_size_MiB;
Richard Henderson6a6447f2023-04-28 11:33:28 +0100239
Juan Quintelabeeda9b2023-05-11 16:12:05 +0200240 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800241
242 if (max_dirtyrate < dirtyrate) {
243 max_dirtyrate = dirtyrate;
244 }
245
Juan Quintelabeeda9b2023-05-11 16:12:05 +0200246 return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
Hyman Huang(黄勇)baa60982022-06-26 01:38:35 +0800247}
248
249static inline bool dirtylimit_done(uint64_t quota,
250 uint64_t current)
251{
252 uint64_t min, max;
253
254 min = MIN(quota, current);
255 max = MAX(quota, current);
256
257 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
258}
259
260static inline bool
261dirtylimit_need_linear_adjustment(uint64_t quota,
262 uint64_t current)
263{
264 uint64_t min, max;
265
266 min = MIN(quota, current);
267 max = MAX(quota, current);
268
269 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
270}
271
272static void dirtylimit_set_throttle(CPUState *cpu,
273 uint64_t quota,
274 uint64_t current)
275{
276 int64_t ring_full_time_us = 0;
277 uint64_t sleep_pct = 0;
278 uint64_t throttle_us = 0;
279
280 if (current == 0) {
281 cpu->throttle_us_per_full = 0;
282 return;
283 }
284
285 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
286
287 if (dirtylimit_need_linear_adjustment(quota, current)) {
288 if (quota < current) {
289 sleep_pct = (current - quota) * 100 / current;
290 throttle_us =
291 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
292 cpu->throttle_us_per_full += throttle_us;
293 } else {
294 sleep_pct = (quota - current) * 100 / quota;
295 throttle_us =
296 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
297 cpu->throttle_us_per_full -= throttle_us;
298 }
299
300 trace_dirtylimit_throttle_pct(cpu->cpu_index,
301 sleep_pct,
302 throttle_us);
303 } else {
304 if (quota < current) {
305 cpu->throttle_us_per_full += ring_full_time_us / 10;
306 } else {
307 cpu->throttle_us_per_full -= ring_full_time_us / 10;
308 }
309 }
310
311 /*
312 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
313 * current dirty page rate may never reach the quota, we should stop
314 * increasing sleep time?
315 */
316 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
317 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
318
319 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
320}
321
322static void dirtylimit_adjust_throttle(CPUState *cpu)
323{
324 uint64_t quota = 0;
325 uint64_t current = 0;
326 int cpu_index = cpu->cpu_index;
327
328 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
329 current = vcpu_dirty_rate_get(cpu_index);
330
331 if (!dirtylimit_done(quota, current)) {
332 dirtylimit_set_throttle(cpu, quota, current);
333 }
334
335 return;
336}
337
338void dirtylimit_process(void)
339{
340 CPUState *cpu;
341
342 if (!qatomic_read(&dirtylimit_quit)) {
343 dirtylimit_state_lock();
344
345 if (!dirtylimit_in_service()) {
346 dirtylimit_state_unlock();
347 return;
348 }
349
350 CPU_FOREACH(cpu) {
351 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
352 continue;
353 }
354 dirtylimit_adjust_throttle(cpu);
355 }
356 dirtylimit_state_unlock();
357 }
358}
359
360void dirtylimit_change(bool start)
361{
362 if (start) {
363 qatomic_set(&dirtylimit_quit, 0);
364 } else {
365 qatomic_set(&dirtylimit_quit, 1);
366 }
367}
368
369void dirtylimit_set_vcpu(int cpu_index,
370 uint64_t quota,
371 bool enable)
372{
373 trace_dirtylimit_set_vcpu(cpu_index, quota);
374
375 if (enable) {
376 dirtylimit_state->states[cpu_index].quota = quota;
377 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
378 dirtylimit_state->limited_nvcpu++;
379 }
380 } else {
381 dirtylimit_state->states[cpu_index].quota = 0;
382 if (dirtylimit_state->states[cpu_index].enabled) {
383 dirtylimit_state->limited_nvcpu--;
384 }
385 }
386
387 dirtylimit_state->states[cpu_index].enabled = enable;
388}
389
390void dirtylimit_set_all(uint64_t quota,
391 bool enable)
392{
393 MachineState *ms = MACHINE(qdev_get_machine());
394 int max_cpus = ms->smp.max_cpus;
395 int i;
396
397 for (i = 0; i < max_cpus; i++) {
398 dirtylimit_set_vcpu(i, quota, enable);
399 }
400}
401
402void dirtylimit_vcpu_execute(CPUState *cpu)
403{
404 if (dirtylimit_in_service() &&
405 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
406 cpu->throttle_us_per_full) {
407 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
408 cpu->throttle_us_per_full);
409 usleep(cpu->throttle_us_per_full);
410 }
411}
Hyman Huang(黄勇)f3b2e382022-06-26 01:38:36 +0800412
413static void dirtylimit_init(void)
414{
415 dirtylimit_state_initialize();
416 dirtylimit_change(true);
417 vcpu_dirty_rate_stat_initialize();
418 vcpu_dirty_rate_stat_start();
419}
420
421static void dirtylimit_cleanup(void)
422{
423 vcpu_dirty_rate_stat_stop();
424 vcpu_dirty_rate_stat_finalize();
425 dirtylimit_change(false);
426 dirtylimit_state_finalize();
427}
428
429void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
430 int64_t cpu_index,
431 Error **errp)
432{
433 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
434 return;
435 }
436
437 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
438 error_setg(errp, "incorrect cpu index specified");
439 return;
440 }
441
442 if (!dirtylimit_in_service()) {
443 return;
444 }
445
446 dirtylimit_state_lock();
447
448 if (has_cpu_index) {
449 dirtylimit_set_vcpu(cpu_index, 0, false);
450 } else {
451 dirtylimit_set_all(0, false);
452 }
453
454 if (!dirtylimit_state->limited_nvcpu) {
455 dirtylimit_cleanup();
456 }
457
458 dirtylimit_state_unlock();
459}
460
461void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
462{
463 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
464 Error *err = NULL;
465
466 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
467 if (err) {
468 hmp_handle_error(mon, err);
469 return;
470 }
471
472 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
473 "dirty limit for virtual CPU]\n");
474}
475
476void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
477 int64_t cpu_index,
478 uint64_t dirty_rate,
479 Error **errp)
480{
481 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
482 error_setg(errp, "dirty page limit feature requires KVM with"
483 " accelerator property 'dirty-ring-size' set'");
484 return;
485 }
486
487 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
488 error_setg(errp, "incorrect cpu index specified");
489 return;
490 }
491
492 if (!dirty_rate) {
493 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
494 return;
495 }
496
497 dirtylimit_state_lock();
498
499 if (!dirtylimit_in_service()) {
500 dirtylimit_init();
501 }
502
503 if (has_cpu_index) {
504 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
505 } else {
506 dirtylimit_set_all(dirty_rate, true);
507 }
508
509 dirtylimit_state_unlock();
510}
511
512void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
513{
514 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
515 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
516 Error *err = NULL;
517
518 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
519 if (err) {
520 hmp_handle_error(mon, err);
521 return;
522 }
523
524 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
525 "dirty limit for virtual CPU]\n");
526}
527
528static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
529{
530 DirtyLimitInfo *info = NULL;
531
532 info = g_malloc0(sizeof(*info));
533 info->cpu_index = cpu_index;
534 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
535 info->current_rate = vcpu_dirty_rate_get(cpu_index);
536
537 return info;
538}
539
540static struct DirtyLimitInfoList *dirtylimit_query_all(void)
541{
542 int i, index;
543 DirtyLimitInfo *info = NULL;
544 DirtyLimitInfoList *head = NULL, **tail = &head;
545
546 dirtylimit_state_lock();
547
548 if (!dirtylimit_in_service()) {
549 dirtylimit_state_unlock();
550 return NULL;
551 }
552
553 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
554 index = dirtylimit_state->states[i].cpu_index;
555 if (dirtylimit_vcpu_get_state(index)->enabled) {
556 info = dirtylimit_query_vcpu(index);
557 QAPI_LIST_APPEND(tail, info);
558 }
559 }
560
561 dirtylimit_state_unlock();
562
563 return head;
564}
565
566struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
567{
568 if (!dirtylimit_in_service()) {
569 return NULL;
570 }
571
572 return dirtylimit_query_all();
573}
574
575void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
576{
577 DirtyLimitInfoList *limit, *head, *info = NULL;
578 Error *err = NULL;
579
580 if (!dirtylimit_in_service()) {
581 monitor_printf(mon, "Dirty page limit not enabled!\n");
582 return;
583 }
584
585 info = qmp_query_vcpu_dirty_limit(&err);
586 if (err) {
587 hmp_handle_error(mon, err);
588 return;
589 }
590
591 head = info;
592 for (limit = head; limit != NULL; limit = limit->next) {
593 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
594 " current rate %"PRIi64 " (MB/s)\n",
595 limit->value->cpu_index,
596 limit->value->limit_rate,
597 limit->value->current_rate);
598 }
599
600 g_free(info);
601}