Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 1 | /* |
| 2 | * QEMU throttling infrastructure |
| 3 | * |
Alberto Garcia | a291d5d | 2015-06-08 18:17:47 +0200 | [diff] [blame] | 4 | * Copyright (C) Nodalink, EURL. 2013-2014 |
| 5 | * Copyright (C) Igalia, S.L. 2015 |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 6 | * |
Alberto Garcia | a291d5d | 2015-06-08 18:17:47 +0200 | [diff] [blame] | 7 | * Authors: |
| 8 | * Benoît Canet <benoit.canet@nodalink.com> |
| 9 | * Alberto Garcia <berto@igalia.com> |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or |
| 12 | * modify it under the terms of the GNU General Public License as |
| 13 | * published by the Free Software Foundation; either version 2 or |
| 14 | * (at your option) version 3 of the License. |
| 15 | * |
| 16 | * This program is distributed in the hope that it will be useful, |
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | * GNU General Public License for more details. |
| 20 | * |
| 21 | * You should have received a copy of the GNU General Public License |
| 22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| 23 | */ |
| 24 | |
Peter Maydell | aafd758 | 2016-01-29 17:49:55 +0000 | [diff] [blame] | 25 | #include "qemu/osdep.h" |
Markus Armbruster | da34e65 | 2016-03-14 09:01:28 +0100 | [diff] [blame] | 26 | #include "qapi/error.h" |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 27 | #include "qemu/throttle.h" |
| 28 | #include "qemu/timer.h" |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 29 | #include "block/aio.h" |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 30 | |
| 31 | /* This function make a bucket leak |
| 32 | * |
| 33 | * @bkt: the bucket to make leak |
| 34 | * @delta_ns: the time delta |
| 35 | */ |
| 36 | void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) |
| 37 | { |
| 38 | double leak; |
| 39 | |
| 40 | /* compute how much to leak */ |
Stefan Hajnoczi | 13566fe | 2015-07-08 15:10:09 +0100 | [diff] [blame] | 41 | leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 42 | |
| 43 | /* make the bucket leak */ |
| 44 | bkt->level = MAX(bkt->level - leak, 0); |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 45 | |
| 46 | /* if we allow bursts for more than one second we also need to |
| 47 | * keep track of bkt->burst_level so the bkt->max goal per second |
| 48 | * is attained */ |
| 49 | if (bkt->burst_length > 1) { |
| 50 | leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND; |
| 51 | bkt->burst_level = MAX(bkt->burst_level - leak, 0); |
| 52 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 53 | } |
| 54 | |
| 55 | /* Calculate the time delta since last leak and make proportionals leaks |
| 56 | * |
| 57 | * @now: the current timestamp in ns |
| 58 | */ |
| 59 | static void throttle_do_leak(ThrottleState *ts, int64_t now) |
| 60 | { |
| 61 | /* compute the time elapsed since the last leak */ |
| 62 | int64_t delta_ns = now - ts->previous_leak; |
| 63 | int i; |
| 64 | |
| 65 | ts->previous_leak = now; |
| 66 | |
| 67 | if (delta_ns <= 0) { |
| 68 | return; |
| 69 | } |
| 70 | |
| 71 | /* make each bucket leak */ |
| 72 | for (i = 0; i < BUCKETS_COUNT; i++) { |
| 73 | throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | /* do the real job of computing the time to wait |
| 78 | * |
| 79 | * @limit: the throttling limit |
| 80 | * @extra: the number of operation to delay |
| 81 | * @ret: the time to wait in ns |
| 82 | */ |
| 83 | static int64_t throttle_do_compute_wait(double limit, double extra) |
| 84 | { |
Stefan Hajnoczi | 13566fe | 2015-07-08 15:10:09 +0100 | [diff] [blame] | 85 | double wait = extra * NANOSECONDS_PER_SECOND; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 86 | wait /= limit; |
| 87 | return wait; |
| 88 | } |
| 89 | |
| 90 | /* This function compute the wait time in ns that a leaky bucket should trigger |
| 91 | * |
| 92 | * @bkt: the leaky bucket we operate on |
| 93 | * @ret: the resulting wait time in ns or 0 if the operation can go through |
| 94 | */ |
| 95 | int64_t throttle_compute_wait(LeakyBucket *bkt) |
| 96 | { |
| 97 | double extra; /* the number of extra units blocking the io */ |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 98 | double bucket_size; /* I/O before throttling to bkt->avg */ |
| 99 | double burst_bucket_size; /* Before throttling to bkt->max */ |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 100 | |
| 101 | if (!bkt->avg) { |
| 102 | return 0; |
| 103 | } |
| 104 | |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 105 | if (!bkt->max) { |
| 106 | /* If bkt->max is 0 we still want to allow short bursts of I/O |
| 107 | * from the guest, otherwise every other request will be throttled |
| 108 | * and performance will suffer considerably. */ |
Alberto Garcia | d00e692 | 2017-08-24 16:24:47 +0300 | [diff] [blame] | 109 | bucket_size = (double) bkt->avg / 10; |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 110 | burst_bucket_size = 0; |
| 111 | } else { |
| 112 | /* If we have a burst limit then we have to wait until all I/O |
| 113 | * at burst rate has finished before throttling to bkt->avg */ |
| 114 | bucket_size = bkt->max * bkt->burst_length; |
Alberto Garcia | d00e692 | 2017-08-24 16:24:47 +0300 | [diff] [blame] | 115 | burst_bucket_size = (double) bkt->max / 10; |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 116 | } |
| 117 | |
| 118 | /* If the main bucket is full then we have to wait */ |
| 119 | extra = bkt->level - bucket_size; |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 120 | if (extra > 0) { |
| 121 | return throttle_do_compute_wait(bkt->avg, extra); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 122 | } |
| 123 | |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 124 | /* If the main bucket is not full yet we still have to check the |
| 125 | * burst bucket in order to enforce the burst limit */ |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 126 | if (bkt->burst_length > 1) { |
Alberto Garcia | b580610 | 2017-09-13 11:28:17 +0300 | [diff] [blame] | 127 | assert(bkt->max > 0); /* see throttle_is_valid() */ |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 128 | extra = bkt->burst_level - burst_bucket_size; |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 129 | if (extra > 0) { |
| 130 | return throttle_do_compute_wait(bkt->max, extra); |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | return 0; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 135 | } |
| 136 | |
| 137 | /* This function compute the time that must be waited while this IO |
| 138 | * |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 139 | * @direction: throttle direction |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 140 | * @ret: time to wait |
| 141 | */ |
| 142 | static int64_t throttle_compute_wait_for(ThrottleState *ts, |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 143 | ThrottleDirection direction) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 144 | { |
zhenwei pi | 7017313 | 2023-07-28 10:20:04 +0800 | [diff] [blame] | 145 | static const BucketType to_check[THROTTLE_MAX][4] = { |
| 146 | {THROTTLE_BPS_TOTAL, |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 147 | THROTTLE_OPS_TOTAL, |
| 148 | THROTTLE_BPS_READ, |
| 149 | THROTTLE_OPS_READ}, |
| 150 | {THROTTLE_BPS_TOTAL, |
| 151 | THROTTLE_OPS_TOTAL, |
| 152 | THROTTLE_BPS_WRITE, |
| 153 | THROTTLE_OPS_WRITE}, }; |
| 154 | int64_t wait, max_wait = 0; |
| 155 | int i; |
| 156 | |
zhenwei pi | 7017313 | 2023-07-28 10:20:04 +0800 | [diff] [blame] | 157 | for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) { |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 158 | BucketType index = to_check[direction][i]; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 159 | wait = throttle_compute_wait(&ts->cfg.buckets[index]); |
| 160 | if (wait > max_wait) { |
| 161 | max_wait = wait; |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | return max_wait; |
| 166 | } |
| 167 | |
| 168 | /* compute the timer for this type of operation |
| 169 | * |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 170 | * @direction: throttle direction |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 171 | * @now: the current clock timestamp |
| 172 | * @next_timestamp: the resulting timer |
| 173 | * @ret: true if a timer must be set |
| 174 | */ |
Alberto Garcia | 3c9242f | 2016-02-18 12:26:54 +0200 | [diff] [blame] | 175 | static bool throttle_compute_timer(ThrottleState *ts, |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 176 | ThrottleDirection direction, |
Alberto Garcia | 3c9242f | 2016-02-18 12:26:54 +0200 | [diff] [blame] | 177 | int64_t now, |
| 178 | int64_t *next_timestamp) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 179 | { |
| 180 | int64_t wait; |
| 181 | |
| 182 | /* leak proportionally to the time elapsed */ |
| 183 | throttle_do_leak(ts, now); |
| 184 | |
| 185 | /* compute the wait time if any */ |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 186 | wait = throttle_compute_wait_for(ts, direction); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 187 | |
| 188 | /* if the code must wait compute when the next timer should fire */ |
| 189 | if (wait) { |
| 190 | *next_timestamp = now + wait; |
| 191 | return true; |
| 192 | } |
| 193 | |
| 194 | /* else no need to wait at all */ |
| 195 | *next_timestamp = now; |
| 196 | return false; |
| 197 | } |
| 198 | |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 199 | /* Add timers to event loop */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 200 | void throttle_timers_attach_aio_context(ThrottleTimers *tt, |
| 201 | AioContext *new_context) |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 202 | { |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 203 | ThrottleDirection dir; |
| 204 | |
| 205 | for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { |
| 206 | if (tt->timer_cb[dir]) { |
| 207 | tt->timers[dir] = |
| 208 | aio_timer_new(new_context, tt->clock_type, SCALE_NS, |
| 209 | tt->timer_cb[dir], tt->timer_opaque); |
| 210 | } |
| 211 | } |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 212 | } |
| 213 | |
Alberto Garcia | 1588ab5 | 2016-02-18 12:27:00 +0200 | [diff] [blame] | 214 | /* |
| 215 | * Initialize the ThrottleConfig structure to a valid state |
| 216 | * @cfg: the config to initialize |
| 217 | */ |
| 218 | void throttle_config_init(ThrottleConfig *cfg) |
| 219 | { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 220 | unsigned i; |
Alberto Garcia | 1588ab5 | 2016-02-18 12:27:00 +0200 | [diff] [blame] | 221 | memset(cfg, 0, sizeof(*cfg)); |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 222 | for (i = 0; i < BUCKETS_COUNT; i++) { |
| 223 | cfg->buckets[i].burst_length = 1; |
| 224 | } |
Alberto Garcia | 1588ab5 | 2016-02-18 12:27:00 +0200 | [diff] [blame] | 225 | } |
| 226 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 227 | /* To be called first on the ThrottleState */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 228 | void throttle_init(ThrottleState *ts) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 229 | { |
| 230 | memset(ts, 0, sizeof(ThrottleState)); |
Alberto Garcia | 1588ab5 | 2016-02-18 12:27:00 +0200 | [diff] [blame] | 231 | throttle_config_init(&ts->cfg); |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 232 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 233 | |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 234 | /* To be called first on the ThrottleTimers */ |
| 235 | void throttle_timers_init(ThrottleTimers *tt, |
| 236 | AioContext *aio_context, |
| 237 | QEMUClockType clock_type, |
| 238 | QEMUTimerCB *read_timer_cb, |
| 239 | QEMUTimerCB *write_timer_cb, |
| 240 | void *timer_opaque) |
| 241 | { |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 242 | assert(read_timer_cb || write_timer_cb); |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 243 | memset(tt, 0, sizeof(ThrottleTimers)); |
| 244 | |
| 245 | tt->clock_type = clock_type; |
zhenwei pi | 8ba02c2 | 2023-07-28 10:19:58 +0800 | [diff] [blame] | 246 | tt->timer_cb[THROTTLE_READ] = read_timer_cb; |
| 247 | tt->timer_cb[THROTTLE_WRITE] = write_timer_cb; |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 248 | tt->timer_opaque = timer_opaque; |
| 249 | throttle_timers_attach_aio_context(tt, aio_context); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 250 | } |
| 251 | |
| 252 | /* destroy a timer */ |
| 253 | static void throttle_timer_destroy(QEMUTimer **timer) |
| 254 | { |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 255 | if (*timer == NULL) { |
| 256 | return; |
| 257 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 258 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 259 | timer_free(*timer); |
| 260 | *timer = NULL; |
| 261 | } |
| 262 | |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 263 | /* Remove timers from event loop */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 264 | void throttle_timers_detach_aio_context(ThrottleTimers *tt) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 265 | { |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 266 | ThrottleDirection dir; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 267 | |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 268 | for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { |
| 269 | throttle_timer_destroy(&tt->timers[dir]); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 270 | } |
| 271 | } |
| 272 | |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 273 | /* To be called last on the ThrottleTimers */ |
| 274 | void throttle_timers_destroy(ThrottleTimers *tt) |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 275 | { |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 276 | throttle_timers_detach_aio_context(tt); |
Stefan Hajnoczi | 13af91e | 2014-05-14 16:22:45 +0200 | [diff] [blame] | 277 | } |
| 278 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 279 | /* is any throttling timer configured */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 280 | bool throttle_timers_are_initialized(ThrottleTimers *tt) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 281 | { |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 282 | ThrottleDirection dir; |
| 283 | |
| 284 | for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) { |
| 285 | if (tt->timers[dir]) { |
| 286 | return true; |
| 287 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 288 | } |
| 289 | |
| 290 | return false; |
| 291 | } |
| 292 | |
| 293 | /* Does any throttling must be done |
| 294 | * |
| 295 | * @cfg: the throttling configuration to inspect |
| 296 | * @ret: true if throttling must be done else false |
| 297 | */ |
| 298 | bool throttle_enabled(ThrottleConfig *cfg) |
| 299 | { |
| 300 | int i; |
| 301 | |
| 302 | for (i = 0; i < BUCKETS_COUNT; i++) { |
| 303 | if (cfg->buckets[i].avg > 0) { |
| 304 | return true; |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | return false; |
| 309 | } |
| 310 | |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 311 | /* check if a throttling configuration is valid |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 312 | * @cfg: the throttling configuration to inspect |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 313 | * @ret: true if valid else false |
Alberto Garcia | 6921b18 | 2016-02-18 12:26:55 +0200 | [diff] [blame] | 314 | * @errp: error object |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 315 | */ |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 316 | bool throttle_is_valid(ThrottleConfig *cfg, Error **errp) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 317 | { |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 318 | int i; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 319 | bool bps_flag, ops_flag; |
| 320 | bool bps_max_flag, ops_max_flag; |
| 321 | |
| 322 | bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && |
| 323 | (cfg->buckets[THROTTLE_BPS_READ].avg || |
| 324 | cfg->buckets[THROTTLE_BPS_WRITE].avg); |
| 325 | |
| 326 | ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && |
| 327 | (cfg->buckets[THROTTLE_OPS_READ].avg || |
| 328 | cfg->buckets[THROTTLE_OPS_WRITE].avg); |
| 329 | |
| 330 | bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && |
| 331 | (cfg->buckets[THROTTLE_BPS_READ].max || |
| 332 | cfg->buckets[THROTTLE_BPS_WRITE].max); |
| 333 | |
| 334 | ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && |
| 335 | (cfg->buckets[THROTTLE_OPS_READ].max || |
| 336 | cfg->buckets[THROTTLE_OPS_WRITE].max); |
| 337 | |
Alberto Garcia | 6921b18 | 2016-02-18 12:26:55 +0200 | [diff] [blame] | 338 | if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) { |
| 339 | error_setg(errp, "bps/iops/max total values and read/write values" |
| 340 | " cannot be used at the same time"); |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 341 | return false; |
Alberto Garcia | 6921b18 | 2016-02-18 12:26:55 +0200 | [diff] [blame] | 342 | } |
| 343 | |
Stefan Hajnoczi | 8860eab | 2016-06-01 17:40:31 -0700 | [diff] [blame] | 344 | if (cfg->op_size && |
| 345 | !cfg->buckets[THROTTLE_OPS_TOTAL].avg && |
| 346 | !cfg->buckets[THROTTLE_OPS_READ].avg && |
| 347 | !cfg->buckets[THROTTLE_OPS_WRITE].avg) { |
| 348 | error_setg(errp, "iops size requires an iops value to be set"); |
| 349 | return false; |
| 350 | } |
| 351 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 352 | for (i = 0; i < BUCKETS_COUNT; i++) { |
Alberto Garcia | fa36f1b | 2017-08-24 16:24:45 +0300 | [diff] [blame] | 353 | LeakyBucket *bkt = &cfg->buckets[i]; |
Alberto Garcia | d00e692 | 2017-08-24 16:24:47 +0300 | [diff] [blame] | 354 | if (bkt->avg > THROTTLE_VALUE_MAX || bkt->max > THROTTLE_VALUE_MAX) { |
Alberto Garcia | 03ba36c | 2016-02-18 12:26:57 +0200 | [diff] [blame] | 355 | error_setg(errp, "bps/iops/max values must be within [0, %lld]", |
| 356 | THROTTLE_VALUE_MAX); |
Fam Zheng | 972606c | 2016-01-20 12:21:20 +0800 | [diff] [blame] | 357 | return false; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 358 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 359 | |
Alberto Garcia | fa36f1b | 2017-08-24 16:24:45 +0300 | [diff] [blame] | 360 | if (!bkt->burst_length) { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 361 | error_setg(errp, "the burst length cannot be 0"); |
| 362 | return false; |
| 363 | } |
| 364 | |
Alberto Garcia | fa36f1b | 2017-08-24 16:24:45 +0300 | [diff] [blame] | 365 | if (bkt->burst_length > 1 && !bkt->max) { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 366 | error_setg(errp, "burst length set without burst rate"); |
| 367 | return false; |
| 368 | } |
| 369 | |
Alberto Garcia | 67335a4 | 2017-08-24 16:24:48 +0300 | [diff] [blame] | 370 | if (bkt->max && bkt->burst_length > THROTTLE_VALUE_MAX / bkt->max) { |
| 371 | error_setg(errp, "burst length too high for this burst rate"); |
| 372 | return false; |
| 373 | } |
| 374 | |
Alberto Garcia | fa36f1b | 2017-08-24 16:24:45 +0300 | [diff] [blame] | 375 | if (bkt->max && !bkt->avg) { |
Alberto Garcia | 45b2d41 | 2016-02-18 12:26:56 +0200 | [diff] [blame] | 376 | error_setg(errp, "bps_max/iops_max require corresponding" |
| 377 | " bps/iops values"); |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 378 | return false; |
Stefan Hajnoczi | ee2bdc3 | 2015-08-04 11:22:12 +0100 | [diff] [blame] | 379 | } |
Alberto Garcia | aaa1e77 | 2016-07-28 11:08:12 +0300 | [diff] [blame] | 380 | |
Alberto Garcia | fa36f1b | 2017-08-24 16:24:45 +0300 | [diff] [blame] | 381 | if (bkt->max && bkt->max < bkt->avg) { |
Alberto Garcia | aaa1e77 | 2016-07-28 11:08:12 +0300 | [diff] [blame] | 382 | error_setg(errp, "bps_max/iops_max cannot be lower than bps/iops"); |
| 383 | return false; |
| 384 | } |
Stefan Hajnoczi | ee2bdc3 | 2015-08-04 11:22:12 +0100 | [diff] [blame] | 385 | } |
Alberto Garcia | d585108 | 2016-02-18 12:26:59 +0200 | [diff] [blame] | 386 | |
| 387 | return true; |
Stefan Hajnoczi | ee2bdc3 | 2015-08-04 11:22:12 +0100 | [diff] [blame] | 388 | } |
| 389 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 390 | /* Used to configure the throttle |
| 391 | * |
| 392 | * @ts: the throttle state we are working on |
Manos Pitsidianakis | dbe824c | 2017-07-02 13:06:45 +0300 | [diff] [blame] | 393 | * @clock_type: the group's clock_type |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 394 | * @cfg: the config to set |
| 395 | */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 396 | void throttle_config(ThrottleState *ts, |
Manos Pitsidianakis | dbe824c | 2017-07-02 13:06:45 +0300 | [diff] [blame] | 397 | QEMUClockType clock_type, |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 398 | ThrottleConfig *cfg) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 399 | { |
| 400 | int i; |
| 401 | |
| 402 | ts->cfg = *cfg; |
| 403 | |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 404 | /* Zero bucket level */ |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 405 | for (i = 0; i < BUCKETS_COUNT; i++) { |
Alberto Garcia | 2a8be39 | 2017-08-24 16:24:46 +0300 | [diff] [blame] | 406 | ts->cfg.buckets[i].level = 0; |
| 407 | ts->cfg.buckets[i].burst_level = 0; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 408 | } |
| 409 | |
Manos Pitsidianakis | dbe824c | 2017-07-02 13:06:45 +0300 | [diff] [blame] | 410 | ts->previous_leak = qemu_clock_get_ns(clock_type); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 411 | } |
| 412 | |
| 413 | /* used to get config |
| 414 | * |
| 415 | * @ts: the throttle state we are working on |
| 416 | * @cfg: the config to write |
| 417 | */ |
| 418 | void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) |
| 419 | { |
| 420 | *cfg = ts->cfg; |
| 421 | } |
| 422 | |
| 423 | |
| 424 | /* Schedule the read or write timer if needed |
| 425 | * |
| 426 | * NOTE: this function is not unit tested due to it's usage of timer_mod |
| 427 | * |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 428 | * @tt: the timers structure |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 429 | * @direction: throttle direction |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 430 | * @ret: true if the timer has been scheduled else false |
| 431 | */ |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 432 | bool throttle_schedule_timer(ThrottleState *ts, |
| 433 | ThrottleTimers *tt, |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 434 | ThrottleDirection direction) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 435 | { |
Benoît Canet | 0e5b0a2 | 2015-06-08 18:17:41 +0200 | [diff] [blame] | 436 | int64_t now = qemu_clock_get_ns(tt->clock_type); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 437 | int64_t next_timestamp; |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 438 | QEMUTimer *timer; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 439 | bool must_wait; |
| 440 | |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 441 | assert(direction < THROTTLE_MAX); |
| 442 | timer = tt->timers[direction]; |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 443 | assert(timer); |
| 444 | |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 445 | must_wait = throttle_compute_timer(ts, |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 446 | direction, |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 447 | now, |
| 448 | &next_timestamp); |
| 449 | |
| 450 | /* request not throttled */ |
| 451 | if (!must_wait) { |
| 452 | return false; |
| 453 | } |
| 454 | |
| 455 | /* request throttled and timer pending -> do nothing */ |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 456 | if (timer_pending(timer)) { |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 457 | return true; |
| 458 | } |
| 459 | |
| 460 | /* request throttled and timer not pending -> arm timer */ |
zhenwei pi | d85b08c | 2023-07-28 10:20:00 +0800 | [diff] [blame] | 461 | timer_mod(timer, next_timestamp); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 462 | return true; |
| 463 | } |
| 464 | |
| 465 | /* do the accounting for this operation |
| 466 | * |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 467 | * @direction: throttle direction |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 468 | * @size: the size of the operation |
| 469 | */ |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 470 | void throttle_account(ThrottleState *ts, ThrottleDirection direction, |
| 471 | uint64_t size) |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 472 | { |
zhenwei pi | 7017313 | 2023-07-28 10:20:04 +0800 | [diff] [blame] | 473 | static const BucketType bucket_types_size[THROTTLE_MAX][2] = { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 474 | { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, |
| 475 | { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE } |
| 476 | }; |
zhenwei pi | 7017313 | 2023-07-28 10:20:04 +0800 | [diff] [blame] | 477 | static const BucketType bucket_types_units[THROTTLE_MAX][2] = { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 478 | { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ }, |
| 479 | { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE } |
| 480 | }; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 481 | double units = 1.0; |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 482 | unsigned i; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 483 | |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 484 | assert(direction < THROTTLE_MAX); |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 485 | /* if cfg.op_size is defined and smaller than size we compute unit count */ |
| 486 | if (ts->cfg.op_size && size > ts->cfg.op_size) { |
| 487 | units = (double) size / ts->cfg.op_size; |
| 488 | } |
| 489 | |
zhenwei pi | 7017313 | 2023-07-28 10:20:04 +0800 | [diff] [blame] | 490 | for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) { |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 491 | LeakyBucket *bkt; |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 492 | |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 493 | bkt = &ts->cfg.buckets[bucket_types_size[direction][i]]; |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 494 | bkt->level += size; |
| 495 | if (bkt->burst_length > 1) { |
| 496 | bkt->burst_level += size; |
| 497 | } |
| 498 | |
zhenwei pi | e76f201f | 2023-07-28 10:20:03 +0800 | [diff] [blame] | 499 | bkt = &ts->cfg.buckets[bucket_types_units[direction][i]]; |
Alberto Garcia | 100f8f2 | 2016-02-18 12:27:01 +0200 | [diff] [blame] | 500 | bkt->level += units; |
| 501 | if (bkt->burst_length > 1) { |
| 502 | bkt->burst_level += units; |
| 503 | } |
Benoît Canet | 5ddfffb | 2013-09-02 14:14:37 +0200 | [diff] [blame] | 504 | } |
| 505 | } |
| 506 | |
Manos Pitsidianakis | 432d889 | 2017-08-25 16:20:26 +0300 | [diff] [blame] | 507 | /* return a ThrottleConfig based on the options in a ThrottleLimits |
| 508 | * |
| 509 | * @arg: the ThrottleLimits object to read from |
| 510 | * @cfg: the ThrottleConfig to edit |
| 511 | * @errp: error object |
| 512 | */ |
| 513 | void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg, |
| 514 | Error **errp) |
| 515 | { |
| 516 | if (arg->has_bps_total) { |
| 517 | cfg->buckets[THROTTLE_BPS_TOTAL].avg = arg->bps_total; |
| 518 | } |
| 519 | if (arg->has_bps_read) { |
| 520 | cfg->buckets[THROTTLE_BPS_READ].avg = arg->bps_read; |
| 521 | } |
| 522 | if (arg->has_bps_write) { |
| 523 | cfg->buckets[THROTTLE_BPS_WRITE].avg = arg->bps_write; |
| 524 | } |
| 525 | |
| 526 | if (arg->has_iops_total) { |
| 527 | cfg->buckets[THROTTLE_OPS_TOTAL].avg = arg->iops_total; |
| 528 | } |
| 529 | if (arg->has_iops_read) { |
| 530 | cfg->buckets[THROTTLE_OPS_READ].avg = arg->iops_read; |
| 531 | } |
| 532 | if (arg->has_iops_write) { |
| 533 | cfg->buckets[THROTTLE_OPS_WRITE].avg = arg->iops_write; |
| 534 | } |
| 535 | |
| 536 | if (arg->has_bps_total_max) { |
| 537 | cfg->buckets[THROTTLE_BPS_TOTAL].max = arg->bps_total_max; |
| 538 | } |
| 539 | if (arg->has_bps_read_max) { |
| 540 | cfg->buckets[THROTTLE_BPS_READ].max = arg->bps_read_max; |
| 541 | } |
| 542 | if (arg->has_bps_write_max) { |
| 543 | cfg->buckets[THROTTLE_BPS_WRITE].max = arg->bps_write_max; |
| 544 | } |
| 545 | if (arg->has_iops_total_max) { |
| 546 | cfg->buckets[THROTTLE_OPS_TOTAL].max = arg->iops_total_max; |
| 547 | } |
| 548 | if (arg->has_iops_read_max) { |
| 549 | cfg->buckets[THROTTLE_OPS_READ].max = arg->iops_read_max; |
| 550 | } |
| 551 | if (arg->has_iops_write_max) { |
| 552 | cfg->buckets[THROTTLE_OPS_WRITE].max = arg->iops_write_max; |
| 553 | } |
| 554 | |
| 555 | if (arg->has_bps_total_max_length) { |
| 556 | if (arg->bps_total_max_length > UINT_MAX) { |
| 557 | error_setg(errp, "bps-total-max-length value must be in" |
| 558 | " the range [0, %u]", UINT_MAX); |
| 559 | return; |
| 560 | } |
| 561 | cfg->buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_total_max_length; |
| 562 | } |
| 563 | if (arg->has_bps_read_max_length) { |
| 564 | if (arg->bps_read_max_length > UINT_MAX) { |
| 565 | error_setg(errp, "bps-read-max-length value must be in" |
| 566 | " the range [0, %u]", UINT_MAX); |
| 567 | return; |
| 568 | } |
| 569 | cfg->buckets[THROTTLE_BPS_READ].burst_length = arg->bps_read_max_length; |
| 570 | } |
| 571 | if (arg->has_bps_write_max_length) { |
| 572 | if (arg->bps_write_max_length > UINT_MAX) { |
| 573 | error_setg(errp, "bps-write-max-length value must be in" |
| 574 | " the range [0, %u]", UINT_MAX); |
| 575 | return; |
| 576 | } |
| 577 | cfg->buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_write_max_length; |
| 578 | } |
| 579 | if (arg->has_iops_total_max_length) { |
| 580 | if (arg->iops_total_max_length > UINT_MAX) { |
| 581 | error_setg(errp, "iops-total-max-length value must be in" |
| 582 | " the range [0, %u]", UINT_MAX); |
| 583 | return; |
| 584 | } |
| 585 | cfg->buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_total_max_length; |
| 586 | } |
| 587 | if (arg->has_iops_read_max_length) { |
| 588 | if (arg->iops_read_max_length > UINT_MAX) { |
| 589 | error_setg(errp, "iops-read-max-length value must be in" |
| 590 | " the range [0, %u]", UINT_MAX); |
| 591 | return; |
| 592 | } |
| 593 | cfg->buckets[THROTTLE_OPS_READ].burst_length = arg->iops_read_max_length; |
| 594 | } |
| 595 | if (arg->has_iops_write_max_length) { |
| 596 | if (arg->iops_write_max_length > UINT_MAX) { |
| 597 | error_setg(errp, "iops-write-max-length value must be in" |
| 598 | " the range [0, %u]", UINT_MAX); |
| 599 | return; |
| 600 | } |
| 601 | cfg->buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_write_max_length; |
| 602 | } |
| 603 | |
| 604 | if (arg->has_iops_size) { |
| 605 | cfg->op_size = arg->iops_size; |
| 606 | } |
| 607 | |
| 608 | throttle_is_valid(cfg, errp); |
| 609 | } |
| 610 | |
| 611 | /* write the options of a ThrottleConfig to a ThrottleLimits |
| 612 | * |
| 613 | * @cfg: the ThrottleConfig to read from |
| 614 | * @var: the ThrottleLimits to write to |
| 615 | */ |
| 616 | void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var) |
| 617 | { |
| 618 | var->bps_total = cfg->buckets[THROTTLE_BPS_TOTAL].avg; |
| 619 | var->bps_read = cfg->buckets[THROTTLE_BPS_READ].avg; |
| 620 | var->bps_write = cfg->buckets[THROTTLE_BPS_WRITE].avg; |
| 621 | var->iops_total = cfg->buckets[THROTTLE_OPS_TOTAL].avg; |
| 622 | var->iops_read = cfg->buckets[THROTTLE_OPS_READ].avg; |
| 623 | var->iops_write = cfg->buckets[THROTTLE_OPS_WRITE].avg; |
| 624 | var->bps_total_max = cfg->buckets[THROTTLE_BPS_TOTAL].max; |
| 625 | var->bps_read_max = cfg->buckets[THROTTLE_BPS_READ].max; |
| 626 | var->bps_write_max = cfg->buckets[THROTTLE_BPS_WRITE].max; |
| 627 | var->iops_total_max = cfg->buckets[THROTTLE_OPS_TOTAL].max; |
| 628 | var->iops_read_max = cfg->buckets[THROTTLE_OPS_READ].max; |
| 629 | var->iops_write_max = cfg->buckets[THROTTLE_OPS_WRITE].max; |
| 630 | var->bps_total_max_length = cfg->buckets[THROTTLE_BPS_TOTAL].burst_length; |
| 631 | var->bps_read_max_length = cfg->buckets[THROTTLE_BPS_READ].burst_length; |
| 632 | var->bps_write_max_length = cfg->buckets[THROTTLE_BPS_WRITE].burst_length; |
| 633 | var->iops_total_max_length = cfg->buckets[THROTTLE_OPS_TOTAL].burst_length; |
| 634 | var->iops_read_max_length = cfg->buckets[THROTTLE_OPS_READ].burst_length; |
| 635 | var->iops_write_max_length = cfg->buckets[THROTTLE_OPS_WRITE].burst_length; |
| 636 | var->iops_size = cfg->op_size; |
| 637 | |
| 638 | var->has_bps_total = true; |
| 639 | var->has_bps_read = true; |
| 640 | var->has_bps_write = true; |
| 641 | var->has_iops_total = true; |
| 642 | var->has_iops_read = true; |
| 643 | var->has_iops_write = true; |
| 644 | var->has_bps_total_max = true; |
| 645 | var->has_bps_read_max = true; |
| 646 | var->has_bps_write_max = true; |
| 647 | var->has_iops_total_max = true; |
| 648 | var->has_iops_read_max = true; |
| 649 | var->has_iops_write_max = true; |
| 650 | var->has_bps_read_max_length = true; |
| 651 | var->has_bps_total_max_length = true; |
| 652 | var->has_bps_write_max_length = true; |
| 653 | var->has_iops_total_max_length = true; |
| 654 | var->has_iops_read_max_length = true; |
| 655 | var->has_iops_write_max_length = true; |
| 656 | var->has_iops_size = true; |
| 657 | } |