| /* |
| * QEMU block throttling group infrastructure |
| * |
| * Copyright (C) Nodalink, EURL. 2014 |
| * Copyright (C) Igalia, S.L. 2015 |
| * |
| * Authors: |
| * BenoƮt Canet <benoit.canet@nodalink.com> |
| * Alberto Garcia <berto@igalia.com> |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License as |
| * published by the Free Software Foundation; either version 2 or |
| * (at your option) version 3 of the License. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "sysemu/block-backend.h" |
| #include "block/throttle-groups.h" |
| #include "qemu/queue.h" |
| #include "qemu/thread.h" |
| #include "sysemu/qtest.h" |
| |
| /* The ThrottleGroup structure (with its ThrottleState) is shared |
| * among different BlockBackends and it's independent from |
| * AioContext, so in order to use it from different threads it needs |
| * its own locking. |
| * |
| * This locking is however handled internally in this file, so it's |
| * transparent to outside users. |
| * |
| * The whole ThrottleGroup structure is private and invisible to |
| * outside users, that only use it through its ThrottleState. |
| * |
| * In addition to the ThrottleGroup structure, BlockBackendPublic has |
| * fields that need to be accessed by other members of the group and |
| * therefore also need to be protected by this lock. Once a |
| * BlockBackend is registered in a group those fields can be accessed |
| * by other threads any time. |
| * |
| * Again, all this is handled internally and is mostly transparent to |
| * the outside. The 'throttle_timers' field however has an additional |
| * constraint because it may be temporarily invalid (see for example |
| * bdrv_set_aio_context()). Therefore in this file a thread will |
| * access some other BlockBackend's timers only after verifying that |
| * that BlockBackend has throttled requests in the queue. |
| */ |
| typedef struct ThrottleGroup { |
| char *name; /* This is constant during the lifetime of the group */ |
| |
| QemuMutex lock; /* This lock protects the following four fields */ |
| ThrottleState ts; |
| QLIST_HEAD(, BlockBackendPublic) head; |
| BlockBackend *tokens[2]; |
| bool any_timer_armed[2]; |
| |
| /* These two are protected by the global throttle_groups_lock */ |
| unsigned refcount; |
| QTAILQ_ENTRY(ThrottleGroup) list; |
| } ThrottleGroup; |
| |
| static QemuMutex throttle_groups_lock; |
| static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = |
| QTAILQ_HEAD_INITIALIZER(throttle_groups); |
| |
| /* Increments the reference count of a ThrottleGroup given its name. |
| * |
| * If no ThrottleGroup is found with the given name a new one is |
| * created. |
| * |
| * @name: the name of the ThrottleGroup |
| * @ret: the ThrottleState member of the ThrottleGroup |
| */ |
| ThrottleState *throttle_group_incref(const char *name) |
| { |
| ThrottleGroup *tg = NULL; |
| ThrottleGroup *iter; |
| |
| qemu_mutex_lock(&throttle_groups_lock); |
| |
| /* Look for an existing group with that name */ |
| QTAILQ_FOREACH(iter, &throttle_groups, list) { |
| if (!strcmp(name, iter->name)) { |
| tg = iter; |
| break; |
| } |
| } |
| |
| /* Create a new one if not found */ |
| if (!tg) { |
| tg = g_new0(ThrottleGroup, 1); |
| tg->name = g_strdup(name); |
| qemu_mutex_init(&tg->lock); |
| throttle_init(&tg->ts); |
| QLIST_INIT(&tg->head); |
| |
| QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); |
| } |
| |
| tg->refcount++; |
| |
| qemu_mutex_unlock(&throttle_groups_lock); |
| |
| return &tg->ts; |
| } |
| |
| /* Decrease the reference count of a ThrottleGroup. |
| * |
| * When the reference count reaches zero the ThrottleGroup is |
| * destroyed. |
| * |
| * @ts: The ThrottleGroup to unref, given by its ThrottleState member |
| */ |
| void throttle_group_unref(ThrottleState *ts) |
| { |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| |
| qemu_mutex_lock(&throttle_groups_lock); |
| if (--tg->refcount == 0) { |
| QTAILQ_REMOVE(&throttle_groups, tg, list); |
| qemu_mutex_destroy(&tg->lock); |
| g_free(tg->name); |
| g_free(tg); |
| } |
| qemu_mutex_unlock(&throttle_groups_lock); |
| } |
| |
| /* Get the name from a BlockBackend's ThrottleGroup. The name (and the pointer) |
| * is guaranteed to remain constant during the lifetime of the group. |
| * |
| * @blk: a BlockBackend that is member of a throttling group |
| * @ret: the name of the group. |
| */ |
| const char *throttle_group_get_name(BlockBackend *blk) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); |
| return tg->name; |
| } |
| |
| /* Return the next BlockBackend in the round-robin sequence, simulating a |
| * circular list. |
| * |
| * This assumes that tg->lock is held. |
| * |
| * @blk: the current BlockBackend |
| * @ret: the next BlockBackend in the sequence |
| */ |
| static BlockBackend *throttle_group_next_blk(BlockBackend *blk) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleState *ts = blkp->throttle_state; |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| BlockBackendPublic *next = QLIST_NEXT(blkp, round_robin); |
| |
| if (!next) { |
| next = QLIST_FIRST(&tg->head); |
| } |
| |
| return blk_by_public(next); |
| } |
| |
| /* |
| * Return whether a BlockBackend has pending requests. |
| * |
| * This assumes that tg->lock is held. |
| * |
| * @blk: the BlockBackend |
| * @is_write: the type of operation (read/write) |
| * @ret: whether the BlockBackend has pending requests. |
| */ |
| static inline bool blk_has_pending_reqs(BlockBackend *blk, |
| bool is_write) |
| { |
| const BlockBackendPublic *blkp = blk_get_public(blk); |
| return blkp->pending_reqs[is_write]; |
| } |
| |
| /* Return the next BlockBackend in the round-robin sequence with pending I/O |
| * requests. |
| * |
| * This assumes that tg->lock is held. |
| * |
| * @blk: the current BlockBackend |
| * @is_write: the type of operation (read/write) |
| * @ret: the next BlockBackend with pending requests, or blk if there is |
| * none. |
| */ |
| static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); |
| BlockBackend *token, *start; |
| |
| start = token = tg->tokens[is_write]; |
| |
| /* get next bs round in round robin style */ |
| token = throttle_group_next_blk(token); |
| while (token != start && !blk_has_pending_reqs(token, is_write)) { |
| token = throttle_group_next_blk(token); |
| } |
| |
| /* If no IO are queued for scheduling on the next round robin token |
| * then decide the token is the current bs because chances are |
| * the current bs get the current request queued. |
| */ |
| if (token == start && !blk_has_pending_reqs(token, is_write)) { |
| token = blk; |
| } |
| |
| /* Either we return the original BB, or one with pending requests */ |
| assert(token == blk || blk_has_pending_reqs(token, is_write)); |
| |
| return token; |
| } |
| |
| /* Check if the next I/O request for a BlockBackend needs to be throttled or |
| * not. If there's no timer set in this group, set one and update the token |
| * accordingly. |
| * |
| * This assumes that tg->lock is held. |
| * |
| * @blk: the current BlockBackend |
| * @is_write: the type of operation (read/write) |
| * @ret: whether the I/O request needs to be throttled or not |
| */ |
| static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleState *ts = blkp->throttle_state; |
| ThrottleTimers *tt = &blkp->throttle_timers; |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| bool must_wait; |
| |
| if (blkp->io_limits_disabled) { |
| return false; |
| } |
| |
| /* Check if any of the timers in this group is already armed */ |
| if (tg->any_timer_armed[is_write]) { |
| return true; |
| } |
| |
| must_wait = throttle_schedule_timer(ts, tt, is_write); |
| |
| /* If a timer just got armed, set blk as the current token */ |
| if (must_wait) { |
| tg->tokens[is_write] = blk; |
| tg->any_timer_armed[is_write] = true; |
| } |
| |
| return must_wait; |
| } |
| |
| /* Look for the next pending I/O request and schedule it. |
| * |
| * This assumes that tg->lock is held. |
| * |
| * @blk: the current BlockBackend |
| * @is_write: the type of operation (read/write) |
| */ |
| static void schedule_next_request(BlockBackend *blk, bool is_write) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); |
| bool must_wait; |
| BlockBackend *token; |
| |
| /* Check if there's any pending request to schedule next */ |
| token = next_throttle_token(blk, is_write); |
| if (!blk_has_pending_reqs(token, is_write)) { |
| return; |
| } |
| |
| /* Set a timer for the request if it needs to be throttled */ |
| must_wait = throttle_group_schedule_timer(token, is_write); |
| |
| /* If it doesn't have to wait, queue it for immediate execution */ |
| if (!must_wait) { |
| /* Give preference to requests from the current blk */ |
| if (qemu_in_coroutine() && |
| qemu_co_queue_next(&blkp->throttled_reqs[is_write])) { |
| token = blk; |
| } else { |
| ThrottleTimers *tt = &blk_get_public(token)->throttle_timers; |
| int64_t now = qemu_clock_get_ns(tt->clock_type); |
| timer_mod(tt->timers[is_write], now + 1); |
| tg->any_timer_armed[is_write] = true; |
| } |
| tg->tokens[is_write] = token; |
| } |
| } |
| |
| /* Check if an I/O request needs to be throttled, wait and set a timer |
| * if necessary, and schedule the next request using a round robin |
| * algorithm. |
| * |
| * @blk: the current BlockBackend |
| * @bytes: the number of bytes for this I/O |
| * @is_write: the type of operation (read/write) |
| */ |
| void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk, |
| unsigned int bytes, |
| bool is_write) |
| { |
| bool must_wait; |
| BlockBackend *token; |
| |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); |
| qemu_mutex_lock(&tg->lock); |
| |
| /* First we check if this I/O has to be throttled. */ |
| token = next_throttle_token(blk, is_write); |
| must_wait = throttle_group_schedule_timer(token, is_write); |
| |
| /* Wait if there's a timer set or queued requests of this type */ |
| if (must_wait || blkp->pending_reqs[is_write]) { |
| blkp->pending_reqs[is_write]++; |
| qemu_mutex_unlock(&tg->lock); |
| qemu_co_queue_wait(&blkp->throttled_reqs[is_write]); |
| qemu_mutex_lock(&tg->lock); |
| blkp->pending_reqs[is_write]--; |
| } |
| |
| /* The I/O will be executed, so do the accounting */ |
| throttle_account(blkp->throttle_state, is_write, bytes); |
| |
| /* Schedule the next request */ |
| schedule_next_request(blk, is_write); |
| |
| qemu_mutex_unlock(&tg->lock); |
| } |
| |
| void throttle_group_restart_blk(BlockBackend *blk) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| int i; |
| |
| for (i = 0; i < 2; i++) { |
| while (qemu_co_enter_next(&blkp->throttled_reqs[i])) { |
| ; |
| } |
| } |
| } |
| |
| /* Update the throttle configuration for a particular group. Similar |
| * to throttle_config(), but guarantees atomicity within the |
| * throttling group. |
| * |
| * @blk: a BlockBackend that is a member of the group |
| * @cfg: the configuration to set |
| */ |
| void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleTimers *tt = &blkp->throttle_timers; |
| ThrottleState *ts = blkp->throttle_state; |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| qemu_mutex_lock(&tg->lock); |
| /* throttle_config() cancels the timers */ |
| if (timer_pending(tt->timers[0])) { |
| tg->any_timer_armed[0] = false; |
| } |
| if (timer_pending(tt->timers[1])) { |
| tg->any_timer_armed[1] = false; |
| } |
| throttle_config(ts, tt, cfg); |
| qemu_mutex_unlock(&tg->lock); |
| |
| qemu_co_enter_next(&blkp->throttled_reqs[0]); |
| qemu_co_enter_next(&blkp->throttled_reqs[1]); |
| } |
| |
| /* Get the throttle configuration from a particular group. Similar to |
| * throttle_get_config(), but guarantees atomicity within the |
| * throttling group. |
| * |
| * @blk: a BlockBackend that is a member of the group |
| * @cfg: the configuration will be written here |
| */ |
| void throttle_group_get_config(BlockBackend *blk, ThrottleConfig *cfg) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleState *ts = blkp->throttle_state; |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| qemu_mutex_lock(&tg->lock); |
| throttle_get_config(ts, cfg); |
| qemu_mutex_unlock(&tg->lock); |
| } |
| |
| /* ThrottleTimers callback. This wakes up a request that was waiting |
| * because it had been throttled. |
| * |
| * @blk: the BlockBackend whose request had been throttled |
| * @is_write: the type of operation (read/write) |
| */ |
| static void timer_cb(BlockBackend *blk, bool is_write) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleState *ts = blkp->throttle_state; |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| bool empty_queue; |
| |
| /* The timer has just been fired, so we can update the flag */ |
| qemu_mutex_lock(&tg->lock); |
| tg->any_timer_armed[is_write] = false; |
| qemu_mutex_unlock(&tg->lock); |
| |
| /* Run the request that was waiting for this timer */ |
| empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]); |
| |
| /* If the request queue was empty then we have to take care of |
| * scheduling the next one */ |
| if (empty_queue) { |
| qemu_mutex_lock(&tg->lock); |
| schedule_next_request(blk, is_write); |
| qemu_mutex_unlock(&tg->lock); |
| } |
| } |
| |
| static void read_timer_cb(void *opaque) |
| { |
| timer_cb(opaque, false); |
| } |
| |
| static void write_timer_cb(void *opaque) |
| { |
| timer_cb(opaque, true); |
| } |
| |
| /* Register a BlockBackend in the throttling group, also initializing its |
| * timers and updating its throttle_state pointer to point to it. If a |
| * throttling group with that name does not exist yet, it will be created. |
| * |
| * @blk: the BlockBackend to insert |
| * @groupname: the name of the group |
| */ |
| void throttle_group_register_blk(BlockBackend *blk, const char *groupname) |
| { |
| int i; |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleState *ts = throttle_group_incref(groupname); |
| ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
| int clock_type = QEMU_CLOCK_REALTIME; |
| |
| if (qtest_enabled()) { |
| /* For testing block IO throttling only */ |
| clock_type = QEMU_CLOCK_VIRTUAL; |
| } |
| |
| blkp->throttle_state = ts; |
| |
| qemu_mutex_lock(&tg->lock); |
| /* If the ThrottleGroup is new set this BlockBackend as the token */ |
| for (i = 0; i < 2; i++) { |
| if (!tg->tokens[i]) { |
| tg->tokens[i] = blk; |
| } |
| } |
| |
| QLIST_INSERT_HEAD(&tg->head, blkp, round_robin); |
| |
| throttle_timers_init(&blkp->throttle_timers, |
| blk_get_aio_context(blk), |
| clock_type, |
| read_timer_cb, |
| write_timer_cb, |
| blk); |
| |
| qemu_mutex_unlock(&tg->lock); |
| } |
| |
| /* Unregister a BlockBackend from its group, removing it from the list, |
| * destroying the timers and setting the throttle_state pointer to NULL. |
| * |
| * The BlockBackend must not have pending throttled requests, so the caller has |
| * to drain them first. |
| * |
| * The group will be destroyed if it's empty after this operation. |
| * |
| * @blk: the BlockBackend to remove |
| */ |
| void throttle_group_unregister_blk(BlockBackend *blk) |
| { |
| BlockBackendPublic *blkp = blk_get_public(blk); |
| ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts); |
| int i; |
| |
| assert(blkp->pending_reqs[0] == 0 && blkp->pending_reqs[1] == 0); |
| assert(qemu_co_queue_empty(&blkp->throttled_reqs[0])); |
| assert(qemu_co_queue_empty(&blkp->throttled_reqs[1])); |
| |
| qemu_mutex_lock(&tg->lock); |
| for (i = 0; i < 2; i++) { |
| if (tg->tokens[i] == blk) { |
| BlockBackend *token = throttle_group_next_blk(blk); |
| /* Take care of the case where this is the last blk in the group */ |
| if (token == blk) { |
| token = NULL; |
| } |
| tg->tokens[i] = token; |
| } |
| } |
| |
| /* remove the current blk from the list */ |
| QLIST_REMOVE(blkp, round_robin); |
| throttle_timers_destroy(&blkp->throttle_timers); |
| qemu_mutex_unlock(&tg->lock); |
| |
| throttle_group_unref(&tg->ts); |
| blkp->throttle_state = NULL; |
| } |
| |
| static void throttle_groups_init(void) |
| { |
| qemu_mutex_init(&throttle_groups_lock); |
| } |
| |
| block_init(throttle_groups_init); |