Blame - util/throttle.c - qemu

blob: 02e6f15587f8ba4e2c482093f7e635ed0f796f69 [file] [log] [blame]

Benoît Canet	5ddfffb	2013-09-02 14:14:37 +0200	[diff] [blame]	1	/*
				2	* QEMU throttling infrastructure
				3	*
				4	* Copyright (C) Nodalink, SARL. 2013
				5	*
				6	* Author:
				7	* Benoît Canet <benoit.canet@irqsave.net>
				8	*
				9	* This program is free software; you can redistribute it and/or
				10	* modify it under the terms of the GNU General Public License as
				11	* published by the Free Software Foundation; either version 2 or
				12	* (at your option) version 3 of the License.
				13	*
				14	* This program is distributed in the hope that it will be useful,
				15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				17	* GNU General Public License for more details.
				18	*
				19	* You should have received a copy of the GNU General Public License
				20	* along with this program; if not, see <http://www.gnu.org/licenses/>.
				21	*/
				22
				23	#include "qemu/throttle.h"
				24	#include "qemu/timer.h"
				25
				26	/* This function make a bucket leak
				27	*
				28	* @bkt: the bucket to make leak
				29	* @delta_ns: the time delta
				30	*/
				31	void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
				32	{
				33	double leak;
				34
				35	/* compute how much to leak */
				36	leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
				37
				38	/* make the bucket leak */
				39	bkt->level = MAX(bkt->level - leak, 0);
				40	}
				41
				42	/* Calculate the time delta since last leak and make proportionals leaks
				43	*
				44	* @now: the current timestamp in ns
				45	*/
				46	static void throttle_do_leak(ThrottleState *ts, int64_t now)
				47	{
				48	/* compute the time elapsed since the last leak */
				49	int64_t delta_ns = now - ts->previous_leak;
				50	int i;
				51
				52	ts->previous_leak = now;
				53
				54	if (delta_ns <= 0) {
				55	return;
				56	}
				57
				58	/* make each bucket leak */
				59	for (i = 0; i < BUCKETS_COUNT; i++) {
				60	throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
				61	}
				62	}
				63
				64	/* do the real job of computing the time to wait
				65	*
				66	* @limit: the throttling limit
				67	* @extra: the number of operation to delay
				68	* @ret: the time to wait in ns
				69	*/
				70	static int64_t throttle_do_compute_wait(double limit, double extra)
				71	{
				72	double wait = extra * NANOSECONDS_PER_SECOND;
				73	wait /= limit;
				74	return wait;
				75	}
				76
				77	/* This function compute the wait time in ns that a leaky bucket should trigger
				78	*
				79	* @bkt: the leaky bucket we operate on
				80	* @ret: the resulting wait time in ns or 0 if the operation can go through
				81	*/
				82	int64_t throttle_compute_wait(LeakyBucket *bkt)
				83	{
				84	double extra; /* the number of extra units blocking the io */
				85
				86	if (!bkt->avg) {
				87	return 0;
				88	}
				89
				90	extra = bkt->level - bkt->max;
				91
				92	if (extra <= 0) {
				93	return 0;
				94	}
				95
				96	return throttle_do_compute_wait(bkt->avg, extra);
				97	}
				98
				99	/* This function compute the time that must be waited while this IO
				100	*
				101	* @is_write: true if the current IO is a write, false if it's a read
				102	* @ret: time to wait
				103	*/
				104	static int64_t throttle_compute_wait_for(ThrottleState *ts,
				105	bool is_write)
				106	{
				107	BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
				108	THROTTLE_OPS_TOTAL,
				109	THROTTLE_BPS_READ,
				110	THROTTLE_OPS_READ},
				111	{THROTTLE_BPS_TOTAL,
				112	THROTTLE_OPS_TOTAL,
				113	THROTTLE_BPS_WRITE,
				114	THROTTLE_OPS_WRITE}, };
				115	int64_t wait, max_wait = 0;
				116	int i;
				117
				118	for (i = 0; i < 4; i++) {
				119	BucketType index = to_check[is_write][i];
				120	wait = throttle_compute_wait(&ts->cfg.buckets[index]);
				121	if (wait > max_wait) {
				122	max_wait = wait;
				123	}
				124	}
				125
				126	return max_wait;
				127	}
				128
				129	/* compute the timer for this type of operation
				130	*
				131	* @is_write: the type of operation
				132	* @now: the current clock timestamp
				133	* @next_timestamp: the resulting timer
				134	* @ret: true if a timer must be set
				135	*/
				136	bool throttle_compute_timer(ThrottleState *ts,
				137	bool is_write,
				138	int64_t now,
				139	int64_t *next_timestamp)
				140	{
				141	int64_t wait;
				142
				143	/* leak proportionally to the time elapsed */
				144	throttle_do_leak(ts, now);
				145
				146	/* compute the wait time if any */
				147	wait = throttle_compute_wait_for(ts, is_write);
				148
				149	/* if the code must wait compute when the next timer should fire */
				150	if (wait) {
				151	*next_timestamp = now + wait;
				152	return true;
				153	}
				154
				155	/* else no need to wait at all */
				156	*next_timestamp = now;
				157	return false;
				158	}
				159
				160	/* To be called first on the ThrottleState */
				161	void throttle_init(ThrottleState *ts,
				162	QEMUClockType clock_type,
				163	QEMUTimerCB *read_timer_cb,
				164	QEMUTimerCB *write_timer_cb,
				165	void *timer_opaque)
				166	{
				167	memset(ts, 0, sizeof(ThrottleState));
				168
				169	ts->clock_type = clock_type;
				170	ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
				171	ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
				172	}
				173
				174	/* destroy a timer */
				175	static void throttle_timer_destroy(QEMUTimer **timer)
				176	{
				177	assert(*timer != NULL);
				178
				179	timer_del(*timer);
				180	timer_free(*timer);
				181	*timer = NULL;
				182	}
				183
				184	/* To be called last on the ThrottleState */
				185	void throttle_destroy(ThrottleState *ts)
				186	{
				187	int i;
				188
				189	for (i = 0; i < 2; i++) {
				190	throttle_timer_destroy(&ts->timers[i]);
				191	}
				192	}
				193
				194	/* is any throttling timer configured */
				195	bool throttle_have_timer(ThrottleState *ts)
				196	{
				197	if (ts->timers[0]) {
				198	return true;
				199	}
				200
				201	return false;
				202	}
				203
				204	/* Does any throttling must be done
				205	*
				206	* @cfg: the throttling configuration to inspect
				207	* @ret: true if throttling must be done else false
				208	*/
				209	bool throttle_enabled(ThrottleConfig *cfg)
				210	{
				211	int i;
				212
				213	for (i = 0; i < BUCKETS_COUNT; i++) {
				214	if (cfg->buckets[i].avg > 0) {
				215	return true;
				216	}
				217	}
				218
				219	return false;
				220	}
				221
				222	/* return true if any two throttling parameters conflicts
				223	*
				224	* @cfg: the throttling configuration to inspect
				225	* @ret: true if any conflict detected else false
				226	*/
				227	bool throttle_conflicting(ThrottleConfig *cfg)
				228	{
				229	bool bps_flag, ops_flag;
				230	bool bps_max_flag, ops_max_flag;
				231
				232	bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
				233	(cfg->buckets[THROTTLE_BPS_READ].avg \|\|
				234	cfg->buckets[THROTTLE_BPS_WRITE].avg);
				235
				236	ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
				237	(cfg->buckets[THROTTLE_OPS_READ].avg \|\|
				238	cfg->buckets[THROTTLE_OPS_WRITE].avg);
				239
				240	bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
				241	(cfg->buckets[THROTTLE_BPS_READ].max \|\|
				242	cfg->buckets[THROTTLE_BPS_WRITE].max);
				243
				244	ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
				245	(cfg->buckets[THROTTLE_OPS_READ].max \|\|
				246	cfg->buckets[THROTTLE_OPS_WRITE].max);
				247
				248	return bps_flag \|\| ops_flag \|\| bps_max_flag \|\| ops_max_flag;
				249	}
				250
				251	/* check if a throttling configuration is valid
				252	* @cfg: the throttling configuration to inspect
				253	* @ret: true if valid else false
				254	*/
				255	bool throttle_is_valid(ThrottleConfig *cfg)
				256	{
				257	bool invalid = false;
				258	int i;
				259
				260	for (i = 0; i < BUCKETS_COUNT; i++) {
				261	if (cfg->buckets[i].avg < 0) {
				262	invalid = true;
				263	}
				264	}
				265
				266	for (i = 0; i < BUCKETS_COUNT; i++) {
				267	if (cfg->buckets[i].max < 0) {
				268	invalid = true;
				269	}
				270	}
				271
				272	return !invalid;
				273	}
				274
				275	/* fix bucket parameters */
				276	static void throttle_fix_bucket(LeakyBucket *bkt)
				277	{
				278	double min;
				279
				280	/* zero bucket level */
				281	bkt->level = 0;
				282
				283	/* The following is done to cope with the Linux CFQ block scheduler
				284	* which regroup reads and writes by block of 100ms in the guest.
				285	* When they are two process one making reads and one making writes cfq
				286	* make a pattern looking like the following:
				287	* WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
				288	* Having a max burst value of 100ms of the average will help smooth the
				289	* throttling
				290	*/
				291	min = bkt->avg / 10;
				292	if (bkt->avg && !bkt->max) {
				293	bkt->max = min;
				294	}
				295	}
				296
				297	/* take care of canceling a timer */
				298	static void throttle_cancel_timer(QEMUTimer *timer)
				299	{
				300	assert(timer != NULL);
				301
				302	timer_del(timer);
				303	}
				304
				305	/* Used to configure the throttle
				306	*
				307	* @ts: the throttle state we are working on
				308	* @cfg: the config to set
				309	*/
				310	void throttle_config(ThrottleState ts, ThrottleConfig cfg)
				311	{
				312	int i;
				313
				314	ts->cfg = *cfg;
				315
				316	for (i = 0; i < BUCKETS_COUNT; i++) {
				317	throttle_fix_bucket(&ts->cfg.buckets[i]);
				318	}
				319
				320	ts->previous_leak = qemu_clock_get_ns(ts->clock_type);
				321
				322	for (i = 0; i < 2; i++) {
				323	throttle_cancel_timer(ts->timers[i]);
				324	}
				325	}
				326
				327	/* used to get config
				328	*
				329	* @ts: the throttle state we are working on
				330	* @cfg: the config to write
				331	*/
				332	void throttle_get_config(ThrottleState ts, ThrottleConfig cfg)
				333	{
				334	*cfg = ts->cfg;
				335	}
				336
				337
				338	/* Schedule the read or write timer if needed
				339	*
				340	* NOTE: this function is not unit tested due to it's usage of timer_mod
				341	*
				342	* @is_write: the type of operation (read/write)
				343	* @ret: true if the timer has been scheduled else false
				344	*/
				345	bool throttle_schedule_timer(ThrottleState *ts, bool is_write)
				346	{
				347	int64_t now = qemu_clock_get_ns(ts->clock_type);
				348	int64_t next_timestamp;
				349	bool must_wait;
				350
				351	must_wait = throttle_compute_timer(ts,
				352	is_write,
				353	now,
				354	&next_timestamp);
				355
				356	/* request not throttled */
				357	if (!must_wait) {
				358	return false;
				359	}
				360
				361	/* request throttled and timer pending -> do nothing */
				362	if (timer_pending(ts->timers[is_write])) {
				363	return true;
				364	}
				365
				366	/* request throttled and timer not pending -> arm timer */
				367	timer_mod(ts->timers[is_write], next_timestamp);
				368	return true;
				369	}
				370
				371	/* do the accounting for this operation
				372	*
				373	* @is_write: the type of operation (read/write)
				374	* @size: the size of the operation
				375	*/
				376	void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
				377	{
				378	double units = 1.0;
				379
				380	/* if cfg.op_size is defined and smaller than size we compute unit count */
				381	if (ts->cfg.op_size && size > ts->cfg.op_size) {
				382	units = (double) size / ts->cfg.op_size;
				383	}
				384
				385	ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
				386	ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
				387
				388	if (is_write) {
				389	ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
				390	ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
				391	} else {
				392	ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
				393	ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
				394	}
				395	}
				396