throttle: Add support for burst periods

This patch adds support for burst periods to the throttling code.
With this feature the user can keep performing bursts as defined by
the LeakyBucket.max rate for a configurable period of time.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
diff --git a/util/throttle.c b/util/throttle.c
index 6a01cee..371c769 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -41,6 +41,14 @@
 
     /* make the bucket leak */
     bkt->level = MAX(bkt->level - leak, 0);
+
+    /* if we allow bursts for more than one second we also need to
+     * keep track of bkt->burst_level so the bkt->max goal per second
+     * is attained */
+    if (bkt->burst_length > 1) {
+        leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+        bkt->burst_level = MAX(bkt->burst_level - leak, 0);
+    }
 }
 
 /* Calculate the time delta since last leak and make proportionals leaks
@@ -91,13 +99,24 @@
         return 0;
     }
 
-    extra = bkt->level - bkt->max;
-
-    if (extra <= 0) {
-        return 0;
+    /* If the bucket is full then we have to wait */
+    extra = bkt->level - bkt->max * bkt->burst_length;
+    if (extra > 0) {
+        return throttle_do_compute_wait(bkt->avg, extra);
     }
 
-    return throttle_do_compute_wait(bkt->avg, extra);
+    /* If the bucket is not full yet we have to make sure that we
+     * fulfill the goal of bkt->max units per second. */
+    if (bkt->burst_length > 1) {
+        /* We use 1/10 of the max value to smooth the throttling.
+         * See throttle_fix_bucket() for more details. */
+        extra = bkt->burst_level - bkt->max / 10;
+        if (extra > 0) {
+            return throttle_do_compute_wait(bkt->max, extra);
+        }
+    }
+
+    return 0;
 }
 
 /* This function compute the time that must be waited while this IO
@@ -177,7 +196,11 @@
  */
 void throttle_config_init(ThrottleConfig *cfg)
 {
+    unsigned i;
     memset(cfg, 0, sizeof(*cfg));
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        cfg->buckets[i].burst_length = 1;
+    }
 }
 
 /* To be called first on the ThrottleState */
@@ -301,6 +324,16 @@
             return false;
         }
 
+        if (!cfg->buckets[i].burst_length) {
+            error_setg(errp, "the burst length cannot be 0");
+            return false;
+        }
+
+        if (cfg->buckets[i].burst_length > 1 && !cfg->buckets[i].max) {
+            error_setg(errp, "burst length set without burst rate");
+            return false;
+        }
+
         if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
             error_setg(errp, "bps_max/iops_max require corresponding"
                        " bps/iops values");
@@ -317,7 +350,7 @@
     double min;
 
     /* zero bucket level */
-    bkt->level = 0;
+    bkt->level = bkt->burst_level = 0;
 
     /* The following is done to cope with the Linux CFQ block scheduler
      * which regroup reads and writes by block of 100ms in the guest.
@@ -420,22 +453,36 @@
  */
 void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
 {
+    const BucketType bucket_types_size[2][2] = {
+        { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
+        { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
+    };
+    const BucketType bucket_types_units[2][2] = {
+        { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
+        { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
+    };
     double units = 1.0;
+    unsigned i;
 
     /* if cfg.op_size is defined and smaller than size we compute unit count */
     if (ts->cfg.op_size && size > ts->cfg.op_size) {
         units = (double) size / ts->cfg.op_size;
     }
 
-    ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
-    ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+    for (i = 0; i < 2; i++) {
+        LeakyBucket *bkt;
 
-    if (is_write) {
-        ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
-        ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
-    } else {
-        ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
-        ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+        bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
+        bkt->level += size;
+        if (bkt->burst_length > 1) {
+            bkt->burst_level += size;
+        }
+
+        bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
+        bkt->level += units;
+        if (bkt->burst_length > 1) {
+            bkt->burst_level += units;
+        }
     }
 }