numa: Extend CLI to provide memory latency and bandwidth information
Add -numa hmat-lb option to provide System Locality Latency and
Bandwidth Information. These memory attributes help to build
System Locality Latency and Bandwidth Information Structure(s)
in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using
hmat-lb option, enable HMAT with -machine hmat=on.
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-3-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
diff --git a/hw/core/numa.c b/hw/core/numa.c
index e60da99..34eb413 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
@@ -198,6 +199,186 @@
ms->numa_state->have_numa_distance = true;
}
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp)
+{
+ int i, first_bit, last_bit;
+ uint64_t max_entry, temp_base, bitmap_copy;
+ NodeInfo *numa_info = numa_state->nodes;
+ HMAT_LB_Info *hmat_lb =
+ numa_state->hmat_lb[node->hierarchy][node->data_type];
+ HMAT_LB_Data lb_data = {};
+ HMAT_LB_Data *lb_temp;
+
+ /* Error checking */
+ if (node->initiator > numa_state->num_nodes) {
+ error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+ node->initiator, numa_state->num_nodes);
+ return;
+ }
+ if (node->target > numa_state->num_nodes) {
+ error_setg(errp, "Invalid target=%d, it should be less than %d",
+ node->target, numa_state->num_nodes);
+ return;
+ }
+ if (!numa_info[node->initiator].has_cpu) {
+ error_setg(errp, "Invalid initiator=%d, it isn't an "
+ "initiator proximity domain", node->initiator);
+ return;
+ }
+ if (!numa_info[node->target].present) {
+ error_setg(errp, "The target=%d should point to an existing node",
+ node->target);
+ return;
+ }
+
+ if (!hmat_lb) {
+ hmat_lb = g_malloc0(sizeof(*hmat_lb));
+ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+ }
+ hmat_lb->hierarchy = node->hierarchy;
+ hmat_lb->data_type = node->data_type;
+ lb_data.initiator = node->initiator;
+ lb_data.target = node->target;
+
+ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+ /* Input latency data */
+
+ if (!node->has_latency) {
+ error_setg(errp, "Missing 'latency' option");
+ return;
+ }
+ if (node->has_bandwidth) {
+ error_setg(errp, "Invalid option 'bandwidth' since "
+ "the data type is latency");
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the latency for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+ if (node->latency) {
+ /* Calculate the temporary base and compressed latency */
+ max_entry = node->latency;
+ temp_base = 1;
+ while (QEMU_IS_ALIGNED(max_entry, 10)) {
+ max_entry /= 10;
+ temp_base *= 10;
+ }
+
+ /* Calculate the max compressed latency */
+ temp_base = MIN(hmat_lb->base, temp_base);
+ max_entry = node->latency / hmat_lb->base;
+ max_entry = MAX(hmat_lb->range_bitmap, max_entry);
+
+ /*
+ * For latency hmat_lb->range_bitmap record the max compressed
+ * latency which should be less than 0xFFFF (UINT16_MAX)
+ */
+ if (max_entry >= UINT16_MAX) {
+ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
+ "target=%d should not differ from previously entered "
+ "min or max values on more than %d", node->latency,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = max_entry;
+ }
+
+ /*
+ * Set lb_info_provided bit 0 as 1,
+ * latency information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(0);
+ }
+ lb_data.data = node->latency;
+ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
+ /* Input bandwidth data */
+ if (!node->has_bandwidth) {
+ error_setg(errp, "Missing 'bandwidth' option");
+ return;
+ }
+ if (node->has_latency) {
+ error_setg(errp, "Invalid option 'latency' since "
+ "the data type is bandwidth");
+ return;
+ }
+ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
+ "target=%d should be 1MB aligned", node->bandwidth,
+ node->initiator, node->target);
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the bandwidth for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
+
+ if (node->bandwidth) {
+ /* Keep bitmap unchanged when bandwidth out of range */
+ bitmap_copy = hmat_lb->range_bitmap;
+ bitmap_copy |= node->bandwidth;
+ first_bit = ctz64(bitmap_copy);
+ temp_base = UINT64_C(1) << first_bit;
+ max_entry = node->bandwidth / temp_base;
+ last_bit = 64 - clz64(bitmap_copy);
+
+ /*
+ * For bandwidth, first_bit record the base unit of bandwidth bits,
+ * last_bit record the last bit of the max bandwidth. The max
+ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
+ */
+ if ((last_bit - first_bit) > UINT16_BITS ||
+ max_entry >= UINT16_MAX) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
+ "and target=%d should not differ from previously "
+ "entered values on more than %d", node->bandwidth,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = bitmap_copy;
+ }
+
+ /*
+ * Set lb_info_provided bit 1 as 1,
+ * bandwidth information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(1);
+ }
+ lb_data.data = node->bandwidth;
+ } else {
+ assert(0);
+ }
+
+ g_array_append_val(hmat_lb->list, lb_data);
+}
+
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{
Error *err = NULL;
@@ -236,6 +417,19 @@
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
&err);
break;
+ case NUMA_OPTIONS_TYPE_HMAT_LB:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
+ if (err) {
+ goto end;
+ }
+ break;
default:
abort();
}