Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Mon 12 Oct 2015 08:56:47 BST using RSA key ID 398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211
* remotes/jasowang/tags/net-pull-request:
tests: add test cases for netfilter object
netfilter: add a netbuffer filter
net/queue: export qemu_net_queue_append_iov
netfilter: print filter info associate with the netdev
netfilter: add an API to pass the packet to next filter
net/queue: introduce NetQueueDeliverFunc
net: merge qemu_deliver_packet and qemu_deliver_packet_iov
netfilter: hook packets before net queue send
init/cleanup of netfilter object
vl.c: init delayed object after net_init_clients
vmxnet3: Add support for VMXNET3_CMD_GET_ADAPTIVE_RING_INFO command
e1000: use alias for default model
vmxnet3: Support reading IMR registers on bar0
net/vmxnet3: Refine l2 header validation
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 09c9e9d..910de3a 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1647,7 +1647,7 @@
static const E1000Info e1000_devices[] = {
{
- .name = "e1000-82540em",
+ .name = "e1000",
.device_id = E1000_DEV_ID_82540EM,
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
@@ -1666,11 +1666,6 @@
},
};
-static const TypeInfo e1000_default_info = {
- .name = "e1000",
- .parent = "e1000-82540em",
-};
-
static void e1000_register_types(void)
{
int i;
@@ -1688,7 +1683,6 @@
type_register(&type_info);
}
- type_register_static(&e1000_default_info);
}
type_init(e1000_register_types)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 04159c8..3c5e10d 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -729,9 +729,7 @@
}
if (txd.eop) {
- if (!s->skip_current_tx_pkt) {
- vmxnet_tx_pkt_parse(s->tx_pkt);
-
+ if (!s->skip_current_tx_pkt && vmxnet_tx_pkt_parse(s->tx_pkt)) {
if (s->needs_vlan) {
vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
}
@@ -1165,9 +1163,13 @@
static uint64_t
vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size)
{
+ VMXNET3State *s = opaque;
+
if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
- g_assert_not_reached();
+ int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
+ VMXNET3_REG_ALIGN);
+ return s->interrupt_states[l].is_masked;
}
VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size);
@@ -1629,6 +1631,11 @@
VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
break;
+ case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
+ VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - "
+ "adaptive ring info flags");
+ break;
+
default:
VMW_CBPRN("Received unknown command: %" PRIx64, cmd);
break;
@@ -1668,6 +1675,10 @@
ret = vmxnet3_get_interrupt_config(s);
break;
+ case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO:
+ ret = VMXNET3_DISABLE_ADAPTIVE_RING;
+ break;
+
default:
VMW_WRPRN("Received request for unknown command: %x", s->last_command);
ret = -1;
diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index f987d71..f7006af 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -198,9 +198,13 @@
VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005 */
VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006 */
VMXNET3_CMD_GET_DEV_EXTRA_INFO, /* 0xF00D0007 */
- VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008 */
+ VMXNET3_CMD_GET_CONF_INTR, /* 0xF00D0008 */
+ VMXNET3_CMD_GET_ADAPTIVE_RING_INFO /* 0xF00D0009 */
};
+/* Adaptive Ring Info Flags */
+#define VMXNET3_DISABLE_ADAPTIVE_RING 1
+
/*
* Little Endian layout of bitfields -
* Byte 0 : 7.....len.....0
diff --git a/hw/net/vmxnet_tx_pkt.c b/hw/net/vmxnet_tx_pkt.c
index f7344c4..eb88ddf 100644
--- a/hw/net/vmxnet_tx_pkt.c
+++ b/hw/net/vmxnet_tx_pkt.c
@@ -142,11 +142,24 @@
bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
ETH_MAX_L2_HDR_LEN);
- if (bytes_read < ETH_MAX_L2_HDR_LEN) {
+ if (bytes_read < sizeof(struct eth_header)) {
l2_hdr->iov_len = 0;
return false;
- } else {
- l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
+ }
+
+ l2_hdr->iov_len = sizeof(struct eth_header);
+ switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) {
+ case ETH_P_VLAN:
+ l2_hdr->iov_len += sizeof(struct vlan_header);
+ break;
+ case ETH_P_DVLAN:
+ l2_hdr->iov_len += 2 * sizeof(struct vlan_header);
+ break;
+ }
+
+ if (bytes_read < l2_hdr->iov_len) {
+ l2_hdr->iov_len = 0;
+ return false;
}
l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len);
diff --git a/include/net/filter.h b/include/net/filter.h
new file mode 100644
index 0000000..2deda36
--- /dev/null
+++ b/include/net/filter.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_NET_FILTER_H
+#define QEMU_NET_FILTER_H
+
+#include "qom/object.h"
+#include "qemu-common.h"
+#include "qemu/typedefs.h"
+#include "net/queue.h"
+
+#define TYPE_NETFILTER "netfilter"
+#define NETFILTER(obj) \
+ OBJECT_CHECK(NetFilterState, (obj), TYPE_NETFILTER)
+#define NETFILTER_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(NetFilterClass, (obj), TYPE_NETFILTER)
+#define NETFILTER_CLASS(klass) \
+ OBJECT_CLASS_CHECK(NetFilterClass, (klass), TYPE_NETFILTER)
+
+typedef void (FilterSetup) (NetFilterState *nf, Error **errp);
+typedef void (FilterCleanup) (NetFilterState *nf);
+/*
+ * Return:
+ * 0: finished handling the packet, we should continue
+ * size: filter stolen this packet, we stop pass this packet further
+ */
+typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb);
+
+typedef struct NetFilterClass {
+ ObjectClass parent_class;
+
+ /* optional */
+ FilterSetup *setup;
+ FilterCleanup *cleanup;
+ /* mandatory */
+ FilterReceiveIOV *receive_iov;
+} NetFilterClass;
+
+
+struct NetFilterState {
+ /* private */
+ Object parent;
+
+ /* protected */
+ char *netdev_id;
+ NetClientState *netdev;
+ NetFilterDirection direction;
+ char info_str[256];
+ QTAILQ_ENTRY(NetFilterState) next;
+};
+
+ssize_t qemu_netfilter_receive(NetFilterState *nf,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb);
+
+/* pass the packet to the next filter */
+ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ void *opaque);
+
+#endif /* QEMU_NET_FILTER_H */
diff --git a/include/net/net.h b/include/net/net.h
index 6a6cbef..7af3e15 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -92,6 +92,7 @@
NetClientDestructor *destructor;
unsigned int queue_index;
unsigned rxfilter_notify_enabled:1;
+ QTAILQ_HEAD(, NetFilterState) filters;
};
typedef struct NICState {
@@ -151,11 +152,6 @@
int qemu_find_nic_model(NICInfo *nd, const char * const *models,
const char *default_model);
-ssize_t qemu_deliver_packet(NetClientState *sender,
- unsigned flags,
- const uint8_t *data,
- size_t size,
- void *opaque);
ssize_t qemu_deliver_packet_iov(NetClientState *sender,
unsigned flags,
const struct iovec *iov,
diff --git a/include/net/queue.h b/include/net/queue.h
index fc02b33..5469fdb 100644
--- a/include/net/queue.h
+++ b/include/net/queue.h
@@ -34,7 +34,25 @@
#define QEMU_NET_PACKET_FLAG_NONE 0
#define QEMU_NET_PACKET_FLAG_RAW (1<<0)
-NetQueue *qemu_new_net_queue(void *opaque);
+/* Returns:
+ * >0 - success
+ * 0 - queue packet for future redelivery
+ * <0 - failure (discard packet)
+ */
+typedef ssize_t (NetQueueDeliverFunc)(NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ void *opaque);
+
+NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque);
+
+void qemu_net_queue_append_iov(NetQueue *queue,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb);
void qemu_del_net_queue(NetQueue *queue);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 3a835ff..ee1ce1d 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -45,6 +45,7 @@
typedef struct MouseTransformInfo MouseTransformInfo;
typedef struct MSIMessage MSIMessage;
typedef struct NetClientState NetClientState;
+typedef struct NetFilterState NetFilterState;
typedef struct NICInfo NICInfo;
typedef struct PcGuestInfo PcGuestInfo;
typedef struct PCIBridge PCIBridge;
diff --git a/net/Makefile.objs b/net/Makefile.objs
index ec19cb3..5fa2f97 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -13,3 +13,5 @@
common-obj-$(CONFIG_SLIRP) += slirp.o
common-obj-$(CONFIG_VDE) += vde.o
common-obj-$(CONFIG_NETMAP) += netmap.o
+common-obj-y += filter.o
+common-obj-y += filter-buffer.o
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
new file mode 100644
index 0000000..57be149
--- /dev/null
+++ b/net/filter-buffer.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "net/filter.h"
+#include "net/queue.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/iov.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+
+#define TYPE_FILTER_BUFFER "filter-buffer"
+
+#define FILTER_BUFFER(obj) \
+ OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
+
+typedef struct FilterBufferState {
+ NetFilterState parent_obj;
+
+ NetQueue *incoming_queue;
+ uint32_t interval;
+ QEMUTimer release_timer;
+} FilterBufferState;
+
+static void filter_buffer_flush(NetFilterState *nf)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ if (!qemu_net_queue_flush(s->incoming_queue)) {
+ /* Unable to empty the queue, purge remaining packets */
+ qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+ }
+}
+
+static void filter_buffer_release_timer(void *opaque)
+{
+ NetFilterState *nf = opaque;
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * Note: filter_buffer_flush() drops packets that can't be sent
+ * TODO: We should leave them queued. But currently there's no way
+ * for the next filter or receiver to notify us that it can receive
+ * more packets.
+ */
+ filter_buffer_flush(nf);
+ /* Timer rearmed to fire again in s->interval microseconds. */
+ timer_mod(&s->release_timer,
+ qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+
+/* filter APIs */
+static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * We return size when buffer a packet, the sender will take it as
+ * a already sent packet, so sent_cb should not be called later.
+ *
+ * FIXME: Even if the guest can't receive packets for some reasons,
+ * the filter can still accept packets until its internal queue is full.
+ * For example:
+ * For some reason, receiver could not receive more packets
+ * (.can_receive() returns zero). Without a filter, at most one packet
+ * will be queued in incoming queue and sender's poll will be disabled
+ * unit its sent_cb() was called. With a filter, it will keep receiving
+ * the packets without caring about the receiver. This is suboptimal.
+ * May need more thoughts (e.g keeping sent_cb).
+ */
+ qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
+ iov, iovcnt, NULL);
+ return iov_size(iov, iovcnt);
+}
+
+static void filter_buffer_cleanup(NetFilterState *nf)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ if (s->interval) {
+ timer_del(&s->release_timer);
+ }
+
+ /* flush packets */
+ if (s->incoming_queue) {
+ filter_buffer_flush(nf);
+ g_free(s->incoming_queue);
+ }
+}
+
+static void filter_buffer_setup(NetFilterState *nf, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * We may want to accept zero interval when VM FT solutions like MC
+ * or COLO use this filter to release packets on demand.
+ */
+ if (!s->interval) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
+ "a non-zero interval");
+ return;
+ }
+
+ s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+ if (s->interval) {
+ timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
+ filter_buffer_release_timer, nf);
+ /* Timer armed to fire in s->interval microseconds. */
+ timer_mod(&s->release_timer,
+ qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+ }
+}
+
+static void filter_buffer_class_init(ObjectClass *oc, void *data)
+{
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+ nfc->setup = filter_buffer_setup;
+ nfc->cleanup = filter_buffer_cleanup;
+ nfc->receive_iov = filter_buffer_receive_iov;
+}
+
+static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(obj);
+ uint32_t value = s->interval;
+
+ visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(obj);
+ Error *local_err = NULL;
+ uint32_t value;
+
+ visit_type_uint32(v, &value, name, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ if (!value) {
+ error_setg(&local_err, "Property '%s.%s' requires a positive value",
+ object_get_typename(obj), name);
+ goto out;
+ }
+ s->interval = value;
+
+out:
+ error_propagate(errp, local_err);
+}
+
+static void filter_buffer_init(Object *obj)
+{
+ object_property_add(obj, "interval", "int",
+ filter_buffer_get_interval,
+ filter_buffer_set_interval, NULL, NULL, NULL);
+}
+
+static const TypeInfo filter_buffer_info = {
+ .name = TYPE_FILTER_BUFFER,
+ .parent = TYPE_NETFILTER,
+ .class_init = filter_buffer_class_init,
+ .instance_init = filter_buffer_init,
+ .instance_size = sizeof(FilterBufferState),
+};
+
+static void register_types(void)
+{
+ type_register_static(&filter_buffer_info);
+}
+
+type_init(register_types);
diff --git a/net/filter.c b/net/filter.c
new file mode 100644
index 0000000..326f2b5
--- /dev/null
+++ b/net/filter.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+
+#include "net/filter.h"
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qapi/string-output-visitor.h"
+
+ssize_t qemu_netfilter_receive(NetFilterState *nf,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+ if (nf->direction == direction ||
+ nf->direction == NET_FILTER_DIRECTION_ALL) {
+ return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov(
+ nf, sender, flags, iov, iovcnt, sent_cb);
+ }
+
+ return 0;
+}
+
+ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ void *opaque)
+{
+ int ret = 0;
+ int direction;
+ NetFilterState *nf = opaque;
+ NetFilterState *next = QTAILQ_NEXT(nf, next);
+
+ if (!sender || !sender->peer) {
+ /* no receiver, or sender been deleted, no need to pass it further */
+ goto out;
+ }
+
+ if (nf->direction == NET_FILTER_DIRECTION_ALL) {
+ if (sender == nf->netdev) {
+ /* This packet is sent by netdev itself */
+ direction = NET_FILTER_DIRECTION_TX;
+ } else {
+ direction = NET_FILTER_DIRECTION_RX;
+ }
+ } else {
+ direction = nf->direction;
+ }
+
+ while (next) {
+ /*
+ * if qemu_netfilter_pass_to_next been called, means that
+ * the packet has been hold by filter and has already retured size
+ * to the sender, so sent_cb shouldn't be called later, just
+ * pass NULL to next.
+ */
+ ret = qemu_netfilter_receive(next, direction, sender, flags, iov,
+ iovcnt, NULL);
+ if (ret) {
+ return ret;
+ }
+ next = QTAILQ_NEXT(next, next);
+ }
+
+ /*
+ * We have gone through all filters, pass it to receiver.
+ * Do the valid check again incase sender or receiver been
+ * deleted while we go through filters.
+ */
+ if (sender && sender->peer) {
+ qemu_net_queue_send_iov(sender->peer->incoming_queue,
+ sender, flags, iov, iovcnt, NULL);
+ }
+
+out:
+ /* no receiver, or sender been deleted */
+ return iov_size(iov, iovcnt);
+}
+
+static char *netfilter_get_netdev_id(Object *obj, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+
+ return g_strdup(nf->netdev_id);
+}
+
+static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+
+ nf->netdev_id = g_strdup(str);
+}
+
+static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ return nf->direction;
+}
+
+static void netfilter_set_direction(Object *obj, int direction, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ nf->direction = direction;
+}
+
+static void netfilter_init(Object *obj)
+{
+ object_property_add_str(obj, "netdev",
+ netfilter_get_netdev_id, netfilter_set_netdev_id,
+ NULL);
+ object_property_add_enum(obj, "queue", "NetFilterDirection",
+ NetFilterDirection_lookup,
+ netfilter_get_direction, netfilter_set_direction,
+ NULL);
+}
+
+static void netfilter_complete(UserCreatable *uc, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(uc);
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
+ int queues;
+ Error *local_err = NULL;
+ char *str, *info;
+ ObjectProperty *prop;
+ StringOutputVisitor *ov;
+
+ if (!nf->netdev_id) {
+ error_setg(errp, "Parameter 'netdev' is required");
+ return;
+ }
+
+ queues = qemu_find_net_clients_except(nf->netdev_id, ncs,
+ NET_CLIENT_OPTIONS_KIND_NIC,
+ MAX_QUEUE_NUM);
+ if (queues < 1) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev",
+ "a network backend id");
+ return;
+ } else if (queues > 1) {
+ error_setg(errp, "multiqueue is not supported");
+ return;
+ }
+
+ if (get_vhost_net(ncs[0])) {
+ error_setg(errp, "Vhost is not supported");
+ return;
+ }
+
+ nf->netdev = ncs[0];
+
+ if (nfc->setup) {
+ nfc->setup(nf, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+ QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
+
+ /* generate info str */
+ QTAILQ_FOREACH(prop, &OBJECT(nf)->properties, node) {
+ if (!strcmp(prop->name, "type")) {
+ continue;
+ }
+ ov = string_output_visitor_new(false);
+ object_property_get(OBJECT(nf), string_output_get_visitor(ov),
+ prop->name, errp);
+ str = string_output_get_string(ov);
+ string_output_visitor_cleanup(ov);
+ info = g_strdup_printf(",%s=%s", prop->name, str);
+ g_strlcat(nf->info_str, info, sizeof(nf->info_str));
+ g_free(str);
+ g_free(info);
+ }
+}
+
+static void netfilter_finalize(Object *obj)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ NetFilterClass *nfc = NETFILTER_GET_CLASS(obj);
+
+ if (nfc->cleanup) {
+ nfc->cleanup(nf);
+ }
+
+ if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters)) {
+ QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
+ }
+}
+
+static void netfilter_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->complete = netfilter_complete;
+}
+
+static const TypeInfo netfilter_info = {
+ .name = TYPE_NETFILTER,
+ .parent = TYPE_OBJECT,
+ .abstract = true,
+ .class_size = sizeof(NetFilterClass),
+ .class_init = netfilter_class_init,
+ .instance_size = sizeof(NetFilterState),
+ .instance_init = netfilter_init,
+ .instance_finalize = netfilter_finalize,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&netfilter_info);
+}
+
+type_init(register_types);
diff --git a/net/net.c b/net/net.c
index 28a5597..39af893 100644
--- a/net/net.c
+++ b/net/net.c
@@ -44,6 +44,7 @@
#include "qapi/opts-visitor.h"
#include "qapi/dealloc-visitor.h"
#include "sysemu/sysemu.h"
+#include "net/filter.h"
/* Net bridge is currently not supported for W32. */
#if !defined(_WIN32)
@@ -285,8 +286,9 @@
}
QTAILQ_INSERT_TAIL(&net_clients, nc, next);
- nc->incoming_queue = qemu_new_net_queue(nc);
+ nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
nc->destructor = destructor;
+ QTAILQ_INIT(&nc->filters);
}
NetClientState *qemu_new_net_client(NetClientInfo *info,
@@ -384,6 +386,7 @@
{
NetClientState *ncs[MAX_QUEUE_NUM];
int queues, i;
+ NetFilterState *nf, *next;
assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC);
@@ -395,6 +398,10 @@
MAX_QUEUE_NUM);
assert(queues != 0);
+ QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) {
+ object_unparent(OBJECT(nf));
+ }
+
/* If there is a peer NIC, delete and cleanup client, but do not free. */
if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) {
NICState *nic = qemu_get_nic(nc->peer);
@@ -554,36 +561,44 @@
return 1;
}
-ssize_t qemu_deliver_packet(NetClientState *sender,
- unsigned flags,
- const uint8_t *data,
- size_t size,
- void *opaque)
+static ssize_t filter_receive_iov(NetClientState *nc,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
{
- NetClientState *nc = opaque;
- ssize_t ret;
+ ssize_t ret = 0;
+ NetFilterState *nf = NULL;
- if (nc->link_down) {
- return size;
- }
-
- if (nc->receive_disabled) {
- return 0;
- }
-
- if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
- ret = nc->info->receive_raw(nc, data, size);
- } else {
- ret = nc->info->receive(nc, data, size);
- }
-
- if (ret == 0) {
- nc->receive_disabled = 1;
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
+ ret = qemu_netfilter_receive(nf, direction, sender, flags, iov,
+ iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
}
return ret;
}
+static ssize_t filter_receive(NetClientState *nc,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const uint8_t *data,
+ size_t size,
+ NetPacketSent *sent_cb)
+{
+ struct iovec iov = {
+ .iov_base = (void *)data,
+ .iov_len = size
+ };
+
+ return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb);
+}
+
void qemu_purge_queued_packets(NetClientState *nc)
{
if (!nc->peer) {
@@ -625,6 +640,7 @@
NetPacketSent *sent_cb)
{
NetQueue *queue;
+ int ret;
#ifdef DEBUG_NET
printf("qemu_send_packet_async:\n");
@@ -635,6 +651,19 @@
return size;
}
+ /* Let filters handle the packet first */
+ ret = filter_receive(sender, NET_FILTER_DIRECTION_TX,
+ sender, flags, buf, size, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
+ ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX,
+ sender, flags, buf, size, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
queue = sender->peer->incoming_queue;
return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);
@@ -660,14 +689,25 @@
}
static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
- int iovcnt)
+ int iovcnt, unsigned flags)
{
- uint8_t buffer[NET_BUFSIZE];
+ uint8_t buf[NET_BUFSIZE];
+ uint8_t *buffer;
size_t offset;
- offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+ if (iovcnt == 1) {
+ buffer = iov[0].iov_base;
+ offset = iov[0].iov_len;
+ } else {
+ buffer = buf;
+ offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+ }
- return nc->info->receive(nc, buffer, offset);
+ if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
+ return nc->info->receive_raw(nc, buffer, offset);
+ } else {
+ return nc->info->receive(nc, buffer, offset);
+ }
}
ssize_t qemu_deliver_packet_iov(NetClientState *sender,
@@ -690,7 +730,7 @@
if (nc->info->receive_iov) {
ret = nc->info->receive_iov(nc, iov, iovcnt);
} else {
- ret = nc_sendv_compat(nc, iov, iovcnt);
+ ret = nc_sendv_compat(nc, iov, iovcnt, flags);
}
if (ret == 0) {
@@ -705,11 +745,25 @@
NetPacketSent *sent_cb)
{
NetQueue *queue;
+ int ret;
if (sender->link_down || !sender->peer) {
return iov_size(iov, iovcnt);
}
+ /* Let filters handle the packet first */
+ ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender,
+ QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
+ ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender,
+ QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
queue = sender->peer->incoming_queue;
return qemu_net_queue_send_iov(queue, sender,
@@ -1125,10 +1179,21 @@
void print_net_client(Monitor *mon, NetClientState *nc)
{
+ NetFilterState *nf;
+
monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name,
nc->queue_index,
NetClientOptionsKind_lookup[nc->info->type],
nc->info_str);
+ if (!QTAILQ_EMPTY(&nc->filters)) {
+ monitor_printf(mon, "filters:\n");
+ }
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
+ monitor_printf(mon, " - %s: type=%s%s\n",
+ object_get_canonical_path_component(OBJECT(nf)),
+ object_get_typename(OBJECT(nf)),
+ nf->info_str);
+ }
}
RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
diff --git a/net/queue.c b/net/queue.c
index ebbe2bb..de8b9d3 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -52,13 +52,14 @@
void *opaque;
uint32_t nq_maxlen;
uint32_t nq_count;
+ NetQueueDeliverFunc *deliver;
QTAILQ_HEAD(packets, NetPacket) packets;
unsigned delivering : 1;
};
-NetQueue *qemu_new_net_queue(void *opaque)
+NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque)
{
NetQueue *queue;
@@ -67,6 +68,7 @@
queue->opaque = opaque;
queue->nq_maxlen = 10000;
queue->nq_count = 0;
+ queue->deliver = deliver;
QTAILQ_INIT(&queue->packets);
@@ -110,12 +112,12 @@
QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
}
-static void qemu_net_queue_append_iov(NetQueue *queue,
- NetClientState *sender,
- unsigned flags,
- const struct iovec *iov,
- int iovcnt,
- NetPacketSent *sent_cb)
+void qemu_net_queue_append_iov(NetQueue *queue,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
{
NetPacket *packet;
size_t max_len = 0;
@@ -152,9 +154,13 @@
size_t size)
{
ssize_t ret = -1;
+ struct iovec iov = {
+ .iov_base = (void *)data,
+ .iov_len = size
+ };
queue->delivering = 1;
- ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque);
+ ret = queue->deliver(sender, flags, &iov, 1, queue->opaque);
queue->delivering = 0;
return ret;
@@ -169,7 +175,7 @@
ssize_t ret = -1;
queue->delivering = 1;
- ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque);
+ ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque);
queue->delivering = 0;
return ret;
diff --git a/qapi-schema.json b/qapi-schema.json
index 8b0520c..a386605 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2588,6 +2588,26 @@
'opts': 'NetClientOptions' } }
##
+# @NetFilterDirection
+#
+# Indicates whether a netfilter is attached to a netdev's transmit queue or
+# receive queue or both.
+#
+# @all: the filter is attached both to the receive and the transmit
+# queue of the netdev (default).
+#
+# @rx: the filter is attached to the receive queue of the netdev,
+# where it will receive packets sent to the netdev.
+#
+# @tx: the filter is attached to the transmit queue of the netdev,
+# where it will receive packets sent by the netdev.
+#
+# Since 2.5
+##
+{ 'enum': 'NetFilterDirection',
+ 'data': [ 'all', 'rx', 'tx' ] }
+
+##
# @InetSocketAddress
#
# Captures a socket address or address range in the Internet namespace.
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 1cadefb..a35098f 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -50,6 +50,7 @@
{ "lsi53c895a", "lsi" },
{ "ich9-ahci", "ahci" },
{ "kvm-pci-assign", "pci-assign" },
+ { "e1000", "e1000-82540em" },
{ }
};
diff --git a/qemu-options.hx b/qemu-options.hx
index 328404c..2485b94 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3643,6 +3643,23 @@
@var{server-cert.pem} (only servers), @var{server-key.pem} (only servers),
@var{client-cert.pem} (only clients), and @var{client-key.pem} (only clients).
+@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}]
+
+Interval @var{t} can't be 0, this filter batches the packet delivery: all
+packets arriving in a given interval on netdev @var{netdevid} are delayed
+until the end of the interval. Interval is in microseconds.
+
+queue @var{all|rx|tx} is an option that can be applied to any netfilter.
+
+@option{all}: the filter is attached both to the receive and the transmit
+ queue of the netdev (default).
+
+@option{rx}: the filter is attached to the receive queue of the netdev,
+ where it will receive packets sent to the netdev.
+
+@option{tx}: the filter is attached to the transmit queue of the netdev,
+ where it will receive packets sent by the netdev.
+
@end table
ETEXI
diff --git a/tests/.gitignore b/tests/.gitignore
index a607bdd..65496aa 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -49,5 +49,6 @@
test-write-threshold
test-x86-cpuid
test-xbzrle
+test-netfilter
*-test
qapi-schema/*.test.*
diff --git a/tests/Makefile b/tests/Makefile
index 5a4732f..dbd32a6 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -194,6 +194,7 @@
ifeq ($(CONFIG_VHOST_NET),y)
check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF)
endif
+check-qtest-i386-y += tests/test-netfilter$(EXESUF)
check-qtest-x86_64-y = $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -438,6 +439,7 @@
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
+tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
ifeq ($(CONFIG_POSIX),y)
LIBS += -lutil
diff --git a/tests/test-netfilter.c b/tests/test-netfilter.c
new file mode 100644
index 0000000..303deb7
--- /dev/null
+++ b/tests/test-netfilter.c
@@ -0,0 +1,200 @@
+/*
+ * QTest testcase for netfilter
+ *
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include "libqtest.h"
+
+/* add a netfilter to a netdev and then remove it */
+static void add_one_netfilter(void)
+{
+ QDict *response;
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f0',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'object-del',"
+ " 'arguments': {"
+ " 'id': 'qtest-f0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+}
+
+/* add a netfilter to a netdev and then remove the netdev */
+static void remove_netdev_with_one_netfilter(void)
+{
+ QDict *response;
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f0',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'netdev_del',"
+ " 'arguments': {"
+ " 'id': 'qtest-bn0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ /* add back the netdev */
+ response = qmp("{'execute': 'netdev_add',"
+ " 'arguments': {"
+ " 'type': 'user',"
+ " 'id': 'qtest-bn0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+}
+
+/* add multi(2) netfilters to a netdev and then remove them */
+static void add_multi_netfilter(void)
+{
+ QDict *response;
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f0',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f1',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'object-del',"
+ " 'arguments': {"
+ " 'id': 'qtest-f0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'object-del',"
+ " 'arguments': {"
+ " 'id': 'qtest-f1'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+}
+
+/* add multi(2) netfilters to a netdev and then remove the netdev */
+static void remove_netdev_with_multi_netfilter(void)
+{
+ QDict *response;
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f0',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'object-add',"
+ " 'arguments': {"
+ " 'qom-type': 'filter-buffer',"
+ " 'id': 'qtest-f1',"
+ " 'props': {"
+ " 'netdev': 'qtest-bn0',"
+ " 'queue': 'rx',"
+ " 'interval': 1000"
+ "}}}");
+
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ response = qmp("{'execute': 'netdev_del',"
+ " 'arguments': {"
+ " 'id': 'qtest-bn0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+
+ /* add back the netdev */
+ response = qmp("{'execute': 'netdev_add',"
+ " 'arguments': {"
+ " 'type': 'user',"
+ " 'id': 'qtest-bn0'"
+ "}}");
+ g_assert(response);
+ g_assert(!qdict_haskey(response, "error"));
+ QDECREF(response);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ g_test_init(&argc, &argv, NULL);
+ qtest_add_func("/netfilter/addremove_one", add_one_netfilter);
+ qtest_add_func("/netfilter/remove_netdev_one",
+ remove_netdev_with_one_netfilter);
+ qtest_add_func("/netfilter/addremove_multi", add_multi_netfilter);
+ qtest_add_func("/netfilter/remove_netdev_multi",
+ remove_netdev_with_multi_netfilter);
+
+ qtest_start("-netdev user,id=qtest-bn0 -device e1000,netdev=qtest-bn0");
+ ret = g_test_run();
+
+ qtest_end();
+
+ return ret;
+}
diff --git a/vl.c b/vl.c
index ea9e0e6..7c806a2 100644
--- a/vl.c
+++ b/vl.c
@@ -2747,13 +2747,18 @@
if (g_str_equal(type, "rng-egd")) {
return false;
}
+
+ if (g_str_equal(type, "filter-buffer")) {
+ return false;
+ }
+
return true;
}
/*
* The remainder of object creation happens after the
- * creation of chardev, fsdev and device data types.
+ * creation of chardev, fsdev, net clients and device data types.
*/
static bool object_create_delayed(const char *type)
{
@@ -4259,12 +4264,6 @@
exit(0);
}
- if (qemu_opts_foreach(qemu_find_opts("object"),
- object_create,
- object_create_delayed, NULL)) {
- exit(1);
- }
-
machine_opts = qemu_get_machine_opts();
if (qemu_opt_foreach(machine_opts, machine_set_property, current_machine,
NULL)) {
@@ -4370,6 +4369,12 @@
exit(1);
}
+ if (qemu_opts_foreach(qemu_find_opts("object"),
+ object_create,
+ object_create_delayed, NULL)) {
+ exit(1);
+ }
+
#ifdef CONFIG_TPM
if (tpm_init() < 0) {
exit(1);