Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. |
| 3 | * Copyright (c) 2016 FUJITSU LIMITED |
| 4 | * Copyright (c) 2016 Intel Corporation |
| 5 | * |
| 6 | * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> |
| 7 | * |
| 8 | * This work is licensed under the terms of the GNU GPL, version 2 or |
| 9 | * later. See the COPYING file in the top-level directory. |
| 10 | */ |
| 11 | |
| 12 | #include "qemu/osdep.h" |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 13 | #include "trace.h" |
Michael S. Tsirkin | f27f01d | 2018-05-03 22:50:56 +0300 | [diff] [blame] | 14 | #include "colo.h" |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 15 | #include "net/filter.h" |
| 16 | #include "net/net.h" |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 17 | #include "qemu/error-report.h" |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 18 | #include "qom/object.h" |
| 19 | #include "qemu/main-loop.h" |
| 20 | #include "qemu/iov.h" |
| 21 | #include "net/checksum.h" |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 22 | #include "net/colo.h" |
| 23 | #include "migration/colo.h" |
Marc-André Lureau | e05ae1d | 2018-11-14 16:36:40 +0400 | [diff] [blame] | 24 | #include "util.h" |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 25 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 26 | #define TYPE_FILTER_REWRITER "filter-rewriter" |
Eduardo Habkost | 8063396 | 2020-09-16 14:25:19 -0400 | [diff] [blame] | 27 | OBJECT_DECLARE_SIMPLE_TYPE(RewriterState, FILTER_REWRITER) |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 28 | |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 29 | #define FAILOVER_MODE_ON true |
| 30 | #define FAILOVER_MODE_OFF false |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 31 | |
Eduardo Habkost | db1015e | 2020-09-03 16:43:22 -0400 | [diff] [blame] | 32 | struct RewriterState { |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 33 | NetFilterState parent_obj; |
| 34 | NetQueue *incoming_queue; |
| 35 | /* hashtable to save connection */ |
| 36 | GHashTable *connection_track_table; |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 37 | bool vnet_hdr; |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 38 | bool failover_mode; |
Eduardo Habkost | db1015e | 2020-09-03 16:43:22 -0400 | [diff] [blame] | 39 | }; |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 40 | |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 41 | static void filter_rewriter_failover_mode(RewriterState *s) |
| 42 | { |
| 43 | s->failover_mode = FAILOVER_MODE_ON; |
| 44 | } |
| 45 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 46 | static void filter_rewriter_flush(NetFilterState *nf) |
| 47 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 48 | RewriterState *s = FILTER_REWRITER(nf); |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 49 | |
| 50 | if (!qemu_net_queue_flush(s->incoming_queue)) { |
| 51 | /* Unable to empty the queue, purge remaining packets */ |
| 52 | qemu_net_queue_purge(s->incoming_queue, nf->netdev); |
| 53 | } |
| 54 | } |
| 55 | |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 56 | /* |
| 57 | * Return 1 on success, if return 0 means the pkt |
| 58 | * is not TCP packet |
| 59 | */ |
| 60 | static int is_tcp_packet(Packet *pkt) |
| 61 | { |
| 62 | if (!parse_packet_early(pkt) && |
| 63 | pkt->ip->ip_p == IPPROTO_TCP) { |
| 64 | return 1; |
| 65 | } else { |
| 66 | return 0; |
| 67 | } |
| 68 | } |
| 69 | |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 70 | /* handle tcp packet from primary guest */ |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 71 | static int handle_primary_tcp_pkt(RewriterState *rf, |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 72 | Connection *conn, |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 73 | Packet *pkt, ConnectionKey *key) |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 74 | { |
Marc-André Lureau | e05ae1d | 2018-11-14 16:36:40 +0400 | [diff] [blame] | 75 | struct tcp_hdr *tcp_pkt; |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 76 | |
Marc-André Lureau | e05ae1d | 2018-11-14 16:36:40 +0400 | [diff] [blame] | 77 | tcp_pkt = (struct tcp_hdr *)pkt->transport_header; |
Roman Bolshakov | 8c8ed03 | 2020-07-17 12:35:17 +0300 | [diff] [blame] | 78 | if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_PKT_INFO)) { |
Zhang Chen | 2061c14 | 2016-10-17 17:23:59 +0800 | [diff] [blame] | 79 | trace_colo_filter_rewriter_pkt_info(__func__, |
| 80 | inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst), |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 81 | ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), |
| 82 | tcp_pkt->th_flags); |
Roman Bolshakov | 8c8ed03 | 2020-07-17 12:35:17 +0300 | [diff] [blame] | 83 | } |
| 84 | if (trace_event_get_state_backends( |
| 85 | TRACE_COLO_FILTER_REWRITER_CONN_OFFSET)) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 86 | trace_colo_filter_rewriter_conn_offset(conn->offset); |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 87 | } |
| 88 | |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 89 | if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) && |
| 90 | conn->tcp_state == TCPS_SYN_SENT) { |
| 91 | conn->tcp_state = TCPS_ESTABLISHED; |
| 92 | } |
| 93 | |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 94 | if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { |
| 95 | /* |
| 96 | * we use this flag update offset func |
| 97 | * run once in independent tcp connection |
| 98 | */ |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 99 | conn->tcp_state = TCPS_SYN_RECEIVED; |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) { |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 103 | if (conn->tcp_state == TCPS_SYN_RECEIVED) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 104 | /* |
| 105 | * offset = secondary_seq - primary seq |
| 106 | * ack packet sent by guest from primary node, |
| 107 | * so we use th_ack - 1 get primary_seq |
| 108 | */ |
| 109 | conn->offset -= (ntohl(tcp_pkt->th_ack) - 1); |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 110 | conn->tcp_state = TCPS_ESTABLISHED; |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 111 | } |
zhanghailiang | db0a762 | 2017-02-28 11:54:19 +0800 | [diff] [blame] | 112 | if (conn->offset) { |
| 113 | /* handle packets to the secondary from the primary */ |
| 114 | tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset); |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 115 | |
Zhang Chen | 6ce310b | 2017-07-28 18:03:10 +0800 | [diff] [blame] | 116 | net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len, |
Bin Meng | f574633 | 2020-12-11 17:35:12 +0800 | [diff] [blame] | 117 | pkt->size - pkt->vnet_hdr_len, CSUM_TCP); |
zhanghailiang | db0a762 | 2017-02-28 11:54:19 +0800 | [diff] [blame] | 118 | } |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 119 | |
| 120 | /* |
| 121 | * Passive close step 3 |
| 122 | */ |
| 123 | if ((conn->tcp_state == TCPS_LAST_ACK) && |
| 124 | (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) { |
| 125 | conn->tcp_state = TCPS_CLOSED; |
| 126 | g_hash_table_remove(rf->connection_track_table, key); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) { |
| 131 | /* |
| 132 | * Passive close. |
| 133 | * Step 1: |
| 134 | * The *server* side of this connect is VM, *client* tries to close |
| 135 | * the connection. We will into CLOSE_WAIT status. |
| 136 | * |
| 137 | * Step 2: |
| 138 | * In this step we will into LAST_ACK status. |
| 139 | * |
| 140 | * We got 'fin=1, ack=1' packet from server side, we need to |
| 141 | * record the seq of 'fin=1, ack=1' packet. |
| 142 | * |
| 143 | * Step 3: |
| 144 | * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1' |
| 145 | * packet from server side. From this point, we can ensure that there |
| 146 | * will be no packets in the connection, except that, some errors |
| 147 | * happen between the path of 'filter object' and vNIC, if this rare |
| 148 | * case really happen, we can still create a new connection, |
| 149 | * So it is safe to remove the connection from connection_track_table. |
| 150 | * |
| 151 | */ |
| 152 | if (conn->tcp_state == TCPS_ESTABLISHED) { |
| 153 | conn->tcp_state = TCPS_CLOSE_WAIT; |
| 154 | } |
| 155 | |
| 156 | /* |
| 157 | * Active close step 2. |
| 158 | */ |
| 159 | if (conn->tcp_state == TCPS_FIN_WAIT_1) { |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 160 | /* |
| 161 | * For simplify implementation, we needn't wait 2MSL time |
| 162 | * in filter rewriter. Because guest kernel will track the |
| 163 | * TCP status and wait 2MSL time, if client resend the FIN |
| 164 | * packet, guest will apply the last ACK too. |
Zhang Chen | 013a620 | 2018-10-31 08:50:15 +0800 | [diff] [blame] | 165 | * So, we skip the TCPS_TIME_WAIT state here and go straight |
| 166 | * to TCPS_CLOSED state. |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 167 | */ |
| 168 | conn->tcp_state = TCPS_CLOSED; |
| 169 | g_hash_table_remove(rf->connection_track_table, key); |
| 170 | } |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | return 0; |
| 174 | } |
| 175 | |
| 176 | /* handle tcp packet from secondary guest */ |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 177 | static int handle_secondary_tcp_pkt(RewriterState *rf, |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 178 | Connection *conn, |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 179 | Packet *pkt, ConnectionKey *key) |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 180 | { |
Marc-André Lureau | e05ae1d | 2018-11-14 16:36:40 +0400 | [diff] [blame] | 181 | struct tcp_hdr *tcp_pkt; |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 182 | |
Marc-André Lureau | e05ae1d | 2018-11-14 16:36:40 +0400 | [diff] [blame] | 183 | tcp_pkt = (struct tcp_hdr *)pkt->transport_header; |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 184 | |
Roman Bolshakov | 8c8ed03 | 2020-07-17 12:35:17 +0300 | [diff] [blame] | 185 | if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_PKT_INFO)) { |
Zhang Chen | 2061c14 | 2016-10-17 17:23:59 +0800 | [diff] [blame] | 186 | trace_colo_filter_rewriter_pkt_info(__func__, |
| 187 | inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst), |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 188 | ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), |
| 189 | tcp_pkt->th_flags); |
Roman Bolshakov | 8c8ed03 | 2020-07-17 12:35:17 +0300 | [diff] [blame] | 190 | } |
| 191 | if (trace_event_get_state_backends( |
| 192 | TRACE_COLO_FILTER_REWRITER_CONN_OFFSET)) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 193 | trace_colo_filter_rewriter_conn_offset(conn->offset); |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 194 | } |
| 195 | |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 196 | if (conn->tcp_state == TCPS_SYN_RECEIVED && |
| 197 | ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 198 | /* |
| 199 | * save offset = secondary_seq and then |
| 200 | * in handle_primary_tcp_pkt make offset |
| 201 | * = secondary_seq - primary_seq |
| 202 | */ |
| 203 | conn->offset = ntohl(tcp_pkt->th_seq); |
| 204 | } |
| 205 | |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 206 | /* VM active connect */ |
| 207 | if (conn->tcp_state == TCPS_CLOSED && |
| 208 | ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { |
| 209 | conn->tcp_state = TCPS_SYN_SENT; |
| 210 | } |
| 211 | |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 212 | if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) { |
zhanghailiang | db0a762 | 2017-02-28 11:54:19 +0800 | [diff] [blame] | 213 | /* Only need to adjust seq while offset is Non-zero */ |
| 214 | if (conn->offset) { |
| 215 | /* handle packets to the primary from the secondary*/ |
| 216 | tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset); |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 217 | |
Zhang Chen | 6ce310b | 2017-07-28 18:03:10 +0800 | [diff] [blame] | 218 | net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len, |
Bin Meng | f574633 | 2020-12-11 17:35:12 +0800 | [diff] [blame] | 219 | pkt->size - pkt->vnet_hdr_len, CSUM_TCP); |
zhanghailiang | db0a762 | 2017-02-28 11:54:19 +0800 | [diff] [blame] | 220 | } |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 221 | } |
| 222 | |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 223 | /* |
| 224 | * Passive close step 2: |
| 225 | */ |
| 226 | if (conn->tcp_state == TCPS_CLOSE_WAIT && |
| 227 | (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) { |
| 228 | conn->fin_ack_seq = ntohl(tcp_pkt->th_seq); |
| 229 | conn->tcp_state = TCPS_LAST_ACK; |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * Active close |
| 234 | * |
| 235 | * Step 1: |
| 236 | * The *server* side of this connect is VM, *server* tries to close |
| 237 | * the connection. |
| 238 | * |
| 239 | * Step 2: |
| 240 | * We will into CLOSE_WAIT status. |
| 241 | * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and |
| 242 | * CLOSING status. |
| 243 | */ |
| 244 | if (conn->tcp_state == TCPS_ESTABLISHED && |
| 245 | (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) { |
| 246 | conn->tcp_state = TCPS_FIN_WAIT_1; |
| 247 | } |
| 248 | |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 249 | return 0; |
| 250 | } |
| 251 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 252 | static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, |
| 253 | NetClientState *sender, |
| 254 | unsigned flags, |
| 255 | const struct iovec *iov, |
| 256 | int iovcnt, |
| 257 | NetPacketSent *sent_cb) |
| 258 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 259 | RewriterState *s = FILTER_REWRITER(nf); |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 260 | Connection *conn; |
| 261 | ConnectionKey key; |
| 262 | Packet *pkt; |
| 263 | ssize_t size = iov_size(iov, iovcnt); |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 264 | ssize_t vnet_hdr_len = 0; |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 265 | char *buf = g_malloc0(size); |
| 266 | |
| 267 | iov_to_buf(iov, iovcnt, 0, buf, size); |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 268 | |
| 269 | if (s->vnet_hdr) { |
| 270 | vnet_hdr_len = nf->netdev->vnet_hdr_len; |
| 271 | } |
| 272 | |
| 273 | pkt = packet_new(buf, size, vnet_hdr_len); |
Zhang Chen | 2061c14 | 2016-10-17 17:23:59 +0800 | [diff] [blame] | 274 | g_free(buf); |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 275 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 276 | /* |
| 277 | * if we get tcp packet |
| 278 | * we will rewrite it to make secondary guest's |
| 279 | * connection established successfully |
| 280 | */ |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 281 | if (pkt && is_tcp_packet(pkt)) { |
| 282 | |
| 283 | fill_connection_key(pkt, &key); |
| 284 | |
| 285 | if (sender == nf->netdev) { |
| 286 | /* |
| 287 | * We need make tcp TX and RX packet |
| 288 | * into one connection. |
| 289 | */ |
| 290 | reverse_connection_key(&key); |
| 291 | } |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 292 | |
| 293 | /* After failover we needn't change new TCP packet */ |
| 294 | if (s->failover_mode && |
| 295 | !connection_has_tracked(s->connection_track_table, &key)) { |
| 296 | goto out; |
| 297 | } |
| 298 | |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 299 | conn = connection_get(s->connection_track_table, |
| 300 | &key, |
| 301 | NULL); |
| 302 | |
| 303 | if (sender == nf->netdev) { |
| 304 | /* NET_FILTER_DIRECTION_TX */ |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 305 | if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 306 | qemu_net_queue_send(s->incoming_queue, sender, 0, |
| 307 | (const uint8_t *)pkt->data, pkt->size, NULL); |
| 308 | packet_destroy(pkt, NULL); |
| 309 | pkt = NULL; |
| 310 | /* |
| 311 | * We block the packet here,after rewrite pkt |
| 312 | * and will send it |
| 313 | */ |
| 314 | return 1; |
| 315 | } |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 316 | } else { |
| 317 | /* NET_FILTER_DIRECTION_RX */ |
Zhang Chen | 6214231 | 2018-09-14 01:47:53 +0000 | [diff] [blame] | 318 | if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) { |
Zhang Chen | 30656b0 | 2016-09-27 10:22:34 +0800 | [diff] [blame] | 319 | qemu_net_queue_send(s->incoming_queue, sender, 0, |
| 320 | (const uint8_t *)pkt->data, pkt->size, NULL); |
| 321 | packet_destroy(pkt, NULL); |
| 322 | pkt = NULL; |
| 323 | /* |
| 324 | * We block the packet here,after rewrite pkt |
| 325 | * and will send it |
| 326 | */ |
| 327 | return 1; |
| 328 | } |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 329 | } |
| 330 | } |
| 331 | |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 332 | out: |
Zhang Chen | afe4612 | 2016-09-27 10:22:33 +0800 | [diff] [blame] | 333 | packet_destroy(pkt, NULL); |
| 334 | pkt = NULL; |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 335 | return 0; |
| 336 | } |
| 337 | |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 338 | static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data) |
| 339 | { |
| 340 | Connection *conn = (Connection *)value; |
| 341 | |
| 342 | conn->offset = 0; |
| 343 | } |
| 344 | |
| 345 | static gboolean offset_is_nonzero(gpointer key, |
| 346 | gpointer value, |
| 347 | gpointer user_data) |
| 348 | { |
| 349 | Connection *conn = (Connection *)value; |
| 350 | |
| 351 | return conn->offset ? true : false; |
| 352 | } |
| 353 | |
| 354 | static void colo_rewriter_handle_event(NetFilterState *nf, int event, |
| 355 | Error **errp) |
| 356 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 357 | RewriterState *rs = FILTER_REWRITER(nf); |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 358 | |
| 359 | switch (event) { |
| 360 | case COLO_EVENT_CHECKPOINT: |
| 361 | g_hash_table_foreach(rs->connection_track_table, |
| 362 | reset_seq_offset, NULL); |
| 363 | break; |
| 364 | case COLO_EVENT_FAILOVER: |
| 365 | if (!g_hash_table_find(rs->connection_track_table, |
| 366 | offset_is_nonzero, NULL)) { |
| 367 | filter_rewriter_failover_mode(rs); |
| 368 | } |
| 369 | break; |
| 370 | default: |
| 371 | break; |
| 372 | } |
| 373 | } |
| 374 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 375 | static void colo_rewriter_cleanup(NetFilterState *nf) |
| 376 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 377 | RewriterState *s = FILTER_REWRITER(nf); |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 378 | |
| 379 | /* flush packets */ |
| 380 | if (s->incoming_queue) { |
| 381 | filter_rewriter_flush(nf); |
| 382 | g_free(s->incoming_queue); |
| 383 | } |
Pan Nengyuan | b492a4b | 2020-10-16 13:51:59 +0800 | [diff] [blame] | 384 | |
| 385 | g_hash_table_destroy(s->connection_track_table); |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 386 | } |
| 387 | |
| 388 | static void colo_rewriter_setup(NetFilterState *nf, Error **errp) |
| 389 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 390 | RewriterState *s = FILTER_REWRITER(nf); |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 391 | |
| 392 | s->connection_track_table = g_hash_table_new_full(connection_key_hash, |
| 393 | connection_key_equal, |
| 394 | g_free, |
| 395 | connection_destroy); |
| 396 | s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); |
| 397 | } |
| 398 | |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 399 | static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp) |
| 400 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 401 | RewriterState *s = FILTER_REWRITER(obj); |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 402 | |
| 403 | return s->vnet_hdr; |
| 404 | } |
| 405 | |
| 406 | static void filter_rewriter_set_vnet_hdr(Object *obj, |
| 407 | bool value, |
| 408 | Error **errp) |
| 409 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 410 | RewriterState *s = FILTER_REWRITER(obj); |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 411 | |
| 412 | s->vnet_hdr = value; |
| 413 | } |
| 414 | |
| 415 | static void filter_rewriter_init(Object *obj) |
| 416 | { |
Eduardo Habkost | 50cd7d5 | 2020-09-02 18:42:48 -0400 | [diff] [blame] | 417 | RewriterState *s = FILTER_REWRITER(obj); |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 418 | |
| 419 | s->vnet_hdr = false; |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 420 | s->failover_mode = FAILOVER_MODE_OFF; |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 421 | } |
| 422 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 423 | static void colo_rewriter_class_init(ObjectClass *oc, void *data) |
| 424 | { |
| 425 | NetFilterClass *nfc = NETFILTER_CLASS(oc); |
| 426 | |
Eduardo Habkost | f0e34a0 | 2020-11-11 13:38:22 -0500 | [diff] [blame] | 427 | object_class_property_add_bool(oc, "vnet_hdr_support", |
| 428 | filter_rewriter_get_vnet_hdr, |
| 429 | filter_rewriter_set_vnet_hdr); |
| 430 | |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 431 | nfc->setup = colo_rewriter_setup; |
| 432 | nfc->cleanup = colo_rewriter_cleanup; |
| 433 | nfc->receive_iov = colo_rewriter_receive_iov; |
Zhang Chen | 24525e9 | 2018-09-03 12:38:57 +0800 | [diff] [blame] | 434 | nfc->handle_event = colo_rewriter_handle_event; |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 435 | } |
| 436 | |
| 437 | static const TypeInfo colo_rewriter_info = { |
| 438 | .name = TYPE_FILTER_REWRITER, |
| 439 | .parent = TYPE_NETFILTER, |
| 440 | .class_init = colo_rewriter_class_init, |
Zhang Chen | 4b39bdc | 2017-07-04 14:53:55 +0800 | [diff] [blame] | 441 | .instance_init = filter_rewriter_init, |
Zhang Chen | e6eee8a | 2016-09-27 10:22:32 +0800 | [diff] [blame] | 442 | .instance_size = sizeof(RewriterState), |
| 443 | }; |
| 444 | |
| 445 | static void register_types(void) |
| 446 | { |
| 447 | type_register_static(&colo_rewriter_info); |
| 448 | } |
| 449 | |
| 450 | type_init(register_types); |