-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEbBAABAgAGBQJX6kSEAAoJEO8Ells5jWIRKAkH9iMMzN9USOroQIWmiyMf5S7F mlsSeSccv+U5gA6wCJooA0dwMnAFnxJ3rTcV6BEL0jE0cVHanR61eDfpeOC0lKXw NUWc91Bf4Epg0cTk9fV6yv6xZOcuN/twukrQIEZjfldpbP0ba+WoBx3x0sdYen+M Xjaix011CUEx5VmVMx8g/LbnM8s1WO+CjEjIpWAas+1M68P+elne5nOaTaj+FyzV E9BkUkcXd5ByzikYRykgS/OJGRd7S+BBSFluISekwGjTcppRccAwZsGkgYXRrF3U 1g1LOT2xuz777uP7hBqZQRyZIAaOiLY89WUFuCL1BBLbbkAnT799J/e/n6sRSg== =2gpR -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging # gpg: Signature made Tue 27 Sep 2016 11:05:56 BST # gpg: using RSA key 0xEF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: (27 commits) imx_fec: fix error in qemu_send_packet argument mcf_fec: fix error in qemu_send_packet argument net: mcf: limit buffer descriptor count e1000e: Fix EIAC register implementation e1000e: Fix spurious RX TCP ACK interrupts e1000e: Fix OTHER interrupts processing for MSI-X e1000e: Fix PBACLR implementation e1000e: Fix CTRL_EXT.EIAME behavior e1000e: Flush receive queues on link up e1000e: Flush all receive queues on receive enable net: limit allocation in nc_sendv_compat tap: Allow specifying a bridge e1000: fix buliding complaint docs: Add documentation for COLO-proxy MAINTAINERS: add maintainer for COLO-proxy filter-rewriter: rewrite tcp packet to keep secondary connection filter-rewriter: track connection and parse packet filter-rewriter: introduce filter-rewriter initialization colo-compare: add TCP, UDP, ICMP packet comparison colo-compare: introduce packet comparison thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
333ec4ca6a
|
@ -1364,6 +1364,15 @@ F: util/uuid.c
|
||||||
F: include/qemu/uuid.h
|
F: include/qemu/uuid.h
|
||||||
F: tests/test-uuid.c
|
F: tests/test-uuid.c
|
||||||
|
|
||||||
|
COLO Proxy
|
||||||
|
M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||||
|
M: Li Zhijian <lizhijian@cn.fujitsu.com>
|
||||||
|
S: Supported
|
||||||
|
F: docs/colo-proxy.txt
|
||||||
|
F: net/colo*
|
||||||
|
F: net/filter-rewriter.c
|
||||||
|
F: net/filter-mirror.c
|
||||||
|
|
||||||
Usermode Emulation
|
Usermode Emulation
|
||||||
------------------
|
------------------
|
||||||
Overall
|
Overall
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
COLO-proxy
|
||||||
|
----------
|
||||||
|
Copyright (c) 2016 Intel Corporation
|
||||||
|
Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||||
|
Copyright (c) 2016 Fujitsu, Corp.
|
||||||
|
|
||||||
|
This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||||
|
See the COPYING file in the top-level directory.
|
||||||
|
|
||||||
|
This document gives an overview of COLO proxy's design.
|
||||||
|
|
||||||
|
== Background ==
|
||||||
|
COLO-proxy is a part of COLO project. It is used
|
||||||
|
to compare the network package to help COLO decide
|
||||||
|
whether to do checkpoint. With COLO-proxy's help,
|
||||||
|
COLO greatly improves the performance.
|
||||||
|
|
||||||
|
The filter-redirector, filter-mirror, colo-compare
|
||||||
|
and filter-rewriter compose the COLO-proxy.
|
||||||
|
|
||||||
|
== Architecture ==
|
||||||
|
|
||||||
|
COLO-Proxy is based on qemu netfilter and it's a plugin for qemu netfilter
|
||||||
|
(except colo-compare). It keep Secondary VM connect normally to
|
||||||
|
client and compare packets sent by PVM with sent by SVM.
|
||||||
|
If the packet difference, notify COLO-frame to do checkpoint and send
|
||||||
|
all primary packet has queued. Otherwise just send the queued primary
|
||||||
|
packet and drop the queued secondary packet.
|
||||||
|
|
||||||
|
Below is a COLO proxy ascii figure:
|
||||||
|
|
||||||
|
Primary qemu Secondary qemu
|
||||||
|
+--------------------------------------------------------------+ +----------------------------------------------------------------+
|
||||||
|
| +----------------------------------------------------------+ | | +-----------------------------------------------------------+ |
|
||||||
|
| | | | | | | |
|
||||||
|
| | guest | | | | guest | |
|
||||||
|
| | | | | | | |
|
||||||
|
| +-------^--------------------------+-----------------------+ | | +---------------------+--------+----------------------------+ |
|
||||||
|
| | | | | ^ | |
|
||||||
|
| | | | | | | |
|
||||||
|
| | +------------------------------------------------------+ | | | |
|
||||||
|
|netfilter| | | | | | netfilter | | |
|
||||||
|
| +----------+ +----------------------------+ | | | +-----------------------------------------------------------+ |
|
||||||
|
| | | | | | out | | | | | | filter excute order | |
|
||||||
|
| | | | +-----------------------------+ | | | | | | +-------------------> | |
|
||||||
|
| | | | | | | | | | | | | | TCP | |
|
||||||
|
| | +-----+--+-+ +-----v----+ +-----v----+ |pri +----+----+sec| | | | +------------+ +---+----+---v+rewriter++ +------------+ | |
|
||||||
|
| | | | | | | | |in | |in | | | | | | | | | | | | |
|
||||||
|
| | | filter | | filter | | filter +------> colo <------+ +--------> filter +--> adjust | adjust +--> filter | | |
|
||||||
|
| | | mirror | |redirector| |redirector| | | compare | | | | | | redirector | | ack | seq | | redirector | | |
|
||||||
|
| | | | | | | | | | | | | | | | | | | | | | | |
|
||||||
|
| | +----^-----+ +----+-----+ +----------+ | +---------+ | | | | +------------+ +--------+--------------+ +---+--------+ | |
|
||||||
|
| | | tx | rx rx | | | | | tx all | rx | |
|
||||||
|
| | | | | | | | +-----------------------------------------------------------+ |
|
||||||
|
| | | +--------------+ | | | | | |
|
||||||
|
| | | filter excute order | | | | | | |
|
||||||
|
| | | +----------------> | | | +--------------------------------------------------------+ |
|
||||||
|
| +-----------------------------------------+ | | |
|
||||||
|
| | | | | |
|
||||||
|
+--------------------------------------------------------------+ +----------------------------------------------------------------+
|
||||||
|
|guest receive | guest send
|
||||||
|
| |
|
||||||
|
+--------+----------------------------v------------------------+
|
||||||
|
| | NOTE: filter direction is rx/tx/all
|
||||||
|
| tap | rx:receive packets sent to the netdev
|
||||||
|
| | tx:receive packets sent by the netdev
|
||||||
|
+--------------------------------------------------------------+
|
||||||
|
|
||||||
|
1.Guest receive packet route:
|
||||||
|
|
||||||
|
Primary:
|
||||||
|
|
||||||
|
Tap --> Mirror Client Filter
|
||||||
|
Mirror client will send packet to guest,at the
|
||||||
|
same time, copy and forward packet to secondary
|
||||||
|
mirror server.
|
||||||
|
|
||||||
|
Secondary:
|
||||||
|
|
||||||
|
Mirror Server Filter --> TCP Rewriter
|
||||||
|
If receive packet is TCP packet,we will adjust ack
|
||||||
|
and update TCP checksum, then send to secondary
|
||||||
|
guest. Otherwise directly send to guest.
|
||||||
|
|
||||||
|
2.Guest send packet route:
|
||||||
|
|
||||||
|
Primary:
|
||||||
|
|
||||||
|
Guest --> Redirect Server Filter
|
||||||
|
Redirect server filter receive primary guest packet
|
||||||
|
but do nothing, just pass to next filter.
|
||||||
|
|
||||||
|
Redirect Server Filter --> COLO-Compare
|
||||||
|
COLO-compare receive primary guest packet then
|
||||||
|
waiting scondary redirect packet to compare it.
|
||||||
|
If packet same,send queued primary packet and clear
|
||||||
|
queued secondary packet, Otherwise send primary packet
|
||||||
|
and do checkpoint.
|
||||||
|
|
||||||
|
COLO-Compare --> Another Redirector Filter
|
||||||
|
The redirector get packet from colo-compare by use
|
||||||
|
chardev socket.
|
||||||
|
|
||||||
|
Redirector Filter --> Tap
|
||||||
|
Send the packet.
|
||||||
|
|
||||||
|
Secondary:
|
||||||
|
|
||||||
|
Guest --> TCP Rewriter Filter
|
||||||
|
If the packet is TCP packet,we will adjust seq
|
||||||
|
and update TCP checksum. Then send it to
|
||||||
|
redirect client filter. Otherwise directly send to
|
||||||
|
redirect client filter.
|
||||||
|
|
||||||
|
Redirect Client Filter --> Redirect Server Filter
|
||||||
|
Forward packet to primary.
|
||||||
|
|
||||||
|
== Components introduction ==
|
||||||
|
|
||||||
|
Filter-mirror is a netfilter plugin.
|
||||||
|
It gives qemu the ability to mirror
|
||||||
|
packets to a chardev.
|
||||||
|
|
||||||
|
Filter-redirector is a netfilter plugin.
|
||||||
|
It gives qemu the ability to redirect net packet.
|
||||||
|
Redirector can redirect filter's net packet to outdev,
|
||||||
|
and redirect indev's packet to filter.
|
||||||
|
|
||||||
|
filter
|
||||||
|
+
|
||||||
|
redirector |
|
||||||
|
+--------------+
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
indev +---------+ +----------> outdev
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
+--------------+
|
||||||
|
|
|
||||||
|
v
|
||||||
|
filter
|
||||||
|
|
||||||
|
COLO-compare, we do packet comparing job.
|
||||||
|
Packets coming from the primary char indev will be sent to outdev.
|
||||||
|
Packets coming from the secondary char dev will be dropped after comparing.
|
||||||
|
COLO-comapre need two input chardev and one output chardev:
|
||||||
|
primary_in=chardev1-id (source: primary send packet)
|
||||||
|
secondary_in=chardev2-id (source: secondary send packet)
|
||||||
|
outdev=chardev3-id
|
||||||
|
|
||||||
|
Filter-rewriter will rewrite some of secondary packet to make
|
||||||
|
secondary guest's tcp connection established successfully.
|
||||||
|
In this module we will rewrite tcp packet's ack to the secondary
|
||||||
|
from primary,and rewrite tcp packet's seq to the primary from
|
||||||
|
secondary.
|
||||||
|
|
||||||
|
== Usage ==
|
||||||
|
|
||||||
|
Here, we use demo ip and port discribe more clearly.
|
||||||
|
Primary(ip:3.3.3.3):
|
||||||
|
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
|
||||||
|
-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
|
||||||
|
-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
|
||||||
|
-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
|
||||||
|
-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
|
||||||
|
-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
|
||||||
|
-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
|
||||||
|
-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
|
||||||
|
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
|
||||||
|
-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
|
||||||
|
-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
|
||||||
|
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
|
||||||
|
|
||||||
|
Secondary(ip:3.3.3.8):
|
||||||
|
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
|
||||||
|
-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
|
||||||
|
-chardev socket,id=red0,host=3.3.3.3,port=9003
|
||||||
|
-chardev socket,id=red1,host=3.3.3.3,port=9004
|
||||||
|
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||||
|
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||||
|
|
||||||
|
Note:
|
||||||
|
a.COLO-proxy must work with COLO-frame and Block-replication.
|
||||||
|
b.Primary COLO must be started firstly, because COLO-proxy needs
|
||||||
|
chardev socket server running before secondary started.
|
||||||
|
c.Filter-rewriter only rewrite tcp packet.
|
|
@ -400,7 +400,7 @@ static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
|
||||||
|
|
||||||
if (range_covers_byte(address, len, PCI_COMMAND) &&
|
if (range_covers_byte(address, len, PCI_COMMAND) &&
|
||||||
(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
|
(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
|
||||||
qemu_flush_queued_packets(qemu_get_queue(s->nic));
|
e1000e_start_recv(&s->core);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -953,7 +953,7 @@ e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
|
||||||
core->rx_desc_buf_size;
|
core->rx_desc_buf_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
void
|
||||||
e1000e_start_recv(E1000ECore *core)
|
e1000e_start_recv(E1000ECore *core)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -1710,7 +1710,8 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perform ACK receive detection */
|
/* Perform ACK receive detection */
|
||||||
if (e1000e_is_tcp_ack(core, core->rx_pkt)) {
|
if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) &&
|
||||||
|
(e1000e_is_tcp_ack(core, core->rx_pkt))) {
|
||||||
n |= E1000_ICS_ACK;
|
n |= E1000_ICS_ACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1807,6 +1808,7 @@ e1000e_core_set_link_status(E1000ECore *core)
|
||||||
core->autoneg_timer);
|
core->autoneg_timer);
|
||||||
} else {
|
} else {
|
||||||
e1000x_update_regs_on_link_up(core->mac, core->phy[0]);
|
e1000x_update_regs_on_link_up(core->mac, core->phy[0]);
|
||||||
|
e1000e_start_recv(core);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2007,19 +2009,23 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
|
if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
|
||||||
trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause);
|
trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause);
|
||||||
e1000e_clear_ims_bits(core, core->mac[IAM] & cause);
|
core->mac[IAM] &= ~cause;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]);
|
trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]);
|
||||||
|
|
||||||
if (core->mac[EIAC] & E1000_ICR_OTHER) {
|
effective_eiac = core->mac[EIAC] & cause;
|
||||||
effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) |
|
|
||||||
E1000_ICR_OTHER_CAUSES;
|
if (effective_eiac == E1000_ICR_OTHER) {
|
||||||
} else {
|
effective_eiac |= E1000_ICR_OTHER_CAUSES;
|
||||||
effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
core->mac[ICR] &= ~effective_eiac;
|
core->mac[ICR] &= ~effective_eiac;
|
||||||
|
|
||||||
|
if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
|
||||||
|
core->mac[IMS] &= ~effective_eiac;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -2130,7 +2136,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
|
||||||
|
|
||||||
/* Set ICR[OTHER] for MSI-X */
|
/* Set ICR[OTHER] for MSI-X */
|
||||||
if (is_msix) {
|
if (is_msix) {
|
||||||
if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) {
|
if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) {
|
||||||
core->mac[ICR] |= E1000_ICR_OTHER;
|
core->mac[ICR] |= E1000_ICR_OTHER;
|
||||||
trace_e1000e_irq_add_msi_other(core->mac[ICR]);
|
trace_e1000e_irq_add_msi_other(core->mac[ICR]);
|
||||||
}
|
}
|
||||||
|
@ -2168,7 +2174,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static void
|
||||||
e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
|
e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
|
||||||
{
|
{
|
||||||
trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
|
trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
|
||||||
|
@ -2187,6 +2193,8 @@ e1000e_autoneg_timer(void *opaque)
|
||||||
E1000ECore *core = opaque;
|
E1000ECore *core = opaque;
|
||||||
if (!qemu_get_queue(core->owner_nic)->link_down) {
|
if (!qemu_get_queue(core->owner_nic)->link_down) {
|
||||||
e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]);
|
e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]);
|
||||||
|
e1000e_start_recv(core);
|
||||||
|
|
||||||
e1000e_update_flowctl_status(core);
|
e1000e_update_flowctl_status(core);
|
||||||
/* signal link status change to the guest */
|
/* signal link status change to the guest */
|
||||||
e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
|
e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
|
||||||
|
@ -2344,7 +2352,7 @@ e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val)
|
||||||
|
|
||||||
core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
|
core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
|
||||||
|
|
||||||
if (msix_enabled(core->owner)) {
|
if (!msix_enabled(core->owner)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -144,3 +144,6 @@ e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size);
|
||||||
|
|
||||||
ssize_t
|
ssize_t
|
||||||
e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
|
e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
|
||||||
|
|
||||||
|
void
|
||||||
|
e1000e_start_recv(E1000ECore *core);
|
||||||
|
|
|
@ -429,7 +429,7 @@ static void imx_fec_do_tx(IMXFECState *s)
|
||||||
frame_size += len;
|
frame_size += len;
|
||||||
if (bd.flags & ENET_BD_L) {
|
if (bd.flags & ENET_BD_L) {
|
||||||
/* Last buffer in frame. */
|
/* Last buffer in frame. */
|
||||||
qemu_send_packet(qemu_get_queue(s->nic), frame, len);
|
qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
|
||||||
ptr = frame;
|
ptr = frame;
|
||||||
frame_size = 0;
|
frame_size = 0;
|
||||||
s->regs[ENET_EIR] |= ENET_INT_TXF;
|
s->regs[ENET_EIR] |= ENET_INT_TXF;
|
||||||
|
|
|
@ -23,6 +23,7 @@ do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0)
|
||||||
#define DPRINTF(fmt, ...) do {} while(0)
|
#define DPRINTF(fmt, ...) do {} while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define FEC_MAX_DESC 1024
|
||||||
#define FEC_MAX_FRAME_SIZE 2032
|
#define FEC_MAX_FRAME_SIZE 2032
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -149,7 +150,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
||||||
uint32_t addr;
|
uint32_t addr;
|
||||||
mcf_fec_bd bd;
|
mcf_fec_bd bd;
|
||||||
int frame_size;
|
int frame_size;
|
||||||
int len;
|
int len, descnt = 0;
|
||||||
uint8_t frame[FEC_MAX_FRAME_SIZE];
|
uint8_t frame[FEC_MAX_FRAME_SIZE];
|
||||||
uint8_t *ptr;
|
uint8_t *ptr;
|
||||||
|
|
||||||
|
@ -157,7 +158,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
||||||
ptr = frame;
|
ptr = frame;
|
||||||
frame_size = 0;
|
frame_size = 0;
|
||||||
addr = s->tx_descriptor;
|
addr = s->tx_descriptor;
|
||||||
while (1) {
|
while (descnt++ < FEC_MAX_DESC) {
|
||||||
mcf_fec_read_bd(&bd, addr);
|
mcf_fec_read_bd(&bd, addr);
|
||||||
DPRINTF("tx_bd %x flags %04x len %d data %08x\n",
|
DPRINTF("tx_bd %x flags %04x len %d data %08x\n",
|
||||||
addr, bd.flags, bd.length, bd.data);
|
addr, bd.flags, bd.length, bd.data);
|
||||||
|
@ -176,7 +177,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
||||||
if (bd.flags & FEC_BD_L) {
|
if (bd.flags & FEC_BD_L) {
|
||||||
/* Last buffer in frame. */
|
/* Last buffer in frame. */
|
||||||
DPRINTF("Sending packet\n");
|
DPRINTF("Sending packet\n");
|
||||||
qemu_send_packet(qemu_get_queue(s->nic), frame, len);
|
qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
|
||||||
ptr = frame;
|
ptr = frame;
|
||||||
frame_size = 0;
|
frame_size = 0;
|
||||||
s->eir |= FEC_INT_TXF;
|
s->eir |= FEC_INT_TXF;
|
||||||
|
|
|
@ -223,7 +223,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
|
||||||
e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
|
e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
|
||||||
e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
|
e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
|
||||||
e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
|
e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
|
||||||
e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
|
e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
|
||||||
e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
|
e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
|
||||||
e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
|
e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
|
||||||
e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts"
|
e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts"
|
||||||
|
|
|
@ -31,6 +31,11 @@
|
||||||
#define MAC_TABLE_ENTRIES 64
|
#define MAC_TABLE_ENTRIES 64
|
||||||
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
||||||
|
|
||||||
|
/* previously fixed value */
|
||||||
|
#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
|
||||||
|
/* for now, only allow larger queues; with virtio-1, guest can downsize */
|
||||||
|
#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the number of bytes up to and including the given 'field' of
|
* Calculate the number of bytes up to and including the given 'field' of
|
||||||
* 'container'.
|
* 'container'.
|
||||||
|
@ -1412,7 +1417,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||||
{
|
{
|
||||||
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
||||||
|
|
||||||
n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
|
n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
|
||||||
|
virtio_net_handle_rx);
|
||||||
if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
|
if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
|
||||||
n->vqs[index].tx_vq =
|
n->vqs[index].tx_vq =
|
||||||
virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
|
virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
|
||||||
|
@ -1720,6 +1726,22 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
|
||||||
virtio_net_set_config_size(n, n->host_features);
|
virtio_net_set_config_size(n, n->host_features);
|
||||||
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
|
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We set a lower limit on RX queue size to what it always was.
|
||||||
|
* Guests that want a smaller ring can always resize it without
|
||||||
|
* help from us (using virtio 1 and up).
|
||||||
|
*/
|
||||||
|
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
|
||||||
|
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
|
||||||
|
(n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
|
||||||
|
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
|
||||||
|
"must be a power of 2 between %d and %d.",
|
||||||
|
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
|
||||||
|
VIRTQUEUE_MAX_SIZE);
|
||||||
|
virtio_cleanup(vdev);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
n->max_queues = MAX(n->nic_conf.peers.queues, 1);
|
n->max_queues = MAX(n->nic_conf.peers.queues, 1);
|
||||||
if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
|
if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
|
||||||
error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
|
error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
|
||||||
|
@ -1880,6 +1902,8 @@ static Property virtio_net_properties[] = {
|
||||||
TX_TIMER_INTERVAL),
|
TX_TIMER_INTERVAL),
|
||||||
DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
|
DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
|
||||||
DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
|
DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
|
||||||
|
DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
|
||||||
|
VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
|
||||||
DEFINE_PROP_END_OF_LIST(),
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ typedef struct virtio_net_conf
|
||||||
uint32_t txtimer;
|
uint32_t txtimer;
|
||||||
int32_t txburst;
|
int32_t txburst;
|
||||||
char *tx;
|
char *tx;
|
||||||
|
uint16_t rx_queue_size;
|
||||||
} virtio_net_conf;
|
} virtio_net_conf;
|
||||||
|
|
||||||
/* Maximum packet size we can receive from tap device: header + 64k */
|
/* Maximum packet size we can receive from tap device: header + 64k */
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* jhash.h: Jenkins hash support.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
|
||||||
|
*
|
||||||
|
* http://burtleburtle.net/bob/hash/
|
||||||
|
*
|
||||||
|
* These are the credits from Bob's sources:
|
||||||
|
*
|
||||||
|
* lookup3.c, by Bob Jenkins, May 2006, Public Domain.
|
||||||
|
*
|
||||||
|
* These are functions for producing 32-bit hashes for hash table lookup.
|
||||||
|
* hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
|
||||||
|
* are externally useful functions. Routines to test the hash are included
|
||||||
|
* if SELF_TEST is defined. You can use this free for any purpose. It's in
|
||||||
|
* the public domain. It has no warranty.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
|
||||||
|
*
|
||||||
|
* I've modified Bob's hash to be useful in the Linux kernel, and
|
||||||
|
* any bugs present are my fault.
|
||||||
|
* Jozsef
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef QEMU_JHASH_H__
|
||||||
|
#define QEMU_JHASH_H__
|
||||||
|
|
||||||
|
#include "qemu/bitops.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* hashtable relation copy from linux kernel jhash
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* __jhash_mix -- mix 3 32-bit values reversibly. */
|
||||||
|
#define __jhash_mix(a, b, c) \
|
||||||
|
{ \
|
||||||
|
a -= c; a ^= rol32(c, 4); c += b; \
|
||||||
|
b -= a; b ^= rol32(a, 6); a += c; \
|
||||||
|
c -= b; c ^= rol32(b, 8); b += a; \
|
||||||
|
a -= c; a ^= rol32(c, 16); c += b; \
|
||||||
|
b -= a; b ^= rol32(a, 19); a += c; \
|
||||||
|
c -= b; c ^= rol32(b, 4); b += a; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
|
||||||
|
#define __jhash_final(a, b, c) \
|
||||||
|
{ \
|
||||||
|
c ^= b; c -= rol32(b, 14); \
|
||||||
|
a ^= c; a -= rol32(c, 11); \
|
||||||
|
b ^= a; b -= rol32(a, 25); \
|
||||||
|
c ^= b; c -= rol32(b, 16); \
|
||||||
|
a ^= c; a -= rol32(c, 4); \
|
||||||
|
b ^= a; b -= rol32(a, 14); \
|
||||||
|
c ^= b; c -= rol32(b, 24); \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* An arbitrary initial parameter */
|
||||||
|
#define JHASH_INITVAL 0xdeadbeef
|
||||||
|
|
||||||
|
#endif /* QEMU_JHASH_H__ */
|
|
@ -65,7 +65,8 @@ struct CharDriverState {
|
||||||
int (*chr_sync_read)(struct CharDriverState *s,
|
int (*chr_sync_read)(struct CharDriverState *s,
|
||||||
const uint8_t *buf, int len);
|
const uint8_t *buf, int len);
|
||||||
GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
|
GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
|
||||||
void (*chr_update_read_handler)(struct CharDriverState *s);
|
void (*chr_update_read_handler)(struct CharDriverState *s,
|
||||||
|
GMainContext *context);
|
||||||
int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
|
int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
|
||||||
int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
|
int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
|
||||||
int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
|
int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
|
||||||
|
@ -422,6 +423,14 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
||||||
IOEventHandler *fd_event,
|
IOEventHandler *fd_event,
|
||||||
void *opaque);
|
void *opaque);
|
||||||
|
|
||||||
|
/* This API can make handler run in the context what you pass to. */
|
||||||
|
void qemu_chr_add_handlers_full(CharDriverState *s,
|
||||||
|
IOCanReadHandler *fd_can_read,
|
||||||
|
IOReadHandler *fd_read,
|
||||||
|
IOEventHandler *fd_event,
|
||||||
|
void *opaque,
|
||||||
|
GMainContext *context);
|
||||||
|
|
||||||
void qemu_chr_be_generic_open(CharDriverState *s);
|
void qemu_chr_be_generic_open(CharDriverState *s);
|
||||||
void qemu_chr_accept_input(CharDriverState *s);
|
void qemu_chr_accept_input(CharDriverState *s);
|
||||||
int qemu_chr_add_client(CharDriverState *s, int fd);
|
int qemu_chr_add_client(CharDriverState *s, int fd);
|
||||||
|
|
|
@ -16,3 +16,6 @@ common-obj-$(CONFIG_NETMAP) += netmap.o
|
||||||
common-obj-y += filter.o
|
common-obj-y += filter.o
|
||||||
common-obj-y += filter-buffer.o
|
common-obj-y += filter-buffer.o
|
||||||
common-obj-y += filter-mirror.o
|
common-obj-y += filter-mirror.o
|
||||||
|
common-obj-y += colo-compare.o
|
||||||
|
common-obj-y += colo.o
|
||||||
|
common-obj-y += filter-rewriter.o
|
||||||
|
|
|
@ -0,0 +1,781 @@
|
||||||
|
/*
|
||||||
|
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||||
|
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||||
|
* Copyright (c) 2016 FUJITSU LIMITED
|
||||||
|
* Copyright (c) 2016 Intel Corporation
|
||||||
|
*
|
||||||
|
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||||
|
* later. See the COPYING file in the top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "trace.h"
|
||||||
|
#include "qemu-common.h"
|
||||||
|
#include "qapi/qmp/qerror.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "net/net.h"
|
||||||
|
#include "net/eth.h"
|
||||||
|
#include "qom/object_interfaces.h"
|
||||||
|
#include "qemu/iov.h"
|
||||||
|
#include "qom/object.h"
|
||||||
|
#include "qemu/typedefs.h"
|
||||||
|
#include "net/queue.h"
|
||||||
|
#include "sysemu/char.h"
|
||||||
|
#include "qemu/sockets.h"
|
||||||
|
#include "qapi-visit.h"
|
||||||
|
#include "net/colo.h"
|
||||||
|
|
||||||
|
#define TYPE_COLO_COMPARE "colo-compare"
|
||||||
|
#define COLO_COMPARE(obj) \
|
||||||
|
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
|
||||||
|
|
||||||
|
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
|
||||||
|
#define MAX_QUEUE_SIZE 1024
|
||||||
|
|
||||||
|
/* TODO: Should be configurable */
|
||||||
|
#define REGULAR_PACKET_CHECK_MS 3000
|
||||||
|
|
||||||
|
/*
|
||||||
|
+ CompareState ++
|
||||||
|
| |
|
||||||
|
+---------------+ +---------------+ +---------------+
|
||||||
|
|conn list +--->conn +--------->conn |
|
||||||
|
+---------------+ +---------------+ +---------------+
|
||||||
|
| | | | | |
|
||||||
|
+---------------+ +---v----+ +---v----+ +---v----+ +---v----+
|
||||||
|
|primary | |secondary |primary | |secondary
|
||||||
|
|packet | |packet + |packet | |packet +
|
||||||
|
+--------+ +--------+ +--------+ +--------+
|
||||||
|
| | | |
|
||||||
|
+---v----+ +---v----+ +---v----+ +---v----+
|
||||||
|
|primary | |secondary |primary | |secondary
|
||||||
|
|packet | |packet + |packet | |packet +
|
||||||
|
+--------+ +--------+ +--------+ +--------+
|
||||||
|
| | | |
|
||||||
|
+---v----+ +---v----+ +---v----+ +---v----+
|
||||||
|
|primary | |secondary |primary | |secondary
|
||||||
|
|packet | |packet + |packet | |packet +
|
||||||
|
+--------+ +--------+ +--------+ +--------+
|
||||||
|
*/
|
||||||
|
typedef struct CompareState {
|
||||||
|
Object parent;
|
||||||
|
|
||||||
|
char *pri_indev;
|
||||||
|
char *sec_indev;
|
||||||
|
char *outdev;
|
||||||
|
CharDriverState *chr_pri_in;
|
||||||
|
CharDriverState *chr_sec_in;
|
||||||
|
CharDriverState *chr_out;
|
||||||
|
SocketReadState pri_rs;
|
||||||
|
SocketReadState sec_rs;
|
||||||
|
|
||||||
|
/* connection list: the connections belonged to this NIC could be found
|
||||||
|
* in this list.
|
||||||
|
* element type: Connection
|
||||||
|
*/
|
||||||
|
GQueue conn_list;
|
||||||
|
/* hashtable to save connection */
|
||||||
|
GHashTable *connection_track_table;
|
||||||
|
/* compare thread, a thread for each NIC */
|
||||||
|
QemuThread thread;
|
||||||
|
/* Timer used on the primary to find packets that are never matched */
|
||||||
|
QEMUTimer *timer;
|
||||||
|
QemuMutex timer_check_lock;
|
||||||
|
} CompareState;
|
||||||
|
|
||||||
|
typedef struct CompareClass {
|
||||||
|
ObjectClass parent_class;
|
||||||
|
} CompareClass;
|
||||||
|
|
||||||
|
typedef struct CompareChardevProps {
|
||||||
|
bool is_socket;
|
||||||
|
} CompareChardevProps;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
PRIMARY_IN = 0,
|
||||||
|
SECONDARY_IN,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int compare_chr_send(CharDriverState *out,
|
||||||
|
const uint8_t *buf,
|
||||||
|
uint32_t size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 0 on success, if return -1 means the pkt
|
||||||
|
* is unsupported(arp and ipv6) and will be sent later
|
||||||
|
*/
|
||||||
|
static int packet_enqueue(CompareState *s, int mode)
|
||||||
|
{
|
||||||
|
ConnectionKey key;
|
||||||
|
Packet *pkt = NULL;
|
||||||
|
Connection *conn;
|
||||||
|
|
||||||
|
if (mode == PRIMARY_IN) {
|
||||||
|
pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
|
||||||
|
} else {
|
||||||
|
pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parse_packet_early(pkt)) {
|
||||||
|
packet_destroy(pkt, NULL);
|
||||||
|
pkt = NULL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fill_connection_key(pkt, &key);
|
||||||
|
|
||||||
|
conn = connection_get(s->connection_track_table,
|
||||||
|
&key,
|
||||||
|
&s->conn_list);
|
||||||
|
|
||||||
|
if (!conn->processing) {
|
||||||
|
g_queue_push_tail(&s->conn_list, conn);
|
||||||
|
conn->processing = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode == PRIMARY_IN) {
|
||||||
|
if (g_queue_get_length(&conn->primary_list) <=
|
||||||
|
MAX_QUEUE_SIZE) {
|
||||||
|
g_queue_push_tail(&conn->primary_list, pkt);
|
||||||
|
} else {
|
||||||
|
error_report("colo compare primary queue size too big,"
|
||||||
|
"drop packet");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (g_queue_get_length(&conn->secondary_list) <=
|
||||||
|
MAX_QUEUE_SIZE) {
|
||||||
|
g_queue_push_tail(&conn->secondary_list, pkt);
|
||||||
|
} else {
|
||||||
|
error_report("colo compare secondary queue size too big,"
|
||||||
|
"drop packet");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The IP packets sent by primary and secondary
|
||||||
|
* will be compared in here
|
||||||
|
* TODO support ip fragment, Out-Of-Order
|
||||||
|
* return: 0 means packet same
|
||||||
|
* > 0 || < 0 means packet different
|
||||||
|
*/
|
||||||
|
static int colo_packet_compare(Packet *ppkt, Packet *spkt)
|
||||||
|
{
|
||||||
|
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
|
||||||
|
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
|
||||||
|
inet_ntoa(spkt->ip->ip_src),
|
||||||
|
inet_ntoa(spkt->ip->ip_dst));
|
||||||
|
|
||||||
|
if (ppkt->size == spkt->size) {
|
||||||
|
return memcmp(ppkt->data, spkt->data, spkt->size);
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the compare thread on the primary
|
||||||
|
* for compare tcp packet
|
||||||
|
* compare_tcp copied from Dr. David Alan Gilbert's branch
|
||||||
|
*/
|
||||||
|
static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
|
||||||
|
{
|
||||||
|
struct tcphdr *ptcp, *stcp;
|
||||||
|
int res;
|
||||||
|
char *sdebug, *ddebug;
|
||||||
|
|
||||||
|
trace_colo_compare_main("compare tcp");
|
||||||
|
if (ppkt->size != spkt->size) {
|
||||||
|
if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
|
||||||
|
trace_colo_compare_main("pkt size not same");
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptcp = (struct tcphdr *)ppkt->transport_header;
|
||||||
|
stcp = (struct tcphdr *)spkt->transport_header;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The 'identification' field in the IP header is *very* random
|
||||||
|
* it almost never matches. Fudge this by ignoring differences in
|
||||||
|
* unfragmented packets; they'll normally sort themselves out if different
|
||||||
|
* anyway, and it should recover at the TCP level.
|
||||||
|
* An alternative would be to get both the primary and secondary to rewrite
|
||||||
|
* somehow; but that would need some sync traffic to sync the state
|
||||||
|
*/
|
||||||
|
if (ntohs(ppkt->ip->ip_off) & IP_DF) {
|
||||||
|
spkt->ip->ip_id = ppkt->ip->ip_id;
|
||||||
|
/* and the sum will be different if the IDs were different */
|
||||||
|
spkt->ip->ip_sum = ppkt->ip->ip_sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN,
|
||||||
|
(spkt->size - ETH_HLEN));
|
||||||
|
|
||||||
|
if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
|
||||||
|
sdebug = strdup(inet_ntoa(ppkt->ip->ip_src));
|
||||||
|
ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst));
|
||||||
|
fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u"
|
||||||
|
" s: seq/ack=%u/%u res=%d flags=%x/%x\n",
|
||||||
|
__func__, sdebug, ddebug,
|
||||||
|
(unsigned int)ntohl(ptcp->th_seq),
|
||||||
|
(unsigned int)ntohl(ptcp->th_ack),
|
||||||
|
(unsigned int)ntohl(stcp->th_seq),
|
||||||
|
(unsigned int)ntohl(stcp->th_ack),
|
||||||
|
res, ptcp->th_flags, stcp->th_flags);
|
||||||
|
|
||||||
|
fprintf(stderr, "Primary len = %d\n", ppkt->size);
|
||||||
|
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
|
||||||
|
fprintf(stderr, "Secondary len = %d\n", spkt->size);
|
||||||
|
qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
|
||||||
|
|
||||||
|
g_free(sdebug);
|
||||||
|
g_free(ddebug);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the compare thread on the primary
|
||||||
|
* for compare udp packet
|
||||||
|
*/
|
||||||
|
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
trace_colo_compare_main("compare udp");
|
||||||
|
ret = colo_packet_compare(ppkt, spkt);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
|
||||||
|
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
|
||||||
|
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
|
||||||
|
qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the compare thread on the primary
|
||||||
|
* for compare icmp packet
|
||||||
|
*/
|
||||||
|
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
|
||||||
|
{
|
||||||
|
int network_length;
|
||||||
|
|
||||||
|
trace_colo_compare_main("compare icmp");
|
||||||
|
network_length = ppkt->ip->ip_hl * 4;
|
||||||
|
if (ppkt->size != spkt->size ||
|
||||||
|
ppkt->size < network_length + ETH_HLEN) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (colo_packet_compare(ppkt, spkt)) {
|
||||||
|
trace_colo_compare_icmp_miscompare("primary pkt size",
|
||||||
|
ppkt->size);
|
||||||
|
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
|
||||||
|
ppkt->size);
|
||||||
|
trace_colo_compare_icmp_miscompare("Secondary pkt size",
|
||||||
|
spkt->size);
|
||||||
|
qemu_hexdump((char *)spkt->data, stderr, "colo-compare",
|
||||||
|
spkt->size);
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the compare thread on the primary
|
||||||
|
* for compare other packet
|
||||||
|
*/
|
||||||
|
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
|
||||||
|
{
|
||||||
|
trace_colo_compare_main("compare other");
|
||||||
|
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
|
||||||
|
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
|
||||||
|
inet_ntoa(spkt->ip->ip_src),
|
||||||
|
inet_ntoa(spkt->ip->ip_dst));
|
||||||
|
return colo_packet_compare(ppkt, spkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
|
||||||
|
{
|
||||||
|
int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||||
|
|
||||||
|
if ((now - pkt->creation_ms) > (*check_time)) {
|
||||||
|
trace_colo_old_packet_check_found(pkt->creation_ms);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_old_packet_check_one_conn(void *opaque,
|
||||||
|
void *user_data)
|
||||||
|
{
|
||||||
|
Connection *conn = opaque;
|
||||||
|
GList *result = NULL;
|
||||||
|
int64_t check_time = REGULAR_PACKET_CHECK_MS;
|
||||||
|
|
||||||
|
result = g_queue_find_custom(&conn->primary_list,
|
||||||
|
&check_time,
|
||||||
|
(GCompareFunc)colo_old_packet_check_one);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
/* do checkpoint will flush old packet */
|
||||||
|
/* TODO: colo_notify_checkpoint();*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look for old packets that the secondary hasn't matched,
|
||||||
|
* if we have some then we have to checkpoint to wake
|
||||||
|
* the secondary up.
|
||||||
|
*/
|
||||||
|
static void colo_old_packet_check(void *opaque)
|
||||||
|
{
|
||||||
|
CompareState *s = opaque;
|
||||||
|
|
||||||
|
g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the compare thread on the primary
|
||||||
|
* for compare connection
|
||||||
|
*/
|
||||||
|
static void colo_compare_connection(void *opaque, void *user_data)
|
||||||
|
{
|
||||||
|
CompareState *s = user_data;
|
||||||
|
Connection *conn = opaque;
|
||||||
|
Packet *pkt = NULL;
|
||||||
|
GList *result = NULL;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
while (!g_queue_is_empty(&conn->primary_list) &&
|
||||||
|
!g_queue_is_empty(&conn->secondary_list)) {
|
||||||
|
qemu_mutex_lock(&s->timer_check_lock);
|
||||||
|
pkt = g_queue_pop_tail(&conn->primary_list);
|
||||||
|
qemu_mutex_unlock(&s->timer_check_lock);
|
||||||
|
switch (conn->ip_proto) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
result = g_queue_find_custom(&conn->secondary_list,
|
||||||
|
pkt, (GCompareFunc)colo_packet_compare_tcp);
|
||||||
|
break;
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
result = g_queue_find_custom(&conn->secondary_list,
|
||||||
|
pkt, (GCompareFunc)colo_packet_compare_udp);
|
||||||
|
break;
|
||||||
|
case IPPROTO_ICMP:
|
||||||
|
result = g_queue_find_custom(&conn->secondary_list,
|
||||||
|
pkt, (GCompareFunc)colo_packet_compare_icmp);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
result = g_queue_find_custom(&conn->secondary_list,
|
||||||
|
pkt, (GCompareFunc)colo_packet_compare_other);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
ret = compare_chr_send(s->chr_out, pkt->data, pkt->size);
|
||||||
|
if (ret < 0) {
|
||||||
|
error_report("colo_send_primary_packet failed");
|
||||||
|
}
|
||||||
|
trace_colo_compare_main("packet same and release packet");
|
||||||
|
g_queue_remove(&conn->secondary_list, result->data);
|
||||||
|
packet_destroy(pkt, NULL);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* If one packet arrive late, the secondary_list or
|
||||||
|
* primary_list will be empty, so we can't compare it
|
||||||
|
* until next comparison.
|
||||||
|
*/
|
||||||
|
trace_colo_compare_main("packet different");
|
||||||
|
qemu_mutex_lock(&s->timer_check_lock);
|
||||||
|
g_queue_push_tail(&conn->primary_list, pkt);
|
||||||
|
qemu_mutex_unlock(&s->timer_check_lock);
|
||||||
|
/* TODO: colo_notify_checkpoint();*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compare_chr_send(CharDriverState *out,
|
||||||
|
const uint8_t *buf,
|
||||||
|
uint32_t size)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
uint32_t len = htonl(size);
|
||||||
|
|
||||||
|
if (!size) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
|
||||||
|
if (ret != sizeof(len)) {
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
|
||||||
|
if (ret != size) {
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err:
|
||||||
|
return ret < 0 ? ret : -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compare_chr_can_read(void *opaque)
|
||||||
|
{
|
||||||
|
return COMPARE_READ_LEN_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the main thread on the primary for packets
|
||||||
|
* arriving over the socket from the primary.
|
||||||
|
*/
|
||||||
|
static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(opaque);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = net_fill_rstate(&s->pri_rs, buf, size);
|
||||||
|
if (ret == -1) {
|
||||||
|
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
|
||||||
|
error_report("colo-compare primary_in error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the main thread on the primary for packets
|
||||||
|
* arriving over the socket from the secondary.
|
||||||
|
*/
|
||||||
|
static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(opaque);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = net_fill_rstate(&s->sec_rs, buf, size);
|
||||||
|
if (ret == -1) {
|
||||||
|
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
|
||||||
|
error_report("colo-compare secondary_in error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *colo_compare_thread(void *opaque)
|
||||||
|
{
|
||||||
|
GMainContext *worker_context;
|
||||||
|
GMainLoop *compare_loop;
|
||||||
|
CompareState *s = opaque;
|
||||||
|
|
||||||
|
worker_context = g_main_context_new();
|
||||||
|
|
||||||
|
qemu_chr_add_handlers_full(s->chr_pri_in, compare_chr_can_read,
|
||||||
|
compare_pri_chr_in, NULL, s, worker_context);
|
||||||
|
qemu_chr_add_handlers_full(s->chr_sec_in, compare_chr_can_read,
|
||||||
|
compare_sec_chr_in, NULL, s, worker_context);
|
||||||
|
|
||||||
|
compare_loop = g_main_loop_new(worker_context, FALSE);
|
||||||
|
|
||||||
|
g_main_loop_run(compare_loop);
|
||||||
|
|
||||||
|
g_main_loop_unref(compare_loop);
|
||||||
|
g_main_context_unref(worker_context);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *compare_get_pri_indev(Object *obj, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
return g_strdup(s->pri_indev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
g_free(s->pri_indev);
|
||||||
|
s->pri_indev = g_strdup(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *compare_get_sec_indev(Object *obj, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
return g_strdup(s->sec_indev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
g_free(s->sec_indev);
|
||||||
|
s->sec_indev = g_strdup(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *compare_get_outdev(Object *obj, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
return g_strdup(s->outdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compare_set_outdev(Object *obj, const char *value, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
g_free(s->outdev);
|
||||||
|
s->outdev = g_strdup(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
|
||||||
|
{
|
||||||
|
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
|
||||||
|
|
||||||
|
if (packet_enqueue(s, PRIMARY_IN)) {
|
||||||
|
trace_colo_compare_main("primary: unsupported packet in");
|
||||||
|
compare_chr_send(s->chr_out, pri_rs->buf, pri_rs->packet_len);
|
||||||
|
} else {
|
||||||
|
/* compare connection */
|
||||||
|
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compare_sec_rs_finalize(SocketReadState *sec_rs)
|
||||||
|
{
|
||||||
|
CompareState *s = container_of(sec_rs, CompareState, sec_rs);
|
||||||
|
|
||||||
|
if (packet_enqueue(s, SECONDARY_IN)) {
|
||||||
|
trace_colo_compare_main("secondary: unsupported packet in");
|
||||||
|
} else {
|
||||||
|
/* compare connection */
|
||||||
|
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compare_chardev_opts(void *opaque,
|
||||||
|
const char *name, const char *value,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
CompareChardevProps *props = opaque;
|
||||||
|
|
||||||
|
if (strcmp(name, "backend") == 0 &&
|
||||||
|
strcmp(value, "socket") == 0) {
|
||||||
|
props->is_socket = true;
|
||||||
|
return 0;
|
||||||
|
} else if (strcmp(name, "host") == 0 ||
|
||||||
|
(strcmp(name, "port") == 0) ||
|
||||||
|
(strcmp(name, "server") == 0) ||
|
||||||
|
(strcmp(name, "wait") == 0) ||
|
||||||
|
(strcmp(name, "path") == 0)) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
error_setg(errp,
|
||||||
|
"COLO-compare does not support a chardev with option %s=%s",
|
||||||
|
name, value);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 0 is success.
|
||||||
|
* Return 1 is failed.
|
||||||
|
*/
|
||||||
|
static int find_and_check_chardev(CharDriverState **chr,
|
||||||
|
char *chr_name,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
CompareChardevProps props;
|
||||||
|
|
||||||
|
*chr = qemu_chr_find(chr_name);
|
||||||
|
if (*chr == NULL) {
|
||||||
|
error_setg(errp, "Device '%s' not found",
|
||||||
|
chr_name);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&props, 0, sizeof(props));
|
||||||
|
if (qemu_opt_foreach((*chr)->opts, compare_chardev_opts, &props, errp)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!props.is_socket) {
|
||||||
|
error_setg(errp, "chardev \"%s\" is not a tcp socket",
|
||||||
|
chr_name);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check old packet regularly so it can watch for any packets
|
||||||
|
* that the secondary hasn't produced equivalents of.
|
||||||
|
*/
|
||||||
|
static void check_old_packet_regular(void *opaque)
|
||||||
|
{
|
||||||
|
CompareState *s = opaque;
|
||||||
|
|
||||||
|
timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
||||||
|
REGULAR_PACKET_CHECK_MS);
|
||||||
|
/* if have old packet we will notify checkpoint */
|
||||||
|
/*
|
||||||
|
* TODO: Make timer handler run in compare thread
|
||||||
|
* like qemu_chr_add_handlers_full.
|
||||||
|
*/
|
||||||
|
qemu_mutex_lock(&s->timer_check_lock);
|
||||||
|
colo_old_packet_check(s);
|
||||||
|
qemu_mutex_unlock(&s->timer_check_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the main thread on the primary
|
||||||
|
* to setup colo-compare.
|
||||||
|
*/
|
||||||
|
static void colo_compare_complete(UserCreatable *uc, Error **errp)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(uc);
|
||||||
|
char thread_name[64];
|
||||||
|
static int compare_id;
|
||||||
|
|
||||||
|
if (!s->pri_indev || !s->sec_indev || !s->outdev) {
|
||||||
|
error_setg(errp, "colo compare needs 'primary_in' ,"
|
||||||
|
"'secondary_in','outdev' property set");
|
||||||
|
return;
|
||||||
|
} else if (!strcmp(s->pri_indev, s->outdev) ||
|
||||||
|
!strcmp(s->sec_indev, s->outdev) ||
|
||||||
|
!strcmp(s->pri_indev, s->sec_indev)) {
|
||||||
|
error_setg(errp, "'indev' and 'outdev' could not be same "
|
||||||
|
"for compare module");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_and_check_chardev(&s->chr_pri_in, s->pri_indev, errp)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_and_check_chardev(&s->chr_sec_in, s->sec_indev, errp)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_and_check_chardev(&s->chr_out, s->outdev, errp)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_chr_fe_claim_no_fail(s->chr_pri_in);
|
||||||
|
|
||||||
|
qemu_chr_fe_claim_no_fail(s->chr_sec_in);
|
||||||
|
|
||||||
|
qemu_chr_fe_claim_no_fail(s->chr_out);
|
||||||
|
|
||||||
|
net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
|
||||||
|
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
|
||||||
|
|
||||||
|
g_queue_init(&s->conn_list);
|
||||||
|
qemu_mutex_init(&s->timer_check_lock);
|
||||||
|
|
||||||
|
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||||
|
connection_key_equal,
|
||||||
|
g_free,
|
||||||
|
connection_destroy);
|
||||||
|
|
||||||
|
sprintf(thread_name, "colo-compare %d", compare_id);
|
||||||
|
qemu_thread_create(&s->thread, thread_name,
|
||||||
|
colo_compare_thread, s,
|
||||||
|
QEMU_THREAD_JOINABLE);
|
||||||
|
compare_id++;
|
||||||
|
|
||||||
|
/* A regular timer to kick any packets that the secondary doesn't match */
|
||||||
|
s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */
|
||||||
|
check_old_packet_regular, s);
|
||||||
|
timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
||||||
|
REGULAR_PACKET_CHECK_MS);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_compare_class_init(ObjectClass *oc, void *data)
|
||||||
|
{
|
||||||
|
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||||
|
|
||||||
|
ucc->complete = colo_compare_complete;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_compare_init(Object *obj)
|
||||||
|
{
|
||||||
|
object_property_add_str(obj, "primary_in",
|
||||||
|
compare_get_pri_indev, compare_set_pri_indev,
|
||||||
|
NULL);
|
||||||
|
object_property_add_str(obj, "secondary_in",
|
||||||
|
compare_get_sec_indev, compare_set_sec_indev,
|
||||||
|
NULL);
|
||||||
|
object_property_add_str(obj, "outdev",
|
||||||
|
compare_get_outdev, compare_set_outdev,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_compare_finalize(Object *obj)
|
||||||
|
{
|
||||||
|
CompareState *s = COLO_COMPARE(obj);
|
||||||
|
|
||||||
|
if (s->chr_pri_in) {
|
||||||
|
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
|
||||||
|
qemu_chr_fe_release(s->chr_pri_in);
|
||||||
|
}
|
||||||
|
if (s->chr_sec_in) {
|
||||||
|
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
|
||||||
|
qemu_chr_fe_release(s->chr_sec_in);
|
||||||
|
}
|
||||||
|
if (s->chr_out) {
|
||||||
|
qemu_chr_fe_release(s->chr_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_queue_free(&s->conn_list);
|
||||||
|
|
||||||
|
if (qemu_thread_is_self(&s->thread)) {
|
||||||
|
/* compare connection */
|
||||||
|
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||||
|
qemu_thread_join(&s->thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->timer) {
|
||||||
|
timer_del(s->timer);
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_mutex_destroy(&s->timer_check_lock);
|
||||||
|
|
||||||
|
g_free(s->pri_indev);
|
||||||
|
g_free(s->sec_indev);
|
||||||
|
g_free(s->outdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const TypeInfo colo_compare_info = {
|
||||||
|
.name = TYPE_COLO_COMPARE,
|
||||||
|
.parent = TYPE_OBJECT,
|
||||||
|
.instance_size = sizeof(CompareState),
|
||||||
|
.instance_init = colo_compare_init,
|
||||||
|
.instance_finalize = colo_compare_finalize,
|
||||||
|
.class_size = sizeof(CompareClass),
|
||||||
|
.class_init = colo_compare_class_init,
|
||||||
|
.interfaces = (InterfaceInfo[]) {
|
||||||
|
{ TYPE_USER_CREATABLE },
|
||||||
|
{ }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void register_types(void)
|
||||||
|
{
|
||||||
|
type_register_static(&colo_compare_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_init(register_types);
|
|
@ -0,0 +1,211 @@
|
||||||
|
/*
|
||||||
|
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||||
|
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||||
|
* Copyright (c) 2016 FUJITSU LIMITED
|
||||||
|
* Copyright (c) 2016 Intel Corporation
|
||||||
|
*
|
||||||
|
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||||
|
* later. See the COPYING file in the top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "trace.h"
|
||||||
|
#include "net/colo.h"
|
||||||
|
|
||||||
|
uint32_t connection_key_hash(const void *opaque)
|
||||||
|
{
|
||||||
|
const ConnectionKey *key = opaque;
|
||||||
|
uint32_t a, b, c;
|
||||||
|
|
||||||
|
/* Jenkins hash */
|
||||||
|
a = b = c = JHASH_INITVAL + sizeof(*key);
|
||||||
|
a += key->src.s_addr;
|
||||||
|
b += key->dst.s_addr;
|
||||||
|
c += (key->src_port | key->dst_port << 16);
|
||||||
|
__jhash_mix(a, b, c);
|
||||||
|
|
||||||
|
a += key->ip_proto;
|
||||||
|
__jhash_final(a, b, c);
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
int connection_key_equal(const void *key1, const void *key2)
|
||||||
|
{
|
||||||
|
return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int parse_packet_early(Packet *pkt)
|
||||||
|
{
|
||||||
|
int network_length;
|
||||||
|
static const uint8_t vlan[] = {0x81, 0x00};
|
||||||
|
uint8_t *data = pkt->data;
|
||||||
|
uint16_t l3_proto;
|
||||||
|
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
|
||||||
|
|
||||||
|
if (pkt->size < ETH_HLEN) {
|
||||||
|
trace_colo_proxy_main("pkt->size < ETH_HLEN");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: support vlan.
|
||||||
|
*/
|
||||||
|
if (!memcmp(&data[12], vlan, sizeof(vlan))) {
|
||||||
|
trace_colo_proxy_main("COLO-proxy don't support vlan");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pkt->network_header = data + l2hdr_len;
|
||||||
|
|
||||||
|
const struct iovec l2vec = {
|
||||||
|
.iov_base = (void *) data,
|
||||||
|
.iov_len = l2hdr_len
|
||||||
|
};
|
||||||
|
l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
|
||||||
|
|
||||||
|
if (l3_proto != ETH_P_IP) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
network_length = pkt->ip->ip_hl * 4;
|
||||||
|
if (pkt->size < l2hdr_len + network_length) {
|
||||||
|
trace_colo_proxy_main("pkt->size < network_header + network_length");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
pkt->transport_header = pkt->network_header + network_length;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fill_connection_key(Packet *pkt, ConnectionKey *key)
|
||||||
|
{
|
||||||
|
uint32_t tmp_ports;
|
||||||
|
|
||||||
|
memset(key, 0, sizeof(*key));
|
||||||
|
key->ip_proto = pkt->ip->ip_p;
|
||||||
|
|
||||||
|
switch (key->ip_proto) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
case IPPROTO_DCCP:
|
||||||
|
case IPPROTO_ESP:
|
||||||
|
case IPPROTO_SCTP:
|
||||||
|
case IPPROTO_UDPLITE:
|
||||||
|
tmp_ports = *(uint32_t *)(pkt->transport_header);
|
||||||
|
key->src = pkt->ip->ip_src;
|
||||||
|
key->dst = pkt->ip->ip_dst;
|
||||||
|
key->src_port = ntohs(tmp_ports & 0xffff);
|
||||||
|
key->dst_port = ntohs(tmp_ports >> 16);
|
||||||
|
break;
|
||||||
|
case IPPROTO_AH:
|
||||||
|
tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
|
||||||
|
key->src = pkt->ip->ip_src;
|
||||||
|
key->dst = pkt->ip->ip_dst;
|
||||||
|
key->src_port = ntohs(tmp_ports & 0xffff);
|
||||||
|
key->dst_port = ntohs(tmp_ports >> 16);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void reverse_connection_key(ConnectionKey *key)
|
||||||
|
{
|
||||||
|
struct in_addr tmp_ip;
|
||||||
|
uint16_t tmp_port;
|
||||||
|
|
||||||
|
tmp_ip = key->src;
|
||||||
|
key->src = key->dst;
|
||||||
|
key->dst = tmp_ip;
|
||||||
|
|
||||||
|
tmp_port = key->src_port;
|
||||||
|
key->src_port = key->dst_port;
|
||||||
|
key->dst_port = tmp_port;
|
||||||
|
}
|
||||||
|
|
||||||
|
Connection *connection_new(ConnectionKey *key)
|
||||||
|
{
|
||||||
|
Connection *conn = g_slice_new(Connection);
|
||||||
|
|
||||||
|
conn->ip_proto = key->ip_proto;
|
||||||
|
conn->processing = false;
|
||||||
|
conn->offset = 0;
|
||||||
|
conn->syn_flag = 0;
|
||||||
|
g_queue_init(&conn->primary_list);
|
||||||
|
g_queue_init(&conn->secondary_list);
|
||||||
|
|
||||||
|
return conn;
|
||||||
|
}
|
||||||
|
|
||||||
|
void connection_destroy(void *opaque)
|
||||||
|
{
|
||||||
|
Connection *conn = opaque;
|
||||||
|
|
||||||
|
g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
|
||||||
|
g_queue_free(&conn->primary_list);
|
||||||
|
g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
|
||||||
|
g_queue_free(&conn->secondary_list);
|
||||||
|
g_slice_free(Connection, conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
Packet *packet_new(const void *data, int size)
|
||||||
|
{
|
||||||
|
Packet *pkt = g_slice_new(Packet);
|
||||||
|
|
||||||
|
pkt->data = g_memdup(data, size);
|
||||||
|
pkt->size = size;
|
||||||
|
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||||
|
|
||||||
|
return pkt;
|
||||||
|
}
|
||||||
|
|
||||||
|
void packet_destroy(void *opaque, void *user_data)
|
||||||
|
{
|
||||||
|
Packet *pkt = opaque;
|
||||||
|
|
||||||
|
g_free(pkt->data);
|
||||||
|
g_slice_free(Packet, pkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear hashtable, stop this hash growing really huge
|
||||||
|
*/
|
||||||
|
void connection_hashtable_reset(GHashTable *connection_track_table)
|
||||||
|
{
|
||||||
|
g_hash_table_remove_all(connection_track_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if not found, create a new connection and add to hash table */
|
||||||
|
Connection *connection_get(GHashTable *connection_track_table,
|
||||||
|
ConnectionKey *key,
|
||||||
|
GQueue *conn_list)
|
||||||
|
{
|
||||||
|
Connection *conn = g_hash_table_lookup(connection_track_table, key);
|
||||||
|
|
||||||
|
if (conn == NULL) {
|
||||||
|
ConnectionKey *new_key = g_memdup(key, sizeof(*key));
|
||||||
|
|
||||||
|
conn = connection_new(key);
|
||||||
|
|
||||||
|
if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) {
|
||||||
|
trace_colo_proxy_main("colo proxy connection hashtable full,"
|
||||||
|
" clear it");
|
||||||
|
connection_hashtable_reset(connection_track_table);
|
||||||
|
/*
|
||||||
|
* clear the conn_list
|
||||||
|
*/
|
||||||
|
while (!g_queue_is_empty(conn_list)) {
|
||||||
|
connection_destroy(g_queue_pop_head(conn_list));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_hash_table_insert(connection_track_table, new_key, conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
return conn;
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||||
|
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||||
|
* Copyright (c) 2016 FUJITSU LIMITED
|
||||||
|
* Copyright (c) 2016 Intel Corporation
|
||||||
|
*
|
||||||
|
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||||
|
* later. See the COPYING file in the top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef QEMU_COLO_PROXY_H
|
||||||
|
#define QEMU_COLO_PROXY_H
|
||||||
|
|
||||||
|
#include "slirp/slirp.h"
|
||||||
|
#include "qemu/jhash.h"
|
||||||
|
#include "qemu/timer.h"
|
||||||
|
|
||||||
|
#define HASHTABLE_MAX_SIZE 16384
|
||||||
|
|
||||||
|
#ifndef IPPROTO_DCCP
|
||||||
|
#define IPPROTO_DCCP 33
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef IPPROTO_SCTP
|
||||||
|
#define IPPROTO_SCTP 132
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef IPPROTO_UDPLITE
|
||||||
|
#define IPPROTO_UDPLITE 136
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct Packet {
|
||||||
|
void *data;
|
||||||
|
union {
|
||||||
|
uint8_t *network_header;
|
||||||
|
struct ip *ip;
|
||||||
|
};
|
||||||
|
uint8_t *transport_header;
|
||||||
|
int size;
|
||||||
|
/* Time of packet creation, in wall clock ms */
|
||||||
|
int64_t creation_ms;
|
||||||
|
} Packet;
|
||||||
|
|
||||||
|
typedef struct ConnectionKey {
|
||||||
|
/* (src, dst) must be grouped, in the same way than in IP header */
|
||||||
|
struct in_addr src;
|
||||||
|
struct in_addr dst;
|
||||||
|
uint16_t src_port;
|
||||||
|
uint16_t dst_port;
|
||||||
|
uint8_t ip_proto;
|
||||||
|
} QEMU_PACKED ConnectionKey;
|
||||||
|
|
||||||
|
typedef struct Connection {
|
||||||
|
/* connection primary send queue: element type: Packet */
|
||||||
|
GQueue primary_list;
|
||||||
|
/* connection secondary send queue: element type: Packet */
|
||||||
|
GQueue secondary_list;
|
||||||
|
/* flag to enqueue unprocessed_connections */
|
||||||
|
bool processing;
|
||||||
|
uint8_t ip_proto;
|
||||||
|
/* offset = secondary_seq - primary_seq */
|
||||||
|
tcp_seq offset;
|
||||||
|
/*
|
||||||
|
* we use this flag update offset func
|
||||||
|
* run once in independent tcp connection
|
||||||
|
*/
|
||||||
|
int syn_flag;
|
||||||
|
} Connection;
|
||||||
|
|
||||||
|
uint32_t connection_key_hash(const void *opaque);
|
||||||
|
int connection_key_equal(const void *opaque1, const void *opaque2);
|
||||||
|
int parse_packet_early(Packet *pkt);
|
||||||
|
void fill_connection_key(Packet *pkt, ConnectionKey *key);
|
||||||
|
void reverse_connection_key(ConnectionKey *key);
|
||||||
|
Connection *connection_new(ConnectionKey *key);
|
||||||
|
void connection_destroy(void *opaque);
|
||||||
|
Connection *connection_get(GHashTable *connection_track_table,
|
||||||
|
ConnectionKey *key,
|
||||||
|
GQueue *conn_list);
|
||||||
|
void connection_hashtable_reset(GHashTable *connection_track_table);
|
||||||
|
Packet *packet_new(const void *data, int size);
|
||||||
|
void packet_destroy(void *opaque, void *user_data);
|
||||||
|
|
||||||
|
#endif /* QEMU_COLO_PROXY_H */
|
|
@ -0,0 +1,263 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||||
|
* Copyright (c) 2016 FUJITSU LIMITED
|
||||||
|
* Copyright (c) 2016 Intel Corporation
|
||||||
|
*
|
||||||
|
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||||
|
* later. See the COPYING file in the top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "trace.h"
|
||||||
|
#include "net/colo.h"
|
||||||
|
#include "net/filter.h"
|
||||||
|
#include "net/net.h"
|
||||||
|
#include "qemu-common.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "qapi/qmp/qerror.h"
|
||||||
|
#include "qapi-visit.h"
|
||||||
|
#include "qom/object.h"
|
||||||
|
#include "qemu/main-loop.h"
|
||||||
|
#include "qemu/iov.h"
|
||||||
|
#include "net/checksum.h"
|
||||||
|
|
||||||
|
#define FILTER_COLO_REWRITER(obj) \
|
||||||
|
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
|
||||||
|
|
||||||
|
#define TYPE_FILTER_REWRITER "filter-rewriter"
|
||||||
|
|
||||||
|
typedef struct RewriterState {
|
||||||
|
NetFilterState parent_obj;
|
||||||
|
NetQueue *incoming_queue;
|
||||||
|
/* hashtable to save connection */
|
||||||
|
GHashTable *connection_track_table;
|
||||||
|
} RewriterState;
|
||||||
|
|
||||||
|
static void filter_rewriter_flush(NetFilterState *nf)
|
||||||
|
{
|
||||||
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||||
|
|
||||||
|
if (!qemu_net_queue_flush(s->incoming_queue)) {
|
||||||
|
/* Unable to empty the queue, purge remaining packets */
|
||||||
|
qemu_net_queue_purge(s->incoming_queue, nf->netdev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 1 on success, if return 0 means the pkt
|
||||||
|
* is not TCP packet
|
||||||
|
*/
|
||||||
|
static int is_tcp_packet(Packet *pkt)
|
||||||
|
{
|
||||||
|
if (!parse_packet_early(pkt) &&
|
||||||
|
pkt->ip->ip_p == IPPROTO_TCP) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle tcp packet from primary guest */
|
||||||
|
static int handle_primary_tcp_pkt(NetFilterState *nf,
|
||||||
|
Connection *conn,
|
||||||
|
Packet *pkt)
|
||||||
|
{
|
||||||
|
struct tcphdr *tcp_pkt;
|
||||||
|
|
||||||
|
tcp_pkt = (struct tcphdr *)pkt->transport_header;
|
||||||
|
if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
||||||
|
char *sdebug, *ddebug;
|
||||||
|
sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
|
||||||
|
ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
|
||||||
|
trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
|
||||||
|
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
||||||
|
tcp_pkt->th_flags);
|
||||||
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||||
|
g_free(sdebug);
|
||||||
|
g_free(ddebug);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
||||||
|
/*
|
||||||
|
* we use this flag update offset func
|
||||||
|
* run once in independent tcp connection
|
||||||
|
*/
|
||||||
|
conn->syn_flag = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
|
||||||
|
if (conn->syn_flag) {
|
||||||
|
/*
|
||||||
|
* offset = secondary_seq - primary seq
|
||||||
|
* ack packet sent by guest from primary node,
|
||||||
|
* so we use th_ack - 1 get primary_seq
|
||||||
|
*/
|
||||||
|
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
|
||||||
|
conn->syn_flag = 0;
|
||||||
|
}
|
||||||
|
/* handle packets to the secondary from the primary */
|
||||||
|
tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
|
||||||
|
|
||||||
|
net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle tcp packet from secondary guest */
|
||||||
|
static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
||||||
|
Connection *conn,
|
||||||
|
Packet *pkt)
|
||||||
|
{
|
||||||
|
struct tcphdr *tcp_pkt;
|
||||||
|
|
||||||
|
tcp_pkt = (struct tcphdr *)pkt->transport_header;
|
||||||
|
|
||||||
|
if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
||||||
|
char *sdebug, *ddebug;
|
||||||
|
sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
|
||||||
|
ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
|
||||||
|
trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
|
||||||
|
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
||||||
|
tcp_pkt->th_flags);
|
||||||
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||||
|
g_free(sdebug);
|
||||||
|
g_free(ddebug);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
|
||||||
|
/*
|
||||||
|
* save offset = secondary_seq and then
|
||||||
|
* in handle_primary_tcp_pkt make offset
|
||||||
|
* = secondary_seq - primary_seq
|
||||||
|
*/
|
||||||
|
conn->offset = ntohl(tcp_pkt->th_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
|
||||||
|
/* handle packets to the primary from the secondary*/
|
||||||
|
tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
|
||||||
|
|
||||||
|
net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
|
||||||
|
NetClientState *sender,
|
||||||
|
unsigned flags,
|
||||||
|
const struct iovec *iov,
|
||||||
|
int iovcnt,
|
||||||
|
NetPacketSent *sent_cb)
|
||||||
|
{
|
||||||
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||||
|
Connection *conn;
|
||||||
|
ConnectionKey key;
|
||||||
|
Packet *pkt;
|
||||||
|
ssize_t size = iov_size(iov, iovcnt);
|
||||||
|
char *buf = g_malloc0(size);
|
||||||
|
|
||||||
|
iov_to_buf(iov, iovcnt, 0, buf, size);
|
||||||
|
pkt = packet_new(buf, size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if we get tcp packet
|
||||||
|
* we will rewrite it to make secondary guest's
|
||||||
|
* connection established successfully
|
||||||
|
*/
|
||||||
|
if (pkt && is_tcp_packet(pkt)) {
|
||||||
|
|
||||||
|
fill_connection_key(pkt, &key);
|
||||||
|
|
||||||
|
if (sender == nf->netdev) {
|
||||||
|
/*
|
||||||
|
* We need make tcp TX and RX packet
|
||||||
|
* into one connection.
|
||||||
|
*/
|
||||||
|
reverse_connection_key(&key);
|
||||||
|
}
|
||||||
|
conn = connection_get(s->connection_track_table,
|
||||||
|
&key,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (sender == nf->netdev) {
|
||||||
|
/* NET_FILTER_DIRECTION_TX */
|
||||||
|
if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
|
||||||
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||||
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||||
|
packet_destroy(pkt, NULL);
|
||||||
|
pkt = NULL;
|
||||||
|
/*
|
||||||
|
* We block the packet here,after rewrite pkt
|
||||||
|
* and will send it
|
||||||
|
*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* NET_FILTER_DIRECTION_RX */
|
||||||
|
if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
|
||||||
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||||
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||||
|
packet_destroy(pkt, NULL);
|
||||||
|
pkt = NULL;
|
||||||
|
/*
|
||||||
|
* We block the packet here,after rewrite pkt
|
||||||
|
* and will send it
|
||||||
|
*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
packet_destroy(pkt, NULL);
|
||||||
|
pkt = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_rewriter_cleanup(NetFilterState *nf)
|
||||||
|
{
|
||||||
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||||
|
|
||||||
|
/* flush packets */
|
||||||
|
if (s->incoming_queue) {
|
||||||
|
filter_rewriter_flush(nf);
|
||||||
|
g_free(s->incoming_queue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
|
||||||
|
{
|
||||||
|
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||||
|
|
||||||
|
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||||
|
connection_key_equal,
|
||||||
|
g_free,
|
||||||
|
connection_destroy);
|
||||||
|
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
|
||||||
|
{
|
||||||
|
NetFilterClass *nfc = NETFILTER_CLASS(oc);
|
||||||
|
|
||||||
|
nfc->setup = colo_rewriter_setup;
|
||||||
|
nfc->cleanup = colo_rewriter_cleanup;
|
||||||
|
nfc->receive_iov = colo_rewriter_receive_iov;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const TypeInfo colo_rewriter_info = {
|
||||||
|
.name = TYPE_FILTER_REWRITER,
|
||||||
|
.parent = TYPE_NETFILTER,
|
||||||
|
.class_init = colo_rewriter_class_init,
|
||||||
|
.instance_size = sizeof(RewriterState),
|
||||||
|
};
|
||||||
|
|
||||||
|
static void register_types(void)
|
||||||
|
{
|
||||||
|
type_register_static(&colo_rewriter_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_init(register_types);
|
|
@ -690,9 +690,13 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
|
||||||
buffer = iov[0].iov_base;
|
buffer = iov[0].iov_base;
|
||||||
offset = iov[0].iov_len;
|
offset = iov[0].iov_len;
|
||||||
} else {
|
} else {
|
||||||
buf = g_new(uint8_t, NET_BUFSIZE);
|
offset = iov_size(iov, iovcnt);
|
||||||
|
if (offset > NET_BUFSIZE) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buf = g_malloc(offset);
|
||||||
buffer = buf;
|
buffer = buf;
|
||||||
offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE);
|
offset = iov_to_buf(iov, iovcnt, 0, buf, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
|
if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
|
||||||
|
@ -1179,6 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict)
|
||||||
|
|
||||||
qemu_del_net_client(nc->peer);
|
qemu_del_net_client(nc->peer);
|
||||||
qemu_del_net_client(nc);
|
qemu_del_net_client(nc);
|
||||||
|
qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device));
|
||||||
}
|
}
|
||||||
|
|
||||||
void netdev_add(QemuOpts *opts, Error **errp)
|
void netdev_add(QemuOpts *opts, Error **errp)
|
||||||
|
|
|
@ -857,7 +857,9 @@ free_fail:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
|
fd = net_bridge_run_helper(tap->helper,
|
||||||
|
tap->has_br ?
|
||||||
|
tap->br : DEFAULT_BRIDGE_INTERFACE,
|
||||||
errp);
|
errp);
|
||||||
if (fd == -1) {
|
if (fd == -1) {
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -2636,6 +2636,8 @@
|
||||||
#
|
#
|
||||||
# @downscript: #optional script to shut down the interface
|
# @downscript: #optional script to shut down the interface
|
||||||
#
|
#
|
||||||
|
# @br: #optional bridge name (since 2.8)
|
||||||
|
#
|
||||||
# @helper: #optional command to execute to configure bridge
|
# @helper: #optional command to execute to configure bridge
|
||||||
#
|
#
|
||||||
# @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes.
|
# @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes.
|
||||||
|
@ -2665,6 +2667,7 @@
|
||||||
'*fds': 'str',
|
'*fds': 'str',
|
||||||
'*script': 'str',
|
'*script': 'str',
|
||||||
'*downscript': 'str',
|
'*downscript': 'str',
|
||||||
|
'*br': 'str',
|
||||||
'*helper': 'str',
|
'*helper': 'str',
|
||||||
'*sndbuf': 'size',
|
'*sndbuf': 'size',
|
||||||
'*vnet_hdr': 'bool',
|
'*vnet_hdr': 'bool',
|
||||||
|
|
77
qemu-char.c
77
qemu-char.c
|
@ -449,11 +449,12 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...)
|
||||||
|
|
||||||
static void remove_fd_in_watch(CharDriverState *chr);
|
static void remove_fd_in_watch(CharDriverState *chr);
|
||||||
|
|
||||||
void qemu_chr_add_handlers(CharDriverState *s,
|
void qemu_chr_add_handlers_full(CharDriverState *s,
|
||||||
IOCanReadHandler *fd_can_read,
|
IOCanReadHandler *fd_can_read,
|
||||||
IOReadHandler *fd_read,
|
IOReadHandler *fd_read,
|
||||||
IOEventHandler *fd_event,
|
IOEventHandler *fd_event,
|
||||||
void *opaque)
|
void *opaque,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
int fe_open;
|
int fe_open;
|
||||||
|
|
||||||
|
@ -467,8 +468,9 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
||||||
s->chr_read = fd_read;
|
s->chr_read = fd_read;
|
||||||
s->chr_event = fd_event;
|
s->chr_event = fd_event;
|
||||||
s->handler_opaque = opaque;
|
s->handler_opaque = opaque;
|
||||||
if (fe_open && s->chr_update_read_handler)
|
if (fe_open && s->chr_update_read_handler) {
|
||||||
s->chr_update_read_handler(s);
|
s->chr_update_read_handler(s, context);
|
||||||
|
}
|
||||||
|
|
||||||
if (!s->explicit_fe_open) {
|
if (!s->explicit_fe_open) {
|
||||||
qemu_chr_fe_set_open(s, fe_open);
|
qemu_chr_fe_set_open(s, fe_open);
|
||||||
|
@ -481,6 +483,16 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void qemu_chr_add_handlers(CharDriverState *s,
|
||||||
|
IOCanReadHandler *fd_can_read,
|
||||||
|
IOReadHandler *fd_read,
|
||||||
|
IOEventHandler *fd_event,
|
||||||
|
void *opaque)
|
||||||
|
{
|
||||||
|
qemu_chr_add_handlers_full(s, fd_can_read, fd_read,
|
||||||
|
fd_event, opaque, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
|
static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
|
||||||
{
|
{
|
||||||
return len;
|
return len;
|
||||||
|
@ -722,7 +734,8 @@ static void mux_chr_event(void *opaque, int event)
|
||||||
mux_chr_send_event(d, i, event);
|
mux_chr_send_event(d, i, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mux_chr_update_read_handler(CharDriverState *chr)
|
static void mux_chr_update_read_handler(CharDriverState *chr,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
MuxDriver *d = chr->opaque;
|
MuxDriver *d = chr->opaque;
|
||||||
|
|
||||||
|
@ -736,8 +749,10 @@ static void mux_chr_update_read_handler(CharDriverState *chr)
|
||||||
d->chr_event[d->mux_cnt] = chr->chr_event;
|
d->chr_event[d->mux_cnt] = chr->chr_event;
|
||||||
/* Fix up the real driver with mux routines */
|
/* Fix up the real driver with mux routines */
|
||||||
if (d->mux_cnt == 0) {
|
if (d->mux_cnt == 0) {
|
||||||
qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read,
|
qemu_chr_add_handlers_full(d->drv, mux_chr_can_read,
|
||||||
mux_chr_event, chr);
|
mux_chr_read,
|
||||||
|
mux_chr_event,
|
||||||
|
chr, context);
|
||||||
}
|
}
|
||||||
if (d->focus != -1) {
|
if (d->focus != -1) {
|
||||||
mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
|
mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
|
||||||
|
@ -853,6 +868,7 @@ typedef struct IOWatchPoll
|
||||||
IOCanReadHandler *fd_can_read;
|
IOCanReadHandler *fd_can_read;
|
||||||
GSourceFunc fd_read;
|
GSourceFunc fd_read;
|
||||||
void *opaque;
|
void *opaque;
|
||||||
|
GMainContext *context;
|
||||||
} IOWatchPoll;
|
} IOWatchPoll;
|
||||||
|
|
||||||
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
||||||
|
@ -860,7 +876,8 @@ static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
||||||
return container_of(source, IOWatchPoll, parent);
|
return container_of(source, IOWatchPoll, parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
|
static gboolean io_watch_poll_prepare(GSource *source,
|
||||||
|
gint *timeout_)
|
||||||
{
|
{
|
||||||
IOWatchPoll *iwp = io_watch_poll_from_source(source);
|
IOWatchPoll *iwp = io_watch_poll_from_source(source);
|
||||||
bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
|
bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
|
||||||
|
@ -873,7 +890,7 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
|
||||||
iwp->src = qio_channel_create_watch(
|
iwp->src = qio_channel_create_watch(
|
||||||
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
|
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
|
||||||
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
|
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
|
||||||
g_source_attach(iwp->src, NULL);
|
g_source_attach(iwp->src, iwp->context);
|
||||||
} else {
|
} else {
|
||||||
g_source_destroy(iwp->src);
|
g_source_destroy(iwp->src);
|
||||||
g_source_unref(iwp->src);
|
g_source_unref(iwp->src);
|
||||||
|
@ -920,19 +937,22 @@ static GSourceFuncs io_watch_poll_funcs = {
|
||||||
static guint io_add_watch_poll(QIOChannel *ioc,
|
static guint io_add_watch_poll(QIOChannel *ioc,
|
||||||
IOCanReadHandler *fd_can_read,
|
IOCanReadHandler *fd_can_read,
|
||||||
QIOChannelFunc fd_read,
|
QIOChannelFunc fd_read,
|
||||||
gpointer user_data)
|
gpointer user_data,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
IOWatchPoll *iwp;
|
IOWatchPoll *iwp;
|
||||||
int tag;
|
int tag;
|
||||||
|
|
||||||
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll));
|
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
|
||||||
|
sizeof(IOWatchPoll));
|
||||||
iwp->fd_can_read = fd_can_read;
|
iwp->fd_can_read = fd_can_read;
|
||||||
iwp->opaque = user_data;
|
iwp->opaque = user_data;
|
||||||
iwp->ioc = ioc;
|
iwp->ioc = ioc;
|
||||||
iwp->fd_read = (GSourceFunc) fd_read;
|
iwp->fd_read = (GSourceFunc) fd_read;
|
||||||
iwp->src = NULL;
|
iwp->src = NULL;
|
||||||
|
iwp->context = context;
|
||||||
|
|
||||||
tag = g_source_attach(&iwp->parent, NULL);
|
tag = g_source_attach(&iwp->parent, context);
|
||||||
g_source_unref(&iwp->parent);
|
g_source_unref(&iwp->parent);
|
||||||
return tag;
|
return tag;
|
||||||
}
|
}
|
||||||
|
@ -1064,7 +1084,8 @@ static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond)
|
||||||
return qio_channel_create_watch(s->ioc_out, cond);
|
return qio_channel_create_watch(s->ioc_out, cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fd_chr_update_read_handler(CharDriverState *chr)
|
static void fd_chr_update_read_handler(CharDriverState *chr,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
FDCharDriver *s = chr->opaque;
|
FDCharDriver *s = chr->opaque;
|
||||||
|
|
||||||
|
@ -1072,7 +1093,8 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
|
||||||
if (s->ioc_in) {
|
if (s->ioc_in) {
|
||||||
chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
|
chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
|
||||||
fd_chr_read_poll,
|
fd_chr_read_poll,
|
||||||
fd_chr_read, chr);
|
fd_chr_read, chr,
|
||||||
|
context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1319,7 +1341,8 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pty_chr_update_read_handler(CharDriverState *chr)
|
static void pty_chr_update_read_handler(CharDriverState *chr,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
qemu_mutex_lock(&chr->chr_write_lock);
|
qemu_mutex_lock(&chr->chr_write_lock);
|
||||||
pty_chr_update_read_handler_locked(chr);
|
pty_chr_update_read_handler_locked(chr);
|
||||||
|
@ -1423,7 +1446,8 @@ static void pty_chr_state(CharDriverState *chr, int connected)
|
||||||
if (!chr->fd_in_tag) {
|
if (!chr->fd_in_tag) {
|
||||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||||
pty_chr_read_poll,
|
pty_chr_read_poll,
|
||||||
pty_chr_read, chr);
|
pty_chr_read,
|
||||||
|
chr, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2565,7 +2589,8 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void udp_chr_update_read_handler(CharDriverState *chr)
|
static void udp_chr_update_read_handler(CharDriverState *chr,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
NetCharDriver *s = chr->opaque;
|
NetCharDriver *s = chr->opaque;
|
||||||
|
|
||||||
|
@ -2573,7 +2598,8 @@ static void udp_chr_update_read_handler(CharDriverState *chr)
|
||||||
if (s->ioc) {
|
if (s->ioc) {
|
||||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||||
udp_chr_read_poll,
|
udp_chr_read_poll,
|
||||||
udp_chr_read, chr);
|
udp_chr_read, chr,
|
||||||
|
context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2976,12 +3002,14 @@ static void tcp_chr_connect(void *opaque)
|
||||||
if (s->ioc) {
|
if (s->ioc) {
|
||||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||||
tcp_chr_read_poll,
|
tcp_chr_read_poll,
|
||||||
tcp_chr_read, chr);
|
tcp_chr_read,
|
||||||
|
chr, NULL);
|
||||||
}
|
}
|
||||||
qemu_chr_be_generic_open(chr);
|
qemu_chr_be_generic_open(chr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_chr_update_read_handler(CharDriverState *chr)
|
static void tcp_chr_update_read_handler(CharDriverState *chr,
|
||||||
|
GMainContext *context)
|
||||||
{
|
{
|
||||||
TCPCharDriver *s = chr->opaque;
|
TCPCharDriver *s = chr->opaque;
|
||||||
|
|
||||||
|
@ -2993,7 +3021,8 @@ static void tcp_chr_update_read_handler(CharDriverState *chr)
|
||||||
if (s->ioc) {
|
if (s->ioc) {
|
||||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||||
tcp_chr_read_poll,
|
tcp_chr_read_poll,
|
||||||
tcp_chr_read, chr);
|
tcp_chr_read, chr,
|
||||||
|
context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1598,10 +1598,11 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
|
||||||
" configure a host TAP network backend with ID 'str'\n"
|
" configure a host TAP network backend with ID 'str'\n"
|
||||||
#else
|
#else
|
||||||
"-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
|
"-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
|
||||||
" [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
|
" [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
|
||||||
" [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
|
" [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
|
||||||
" [,poll-us=n]\n"
|
" [,poll-us=n]\n"
|
||||||
" configure a host TAP network backend with ID 'str'\n"
|
" configure a host TAP network backend with ID 'str'\n"
|
||||||
|
" connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n"
|
||||||
" use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n"
|
" use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n"
|
||||||
" to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
|
" to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
|
||||||
" to deconfigure it\n"
|
" to deconfigure it\n"
|
||||||
|
@ -1888,8 +1889,8 @@ processed and applied to -net user. Mixing them with the new configuration
|
||||||
syntax gives undefined results. Their use for new applications is discouraged
|
syntax gives undefined results. Their use for new applications is discouraged
|
||||||
as they will be removed from future versions.
|
as they will be removed from future versions.
|
||||||
|
|
||||||
@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
|
@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
|
||||||
@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
|
@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
|
||||||
Connect the host TAP network interface @var{name} to VLAN @var{n}.
|
Connect the host TAP network interface @var{name} to VLAN @var{n}.
|
||||||
|
|
||||||
Use the network script @var{file} to configure it and the network script
|
Use the network script @var{file} to configure it and the network script
|
||||||
|
@ -1900,8 +1901,9 @@ automatically provides one. The default network configure script is
|
||||||
to disable script execution.
|
to disable script execution.
|
||||||
|
|
||||||
If running QEMU as an unprivileged user, use the network helper
|
If running QEMU as an unprivileged user, use the network helper
|
||||||
@var{helper} to configure the TAP interface. The default network
|
@var{helper} to configure the TAP interface and attach it to the bridge.
|
||||||
helper executable is @file{/path/to/qemu-bridge-helper}.
|
The default network helper executable is @file{/path/to/qemu-bridge-helper}
|
||||||
|
and the default bridge device is @file{br0}.
|
||||||
|
|
||||||
@option{fd}=@var{h} can be used to specify the handle of an already
|
@option{fd}=@var{h} can be used to specify the handle of an already
|
||||||
opened host TAP interface.
|
opened host TAP interface.
|
||||||
|
@ -3887,6 +3889,19 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
|
||||||
be the same. we can just use indev or outdev, but at least one of indev or outdev
|
be the same. we can just use indev or outdev, but at least one of indev or outdev
|
||||||
need to be specified.
|
need to be specified.
|
||||||
|
|
||||||
|
@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}]
|
||||||
|
|
||||||
|
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
|
||||||
|
secondary from primary to keep secondary tcp connection,and rewrite
|
||||||
|
tcp packet to primary from secondary make tcp packet can be handled by
|
||||||
|
client.
|
||||||
|
|
||||||
|
usage:
|
||||||
|
colo secondary:
|
||||||
|
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||||
|
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||||
|
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
|
||||||
|
|
||||||
@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
|
@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
|
||||||
|
|
||||||
Dump the network traffic on netdev @var{dev} to the file specified by
|
Dump the network traffic on netdev @var{dev} to the file specified by
|
||||||
|
@ -3894,6 +3909,45 @@ Dump the network traffic on netdev @var{dev} to the file specified by
|
||||||
The file format is libpcap, so it can be analyzed with tools such as tcpdump
|
The file format is libpcap, so it can be analyzed with tools such as tcpdump
|
||||||
or Wireshark.
|
or Wireshark.
|
||||||
|
|
||||||
|
@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
|
||||||
|
outdev=@var{chardevid}
|
||||||
|
|
||||||
|
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
|
||||||
|
secondary packet. If the packets are same, we will output primary
|
||||||
|
packet to outdev@var{chardevid}, else we will notify colo-frame
|
||||||
|
do checkpoint and send primary packet to outdev@var{chardevid}.
|
||||||
|
|
||||||
|
we must use it with the help of filter-mirror and filter-redirector.
|
||||||
|
|
||||||
|
@example
|
||||||
|
|
||||||
|
primary:
|
||||||
|
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
|
||||||
|
-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
|
||||||
|
-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
|
||||||
|
-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
|
||||||
|
-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
|
||||||
|
-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
|
||||||
|
-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
|
||||||
|
-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
|
||||||
|
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
|
||||||
|
-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
|
||||||
|
-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
|
||||||
|
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
|
||||||
|
|
||||||
|
secondary:
|
||||||
|
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
|
||||||
|
-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
|
||||||
|
-chardev socket,id=red0,host=3.3.3.3,port=9003
|
||||||
|
-chardev socket,id=red1,host=3.3.3.3,port=9004
|
||||||
|
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||||
|
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||||
|
|
||||||
|
@end example
|
||||||
|
|
||||||
|
If you want to know the detail of above command line, you can read
|
||||||
|
the colo-compare git log.
|
||||||
|
|
||||||
@item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
@item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
||||||
@item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
@item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
||||||
|
|
||||||
|
|
16
trace-events
16
trace-events
|
@ -139,6 +139,22 @@ memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t v
|
||||||
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
||||||
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
||||||
|
|
||||||
|
# net/colo.c
|
||||||
|
colo_proxy_main(const char *chr) ": %s"
|
||||||
|
|
||||||
|
# net/colo-compare.c
|
||||||
|
colo_compare_main(const char *chr) ": %s"
|
||||||
|
colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d"
|
||||||
|
colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
|
||||||
|
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
|
||||||
|
colo_old_packet_check_found(int64_t old_time) "%" PRId64
|
||||||
|
colo_compare_miscompare(void) ""
|
||||||
|
|
||||||
|
# net/filter-rewriter.c
|
||||||
|
colo_filter_rewriter_debug(void) ""
|
||||||
|
colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u flags=%x\n"
|
||||||
|
colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"
|
||||||
|
|
||||||
### Guest events, keep at bottom
|
### Guest events, keep at bottom
|
||||||
|
|
||||||
# @vaddr: Access' virtual address.
|
# @vaddr: Access' virtual address.
|
||||||
|
|
4
vl.c
4
vl.c
|
@ -2845,7 +2845,9 @@ static bool object_create_initial(const char *type)
|
||||||
if (g_str_equal(type, "filter-buffer") ||
|
if (g_str_equal(type, "filter-buffer") ||
|
||||||
g_str_equal(type, "filter-dump") ||
|
g_str_equal(type, "filter-dump") ||
|
||||||
g_str_equal(type, "filter-mirror") ||
|
g_str_equal(type, "filter-mirror") ||
|
||||||
g_str_equal(type, "filter-redirector")) {
|
g_str_equal(type, "filter-redirector") ||
|
||||||
|
g_str_equal(type, "colo-compare") ||
|
||||||
|
g_str_equal(type, "filter-rewriter")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue