-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEbBAABAgAGBQJX6kSEAAoJEO8Ells5jWIRKAkH9iMMzN9USOroQIWmiyMf5S7F mlsSeSccv+U5gA6wCJooA0dwMnAFnxJ3rTcV6BEL0jE0cVHanR61eDfpeOC0lKXw NUWc91Bf4Epg0cTk9fV6yv6xZOcuN/twukrQIEZjfldpbP0ba+WoBx3x0sdYen+M Xjaix011CUEx5VmVMx8g/LbnM8s1WO+CjEjIpWAas+1M68P+elne5nOaTaj+FyzV E9BkUkcXd5ByzikYRykgS/OJGRd7S+BBSFluISekwGjTcppRccAwZsGkgYXRrF3U 1g1LOT2xuz777uP7hBqZQRyZIAaOiLY89WUFuCL1BBLbbkAnT799J/e/n6sRSg== =2gpR -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging # gpg: Signature made Tue 27 Sep 2016 11:05:56 BST # gpg: using RSA key 0xEF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: (27 commits) imx_fec: fix error in qemu_send_packet argument mcf_fec: fix error in qemu_send_packet argument net: mcf: limit buffer descriptor count e1000e: Fix EIAC register implementation e1000e: Fix spurious RX TCP ACK interrupts e1000e: Fix OTHER interrupts processing for MSI-X e1000e: Fix PBACLR implementation e1000e: Fix CTRL_EXT.EIAME behavior e1000e: Flush receive queues on link up e1000e: Flush all receive queues on receive enable net: limit allocation in nc_sendv_compat tap: Allow specifying a bridge e1000: fix buliding complaint docs: Add documentation for COLO-proxy MAINTAINERS: add maintainer for COLO-proxy filter-rewriter: rewrite tcp packet to keep secondary connection filter-rewriter: track connection and parse packet filter-rewriter: introduce filter-rewriter initialization colo-compare: add TCP, UDP, ICMP packet comparison colo-compare: introduce packet comparison thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
						commit
						333ec4ca6a
					
				|  | @ -1364,6 +1364,15 @@ F: util/uuid.c | |||
| F: include/qemu/uuid.h | ||||
| F: tests/test-uuid.c | ||||
| 
 | ||||
| COLO Proxy | ||||
| M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||
| M: Li Zhijian <lizhijian@cn.fujitsu.com> | ||||
| S: Supported | ||||
| F: docs/colo-proxy.txt | ||||
| F: net/colo* | ||||
| F: net/filter-rewriter.c | ||||
| F: net/filter-mirror.c | ||||
| 
 | ||||
| Usermode Emulation | ||||
| ------------------ | ||||
| Overall | ||||
|  |  | |||
|  | @ -0,0 +1,188 @@ | |||
| COLO-proxy | ||||
| ---------- | ||||
| Copyright (c) 2016 Intel Corporation | ||||
| Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||
| Copyright (c) 2016 Fujitsu, Corp. | ||||
| 
 | ||||
| This work is licensed under the terms of the GNU GPL, version 2 or later. | ||||
| See the COPYING file in the top-level directory. | ||||
| 
 | ||||
| This document gives an overview of COLO proxy's design. | ||||
| 
 | ||||
| == Background == | ||||
| COLO-proxy is a part of COLO project. It is used | ||||
| to compare the network package to help COLO decide | ||||
| whether to do checkpoint. With COLO-proxy's help, | ||||
| COLO greatly improves the performance. | ||||
| 
 | ||||
| The filter-redirector, filter-mirror, colo-compare | ||||
| and filter-rewriter compose the COLO-proxy. | ||||
| 
 | ||||
| == Architecture == | ||||
| 
 | ||||
| COLO-Proxy is based on qemu netfilter and it's a plugin for qemu netfilter | ||||
| (except colo-compare). It keep Secondary VM connect normally to | ||||
| client and compare packets sent by PVM with sent by SVM. | ||||
| If the packet difference, notify COLO-frame to do checkpoint and send | ||||
| all primary packet has queued. Otherwise just send the queued primary | ||||
| packet and drop the queued secondary packet. | ||||
| 
 | ||||
| Below is a COLO proxy ascii figure: | ||||
| 
 | ||||
|  Primary qemu                                                           Secondary qemu | ||||
| +--------------------------------------------------------------+       +----------------------------------------------------------------+ | ||||
| | +----------------------------------------------------------+ |       |  +-----------------------------------------------------------+ | | ||||
| | |                                                          | |       |  |                                                           | | | ||||
| | |                        guest                             | |       |  |                        guest                              | | | ||||
| | |                                                          | |       |  |                                                           | | | ||||
| | +-------^--------------------------+-----------------------+ |       |  +---------------------+--------+----------------------------+ | | ||||
| |         |                          |                         |       |                        ^        |                              | | ||||
| |         |                          |                         |       |                        |        |                              | | ||||
| |         |  +------------------------------------------------------+  |                        |        |                              | | ||||
| |netfilter|  |                       |                         |    |  |   netfilter            |        |                              | | ||||
| | +----------+ +----------------------------+                  |    |  |  +-----------------------------------------------------------+ | | ||||
| | |       |  |                       |      |        out       |    |  |  |                     |        |  filter excute order       | | | ||||
| | |       |  |          +-----------------------------+        |    |  |  |                     |        | +------------------->      | | | ||||
| | |       |  |          |            |      |         |        |    |  |  |                     |        |   TCP                      | | | ||||
| | | +-----+--+-+  +-----v----+ +-----v----+ |pri +----+----+sec|    |  |  | +------------+  +---+----+---v+rewriter++  +------------+ | | | ||||
| | | |          |  |          | |          | |in  |         |in |    |  |  | |            |  |        |              |  |            | | | | ||||
| | | |  filter  |  |  filter  | |  filter  +------>  colo   <------+ +-------->  filter   +--> adjust |   adjust     +-->   filter   | | | | ||||
| | | |  mirror  |  |redirector| |redirector| |    | compare |   |  |    |  | | redirector |  | ack    |   seq        |  | redirector | | | | ||||
| | | |          |  |          | |          | |    |         |   |  |    |  | |            |  |        |              |  |            | | | | ||||
| | | +----^-----+  +----+-----+ +----------+ |    +---------+   |  |    |  | +------------+  +--------+--------------+  +---+--------+ | | | ||||
| | |      |   tx        |   rx           rx  |                  |  |    |  |            tx                        all       |  rx      | | | ||||
| | |      |             |                    |                  |  |    |  +-----------------------------------------------------------+ | | ||||
| | |      |             +--------------+     |                  |  |    |                                                   |            | | ||||
| | |      |   filter excute order      |     |                  |  |    |                                                   |            | | ||||
| | |      |  +---------------->        |     |                  |  +--------------------------------------------------------+            | | ||||
| | +-----------------------------------------+                  |       |                                                                | | ||||
| |        |                            |                        |       |                                                                | | ||||
| +--------------------------------------------------------------+       +----------------------------------------------------------------+ | ||||
|          |guest receive               | guest send | ||||
|          |                            | | ||||
| +--------+----------------------------v------------------------+ | ||||
| |                                                              |                          NOTE: filter direction is rx/tx/all | ||||
| |                         tap                                  |                          rx:receive packets sent to the netdev | ||||
| |                                                              |                          tx:receive packets sent by the netdev | ||||
| +--------------------------------------------------------------+ | ||||
| 
 | ||||
| 1.Guest receive packet route: | ||||
| 
 | ||||
| Primary: | ||||
| 
 | ||||
| Tap --> Mirror Client Filter | ||||
| Mirror client will send packet to guest,at the | ||||
| same time, copy and forward packet to secondary | ||||
| mirror server. | ||||
| 
 | ||||
| Secondary: | ||||
| 
 | ||||
| Mirror Server Filter --> TCP Rewriter | ||||
| If receive packet is TCP packet,we will adjust ack | ||||
| and update TCP checksum, then send to secondary | ||||
| guest. Otherwise directly send to guest. | ||||
| 
 | ||||
| 2.Guest send packet route: | ||||
| 
 | ||||
| Primary: | ||||
| 
 | ||||
| Guest --> Redirect Server Filter | ||||
| Redirect server filter receive primary guest packet | ||||
| but do nothing, just pass to next filter. | ||||
| 
 | ||||
| Redirect Server Filter --> COLO-Compare | ||||
| COLO-compare receive primary guest packet then | ||||
| waiting scondary redirect packet to compare it. | ||||
| If packet same,send queued primary packet and clear | ||||
| queued secondary packet, Otherwise send primary packet | ||||
| and do checkpoint. | ||||
| 
 | ||||
| COLO-Compare --> Another Redirector Filter | ||||
| The redirector get packet from colo-compare by use | ||||
| chardev socket. | ||||
| 
 | ||||
| Redirector Filter --> Tap | ||||
| Send the packet. | ||||
| 
 | ||||
| Secondary: | ||||
| 
 | ||||
| Guest --> TCP Rewriter Filter | ||||
| If the packet is TCP packet,we will adjust seq | ||||
| and update TCP checksum. Then send it to | ||||
| redirect client filter. Otherwise directly send to | ||||
| redirect client filter. | ||||
| 
 | ||||
| Redirect Client Filter --> Redirect Server Filter | ||||
| Forward packet to primary. | ||||
| 
 | ||||
| == Components introduction == | ||||
| 
 | ||||
| Filter-mirror is a netfilter plugin. | ||||
| It gives qemu the ability to mirror | ||||
| packets to a chardev. | ||||
| 
 | ||||
| Filter-redirector is a netfilter plugin. | ||||
| It gives qemu the ability to redirect net packet. | ||||
| Redirector can redirect filter's net packet to outdev, | ||||
| and redirect indev's packet to filter. | ||||
| 
 | ||||
|                     filter | ||||
|                       + | ||||
|           redirector  | | ||||
|              +--------------+ | ||||
|              |        |     | | ||||
|              |        |     | | ||||
|              |        |     | | ||||
|   indev +---------+   +---------->  outdev | ||||
|              |    |         | | ||||
|              |    |         | | ||||
|              |    |         | | ||||
|              +--------------+ | ||||
|                   | | ||||
|                   v | ||||
|                 filter | ||||
| 
 | ||||
| COLO-compare, we do packet comparing job. | ||||
| Packets coming from the primary char indev will be sent to outdev. | ||||
| Packets coming from the secondary char dev will be dropped after comparing. | ||||
| COLO-comapre need two input chardev and one output chardev: | ||||
| primary_in=chardev1-id (source: primary send packet) | ||||
| secondary_in=chardev2-id (source: secondary send packet) | ||||
| outdev=chardev3-id | ||||
| 
 | ||||
| Filter-rewriter will rewrite some of secondary packet to make | ||||
| secondary guest's tcp connection established successfully. | ||||
| In this module we will rewrite tcp packet's ack to the secondary | ||||
| from primary,and rewrite tcp packet's seq to the primary from | ||||
| secondary. | ||||
| 
 | ||||
| == Usage == | ||||
| 
 | ||||
| Here, we use demo ip and port discribe more clearly. | ||||
| Primary(ip:3.3.3.3): | ||||
| -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown | ||||
| -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 | ||||
| -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait | ||||
| -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait | ||||
| -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait | ||||
| -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 | ||||
| -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait | ||||
| -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 | ||||
| -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 | ||||
| -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out | ||||
| -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 | ||||
| -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 | ||||
| 
 | ||||
| Secondary(ip:3.3.3.8): | ||||
| -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown | ||||
| -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 | ||||
| -chardev socket,id=red0,host=3.3.3.3,port=9003 | ||||
| -chardev socket,id=red1,host=3.3.3.3,port=9004 | ||||
| -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||
| -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||
| 
 | ||||
| Note: | ||||
|   a.COLO-proxy must work with COLO-frame and Block-replication. | ||||
|   b.Primary COLO must be started firstly, because COLO-proxy needs | ||||
|     chardev socket server running before secondary started. | ||||
|   c.Filter-rewriter only rewrite tcp packet. | ||||
|  | @ -400,7 +400,7 @@ static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, | |||
| 
 | ||||
|     if (range_covers_byte(address, len, PCI_COMMAND) && | ||||
|         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { | ||||
|         qemu_flush_queued_packets(qemu_get_queue(s->nic)); | ||||
|         e1000e_start_recv(&s->core); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -953,7 +953,7 @@ e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, | |||
|                          core->rx_desc_buf_size; | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| void | ||||
| e1000e_start_recv(E1000ECore *core) | ||||
| { | ||||
|     int i; | ||||
|  | @ -1710,7 +1710,8 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) | |||
|         } | ||||
| 
 | ||||
|         /* Perform ACK receive detection */ | ||||
|         if (e1000e_is_tcp_ack(core, core->rx_pkt)) { | ||||
|         if  (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) && | ||||
|              (e1000e_is_tcp_ack(core, core->rx_pkt))) { | ||||
|             n |= E1000_ICS_ACK; | ||||
|         } | ||||
| 
 | ||||
|  | @ -1807,6 +1808,7 @@ e1000e_core_set_link_status(E1000ECore *core) | |||
|                                    core->autoneg_timer); | ||||
|         } else { | ||||
|             e1000x_update_regs_on_link_up(core->mac, core->phy[0]); | ||||
|             e1000e_start_recv(core); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  | @ -2007,19 +2009,23 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) | |||
|     } | ||||
| 
 | ||||
|     if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { | ||||
|         trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause); | ||||
|         e1000e_clear_ims_bits(core, core->mac[IAM] & cause); | ||||
|         trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause); | ||||
|         core->mac[IAM] &= ~cause; | ||||
|     } | ||||
| 
 | ||||
|     trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); | ||||
| 
 | ||||
|     if (core->mac[EIAC] & E1000_ICR_OTHER) { | ||||
|         effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) | | ||||
|                          E1000_ICR_OTHER_CAUSES; | ||||
|     } else { | ||||
|         effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK; | ||||
|     effective_eiac = core->mac[EIAC] & cause; | ||||
| 
 | ||||
|     if (effective_eiac == E1000_ICR_OTHER) { | ||||
|         effective_eiac |= E1000_ICR_OTHER_CAUSES; | ||||
|     } | ||||
| 
 | ||||
|     core->mac[ICR] &= ~effective_eiac; | ||||
| 
 | ||||
|     if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { | ||||
|         core->mac[IMS] &= ~effective_eiac; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void | ||||
|  | @ -2130,7 +2136,7 @@ e1000e_update_interrupt_state(E1000ECore *core) | |||
| 
 | ||||
|     /* Set ICR[OTHER] for MSI-X */ | ||||
|     if (is_msix) { | ||||
|         if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) { | ||||
|         if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) { | ||||
|             core->mac[ICR] |= E1000_ICR_OTHER; | ||||
|             trace_e1000e_irq_add_msi_other(core->mac[ICR]); | ||||
|         } | ||||
|  | @ -2168,7 +2174,7 @@ e1000e_update_interrupt_state(E1000ECore *core) | |||
|     } | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| static void | ||||
| e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) | ||||
| { | ||||
|     trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); | ||||
|  | @ -2187,6 +2193,8 @@ e1000e_autoneg_timer(void *opaque) | |||
|     E1000ECore *core = opaque; | ||||
|     if (!qemu_get_queue(core->owner_nic)->link_down) { | ||||
|         e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); | ||||
|         e1000e_start_recv(core); | ||||
| 
 | ||||
|         e1000e_update_flowctl_status(core); | ||||
|         /* signal link status change to the guest */ | ||||
|         e1000e_set_interrupt_cause(core, E1000_ICR_LSC); | ||||
|  | @ -2344,7 +2352,7 @@ e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val) | |||
| 
 | ||||
|     core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; | ||||
| 
 | ||||
|     if (msix_enabled(core->owner)) { | ||||
|     if (!msix_enabled(core->owner)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -144,3 +144,6 @@ e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size); | |||
| 
 | ||||
| ssize_t | ||||
| e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); | ||||
| 
 | ||||
| void | ||||
| e1000e_start_recv(E1000ECore *core); | ||||
|  |  | |||
|  | @ -429,7 +429,7 @@ static void imx_fec_do_tx(IMXFECState *s) | |||
|         frame_size += len; | ||||
|         if (bd.flags & ENET_BD_L) { | ||||
|             /* Last buffer in frame.  */ | ||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, len); | ||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size); | ||||
|             ptr = frame; | ||||
|             frame_size = 0; | ||||
|             s->regs[ENET_EIR] |= ENET_INT_TXF; | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0) | |||
| #define DPRINTF(fmt, ...) do {} while(0) | ||||
| #endif | ||||
| 
 | ||||
| #define FEC_MAX_DESC 1024 | ||||
| #define FEC_MAX_FRAME_SIZE 2032 | ||||
| 
 | ||||
| typedef struct { | ||||
|  | @ -149,7 +150,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||
|     uint32_t addr; | ||||
|     mcf_fec_bd bd; | ||||
|     int frame_size; | ||||
|     int len; | ||||
|     int len, descnt = 0; | ||||
|     uint8_t frame[FEC_MAX_FRAME_SIZE]; | ||||
|     uint8_t *ptr; | ||||
| 
 | ||||
|  | @ -157,7 +158,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||
|     ptr = frame; | ||||
|     frame_size = 0; | ||||
|     addr = s->tx_descriptor; | ||||
|     while (1) { | ||||
|     while (descnt++ < FEC_MAX_DESC) { | ||||
|         mcf_fec_read_bd(&bd, addr); | ||||
|         DPRINTF("tx_bd %x flags %04x len %d data %08x\n", | ||||
|                 addr, bd.flags, bd.length, bd.data); | ||||
|  | @ -176,7 +177,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||
|         if (bd.flags & FEC_BD_L) { | ||||
|             /* Last buffer in frame.  */ | ||||
|             DPRINTF("Sending packet\n"); | ||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, len); | ||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size); | ||||
|             ptr = frame; | ||||
|             frame_size = 0; | ||||
|             s->eir |= FEC_INT_TXF; | ||||
|  |  | |||
|  | @ -223,7 +223,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x" | |||
| e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" | ||||
| e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" | ||||
| e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME" | ||||
| e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" | ||||
| e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" | ||||
| e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X" | ||||
| e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x" | ||||
| e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts" | ||||
|  |  | |||
|  | @ -31,6 +31,11 @@ | |||
| #define MAC_TABLE_ENTRIES    64 | ||||
| #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */ | ||||
| 
 | ||||
| /* previously fixed value */ | ||||
| #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 | ||||
| /* for now, only allow larger queues; with virtio-1, guest can downsize */ | ||||
| #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE | ||||
| 
 | ||||
| /*
 | ||||
|  * Calculate the number of bytes up to and including the given 'field' of | ||||
|  * 'container'. | ||||
|  | @ -1412,7 +1417,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) | |||
| { | ||||
|     VirtIODevice *vdev = VIRTIO_DEVICE(n); | ||||
| 
 | ||||
|     n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); | ||||
|     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, | ||||
|                                            virtio_net_handle_rx); | ||||
|     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { | ||||
|         n->vqs[index].tx_vq = | ||||
|             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); | ||||
|  | @ -1720,6 +1726,22 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) | |||
|     virtio_net_set_config_size(n, n->host_features); | ||||
|     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); | ||||
| 
 | ||||
|     /*
 | ||||
|      * We set a lower limit on RX queue size to what it always was. | ||||
|      * Guests that want a smaller ring can always resize it without | ||||
|      * help from us (using virtio 1 and up). | ||||
|      */ | ||||
|     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || | ||||
|         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || | ||||
|         (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) { | ||||
|         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " | ||||
|                    "must be a power of 2 between %d and %d.", | ||||
|                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, | ||||
|                    VIRTQUEUE_MAX_SIZE); | ||||
|         virtio_cleanup(vdev); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     n->max_queues = MAX(n->nic_conf.peers.queues, 1); | ||||
|     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { | ||||
|         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " | ||||
|  | @ -1880,6 +1902,8 @@ static Property virtio_net_properties[] = { | |||
|                        TX_TIMER_INTERVAL), | ||||
|     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), | ||||
|     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), | ||||
|     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, | ||||
|                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), | ||||
|     DEFINE_PROP_END_OF_LIST(), | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -35,6 +35,7 @@ typedef struct virtio_net_conf | |||
|     uint32_t txtimer; | ||||
|     int32_t txburst; | ||||
|     char *tx; | ||||
|     uint16_t rx_queue_size; | ||||
| } virtio_net_conf; | ||||
| 
 | ||||
| /* Maximum packet size we can receive from tap device: header + 64k */ | ||||
|  |  | |||
|  | @ -0,0 +1,59 @@ | |||
| /* jhash.h: Jenkins hash support.
 | ||||
|   * | ||||
|   * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) | ||||
|   * | ||||
|   * http://burtleburtle.net/bob/hash/
 | ||||
|   * | ||||
|   * These are the credits from Bob's sources: | ||||
|   * | ||||
|   * lookup3.c, by Bob Jenkins, May 2006, Public Domain. | ||||
|   * | ||||
|   * These are functions for producing 32-bit hashes for hash table lookup. | ||||
|   * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() | ||||
|   * are externally useful functions.  Routines to test the hash are included | ||||
|   * if SELF_TEST is defined.  You can use this free for any purpose. It's in | ||||
|   * the public domain.  It has no warranty. | ||||
|   * | ||||
|   * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) | ||||
|   * | ||||
|   * I've modified Bob's hash to be useful in the Linux kernel, and | ||||
|   * any bugs present are my fault. | ||||
|   * Jozsef | ||||
|   */ | ||||
| 
 | ||||
| #ifndef QEMU_JHASH_H__ | ||||
| #define QEMU_JHASH_H__ | ||||
| 
 | ||||
| #include "qemu/bitops.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * hashtable relation copy from linux kernel jhash | ||||
|  */ | ||||
| 
 | ||||
| /* __jhash_mix -- mix 3 32-bit values reversibly. */ | ||||
| #define __jhash_mix(a, b, c)                \ | ||||
| {                                           \ | ||||
|     a -= c;  a ^= rol32(c, 4);  c += b;     \ | ||||
|     b -= a;  b ^= rol32(a, 6);  a += c;     \ | ||||
|     c -= b;  c ^= rol32(b, 8);  b += a;     \ | ||||
|     a -= c;  a ^= rol32(c, 16); c += b;     \ | ||||
|     b -= a;  b ^= rol32(a, 19); a += c;     \ | ||||
|     c -= b;  c ^= rol32(b, 4);  b += a;     \ | ||||
| } | ||||
| 
 | ||||
| /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ | ||||
| #define __jhash_final(a, b, c)  \ | ||||
| {                               \ | ||||
|     c ^= b; c -= rol32(b, 14);  \ | ||||
|     a ^= c; a -= rol32(c, 11);  \ | ||||
|     b ^= a; b -= rol32(a, 25);  \ | ||||
|     c ^= b; c -= rol32(b, 16);  \ | ||||
|     a ^= c; a -= rol32(c, 4);   \ | ||||
|     b ^= a; b -= rol32(a, 14);  \ | ||||
|     c ^= b; c -= rol32(b, 24);  \ | ||||
| } | ||||
| 
 | ||||
| /* An arbitrary initial parameter */ | ||||
| #define JHASH_INITVAL           0xdeadbeef | ||||
| 
 | ||||
| #endif /* QEMU_JHASH_H__ */ | ||||
|  | @ -65,7 +65,8 @@ struct CharDriverState { | |||
|     int (*chr_sync_read)(struct CharDriverState *s, | ||||
|                          const uint8_t *buf, int len); | ||||
|     GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond); | ||||
|     void (*chr_update_read_handler)(struct CharDriverState *s); | ||||
|     void (*chr_update_read_handler)(struct CharDriverState *s, | ||||
|                                     GMainContext *context); | ||||
|     int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg); | ||||
|     int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); | ||||
|     int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); | ||||
|  | @ -422,6 +423,14 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||
|                            IOEventHandler *fd_event, | ||||
|                            void *opaque); | ||||
| 
 | ||||
| /* This API can make handler run in the context what you pass to. */ | ||||
| void qemu_chr_add_handlers_full(CharDriverState *s, | ||||
|                                 IOCanReadHandler *fd_can_read, | ||||
|                                 IOReadHandler *fd_read, | ||||
|                                 IOEventHandler *fd_event, | ||||
|                                 void *opaque, | ||||
|                                 GMainContext *context); | ||||
| 
 | ||||
| void qemu_chr_be_generic_open(CharDriverState *s); | ||||
| void qemu_chr_accept_input(CharDriverState *s); | ||||
| int qemu_chr_add_client(CharDriverState *s, int fd); | ||||
|  |  | |||
|  | @ -16,3 +16,6 @@ common-obj-$(CONFIG_NETMAP) += netmap.o | |||
| common-obj-y += filter.o | ||||
| common-obj-y += filter-buffer.o | ||||
| common-obj-y += filter-mirror.o | ||||
| common-obj-y += colo-compare.o | ||||
| common-obj-y += colo.o | ||||
| common-obj-y += filter-rewriter.o | ||||
|  |  | |||
|  | @ -0,0 +1,781 @@ | |||
| /*
 | ||||
|  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||
|  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||
|  * | ||||
|  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||
|  * Copyright (c) 2016 FUJITSU LIMITED | ||||
|  * Copyright (c) 2016 Intel Corporation | ||||
|  * | ||||
|  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||
|  * | ||||
|  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||
|  * later.  See the COPYING file in the top-level directory. | ||||
|  */ | ||||
| 
 | ||||
| #include "qemu/osdep.h" | ||||
| #include "qemu/error-report.h" | ||||
| #include "trace.h" | ||||
| #include "qemu-common.h" | ||||
| #include "qapi/qmp/qerror.h" | ||||
| #include "qapi/error.h" | ||||
| #include "net/net.h" | ||||
| #include "net/eth.h" | ||||
| #include "qom/object_interfaces.h" | ||||
| #include "qemu/iov.h" | ||||
| #include "qom/object.h" | ||||
| #include "qemu/typedefs.h" | ||||
| #include "net/queue.h" | ||||
| #include "sysemu/char.h" | ||||
| #include "qemu/sockets.h" | ||||
| #include "qapi-visit.h" | ||||
| #include "net/colo.h" | ||||
| 
 | ||||
| #define TYPE_COLO_COMPARE "colo-compare" | ||||
| #define COLO_COMPARE(obj) \ | ||||
|     OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) | ||||
| 
 | ||||
| #define COMPARE_READ_LEN_MAX NET_BUFSIZE | ||||
| #define MAX_QUEUE_SIZE 1024 | ||||
| 
 | ||||
| /* TODO: Should be configurable */ | ||||
| #define REGULAR_PACKET_CHECK_MS 3000 | ||||
| 
 | ||||
| /*
 | ||||
|   + CompareState ++ | ||||
|   |               | | ||||
|   +---------------+   +---------------+         +---------------+ | ||||
|   |conn list      +--->conn           +--------->conn           | | ||||
|   +---------------+   +---------------+         +---------------+ | ||||
|   |               |     |           |             |          | | ||||
|   +---------------+ +---v----+  +---v----+    +---v----+ +---v----+ | ||||
|                     |primary |  |secondary    |primary | |secondary | ||||
|                     |packet  |  |packet  +    |packet  | |packet  + | ||||
|                     +--------+  +--------+    +--------+ +--------+ | ||||
|                         |           |             |          | | ||||
|                     +---v----+  +---v----+    +---v----+ +---v----+ | ||||
|                     |primary |  |secondary    |primary | |secondary | ||||
|                     |packet  |  |packet  +    |packet  | |packet  + | ||||
|                     +--------+  +--------+    +--------+ +--------+ | ||||
|                         |           |             |          | | ||||
|                     +---v----+  +---v----+    +---v----+ +---v----+ | ||||
|                     |primary |  |secondary    |primary | |secondary | ||||
|                     |packet  |  |packet  +    |packet  | |packet  + | ||||
|                     +--------+  +--------+    +--------+ +--------+ | ||||
| */ | ||||
| typedef struct CompareState { | ||||
|     Object parent; | ||||
| 
 | ||||
|     char *pri_indev; | ||||
|     char *sec_indev; | ||||
|     char *outdev; | ||||
|     CharDriverState *chr_pri_in; | ||||
|     CharDriverState *chr_sec_in; | ||||
|     CharDriverState *chr_out; | ||||
|     SocketReadState pri_rs; | ||||
|     SocketReadState sec_rs; | ||||
| 
 | ||||
|     /* connection list: the connections belonged to this NIC could be found
 | ||||
|      * in this list. | ||||
|      * element type: Connection | ||||
|      */ | ||||
|     GQueue conn_list; | ||||
|     /* hashtable to save connection */ | ||||
|     GHashTable *connection_track_table; | ||||
|     /* compare thread, a thread for each NIC */ | ||||
|     QemuThread thread; | ||||
|     /* Timer used on the primary to find packets that are never matched */ | ||||
|     QEMUTimer *timer; | ||||
|     QemuMutex timer_check_lock; | ||||
| } CompareState; | ||||
| 
 | ||||
| typedef struct CompareClass { | ||||
|     ObjectClass parent_class; | ||||
| } CompareClass; | ||||
| 
 | ||||
| typedef struct CompareChardevProps { | ||||
|     bool is_socket; | ||||
| } CompareChardevProps; | ||||
| 
 | ||||
| enum { | ||||
|     PRIMARY_IN = 0, | ||||
|     SECONDARY_IN, | ||||
| }; | ||||
| 
 | ||||
| static int compare_chr_send(CharDriverState *out, | ||||
|                             const uint8_t *buf, | ||||
|                             uint32_t size); | ||||
| 
 | ||||
| /*
 | ||||
|  * Return 0 on success, if return -1 means the pkt | ||||
|  * is unsupported(arp and ipv6) and will be sent later | ||||
|  */ | ||||
| static int packet_enqueue(CompareState *s, int mode) | ||||
| { | ||||
|     ConnectionKey key; | ||||
|     Packet *pkt = NULL; | ||||
|     Connection *conn; | ||||
| 
 | ||||
|     if (mode == PRIMARY_IN) { | ||||
|         pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len); | ||||
|     } else { | ||||
|         pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len); | ||||
|     } | ||||
| 
 | ||||
|     if (parse_packet_early(pkt)) { | ||||
|         packet_destroy(pkt, NULL); | ||||
|         pkt = NULL; | ||||
|         return -1; | ||||
|     } | ||||
|     fill_connection_key(pkt, &key); | ||||
| 
 | ||||
|     conn = connection_get(s->connection_track_table, | ||||
|                           &key, | ||||
|                           &s->conn_list); | ||||
| 
 | ||||
|     if (!conn->processing) { | ||||
|         g_queue_push_tail(&s->conn_list, conn); | ||||
|         conn->processing = true; | ||||
|     } | ||||
| 
 | ||||
|     if (mode == PRIMARY_IN) { | ||||
|         if (g_queue_get_length(&conn->primary_list) <= | ||||
|                                MAX_QUEUE_SIZE) { | ||||
|             g_queue_push_tail(&conn->primary_list, pkt); | ||||
|         } else { | ||||
|             error_report("colo compare primary queue size too big," | ||||
|                          "drop packet"); | ||||
|         } | ||||
|     } else { | ||||
|         if (g_queue_get_length(&conn->secondary_list) <= | ||||
|                                MAX_QUEUE_SIZE) { | ||||
|             g_queue_push_tail(&conn->secondary_list, pkt); | ||||
|         } else { | ||||
|             error_report("colo compare secondary queue size too big," | ||||
|                          "drop packet"); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The IP packets sent by primary and secondary | ||||
|  * will be compared in here | ||||
|  * TODO support ip fragment, Out-Of-Order | ||||
|  * return:    0  means packet same | ||||
|  *            > 0 || < 0 means packet different | ||||
|  */ | ||||
| static int colo_packet_compare(Packet *ppkt, Packet *spkt) | ||||
| { | ||||
|     trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src), | ||||
|                                inet_ntoa(ppkt->ip->ip_dst), spkt->size, | ||||
|                                inet_ntoa(spkt->ip->ip_src), | ||||
|                                inet_ntoa(spkt->ip->ip_dst)); | ||||
| 
 | ||||
|     if (ppkt->size == spkt->size) { | ||||
|         return memcmp(ppkt->data, spkt->data, spkt->size); | ||||
|     } else { | ||||
|         return -1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the compare thread on the primary | ||||
|  * for compare tcp packet | ||||
|  * compare_tcp copied from Dr. David Alan Gilbert's branch | ||||
|  */ | ||||
| static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | ||||
| { | ||||
|     struct tcphdr *ptcp, *stcp; | ||||
|     int res; | ||||
|     char *sdebug, *ddebug; | ||||
| 
 | ||||
|     trace_colo_compare_main("compare tcp"); | ||||
|     if (ppkt->size != spkt->size) { | ||||
|         if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||||
|             trace_colo_compare_main("pkt size not same"); | ||||
|         } | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     ptcp = (struct tcphdr *)ppkt->transport_header; | ||||
|     stcp = (struct tcphdr *)spkt->transport_header; | ||||
| 
 | ||||
|     /*
 | ||||
|      * The 'identification' field in the IP header is *very* random | ||||
|      * it almost never matches.  Fudge this by ignoring differences in | ||||
|      * unfragmented packets; they'll normally sort themselves out if different | ||||
|      * anyway, and it should recover at the TCP level. | ||||
|      * An alternative would be to get both the primary and secondary to rewrite | ||||
|      * somehow; but that would need some sync traffic to sync the state | ||||
|      */ | ||||
|     if (ntohs(ppkt->ip->ip_off) & IP_DF) { | ||||
|         spkt->ip->ip_id = ppkt->ip->ip_id; | ||||
|         /* and the sum will be different if the IDs were different */ | ||||
|         spkt->ip->ip_sum = ppkt->ip->ip_sum; | ||||
|     } | ||||
| 
 | ||||
|     res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN, | ||||
|                 (spkt->size - ETH_HLEN)); | ||||
| 
 | ||||
|     if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||||
|         sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); | ||||
|         ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); | ||||
|         fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u" | ||||
|                 " s: seq/ack=%u/%u res=%d flags=%x/%x\n", | ||||
|                 __func__, sdebug, ddebug, | ||||
|                 (unsigned int)ntohl(ptcp->th_seq), | ||||
|                 (unsigned int)ntohl(ptcp->th_ack), | ||||
|                 (unsigned int)ntohl(stcp->th_seq), | ||||
|                 (unsigned int)ntohl(stcp->th_ack), | ||||
|                 res, ptcp->th_flags, stcp->th_flags); | ||||
| 
 | ||||
|         fprintf(stderr, "Primary len = %d\n", ppkt->size); | ||||
|         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size); | ||||
|         fprintf(stderr, "Secondary len = %d\n", spkt->size); | ||||
|         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size); | ||||
| 
 | ||||
|         g_free(sdebug); | ||||
|         g_free(ddebug); | ||||
|     } | ||||
| 
 | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the compare thread on the primary | ||||
|  * for compare udp packet | ||||
|  */ | ||||
| static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | ||||
| { | ||||
|     int ret; | ||||
| 
 | ||||
|     trace_colo_compare_main("compare udp"); | ||||
|     ret = colo_packet_compare(ppkt, spkt); | ||||
| 
 | ||||
|     if (ret) { | ||||
|         trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size); | ||||
|         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size); | ||||
|         trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size); | ||||
|         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size); | ||||
|     } | ||||
| 
 | ||||
|     return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the compare thread on the primary | ||||
|  * for compare icmp packet | ||||
|  */ | ||||
| static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) | ||||
| { | ||||
|     int network_length; | ||||
| 
 | ||||
|     trace_colo_compare_main("compare icmp"); | ||||
|     network_length = ppkt->ip->ip_hl * 4; | ||||
|     if (ppkt->size != spkt->size || | ||||
|         ppkt->size < network_length + ETH_HLEN) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     if (colo_packet_compare(ppkt, spkt)) { | ||||
|         trace_colo_compare_icmp_miscompare("primary pkt size", | ||||
|                                            ppkt->size); | ||||
|         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", | ||||
|                      ppkt->size); | ||||
|         trace_colo_compare_icmp_miscompare("Secondary pkt size", | ||||
|                                            spkt->size); | ||||
|         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", | ||||
|                      spkt->size); | ||||
|         return -1; | ||||
|     } else { | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the compare thread on the primary | ||||
|  * for compare other packet | ||||
|  */ | ||||
| static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) | ||||
| { | ||||
|     trace_colo_compare_main("compare other"); | ||||
|     trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src), | ||||
|                                inet_ntoa(ppkt->ip->ip_dst), spkt->size, | ||||
|                                inet_ntoa(spkt->ip->ip_src), | ||||
|                                inet_ntoa(spkt->ip->ip_dst)); | ||||
|     return colo_packet_compare(ppkt, spkt); | ||||
| } | ||||
| 
 | ||||
| static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) | ||||
| { | ||||
|     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST); | ||||
| 
 | ||||
|     if ((now - pkt->creation_ms) > (*check_time)) { | ||||
|         trace_colo_old_packet_check_found(pkt->creation_ms); | ||||
|         return 0; | ||||
|     } else { | ||||
|         return 1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void colo_old_packet_check_one_conn(void *opaque, | ||||
|                                            void *user_data) | ||||
| { | ||||
|     Connection *conn = opaque; | ||||
|     GList *result = NULL; | ||||
|     int64_t check_time = REGULAR_PACKET_CHECK_MS; | ||||
| 
 | ||||
|     result = g_queue_find_custom(&conn->primary_list, | ||||
|                                  &check_time, | ||||
|                                  (GCompareFunc)colo_old_packet_check_one); | ||||
| 
 | ||||
|     if (result) { | ||||
|         /* do checkpoint will flush old packet */ | ||||
|         /* TODO: colo_notify_checkpoint();*/ | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Look for old packets that the secondary hasn't matched, | ||||
|  * if we have some then we have to checkpoint to wake | ||||
|  * the secondary up. | ||||
|  */ | ||||
| static void colo_old_packet_check(void *opaque) | ||||
| { | ||||
|     CompareState *s = opaque; | ||||
| 
 | ||||
|     g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the compare thread on the primary | ||||
|  * for compare connection | ||||
|  */ | ||||
| static void colo_compare_connection(void *opaque, void *user_data) | ||||
| { | ||||
|     CompareState *s = user_data; | ||||
|     Connection *conn = opaque; | ||||
|     Packet *pkt = NULL; | ||||
|     GList *result = NULL; | ||||
|     int ret; | ||||
| 
 | ||||
|     while (!g_queue_is_empty(&conn->primary_list) && | ||||
|            !g_queue_is_empty(&conn->secondary_list)) { | ||||
|         qemu_mutex_lock(&s->timer_check_lock); | ||||
|         pkt = g_queue_pop_tail(&conn->primary_list); | ||||
|         qemu_mutex_unlock(&s->timer_check_lock); | ||||
|         switch (conn->ip_proto) { | ||||
|         case IPPROTO_TCP: | ||||
|             result = g_queue_find_custom(&conn->secondary_list, | ||||
|                      pkt, (GCompareFunc)colo_packet_compare_tcp); | ||||
|             break; | ||||
|         case IPPROTO_UDP: | ||||
|             result = g_queue_find_custom(&conn->secondary_list, | ||||
|                      pkt, (GCompareFunc)colo_packet_compare_udp); | ||||
|             break; | ||||
|         case IPPROTO_ICMP: | ||||
|             result = g_queue_find_custom(&conn->secondary_list, | ||||
|                      pkt, (GCompareFunc)colo_packet_compare_icmp); | ||||
|             break; | ||||
|         default: | ||||
|             result = g_queue_find_custom(&conn->secondary_list, | ||||
|                      pkt, (GCompareFunc)colo_packet_compare_other); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         if (result) { | ||||
|             ret = compare_chr_send(s->chr_out, pkt->data, pkt->size); | ||||
|             if (ret < 0) { | ||||
|                 error_report("colo_send_primary_packet failed"); | ||||
|             } | ||||
|             trace_colo_compare_main("packet same and release packet"); | ||||
|             g_queue_remove(&conn->secondary_list, result->data); | ||||
|             packet_destroy(pkt, NULL); | ||||
|         } else { | ||||
|             /*
 | ||||
|              * If one packet arrive late, the secondary_list or | ||||
|              * primary_list will be empty, so we can't compare it | ||||
|              * until next comparison. | ||||
|              */ | ||||
|             trace_colo_compare_main("packet different"); | ||||
|             qemu_mutex_lock(&s->timer_check_lock); | ||||
|             g_queue_push_tail(&conn->primary_list, pkt); | ||||
|             qemu_mutex_unlock(&s->timer_check_lock); | ||||
|             /* TODO: colo_notify_checkpoint();*/ | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int compare_chr_send(CharDriverState *out, | ||||
|                             const uint8_t *buf, | ||||
|                             uint32_t size) | ||||
| { | ||||
|     int ret = 0; | ||||
|     uint32_t len = htonl(size); | ||||
| 
 | ||||
|     if (!size) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len)); | ||||
|     if (ret != sizeof(len)) { | ||||
|         goto err; | ||||
|     } | ||||
| 
 | ||||
|     ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size); | ||||
|     if (ret != size) { | ||||
|         goto err; | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| 
 | ||||
| err: | ||||
|     return ret < 0 ? ret : -EIO; | ||||
| } | ||||
| 
 | ||||
| static int compare_chr_can_read(void *opaque) | ||||
| { | ||||
|     return COMPARE_READ_LEN_MAX; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the main thread on the primary for packets | ||||
|  * arriving over the socket from the primary. | ||||
|  */ | ||||
| static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(opaque); | ||||
|     int ret; | ||||
| 
 | ||||
|     ret = net_fill_rstate(&s->pri_rs, buf, size); | ||||
|     if (ret == -1) { | ||||
|         qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL); | ||||
|         error_report("colo-compare primary_in error"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the main thread on the primary for packets | ||||
|  * arriving over the socket from the secondary. | ||||
|  */ | ||||
| static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(opaque); | ||||
|     int ret; | ||||
| 
 | ||||
|     ret = net_fill_rstate(&s->sec_rs, buf, size); | ||||
|     if (ret == -1) { | ||||
|         qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL); | ||||
|         error_report("colo-compare secondary_in error"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void *colo_compare_thread(void *opaque) | ||||
| { | ||||
|     GMainContext *worker_context; | ||||
|     GMainLoop *compare_loop; | ||||
|     CompareState *s = opaque; | ||||
| 
 | ||||
|     worker_context = g_main_context_new(); | ||||
| 
 | ||||
|     qemu_chr_add_handlers_full(s->chr_pri_in, compare_chr_can_read, | ||||
|                           compare_pri_chr_in, NULL, s, worker_context); | ||||
|     qemu_chr_add_handlers_full(s->chr_sec_in, compare_chr_can_read, | ||||
|                           compare_sec_chr_in, NULL, s, worker_context); | ||||
| 
 | ||||
|     compare_loop = g_main_loop_new(worker_context, FALSE); | ||||
| 
 | ||||
|     g_main_loop_run(compare_loop); | ||||
| 
 | ||||
|     g_main_loop_unref(compare_loop); | ||||
|     g_main_context_unref(worker_context); | ||||
|     return NULL; | ||||
| } | ||||
| 
 | ||||
| static char *compare_get_pri_indev(Object *obj, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     return g_strdup(s->pri_indev); | ||||
| } | ||||
| 
 | ||||
| static void compare_set_pri_indev(Object *obj, const char *value, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     g_free(s->pri_indev); | ||||
|     s->pri_indev = g_strdup(value); | ||||
| } | ||||
| 
 | ||||
| static char *compare_get_sec_indev(Object *obj, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     return g_strdup(s->sec_indev); | ||||
| } | ||||
| 
 | ||||
| static void compare_set_sec_indev(Object *obj, const char *value, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     g_free(s->sec_indev); | ||||
|     s->sec_indev = g_strdup(value); | ||||
| } | ||||
| 
 | ||||
| static char *compare_get_outdev(Object *obj, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     return g_strdup(s->outdev); | ||||
| } | ||||
| 
 | ||||
| static void compare_set_outdev(Object *obj, const char *value, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     g_free(s->outdev); | ||||
|     s->outdev = g_strdup(value); | ||||
| } | ||||
| 
 | ||||
| static void compare_pri_rs_finalize(SocketReadState *pri_rs) | ||||
| { | ||||
|     CompareState *s = container_of(pri_rs, CompareState, pri_rs); | ||||
| 
 | ||||
|     if (packet_enqueue(s, PRIMARY_IN)) { | ||||
|         trace_colo_compare_main("primary: unsupported packet in"); | ||||
|         compare_chr_send(s->chr_out, pri_rs->buf, pri_rs->packet_len); | ||||
|     } else { | ||||
|         /* compare connection */ | ||||
|         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void compare_sec_rs_finalize(SocketReadState *sec_rs) | ||||
| { | ||||
|     CompareState *s = container_of(sec_rs, CompareState, sec_rs); | ||||
| 
 | ||||
|     if (packet_enqueue(s, SECONDARY_IN)) { | ||||
|         trace_colo_compare_main("secondary: unsupported packet in"); | ||||
|     } else { | ||||
|         /* compare connection */ | ||||
|         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int compare_chardev_opts(void *opaque, | ||||
|                                 const char *name, const char *value, | ||||
|                                 Error **errp) | ||||
| { | ||||
|     CompareChardevProps *props = opaque; | ||||
| 
 | ||||
|     if (strcmp(name, "backend") == 0 && | ||||
|         strcmp(value, "socket") == 0) { | ||||
|         props->is_socket = true; | ||||
|         return 0; | ||||
|     } else if (strcmp(name, "host") == 0 || | ||||
|               (strcmp(name, "port") == 0) || | ||||
|               (strcmp(name, "server") == 0) || | ||||
|               (strcmp(name, "wait") == 0) || | ||||
|               (strcmp(name, "path") == 0)) { | ||||
|         return 0; | ||||
|     } else { | ||||
|         error_setg(errp, | ||||
|                    "COLO-compare does not support a chardev with option %s=%s", | ||||
|                    name, value); | ||||
|         return -1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return 0 is success. | ||||
|  * Return 1 is failed. | ||||
|  */ | ||||
| static int find_and_check_chardev(CharDriverState **chr, | ||||
|                                   char *chr_name, | ||||
|                                   Error **errp) | ||||
| { | ||||
|     CompareChardevProps props; | ||||
| 
 | ||||
|     *chr = qemu_chr_find(chr_name); | ||||
|     if (*chr == NULL) { | ||||
|         error_setg(errp, "Device '%s' not found", | ||||
|                    chr_name); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     memset(&props, 0, sizeof(props)); | ||||
|     if (qemu_opt_foreach((*chr)->opts, compare_chardev_opts, &props, errp)) { | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     if (!props.is_socket) { | ||||
|         error_setg(errp, "chardev \"%s\" is not a tcp socket", | ||||
|                    chr_name); | ||||
|         return 1; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Check old packet regularly so it can watch for any packets | ||||
|  * that the secondary hasn't produced equivalents of. | ||||
|  */ | ||||
| static void check_old_packet_regular(void *opaque) | ||||
| { | ||||
|     CompareState *s = opaque; | ||||
| 
 | ||||
|     timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + | ||||
|               REGULAR_PACKET_CHECK_MS); | ||||
|     /* if have old packet we will notify checkpoint */ | ||||
|     /*
 | ||||
|      * TODO: Make timer handler run in compare thread | ||||
|      * like qemu_chr_add_handlers_full. | ||||
|      */ | ||||
|     qemu_mutex_lock(&s->timer_check_lock); | ||||
|     colo_old_packet_check(s); | ||||
|     qemu_mutex_unlock(&s->timer_check_lock); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from the main thread on the primary | ||||
|  * to setup colo-compare. | ||||
|  */ | ||||
| static void colo_compare_complete(UserCreatable *uc, Error **errp) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(uc); | ||||
|     char thread_name[64]; | ||||
|     static int compare_id; | ||||
| 
 | ||||
|     if (!s->pri_indev || !s->sec_indev || !s->outdev) { | ||||
|         error_setg(errp, "colo compare needs 'primary_in' ," | ||||
|                    "'secondary_in','outdev' property set"); | ||||
|         return; | ||||
|     } else if (!strcmp(s->pri_indev, s->outdev) || | ||||
|                !strcmp(s->sec_indev, s->outdev) || | ||||
|                !strcmp(s->pri_indev, s->sec_indev)) { | ||||
|         error_setg(errp, "'indev' and 'outdev' could not be same " | ||||
|                    "for compare module"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (find_and_check_chardev(&s->chr_pri_in, s->pri_indev, errp)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (find_and_check_chardev(&s->chr_sec_in, s->sec_indev, errp)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (find_and_check_chardev(&s->chr_out, s->outdev, errp)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     qemu_chr_fe_claim_no_fail(s->chr_pri_in); | ||||
| 
 | ||||
|     qemu_chr_fe_claim_no_fail(s->chr_sec_in); | ||||
| 
 | ||||
|     qemu_chr_fe_claim_no_fail(s->chr_out); | ||||
| 
 | ||||
|     net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize); | ||||
|     net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize); | ||||
| 
 | ||||
|     g_queue_init(&s->conn_list); | ||||
|     qemu_mutex_init(&s->timer_check_lock); | ||||
| 
 | ||||
|     s->connection_track_table = g_hash_table_new_full(connection_key_hash, | ||||
|                                                       connection_key_equal, | ||||
|                                                       g_free, | ||||
|                                                       connection_destroy); | ||||
| 
 | ||||
|     sprintf(thread_name, "colo-compare %d", compare_id); | ||||
|     qemu_thread_create(&s->thread, thread_name, | ||||
|                        colo_compare_thread, s, | ||||
|                        QEMU_THREAD_JOINABLE); | ||||
|     compare_id++; | ||||
| 
 | ||||
|     /* A regular timer to kick any packets that the secondary doesn't match */ | ||||
|     s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */ | ||||
|                             check_old_packet_regular, s); | ||||
|     timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + | ||||
|                         REGULAR_PACKET_CHECK_MS); | ||||
| 
 | ||||
|     return; | ||||
| } | ||||
| 
 | ||||
| static void colo_compare_class_init(ObjectClass *oc, void *data) | ||||
| { | ||||
|     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); | ||||
| 
 | ||||
|     ucc->complete = colo_compare_complete; | ||||
| } | ||||
| 
 | ||||
| static void colo_compare_init(Object *obj) | ||||
| { | ||||
|     object_property_add_str(obj, "primary_in", | ||||
|                             compare_get_pri_indev, compare_set_pri_indev, | ||||
|                             NULL); | ||||
|     object_property_add_str(obj, "secondary_in", | ||||
|                             compare_get_sec_indev, compare_set_sec_indev, | ||||
|                             NULL); | ||||
|     object_property_add_str(obj, "outdev", | ||||
|                             compare_get_outdev, compare_set_outdev, | ||||
|                             NULL); | ||||
| } | ||||
| 
 | ||||
| static void colo_compare_finalize(Object *obj) | ||||
| { | ||||
|     CompareState *s = COLO_COMPARE(obj); | ||||
| 
 | ||||
|     if (s->chr_pri_in) { | ||||
|         qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL); | ||||
|         qemu_chr_fe_release(s->chr_pri_in); | ||||
|     } | ||||
|     if (s->chr_sec_in) { | ||||
|         qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL); | ||||
|         qemu_chr_fe_release(s->chr_sec_in); | ||||
|     } | ||||
|     if (s->chr_out) { | ||||
|         qemu_chr_fe_release(s->chr_out); | ||||
|     } | ||||
| 
 | ||||
|     g_queue_free(&s->conn_list); | ||||
| 
 | ||||
|     if (qemu_thread_is_self(&s->thread)) { | ||||
|         /* compare connection */ | ||||
|         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||
|         qemu_thread_join(&s->thread); | ||||
|     } | ||||
| 
 | ||||
|     if (s->timer) { | ||||
|         timer_del(s->timer); | ||||
|     } | ||||
| 
 | ||||
|     qemu_mutex_destroy(&s->timer_check_lock); | ||||
| 
 | ||||
|     g_free(s->pri_indev); | ||||
|     g_free(s->sec_indev); | ||||
|     g_free(s->outdev); | ||||
| } | ||||
| 
 | ||||
| static const TypeInfo colo_compare_info = { | ||||
|     .name = TYPE_COLO_COMPARE, | ||||
|     .parent = TYPE_OBJECT, | ||||
|     .instance_size = sizeof(CompareState), | ||||
|     .instance_init = colo_compare_init, | ||||
|     .instance_finalize = colo_compare_finalize, | ||||
|     .class_size = sizeof(CompareClass), | ||||
|     .class_init = colo_compare_class_init, | ||||
|     .interfaces = (InterfaceInfo[]) { | ||||
|         { TYPE_USER_CREATABLE }, | ||||
|         { } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| static void register_types(void) | ||||
| { | ||||
|     type_register_static(&colo_compare_info); | ||||
| } | ||||
| 
 | ||||
| type_init(register_types); | ||||
|  | @ -0,0 +1,211 @@ | |||
| /*
 | ||||
|  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||
|  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||
|  * | ||||
|  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||
|  * Copyright (c) 2016 FUJITSU LIMITED | ||||
|  * Copyright (c) 2016 Intel Corporation | ||||
|  * | ||||
|  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||
|  * | ||||
|  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||
|  * later.  See the COPYING file in the top-level directory. | ||||
|  */ | ||||
| 
 | ||||
| #include "qemu/osdep.h" | ||||
| #include "trace.h" | ||||
| #include "net/colo.h" | ||||
| 
 | ||||
| uint32_t connection_key_hash(const void *opaque) | ||||
| { | ||||
|     const ConnectionKey *key = opaque; | ||||
|     uint32_t a, b, c; | ||||
| 
 | ||||
|     /* Jenkins hash */ | ||||
|     a = b = c = JHASH_INITVAL + sizeof(*key); | ||||
|     a += key->src.s_addr; | ||||
|     b += key->dst.s_addr; | ||||
|     c += (key->src_port | key->dst_port << 16); | ||||
|     __jhash_mix(a, b, c); | ||||
| 
 | ||||
|     a += key->ip_proto; | ||||
|     __jhash_final(a, b, c); | ||||
| 
 | ||||
|     return c; | ||||
| } | ||||
| 
 | ||||
| int connection_key_equal(const void *key1, const void *key2) | ||||
| { | ||||
|     return memcmp(key1, key2, sizeof(ConnectionKey)) == 0; | ||||
| } | ||||
| 
 | ||||
| int parse_packet_early(Packet *pkt) | ||||
| { | ||||
|     int network_length; | ||||
|     static const uint8_t vlan[] = {0x81, 0x00}; | ||||
|     uint8_t *data = pkt->data; | ||||
|     uint16_t l3_proto; | ||||
|     ssize_t l2hdr_len = eth_get_l2_hdr_length(data); | ||||
| 
 | ||||
|     if (pkt->size < ETH_HLEN) { | ||||
|         trace_colo_proxy_main("pkt->size < ETH_HLEN"); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     /*
 | ||||
|      * TODO: support vlan. | ||||
|      */ | ||||
|     if (!memcmp(&data[12], vlan, sizeof(vlan))) { | ||||
|         trace_colo_proxy_main("COLO-proxy don't support vlan"); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     pkt->network_header = data + l2hdr_len; | ||||
| 
 | ||||
|     const struct iovec l2vec = { | ||||
|         .iov_base = (void *) data, | ||||
|         .iov_len = l2hdr_len | ||||
|     }; | ||||
|     l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len); | ||||
| 
 | ||||
|     if (l3_proto != ETH_P_IP) { | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     network_length = pkt->ip->ip_hl * 4; | ||||
|     if (pkt->size < l2hdr_len + network_length) { | ||||
|         trace_colo_proxy_main("pkt->size < network_header + network_length"); | ||||
|         return 1; | ||||
|     } | ||||
|     pkt->transport_header = pkt->network_header + network_length; | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| void fill_connection_key(Packet *pkt, ConnectionKey *key) | ||||
| { | ||||
|     uint32_t tmp_ports; | ||||
| 
 | ||||
|     memset(key, 0, sizeof(*key)); | ||||
|     key->ip_proto = pkt->ip->ip_p; | ||||
| 
 | ||||
|     switch (key->ip_proto) { | ||||
|     case IPPROTO_TCP: | ||||
|     case IPPROTO_UDP: | ||||
|     case IPPROTO_DCCP: | ||||
|     case IPPROTO_ESP: | ||||
|     case IPPROTO_SCTP: | ||||
|     case IPPROTO_UDPLITE: | ||||
|         tmp_ports = *(uint32_t *)(pkt->transport_header); | ||||
|         key->src = pkt->ip->ip_src; | ||||
|         key->dst = pkt->ip->ip_dst; | ||||
|         key->src_port = ntohs(tmp_ports & 0xffff); | ||||
|         key->dst_port = ntohs(tmp_ports >> 16); | ||||
|         break; | ||||
|     case IPPROTO_AH: | ||||
|         tmp_ports = *(uint32_t *)(pkt->transport_header + 4); | ||||
|         key->src = pkt->ip->ip_src; | ||||
|         key->dst = pkt->ip->ip_dst; | ||||
|         key->src_port = ntohs(tmp_ports & 0xffff); | ||||
|         key->dst_port = ntohs(tmp_ports >> 16); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void reverse_connection_key(ConnectionKey *key) | ||||
| { | ||||
|     struct in_addr tmp_ip; | ||||
|     uint16_t tmp_port; | ||||
| 
 | ||||
|     tmp_ip = key->src; | ||||
|     key->src = key->dst; | ||||
|     key->dst = tmp_ip; | ||||
| 
 | ||||
|     tmp_port = key->src_port; | ||||
|     key->src_port = key->dst_port; | ||||
|     key->dst_port = tmp_port; | ||||
| } | ||||
| 
 | ||||
| Connection *connection_new(ConnectionKey *key) | ||||
| { | ||||
|     Connection *conn = g_slice_new(Connection); | ||||
| 
 | ||||
|     conn->ip_proto = key->ip_proto; | ||||
|     conn->processing = false; | ||||
|     conn->offset = 0; | ||||
|     conn->syn_flag = 0; | ||||
|     g_queue_init(&conn->primary_list); | ||||
|     g_queue_init(&conn->secondary_list); | ||||
| 
 | ||||
|     return conn; | ||||
| } | ||||
| 
 | ||||
| void connection_destroy(void *opaque) | ||||
| { | ||||
|     Connection *conn = opaque; | ||||
| 
 | ||||
|     g_queue_foreach(&conn->primary_list, packet_destroy, NULL); | ||||
|     g_queue_free(&conn->primary_list); | ||||
|     g_queue_foreach(&conn->secondary_list, packet_destroy, NULL); | ||||
|     g_queue_free(&conn->secondary_list); | ||||
|     g_slice_free(Connection, conn); | ||||
| } | ||||
| 
 | ||||
| Packet *packet_new(const void *data, int size) | ||||
| { | ||||
|     Packet *pkt = g_slice_new(Packet); | ||||
| 
 | ||||
|     pkt->data = g_memdup(data, size); | ||||
|     pkt->size = size; | ||||
|     pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); | ||||
| 
 | ||||
|     return pkt; | ||||
| } | ||||
| 
 | ||||
| void packet_destroy(void *opaque, void *user_data) | ||||
| { | ||||
|     Packet *pkt = opaque; | ||||
| 
 | ||||
|     g_free(pkt->data); | ||||
|     g_slice_free(Packet, pkt); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Clear hashtable, stop this hash growing really huge | ||||
|  */ | ||||
| void connection_hashtable_reset(GHashTable *connection_track_table) | ||||
| { | ||||
|     g_hash_table_remove_all(connection_track_table); | ||||
| } | ||||
| 
 | ||||
| /* if not found, create a new connection and add to hash table */ | ||||
| Connection *connection_get(GHashTable *connection_track_table, | ||||
|                            ConnectionKey *key, | ||||
|                            GQueue *conn_list) | ||||
| { | ||||
|     Connection *conn = g_hash_table_lookup(connection_track_table, key); | ||||
| 
 | ||||
|     if (conn == NULL) { | ||||
|         ConnectionKey *new_key = g_memdup(key, sizeof(*key)); | ||||
| 
 | ||||
|         conn = connection_new(key); | ||||
| 
 | ||||
|         if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) { | ||||
|             trace_colo_proxy_main("colo proxy connection hashtable full," | ||||
|                                   " clear it"); | ||||
|             connection_hashtable_reset(connection_track_table); | ||||
|             /*
 | ||||
|              * clear the conn_list | ||||
|              */ | ||||
|             while (!g_queue_is_empty(conn_list)) { | ||||
|                 connection_destroy(g_queue_pop_head(conn_list)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         g_hash_table_insert(connection_track_table, new_key, conn); | ||||
|     } | ||||
| 
 | ||||
|     return conn; | ||||
| } | ||||
|  | @ -0,0 +1,88 @@ | |||
| /*
 | ||||
|  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||
|  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||
|  * | ||||
|  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||
|  * Copyright (c) 2016 FUJITSU LIMITED | ||||
|  * Copyright (c) 2016 Intel Corporation | ||||
|  * | ||||
|  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||
|  * | ||||
|  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||
|  * later.  See the COPYING file in the top-level directory. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef QEMU_COLO_PROXY_H | ||||
| #define QEMU_COLO_PROXY_H | ||||
| 
 | ||||
| #include "slirp/slirp.h" | ||||
| #include "qemu/jhash.h" | ||||
| #include "qemu/timer.h" | ||||
| 
 | ||||
| #define HASHTABLE_MAX_SIZE 16384 | ||||
| 
 | ||||
| #ifndef IPPROTO_DCCP | ||||
| #define IPPROTO_DCCP 33 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef IPPROTO_SCTP | ||||
| #define IPPROTO_SCTP 132 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef IPPROTO_UDPLITE | ||||
| #define IPPROTO_UDPLITE 136 | ||||
| #endif | ||||
| 
 | ||||
| typedef struct Packet { | ||||
|     void *data; | ||||
|     union { | ||||
|         uint8_t *network_header; | ||||
|         struct ip *ip; | ||||
|     }; | ||||
|     uint8_t *transport_header; | ||||
|     int size; | ||||
|     /* Time of packet creation, in wall clock ms */ | ||||
|     int64_t creation_ms; | ||||
| } Packet; | ||||
| 
 | ||||
| typedef struct ConnectionKey { | ||||
|     /* (src, dst) must be grouped, in the same way than in IP header */ | ||||
|     struct in_addr src; | ||||
|     struct in_addr dst; | ||||
|     uint16_t src_port; | ||||
|     uint16_t dst_port; | ||||
|     uint8_t ip_proto; | ||||
| } QEMU_PACKED ConnectionKey; | ||||
| 
 | ||||
| typedef struct Connection { | ||||
|     /* connection primary send queue: element type: Packet */ | ||||
|     GQueue primary_list; | ||||
|     /* connection secondary send queue: element type: Packet */ | ||||
|     GQueue secondary_list; | ||||
|     /* flag to enqueue unprocessed_connections */ | ||||
|     bool processing; | ||||
|     uint8_t ip_proto; | ||||
|     /* offset = secondary_seq - primary_seq */ | ||||
|     tcp_seq  offset; | ||||
|     /*
 | ||||
|      * we use this flag update offset func | ||||
|      * run once in independent tcp connection | ||||
|      */ | ||||
|     int syn_flag; | ||||
| } Connection; | ||||
| 
 | ||||
| uint32_t connection_key_hash(const void *opaque); | ||||
| int connection_key_equal(const void *opaque1, const void *opaque2); | ||||
| int parse_packet_early(Packet *pkt); | ||||
| void fill_connection_key(Packet *pkt, ConnectionKey *key); | ||||
| void reverse_connection_key(ConnectionKey *key); | ||||
| Connection *connection_new(ConnectionKey *key); | ||||
| void connection_destroy(void *opaque); | ||||
| Connection *connection_get(GHashTable *connection_track_table, | ||||
|                            ConnectionKey *key, | ||||
|                            GQueue *conn_list); | ||||
| void connection_hashtable_reset(GHashTable *connection_track_table); | ||||
| Packet *packet_new(const void *data, int size); | ||||
| void packet_destroy(void *opaque, void *user_data); | ||||
| 
 | ||||
| #endif /* QEMU_COLO_PROXY_H */ | ||||
|  | @ -0,0 +1,263 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||
|  * Copyright (c) 2016 FUJITSU LIMITED | ||||
|  * Copyright (c) 2016 Intel Corporation | ||||
|  * | ||||
|  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||
|  * | ||||
|  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||
|  * later.  See the COPYING file in the top-level directory. | ||||
|  */ | ||||
| 
 | ||||
| #include "qemu/osdep.h" | ||||
| #include "trace.h" | ||||
| #include "net/colo.h" | ||||
| #include "net/filter.h" | ||||
| #include "net/net.h" | ||||
| #include "qemu-common.h" | ||||
| #include "qapi/error.h" | ||||
| #include "qapi/qmp/qerror.h" | ||||
| #include "qapi-visit.h" | ||||
| #include "qom/object.h" | ||||
| #include "qemu/main-loop.h" | ||||
| #include "qemu/iov.h" | ||||
| #include "net/checksum.h" | ||||
| 
 | ||||
| #define FILTER_COLO_REWRITER(obj) \ | ||||
|     OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER) | ||||
| 
 | ||||
| #define TYPE_FILTER_REWRITER "filter-rewriter" | ||||
| 
 | ||||
| typedef struct RewriterState { | ||||
|     NetFilterState parent_obj; | ||||
|     NetQueue *incoming_queue; | ||||
|     /* hashtable to save connection */ | ||||
|     GHashTable *connection_track_table; | ||||
| } RewriterState; | ||||
| 
 | ||||
| static void filter_rewriter_flush(NetFilterState *nf) | ||||
| { | ||||
|     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||
| 
 | ||||
|     if (!qemu_net_queue_flush(s->incoming_queue)) { | ||||
|         /* Unable to empty the queue, purge remaining packets */ | ||||
|         qemu_net_queue_purge(s->incoming_queue, nf->netdev); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return 1 on success, if return 0 means the pkt | ||||
|  * is not TCP packet | ||||
|  */ | ||||
| static int is_tcp_packet(Packet *pkt) | ||||
| { | ||||
|     if (!parse_packet_early(pkt) && | ||||
|         pkt->ip->ip_p == IPPROTO_TCP) { | ||||
|         return 1; | ||||
|     } else { | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /* handle tcp packet from primary guest */ | ||||
| static int handle_primary_tcp_pkt(NetFilterState *nf, | ||||
|                                   Connection *conn, | ||||
|                                   Packet *pkt) | ||||
| { | ||||
|     struct tcphdr *tcp_pkt; | ||||
| 
 | ||||
|     tcp_pkt = (struct tcphdr *)pkt->transport_header; | ||||
|     if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { | ||||
|         char *sdebug, *ddebug; | ||||
|         sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); | ||||
|         ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); | ||||
|         trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, | ||||
|                     ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), | ||||
|                     tcp_pkt->th_flags); | ||||
|         trace_colo_filter_rewriter_conn_offset(conn->offset); | ||||
|         g_free(sdebug); | ||||
|         g_free(ddebug); | ||||
|     } | ||||
| 
 | ||||
|     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { | ||||
|         /*
 | ||||
|          * we use this flag update offset func | ||||
|          * run once in independent tcp connection | ||||
|          */ | ||||
|         conn->syn_flag = 1; | ||||
|     } | ||||
| 
 | ||||
|     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) { | ||||
|         if (conn->syn_flag) { | ||||
|             /*
 | ||||
|              * offset = secondary_seq - primary seq | ||||
|              * ack packet sent by guest from primary node, | ||||
|              * so we use th_ack - 1 get primary_seq | ||||
|              */ | ||||
|             conn->offset -= (ntohl(tcp_pkt->th_ack) - 1); | ||||
|             conn->syn_flag = 0; | ||||
|         } | ||||
|         /* handle packets to the secondary from the primary */ | ||||
|         tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset); | ||||
| 
 | ||||
|         net_checksum_calculate((uint8_t *)pkt->data, pkt->size); | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| /* handle tcp packet from secondary guest */ | ||||
| static int handle_secondary_tcp_pkt(NetFilterState *nf, | ||||
|                                     Connection *conn, | ||||
|                                     Packet *pkt) | ||||
| { | ||||
|     struct tcphdr *tcp_pkt; | ||||
| 
 | ||||
|     tcp_pkt = (struct tcphdr *)pkt->transport_header; | ||||
| 
 | ||||
|     if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { | ||||
|         char *sdebug, *ddebug; | ||||
|         sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); | ||||
|         ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); | ||||
|         trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, | ||||
|                     ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), | ||||
|                     tcp_pkt->th_flags); | ||||
|         trace_colo_filter_rewriter_conn_offset(conn->offset); | ||||
|         g_free(sdebug); | ||||
|         g_free(ddebug); | ||||
|     } | ||||
| 
 | ||||
|     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { | ||||
|         /*
 | ||||
|          * save offset = secondary_seq and then | ||||
|          * in handle_primary_tcp_pkt make offset | ||||
|          * = secondary_seq - primary_seq | ||||
|          */ | ||||
|         conn->offset = ntohl(tcp_pkt->th_seq); | ||||
|     } | ||||
| 
 | ||||
|     if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) { | ||||
|         /* handle packets to the primary from the secondary*/ | ||||
|         tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset); | ||||
| 
 | ||||
|         net_checksum_calculate((uint8_t *)pkt->data, pkt->size); | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, | ||||
|                                          NetClientState *sender, | ||||
|                                          unsigned flags, | ||||
|                                          const struct iovec *iov, | ||||
|                                          int iovcnt, | ||||
|                                          NetPacketSent *sent_cb) | ||||
| { | ||||
|     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||
|     Connection *conn; | ||||
|     ConnectionKey key; | ||||
|     Packet *pkt; | ||||
|     ssize_t size = iov_size(iov, iovcnt); | ||||
|     char *buf = g_malloc0(size); | ||||
| 
 | ||||
|     iov_to_buf(iov, iovcnt, 0, buf, size); | ||||
|     pkt = packet_new(buf, size); | ||||
| 
 | ||||
|     /*
 | ||||
|      * if we get tcp packet | ||||
|      * we will rewrite it to make secondary guest's | ||||
|      * connection established successfully | ||||
|      */ | ||||
|     if (pkt && is_tcp_packet(pkt)) { | ||||
| 
 | ||||
|         fill_connection_key(pkt, &key); | ||||
| 
 | ||||
|         if (sender == nf->netdev) { | ||||
|             /*
 | ||||
|              * We need make tcp TX and RX packet | ||||
|              * into one connection. | ||||
|              */ | ||||
|             reverse_connection_key(&key); | ||||
|         } | ||||
|         conn = connection_get(s->connection_track_table, | ||||
|                               &key, | ||||
|                               NULL); | ||||
| 
 | ||||
|         if (sender == nf->netdev) { | ||||
|             /* NET_FILTER_DIRECTION_TX */ | ||||
|             if (!handle_primary_tcp_pkt(nf, conn, pkt)) { | ||||
|                 qemu_net_queue_send(s->incoming_queue, sender, 0, | ||||
|                 (const uint8_t *)pkt->data, pkt->size, NULL); | ||||
|                 packet_destroy(pkt, NULL); | ||||
|                 pkt = NULL; | ||||
|                 /*
 | ||||
|                  * We block the packet here,after rewrite pkt | ||||
|                  * and will send it | ||||
|                  */ | ||||
|                 return 1; | ||||
|             } | ||||
|         } else { | ||||
|             /* NET_FILTER_DIRECTION_RX */ | ||||
|             if (!handle_secondary_tcp_pkt(nf, conn, pkt)) { | ||||
|                 qemu_net_queue_send(s->incoming_queue, sender, 0, | ||||
|                 (const uint8_t *)pkt->data, pkt->size, NULL); | ||||
|                 packet_destroy(pkt, NULL); | ||||
|                 pkt = NULL; | ||||
|                 /*
 | ||||
|                  * We block the packet here,after rewrite pkt | ||||
|                  * and will send it | ||||
|                  */ | ||||
|                 return 1; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     packet_destroy(pkt, NULL); | ||||
|     pkt = NULL; | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static void colo_rewriter_cleanup(NetFilterState *nf) | ||||
| { | ||||
|     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||
| 
 | ||||
|     /* flush packets */ | ||||
|     if (s->incoming_queue) { | ||||
|         filter_rewriter_flush(nf); | ||||
|         g_free(s->incoming_queue); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void colo_rewriter_setup(NetFilterState *nf, Error **errp) | ||||
| { | ||||
|     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||
| 
 | ||||
|     s->connection_track_table = g_hash_table_new_full(connection_key_hash, | ||||
|                                                       connection_key_equal, | ||||
|                                                       g_free, | ||||
|                                                       connection_destroy); | ||||
|     s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); | ||||
| } | ||||
| 
 | ||||
| static void colo_rewriter_class_init(ObjectClass *oc, void *data) | ||||
| { | ||||
|     NetFilterClass *nfc = NETFILTER_CLASS(oc); | ||||
| 
 | ||||
|     nfc->setup = colo_rewriter_setup; | ||||
|     nfc->cleanup = colo_rewriter_cleanup; | ||||
|     nfc->receive_iov = colo_rewriter_receive_iov; | ||||
| } | ||||
| 
 | ||||
| static const TypeInfo colo_rewriter_info = { | ||||
|     .name = TYPE_FILTER_REWRITER, | ||||
|     .parent = TYPE_NETFILTER, | ||||
|     .class_init = colo_rewriter_class_init, | ||||
|     .instance_size = sizeof(RewriterState), | ||||
| }; | ||||
| 
 | ||||
| static void register_types(void) | ||||
| { | ||||
|     type_register_static(&colo_rewriter_info); | ||||
| } | ||||
| 
 | ||||
| type_init(register_types); | ||||
|  | @ -690,9 +690,13 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, | |||
|         buffer = iov[0].iov_base; | ||||
|         offset = iov[0].iov_len; | ||||
|     } else { | ||||
|         buf = g_new(uint8_t, NET_BUFSIZE); | ||||
|         offset = iov_size(iov, iovcnt); | ||||
|         if (offset > NET_BUFSIZE) { | ||||
|             return -1; | ||||
|         } | ||||
|         buf = g_malloc(offset); | ||||
|         buffer = buf; | ||||
|         offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE); | ||||
|         offset = iov_to_buf(iov, iovcnt, 0, buf, offset); | ||||
|     } | ||||
| 
 | ||||
|     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { | ||||
|  | @ -1179,6 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict) | |||
| 
 | ||||
|     qemu_del_net_client(nc->peer); | ||||
|     qemu_del_net_client(nc); | ||||
|     qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device)); | ||||
| } | ||||
| 
 | ||||
| void netdev_add(QemuOpts *opts, Error **errp) | ||||
|  |  | |||
|  | @ -857,7 +857,9 @@ free_fail: | |||
|             return -1; | ||||
|         } | ||||
| 
 | ||||
|         fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE, | ||||
|         fd = net_bridge_run_helper(tap->helper, | ||||
|                                    tap->has_br ? | ||||
|                                    tap->br : DEFAULT_BRIDGE_INTERFACE, | ||||
|                                    errp); | ||||
|         if (fd == -1) { | ||||
|             return -1; | ||||
|  |  | |||
|  | @ -2636,6 +2636,8 @@ | |||
| # | ||||
| # @downscript: #optional script to shut down the interface | ||||
| # | ||||
| # @br: #optional bridge name (since 2.8) | ||||
| # | ||||
| # @helper: #optional command to execute to configure bridge | ||||
| # | ||||
| # @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes. | ||||
|  | @ -2665,6 +2667,7 @@ | |||
|     '*fds':        'str', | ||||
|     '*script':     'str', | ||||
|     '*downscript': 'str', | ||||
|     '*br':         'str', | ||||
|     '*helper':     'str', | ||||
|     '*sndbuf':     'size', | ||||
|     '*vnet_hdr':   'bool', | ||||
|  |  | |||
							
								
								
									
										71
									
								
								qemu-char.c
								
								
								
								
							
							
						
						
									
										71
									
								
								qemu-char.c
								
								
								
								
							|  | @ -449,11 +449,12 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...) | |||
| 
 | ||||
| static void remove_fd_in_watch(CharDriverState *chr); | ||||
| 
 | ||||
| void qemu_chr_add_handlers(CharDriverState *s, | ||||
| void qemu_chr_add_handlers_full(CharDriverState *s, | ||||
|                                 IOCanReadHandler *fd_can_read, | ||||
|                                 IOReadHandler *fd_read, | ||||
|                                 IOEventHandler *fd_event, | ||||
|                            void *opaque) | ||||
|                                 void *opaque, | ||||
|                                 GMainContext *context) | ||||
| { | ||||
|     int fe_open; | ||||
| 
 | ||||
|  | @ -467,8 +468,9 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||
|     s->chr_read = fd_read; | ||||
|     s->chr_event = fd_event; | ||||
|     s->handler_opaque = opaque; | ||||
|     if (fe_open && s->chr_update_read_handler) | ||||
|         s->chr_update_read_handler(s); | ||||
|     if (fe_open && s->chr_update_read_handler) { | ||||
|         s->chr_update_read_handler(s, context); | ||||
|     } | ||||
| 
 | ||||
|     if (!s->explicit_fe_open) { | ||||
|         qemu_chr_fe_set_open(s, fe_open); | ||||
|  | @ -481,6 +483,16 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void qemu_chr_add_handlers(CharDriverState *s, | ||||
|                            IOCanReadHandler *fd_can_read, | ||||
|                            IOReadHandler *fd_read, | ||||
|                            IOEventHandler *fd_event, | ||||
|                            void *opaque) | ||||
| { | ||||
|     qemu_chr_add_handlers_full(s, fd_can_read, fd_read, | ||||
|                                fd_event, opaque, NULL); | ||||
| } | ||||
| 
 | ||||
| static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len) | ||||
| { | ||||
|     return len; | ||||
|  | @ -722,7 +734,8 @@ static void mux_chr_event(void *opaque, int event) | |||
|         mux_chr_send_event(d, i, event); | ||||
| } | ||||
| 
 | ||||
| static void mux_chr_update_read_handler(CharDriverState *chr) | ||||
| static void mux_chr_update_read_handler(CharDriverState *chr, | ||||
|                                         GMainContext *context) | ||||
| { | ||||
|     MuxDriver *d = chr->opaque; | ||||
| 
 | ||||
|  | @ -736,8 +749,10 @@ static void mux_chr_update_read_handler(CharDriverState *chr) | |||
|     d->chr_event[d->mux_cnt] = chr->chr_event; | ||||
|     /* Fix up the real driver with mux routines */ | ||||
|     if (d->mux_cnt == 0) { | ||||
|         qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read, | ||||
|                               mux_chr_event, chr); | ||||
|         qemu_chr_add_handlers_full(d->drv, mux_chr_can_read, | ||||
|                                    mux_chr_read, | ||||
|                                    mux_chr_event, | ||||
|                                    chr, context); | ||||
|     } | ||||
|     if (d->focus != -1) { | ||||
|         mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT); | ||||
|  | @ -853,6 +868,7 @@ typedef struct IOWatchPoll | |||
|     IOCanReadHandler *fd_can_read; | ||||
|     GSourceFunc fd_read; | ||||
|     void *opaque; | ||||
|     GMainContext *context; | ||||
| } IOWatchPoll; | ||||
| 
 | ||||
| static IOWatchPoll *io_watch_poll_from_source(GSource *source) | ||||
|  | @ -860,7 +876,8 @@ static IOWatchPoll *io_watch_poll_from_source(GSource *source) | |||
|     return container_of(source, IOWatchPoll, parent); | ||||
| } | ||||
| 
 | ||||
| static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) | ||||
| static gboolean io_watch_poll_prepare(GSource *source, | ||||
|                                       gint *timeout_) | ||||
| { | ||||
|     IOWatchPoll *iwp = io_watch_poll_from_source(source); | ||||
|     bool now_active = iwp->fd_can_read(iwp->opaque) > 0; | ||||
|  | @ -873,7 +890,7 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) | |||
|         iwp->src = qio_channel_create_watch( | ||||
|             iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); | ||||
|         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); | ||||
|         g_source_attach(iwp->src, NULL); | ||||
|         g_source_attach(iwp->src, iwp->context); | ||||
|     } else { | ||||
|         g_source_destroy(iwp->src); | ||||
|         g_source_unref(iwp->src); | ||||
|  | @ -920,19 +937,22 @@ static GSourceFuncs io_watch_poll_funcs = { | |||
| static guint io_add_watch_poll(QIOChannel *ioc, | ||||
|                                IOCanReadHandler *fd_can_read, | ||||
|                                QIOChannelFunc fd_read, | ||||
|                                gpointer user_data) | ||||
|                                gpointer user_data, | ||||
|                                GMainContext *context) | ||||
| { | ||||
|     IOWatchPoll *iwp; | ||||
|     int tag; | ||||
| 
 | ||||
|     iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll)); | ||||
|     iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, | ||||
|                                        sizeof(IOWatchPoll)); | ||||
|     iwp->fd_can_read = fd_can_read; | ||||
|     iwp->opaque = user_data; | ||||
|     iwp->ioc = ioc; | ||||
|     iwp->fd_read = (GSourceFunc) fd_read; | ||||
|     iwp->src = NULL; | ||||
|     iwp->context = context; | ||||
| 
 | ||||
|     tag = g_source_attach(&iwp->parent, NULL); | ||||
|     tag = g_source_attach(&iwp->parent, context); | ||||
|     g_source_unref(&iwp->parent); | ||||
|     return tag; | ||||
| } | ||||
|  | @ -1064,7 +1084,8 @@ static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond) | |||
|     return qio_channel_create_watch(s->ioc_out, cond); | ||||
| } | ||||
| 
 | ||||
| static void fd_chr_update_read_handler(CharDriverState *chr) | ||||
| static void fd_chr_update_read_handler(CharDriverState *chr, | ||||
|                                        GMainContext *context) | ||||
| { | ||||
|     FDCharDriver *s = chr->opaque; | ||||
| 
 | ||||
|  | @ -1072,7 +1093,8 @@ static void fd_chr_update_read_handler(CharDriverState *chr) | |||
|     if (s->ioc_in) { | ||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc_in, | ||||
|                                            fd_chr_read_poll, | ||||
|                                            fd_chr_read, chr); | ||||
|                                            fd_chr_read, chr, | ||||
|                                            context); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -1319,7 +1341,8 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr) | |||
|     } | ||||
| } | ||||
| 
 | ||||
| static void pty_chr_update_read_handler(CharDriverState *chr) | ||||
| static void pty_chr_update_read_handler(CharDriverState *chr, | ||||
|                                         GMainContext *context) | ||||
| { | ||||
|     qemu_mutex_lock(&chr->chr_write_lock); | ||||
|     pty_chr_update_read_handler_locked(chr); | ||||
|  | @ -1423,7 +1446,8 @@ static void pty_chr_state(CharDriverState *chr, int connected) | |||
|         if (!chr->fd_in_tag) { | ||||
|             chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||
|                                                pty_chr_read_poll, | ||||
|                                                pty_chr_read, chr); | ||||
|                                                pty_chr_read, | ||||
|                                                chr, NULL); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | @ -2565,7 +2589,8 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) | |||
|     return TRUE; | ||||
| } | ||||
| 
 | ||||
| static void udp_chr_update_read_handler(CharDriverState *chr) | ||||
| static void udp_chr_update_read_handler(CharDriverState *chr, | ||||
|                                         GMainContext *context) | ||||
| { | ||||
|     NetCharDriver *s = chr->opaque; | ||||
| 
 | ||||
|  | @ -2573,7 +2598,8 @@ static void udp_chr_update_read_handler(CharDriverState *chr) | |||
|     if (s->ioc) { | ||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||
|                                            udp_chr_read_poll, | ||||
|                                            udp_chr_read, chr); | ||||
|                                            udp_chr_read, chr, | ||||
|                                            context); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -2976,12 +3002,14 @@ static void tcp_chr_connect(void *opaque) | |||
|     if (s->ioc) { | ||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||
|                                            tcp_chr_read_poll, | ||||
|                                            tcp_chr_read, chr); | ||||
|                                            tcp_chr_read, | ||||
|                                            chr, NULL); | ||||
|     } | ||||
|     qemu_chr_be_generic_open(chr); | ||||
| } | ||||
| 
 | ||||
| static void tcp_chr_update_read_handler(CharDriverState *chr) | ||||
| static void tcp_chr_update_read_handler(CharDriverState *chr, | ||||
|                                         GMainContext *context) | ||||
| { | ||||
|     TCPCharDriver *s = chr->opaque; | ||||
| 
 | ||||
|  | @ -2993,7 +3021,8 @@ static void tcp_chr_update_read_handler(CharDriverState *chr) | |||
|     if (s->ioc) { | ||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||
|                                            tcp_chr_read_poll, | ||||
|                                            tcp_chr_read, chr); | ||||
|                                            tcp_chr_read, chr, | ||||
|                                            context); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1598,10 +1598,11 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, | |||
|     "                configure a host TAP network backend with ID 'str'\n" | ||||
| #else | ||||
|     "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n" | ||||
|     "         [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" | ||||
|     "         [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" | ||||
|     "         [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n" | ||||
|     "         [,poll-us=n]\n" | ||||
|     "                configure a host TAP network backend with ID 'str'\n" | ||||
|     "                connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n" | ||||
|     "                use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n" | ||||
|     "                to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n" | ||||
|     "                to deconfigure it\n" | ||||
|  | @ -1888,8 +1889,8 @@ processed and applied to -net user. Mixing them with the new configuration | |||
| syntax gives undefined results. Their use for new applications is discouraged | ||||
| as they will be removed from future versions. | ||||
| 
 | ||||
| @item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] | ||||
| @itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] | ||||
| @item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}] | ||||
| @itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}] | ||||
| Connect the host TAP network interface @var{name} to VLAN @var{n}. | ||||
| 
 | ||||
| Use the network script @var{file} to configure it and the network script | ||||
|  | @ -1900,8 +1901,9 @@ automatically provides one. The default network configure script is | |||
| to disable script execution. | ||||
| 
 | ||||
| If running QEMU as an unprivileged user, use the network helper | ||||
| @var{helper} to configure the TAP interface. The default network | ||||
| helper executable is @file{/path/to/qemu-bridge-helper}. | ||||
| @var{helper} to configure the TAP interface and attach it to the bridge. | ||||
| The default network helper executable is @file{/path/to/qemu-bridge-helper} | ||||
| and the default bridge device is @file{br0}. | ||||
| 
 | ||||
| @option{fd}=@var{h} can be used to specify the handle of an already | ||||
| opened host TAP interface. | ||||
|  | @ -3887,6 +3889,19 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not | |||
| be the same. we can just use indev or outdev, but at least one of indev or outdev | ||||
| need to be specified. | ||||
| 
 | ||||
| @item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}] | ||||
| 
 | ||||
| Filter-rewriter is a part of COLO project.It will rewrite tcp packet to | ||||
| secondary from primary to keep secondary tcp connection,and rewrite | ||||
| tcp packet to primary from secondary make tcp packet can be handled by | ||||
| client. | ||||
| 
 | ||||
| usage: | ||||
| colo secondary: | ||||
| -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||
| -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||
| -object filter-rewriter,id=rew0,netdev=hn0,queue=all | ||||
| 
 | ||||
| @item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}] | ||||
| 
 | ||||
| Dump the network traffic on netdev @var{dev} to the file specified by | ||||
|  | @ -3894,6 +3909,45 @@ Dump the network traffic on netdev @var{dev} to the file specified by | |||
| The file format is libpcap, so it can be analyzed with tools such as tcpdump | ||||
| or Wireshark. | ||||
| 
 | ||||
| @item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid}, | ||||
| outdev=@var{chardevid} | ||||
| 
 | ||||
| Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with | ||||
| secondary packet. If the packets are same, we will output primary | ||||
| packet to outdev@var{chardevid}, else we will notify colo-frame | ||||
| do checkpoint and send primary packet to outdev@var{chardevid}. | ||||
| 
 | ||||
| we must use it with the help of filter-mirror and filter-redirector. | ||||
| 
 | ||||
| @example | ||||
| 
 | ||||
| primary: | ||||
| -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown | ||||
| -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 | ||||
| -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait | ||||
| -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait | ||||
| -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait | ||||
| -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 | ||||
| -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait | ||||
| -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 | ||||
| -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 | ||||
| -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out | ||||
| -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 | ||||
| -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 | ||||
| 
 | ||||
| secondary: | ||||
| -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown | ||||
| -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 | ||||
| -chardev socket,id=red0,host=3.3.3.3,port=9003 | ||||
| -chardev socket,id=red1,host=3.3.3.3,port=9004 | ||||
| -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||
| -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||
| 
 | ||||
| @end example | ||||
| 
 | ||||
| If you want to know the detail of above command line, you can read | ||||
| the colo-compare git log. | ||||
| 
 | ||||
| @item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | ||||
| @item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										16
									
								
								trace-events
								
								
								
								
							
							
						
						
									
										16
									
								
								trace-events
								
								
								
								
							|  | @ -139,6 +139,22 @@ memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t v | |||
| memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | ||||
| memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | ||||
| 
 | ||||
| # net/colo.c | ||||
| colo_proxy_main(const char *chr) ": %s" | ||||
| 
 | ||||
| # net/colo-compare.c | ||||
| colo_compare_main(const char *chr) ": %s" | ||||
| colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" | ||||
| colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" | ||||
| colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" | ||||
| colo_old_packet_check_found(int64_t old_time) "%" PRId64 | ||||
| colo_compare_miscompare(void) "" | ||||
| 
 | ||||
| # net/filter-rewriter.c | ||||
| colo_filter_rewriter_debug(void) "" | ||||
| colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n" | ||||
| colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n" | ||||
| 
 | ||||
| ### Guest events, keep at bottom | ||||
| 
 | ||||
| # @vaddr: Access' virtual address. | ||||
|  |  | |||
							
								
								
									
										4
									
								
								vl.c
								
								
								
								
							
							
						
						
									
										4
									
								
								vl.c
								
								
								
								
							|  | @ -2845,7 +2845,9 @@ static bool object_create_initial(const char *type) | |||
|     if (g_str_equal(type, "filter-buffer") || | ||||
|         g_str_equal(type, "filter-dump") || | ||||
|         g_str_equal(type, "filter-mirror") || | ||||
|         g_str_equal(type, "filter-redirector")) { | ||||
|         g_str_equal(type, "filter-redirector") || | ||||
|         g_str_equal(type, "colo-compare") || | ||||
|         g_str_equal(type, "filter-rewriter")) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Peter Maydell
						Peter Maydell