cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

msg_zerocopy.c (19360B)


      1/* Evaluate MSG_ZEROCOPY
      2 *
      3 * Send traffic between two processes over one of the supported
      4 * protocols and modes:
      5 *
      6 * PF_INET/PF_INET6
      7 * - SOCK_STREAM
      8 * - SOCK_DGRAM
      9 * - SOCK_DGRAM with UDP_CORK
     10 * - SOCK_RAW
     11 * - SOCK_RAW with IP_HDRINCL
     12 *
     13 * PF_PACKET
     14 * - SOCK_DGRAM
     15 * - SOCK_RAW
     16 *
     17 * PF_RDS
     18 * - SOCK_SEQPACKET
     19 *
     20 * Start this program on two connected hosts, one in send mode and
     21 * the other with option '-r' to put it in receiver mode.
     22 *
     23 * If zerocopy mode ('-z') is enabled, the sender will verify that
     24 * the kernel queues completions on the error queue for all zerocopy
     25 * transfers.
     26 */
     27
     28#define _GNU_SOURCE
     29
     30#include <arpa/inet.h>
     31#include <error.h>
     32#include <errno.h>
     33#include <limits.h>
     34#include <linux/errqueue.h>
     35#include <linux/if_packet.h>
     36#include <linux/ipv6.h>
     37#include <linux/socket.h>
     38#include <linux/sockios.h>
     39#include <net/ethernet.h>
     40#include <net/if.h>
     41#include <netinet/ip.h>
     42#include <netinet/ip6.h>
     43#include <netinet/tcp.h>
     44#include <netinet/udp.h>
     45#include <poll.h>
     46#include <sched.h>
     47#include <stdbool.h>
     48#include <stdio.h>
     49#include <stdint.h>
     50#include <stdlib.h>
     51#include <string.h>
     52#include <sys/ioctl.h>
     53#include <sys/socket.h>
     54#include <sys/stat.h>
     55#include <sys/time.h>
     56#include <sys/types.h>
     57#include <sys/wait.h>
     58#include <unistd.h>
     59#include <linux/rds.h>
     60
     61#ifndef SO_EE_ORIGIN_ZEROCOPY
     62#define SO_EE_ORIGIN_ZEROCOPY		5
     63#endif
     64
     65#ifndef SO_ZEROCOPY
     66#define SO_ZEROCOPY	60
     67#endif
     68
     69#ifndef SO_EE_CODE_ZEROCOPY_COPIED
     70#define SO_EE_CODE_ZEROCOPY_COPIED	1
     71#endif
     72
     73#ifndef MSG_ZEROCOPY
     74#define MSG_ZEROCOPY	0x4000000
     75#endif
     76
     77static int  cfg_cork;
     78static bool cfg_cork_mixed;
     79static int  cfg_cpu		= -1;		/* default: pin to last cpu */
     80static int  cfg_family		= PF_UNSPEC;
     81static int  cfg_ifindex		= 1;
     82static int  cfg_payload_len;
     83static int  cfg_port		= 8000;
     84static bool cfg_rx;
     85static int  cfg_runtime_ms	= 4200;
     86static int  cfg_verbose;
     87static int  cfg_waittime_ms	= 500;
     88static bool cfg_zerocopy;
     89
     90static socklen_t cfg_alen;
     91static struct sockaddr_storage cfg_dst_addr;
     92static struct sockaddr_storage cfg_src_addr;
     93
     94static char payload[IP_MAXPACKET];
     95static long packets, bytes, completions, expected_completions;
     96static int  zerocopied = -1;
     97static uint32_t next_completion;
     98
     99static unsigned long gettimeofday_ms(void)
    100{
    101	struct timeval tv;
    102
    103	gettimeofday(&tv, NULL);
    104	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
    105}
    106
    107static uint16_t get_ip_csum(const uint16_t *start, int num_words)
    108{
    109	unsigned long sum = 0;
    110	int i;
    111
    112	for (i = 0; i < num_words; i++)
    113		sum += start[i];
    114
    115	while (sum >> 16)
    116		sum = (sum & 0xFFFF) + (sum >> 16);
    117
    118	return ~sum;
    119}
    120
    121static int do_setcpu(int cpu)
    122{
    123	cpu_set_t mask;
    124
    125	CPU_ZERO(&mask);
    126	CPU_SET(cpu, &mask);
    127	if (sched_setaffinity(0, sizeof(mask), &mask))
    128		fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
    129	else if (cfg_verbose)
    130		fprintf(stderr, "cpu: %u\n", cpu);
    131
    132	return 0;
    133}
    134
    135static void do_setsockopt(int fd, int level, int optname, int val)
    136{
    137	if (setsockopt(fd, level, optname, &val, sizeof(val)))
    138		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
    139}
    140
    141static int do_poll(int fd, int events)
    142{
    143	struct pollfd pfd;
    144	int ret;
    145
    146	pfd.events = events;
    147	pfd.revents = 0;
    148	pfd.fd = fd;
    149
    150	ret = poll(&pfd, 1, cfg_waittime_ms);
    151	if (ret == -1)
    152		error(1, errno, "poll");
    153
    154	return ret && (pfd.revents & events);
    155}
    156
    157static int do_accept(int fd)
    158{
    159	int fda = fd;
    160
    161	fd = accept(fda, NULL, NULL);
    162	if (fd == -1)
    163		error(1, errno, "accept");
    164	if (close(fda))
    165		error(1, errno, "close listen sock");
    166
    167	return fd;
    168}
    169
    170static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
    171{
    172	struct cmsghdr *cm;
    173
    174	if (!msg->msg_control)
    175		error(1, errno, "NULL cookie");
    176	cm = (void *)msg->msg_control;
    177	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
    178	cm->cmsg_level = SOL_RDS;
    179	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
    180	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
    181}
    182
    183static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
    184{
    185	int ret, len, i, flags;
    186	static uint32_t cookie;
    187	char ckbuf[CMSG_SPACE(sizeof(cookie))];
    188
    189	len = 0;
    190	for (i = 0; i < msg->msg_iovlen; i++)
    191		len += msg->msg_iov[i].iov_len;
    192
    193	flags = MSG_DONTWAIT;
    194	if (do_zerocopy) {
    195		flags |= MSG_ZEROCOPY;
    196		if (domain == PF_RDS) {
    197			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
    198			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
    199			msg->msg_control = (struct cmsghdr *)ckbuf;
    200			add_zcopy_cookie(msg, ++cookie);
    201		}
    202	}
    203
    204	ret = sendmsg(fd, msg, flags);
    205	if (ret == -1 && errno == EAGAIN)
    206		return false;
    207	if (ret == -1)
    208		error(1, errno, "send");
    209	if (cfg_verbose && ret != len)
    210		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
    211
    212	if (len) {
    213		packets++;
    214		bytes += ret;
    215		if (do_zerocopy && ret)
    216			expected_completions++;
    217	}
    218	if (do_zerocopy && domain == PF_RDS) {
    219		msg->msg_control = NULL;
    220		msg->msg_controllen = 0;
    221	}
    222
    223	return true;
    224}
    225
    226static void do_sendmsg_corked(int fd, struct msghdr *msg)
    227{
    228	bool do_zerocopy = cfg_zerocopy;
    229	int i, payload_len, extra_len;
    230
    231	/* split up the packet. for non-multiple, make first buffer longer */
    232	payload_len = cfg_payload_len / cfg_cork;
    233	extra_len = cfg_payload_len - (cfg_cork * payload_len);
    234
    235	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
    236
    237	for (i = 0; i < cfg_cork; i++) {
    238
    239		/* in mixed-frags mode, alternate zerocopy and copy frags
    240		 * start with non-zerocopy, to ensure attach later works
    241		 */
    242		if (cfg_cork_mixed)
    243			do_zerocopy = (i & 1);
    244
    245		msg->msg_iov[0].iov_len = payload_len + extra_len;
    246		extra_len = 0;
    247
    248		do_sendmsg(fd, msg, do_zerocopy,
    249			   (cfg_dst_addr.ss_family == AF_INET ?
    250			    PF_INET : PF_INET6));
    251	}
    252
    253	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
    254}
    255
    256static int setup_iph(struct iphdr *iph, uint16_t payload_len)
    257{
    258	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
    259	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
    260
    261	memset(iph, 0, sizeof(*iph));
    262
    263	iph->version	= 4;
    264	iph->tos	= 0;
    265	iph->ihl	= 5;
    266	iph->ttl	= 2;
    267	iph->saddr	= saddr->sin_addr.s_addr;
    268	iph->daddr	= daddr->sin_addr.s_addr;
    269	iph->protocol	= IPPROTO_EGP;
    270	iph->tot_len	= htons(sizeof(*iph) + payload_len);
    271	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
    272
    273	return sizeof(*iph);
    274}
    275
    276static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
    277{
    278	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
    279	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
    280
    281	memset(ip6h, 0, sizeof(*ip6h));
    282
    283	ip6h->version		= 6;
    284	ip6h->payload_len	= htons(payload_len);
    285	ip6h->nexthdr		= IPPROTO_EGP;
    286	ip6h->hop_limit		= 2;
    287	ip6h->saddr		= saddr->sin6_addr;
    288	ip6h->daddr		= daddr->sin6_addr;
    289
    290	return sizeof(*ip6h);
    291}
    292
    293
    294static void setup_sockaddr(int domain, const char *str_addr,
    295			   struct sockaddr_storage *sockaddr)
    296{
    297	struct sockaddr_in6 *addr6 = (void *) sockaddr;
    298	struct sockaddr_in *addr4 = (void *) sockaddr;
    299
    300	switch (domain) {
    301	case PF_INET:
    302		memset(addr4, 0, sizeof(*addr4));
    303		addr4->sin_family = AF_INET;
    304		addr4->sin_port = htons(cfg_port);
    305		if (str_addr &&
    306		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
    307			error(1, 0, "ipv4 parse error: %s", str_addr);
    308		break;
    309	case PF_INET6:
    310		memset(addr6, 0, sizeof(*addr6));
    311		addr6->sin6_family = AF_INET6;
    312		addr6->sin6_port = htons(cfg_port);
    313		if (str_addr &&
    314		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
    315			error(1, 0, "ipv6 parse error: %s", str_addr);
    316		break;
    317	default:
    318		error(1, 0, "illegal domain");
    319	}
    320}
    321
    322static int do_setup_tx(int domain, int type, int protocol)
    323{
    324	int fd;
    325
    326	fd = socket(domain, type, protocol);
    327	if (fd == -1)
    328		error(1, errno, "socket t");
    329
    330	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
    331	if (cfg_zerocopy)
    332		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
    333
    334	if (domain != PF_PACKET && domain != PF_RDS)
    335		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
    336			error(1, errno, "connect");
    337
    338	if (domain == PF_RDS) {
    339		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
    340			error(1, errno, "bind");
    341	}
    342
    343	return fd;
    344}
    345
    346static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
    347{
    348	int i;
    349
    350	if (ck->num > RDS_MAX_ZCOOKIES)
    351		error(1, 0, "Returned %d cookies, max expected %d\n",
    352		      ck->num, RDS_MAX_ZCOOKIES);
    353	for (i = 0; i < ck->num; i++)
    354		if (cfg_verbose >= 2)
    355			fprintf(stderr, "%d\n", ck->cookies[i]);
    356	return ck->num;
    357}
    358
    359static bool do_recvmsg_completion(int fd)
    360{
    361	char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
    362	struct rds_zcopy_cookies *ck;
    363	struct cmsghdr *cmsg;
    364	struct msghdr msg;
    365	bool ret = false;
    366
    367	memset(&msg, 0, sizeof(msg));
    368	msg.msg_control = cmsgbuf;
    369	msg.msg_controllen = sizeof(cmsgbuf);
    370
    371	if (recvmsg(fd, &msg, MSG_DONTWAIT))
    372		return ret;
    373
    374	if (msg.msg_flags & MSG_CTRUNC)
    375		error(1, errno, "recvmsg notification: truncated");
    376
    377	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
    378		if (cmsg->cmsg_level == SOL_RDS &&
    379		    cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
    380
    381			ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
    382			completions += do_process_zerocopy_cookies(ck);
    383			ret = true;
    384			break;
    385		}
    386		error(0, 0, "ignoring cmsg at level %d type %d\n",
    387			    cmsg->cmsg_level, cmsg->cmsg_type);
    388	}
    389	return ret;
    390}
    391
    392static bool do_recv_completion(int fd, int domain)
    393{
    394	struct sock_extended_err *serr;
    395	struct msghdr msg = {};
    396	struct cmsghdr *cm;
    397	uint32_t hi, lo, range;
    398	int ret, zerocopy;
    399	char control[100];
    400
    401	if (domain == PF_RDS)
    402		return do_recvmsg_completion(fd);
    403
    404	msg.msg_control = control;
    405	msg.msg_controllen = sizeof(control);
    406
    407	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
    408	if (ret == -1 && errno == EAGAIN)
    409		return false;
    410	if (ret == -1)
    411		error(1, errno, "recvmsg notification");
    412	if (msg.msg_flags & MSG_CTRUNC)
    413		error(1, errno, "recvmsg notification: truncated");
    414
    415	cm = CMSG_FIRSTHDR(&msg);
    416	if (!cm)
    417		error(1, 0, "cmsg: no cmsg");
    418	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
    419	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
    420	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
    421		error(1, 0, "serr: wrong type: %d.%d",
    422		      cm->cmsg_level, cm->cmsg_type);
    423
    424	serr = (void *) CMSG_DATA(cm);
    425
    426	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
    427		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
    428	if (serr->ee_errno != 0)
    429		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
    430
    431	hi = serr->ee_data;
    432	lo = serr->ee_info;
    433	range = hi - lo + 1;
    434
    435	/* Detect notification gaps. These should not happen often, if at all.
    436	 * Gaps can occur due to drops, reordering and retransmissions.
    437	 */
    438	if (lo != next_completion)
    439		fprintf(stderr, "gap: %u..%u does not append to %u\n",
    440			lo, hi, next_completion);
    441	next_completion = hi + 1;
    442
    443	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
    444	if (zerocopied == -1)
    445		zerocopied = zerocopy;
    446	else if (zerocopied != zerocopy) {
    447		fprintf(stderr, "serr: inconsistent\n");
    448		zerocopied = zerocopy;
    449	}
    450
    451	if (cfg_verbose >= 2)
    452		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
    453			range, hi, lo);
    454
    455	completions += range;
    456	return true;
    457}
    458
    459/* Read all outstanding messages on the errqueue */
    460static void do_recv_completions(int fd, int domain)
    461{
    462	while (do_recv_completion(fd, domain)) {}
    463}
    464
    465/* Wait for all remaining completions on the errqueue */
    466static void do_recv_remaining_completions(int fd, int domain)
    467{
    468	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
    469
    470	while (completions < expected_completions &&
    471	       gettimeofday_ms() < tstop) {
    472		if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
    473			do_recv_completions(fd, domain);
    474	}
    475
    476	if (completions < expected_completions)
    477		fprintf(stderr, "missing notifications: %lu < %lu\n",
    478			completions, expected_completions);
    479}
    480
    481static void do_tx(int domain, int type, int protocol)
    482{
    483	struct iovec iov[3] = { {0} };
    484	struct sockaddr_ll laddr;
    485	struct msghdr msg = {0};
    486	struct ethhdr eth;
    487	union {
    488		struct ipv6hdr ip6h;
    489		struct iphdr iph;
    490	} nh;
    491	uint64_t tstop;
    492	int fd;
    493
    494	fd = do_setup_tx(domain, type, protocol);
    495
    496	if (domain == PF_PACKET) {
    497		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
    498
    499		/* sock_raw passes ll header as data */
    500		if (type == SOCK_RAW) {
    501			memset(eth.h_dest, 0x06, ETH_ALEN);
    502			memset(eth.h_source, 0x02, ETH_ALEN);
    503			eth.h_proto = htons(proto);
    504			iov[0].iov_base = &eth;
    505			iov[0].iov_len = sizeof(eth);
    506			msg.msg_iovlen++;
    507		}
    508
    509		/* both sock_raw and sock_dgram expect name */
    510		memset(&laddr, 0, sizeof(laddr));
    511		laddr.sll_family	= AF_PACKET;
    512		laddr.sll_ifindex	= cfg_ifindex;
    513		laddr.sll_protocol	= htons(proto);
    514		laddr.sll_halen		= ETH_ALEN;
    515
    516		memset(laddr.sll_addr, 0x06, ETH_ALEN);
    517
    518		msg.msg_name		= &laddr;
    519		msg.msg_namelen		= sizeof(laddr);
    520	}
    521
    522	/* packet and raw sockets with hdrincl must pass network header */
    523	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
    524		if (cfg_family == PF_INET)
    525			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
    526		else
    527			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
    528
    529		iov[1].iov_base = (void *) &nh;
    530		msg.msg_iovlen++;
    531	}
    532
    533	if (domain == PF_RDS) {
    534		msg.msg_name = &cfg_dst_addr;
    535		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
    536				    sizeof(struct sockaddr_in) :
    537				    sizeof(struct sockaddr_in6));
    538	}
    539
    540	iov[2].iov_base = payload;
    541	iov[2].iov_len = cfg_payload_len;
    542	msg.msg_iovlen++;
    543	msg.msg_iov = &iov[3 - msg.msg_iovlen];
    544
    545	tstop = gettimeofday_ms() + cfg_runtime_ms;
    546	do {
    547		if (cfg_cork)
    548			do_sendmsg_corked(fd, &msg);
    549		else
    550			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
    551
    552		while (!do_poll(fd, POLLOUT)) {
    553			if (cfg_zerocopy)
    554				do_recv_completions(fd, domain);
    555		}
    556
    557	} while (gettimeofday_ms() < tstop);
    558
    559	if (cfg_zerocopy)
    560		do_recv_remaining_completions(fd, domain);
    561
    562	if (close(fd))
    563		error(1, errno, "close");
    564
    565	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
    566		packets, bytes >> 20, completions,
    567		zerocopied == 1 ? 'y' : 'n');
    568}
    569
    570static int do_setup_rx(int domain, int type, int protocol)
    571{
    572	int fd;
    573
    574	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
    575	 * to recv the only copy of the packet, not a clone
    576	 */
    577	if (domain == PF_PACKET)
    578		error(1, 0, "Use PF_INET/SOCK_RAW to read");
    579
    580	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
    581		error(1, 0, "IPPROTO_RAW: not supported on Rx");
    582
    583	fd = socket(domain, type, protocol);
    584	if (fd == -1)
    585		error(1, errno, "socket r");
    586
    587	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
    588	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
    589	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
    590
    591	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
    592		error(1, errno, "bind");
    593
    594	if (type == SOCK_STREAM) {
    595		if (listen(fd, 1))
    596			error(1, errno, "listen");
    597		fd = do_accept(fd);
    598	}
    599
    600	return fd;
    601}
    602
    603/* Flush all outstanding bytes for the tcp receive queue */
    604static void do_flush_tcp(int fd)
    605{
    606	int ret;
    607
    608	/* MSG_TRUNC flushes up to len bytes */
    609	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
    610	if (ret == -1 && errno == EAGAIN)
    611		return;
    612	if (ret == -1)
    613		error(1, errno, "flush");
    614	if (!ret)
    615		return;
    616
    617	packets++;
    618	bytes += ret;
    619}
    620
    621/* Flush all outstanding datagrams. Verify first few bytes of each. */
    622static void do_flush_datagram(int fd, int type)
    623{
    624	int ret, off = 0;
    625	char buf[64];
    626
    627	/* MSG_TRUNC will return full datagram length */
    628	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
    629	if (ret == -1 && errno == EAGAIN)
    630		return;
    631
    632	/* raw ipv4 return with header, raw ipv6 without */
    633	if (cfg_family == PF_INET && type == SOCK_RAW) {
    634		off += sizeof(struct iphdr);
    635		ret -= sizeof(struct iphdr);
    636	}
    637
    638	if (ret == -1)
    639		error(1, errno, "recv");
    640	if (ret != cfg_payload_len)
    641		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
    642	if (ret > sizeof(buf) - off)
    643		ret = sizeof(buf) - off;
    644	if (memcmp(buf + off, payload, ret))
    645		error(1, 0, "recv: data mismatch");
    646
    647	packets++;
    648	bytes += cfg_payload_len;
    649}
    650
    651static void do_rx(int domain, int type, int protocol)
    652{
    653	const int cfg_receiver_wait_ms = 400;
    654	uint64_t tstop;
    655	int fd;
    656
    657	fd = do_setup_rx(domain, type, protocol);
    658
    659	tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
    660	do {
    661		if (type == SOCK_STREAM)
    662			do_flush_tcp(fd);
    663		else
    664			do_flush_datagram(fd, type);
    665
    666		do_poll(fd, POLLIN);
    667
    668	} while (gettimeofday_ms() < tstop);
    669
    670	if (close(fd))
    671		error(1, errno, "close");
    672
    673	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
    674}
    675
    676static void do_test(int domain, int type, int protocol)
    677{
    678	int i;
    679
    680	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
    681		error(1, 0, "can only cork udp sockets");
    682
    683	do_setcpu(cfg_cpu);
    684
    685	for (i = 0; i < IP_MAXPACKET; i++)
    686		payload[i] = 'a' + (i % 26);
    687
    688	if (cfg_rx)
    689		do_rx(domain, type, protocol);
    690	else
    691		do_tx(domain, type, protocol);
    692}
    693
    694static void usage(const char *filepath)
    695{
    696	error(1, 0, "Usage: %s [options] <test>", filepath);
    697}
    698
    699static void parse_opts(int argc, char **argv)
    700{
    701	const int max_payload_len = sizeof(payload) -
    702				    sizeof(struct ipv6hdr) -
    703				    sizeof(struct tcphdr) -
    704				    40 /* max tcp options */;
    705	int c;
    706	char *daddr = NULL, *saddr = NULL;
    707	char *cfg_test;
    708
    709	cfg_payload_len = max_payload_len;
    710
    711	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
    712		switch (c) {
    713		case '4':
    714			if (cfg_family != PF_UNSPEC)
    715				error(1, 0, "Pass one of -4 or -6");
    716			cfg_family = PF_INET;
    717			cfg_alen = sizeof(struct sockaddr_in);
    718			break;
    719		case '6':
    720			if (cfg_family != PF_UNSPEC)
    721				error(1, 0, "Pass one of -4 or -6");
    722			cfg_family = PF_INET6;
    723			cfg_alen = sizeof(struct sockaddr_in6);
    724			break;
    725		case 'c':
    726			cfg_cork = strtol(optarg, NULL, 0);
    727			break;
    728		case 'C':
    729			cfg_cpu = strtol(optarg, NULL, 0);
    730			break;
    731		case 'D':
    732			daddr = optarg;
    733			break;
    734		case 'i':
    735			cfg_ifindex = if_nametoindex(optarg);
    736			if (cfg_ifindex == 0)
    737				error(1, errno, "invalid iface: %s", optarg);
    738			break;
    739		case 'm':
    740			cfg_cork_mixed = true;
    741			break;
    742		case 'p':
    743			cfg_port = strtoul(optarg, NULL, 0);
    744			break;
    745		case 'r':
    746			cfg_rx = true;
    747			break;
    748		case 's':
    749			cfg_payload_len = strtoul(optarg, NULL, 0);
    750			break;
    751		case 'S':
    752			saddr = optarg;
    753			break;
    754		case 't':
    755			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
    756			break;
    757		case 'v':
    758			cfg_verbose++;
    759			break;
    760		case 'z':
    761			cfg_zerocopy = true;
    762			break;
    763		}
    764	}
    765
    766	cfg_test = argv[argc - 1];
    767	if (strcmp(cfg_test, "rds") == 0) {
    768		if (!daddr)
    769			error(1, 0, "-D <server addr> required for PF_RDS\n");
    770		if (!cfg_rx && !saddr)
    771			error(1, 0, "-S <client addr> required for PF_RDS\n");
    772	}
    773	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
    774	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
    775
    776	if (cfg_payload_len > max_payload_len)
    777		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
    778	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
    779		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
    780
    781	if (optind != argc - 1)
    782		usage(argv[0]);
    783}
    784
    785int main(int argc, char **argv)
    786{
    787	const char *cfg_test;
    788
    789	parse_opts(argc, argv);
    790
    791	cfg_test = argv[argc - 1];
    792
    793	if (!strcmp(cfg_test, "packet"))
    794		do_test(PF_PACKET, SOCK_RAW, 0);
    795	else if (!strcmp(cfg_test, "packet_dgram"))
    796		do_test(PF_PACKET, SOCK_DGRAM, 0);
    797	else if (!strcmp(cfg_test, "raw"))
    798		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
    799	else if (!strcmp(cfg_test, "raw_hdrincl"))
    800		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
    801	else if (!strcmp(cfg_test, "tcp"))
    802		do_test(cfg_family, SOCK_STREAM, 0);
    803	else if (!strcmp(cfg_test, "udp"))
    804		do_test(cfg_family, SOCK_DGRAM, 0);
    805	else if (!strcmp(cfg_test, "rds"))
    806		do_test(PF_RDS, SOCK_SEQPACKET, 0);
    807	else
    808		error(1, 0, "unknown cfg_test %s", cfg_test);
    809
    810	return 0;
    811}