diff --git a/Makefile b/Makefile index 586eeea..0d2ee71 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,9 @@ all: binaries -CFLAGS = -std=c99 -Wall -O3 -g -D_GNU_SOURCE -DNO_LIBNUMA +CFLAGS = -std=c99 -Wall -O3 -g -D_GNU_SOURCE -DNO_LIBNUMA -MMD -MP + +deps := $(patsubst %.c, %.d, $(wildcard *.c)) lib := \ check_all_options.o \ @@ -90,10 +92,15 @@ psp_rr: $(psp_rr-objs) binaries: tcp_rr tcp_stream tcp_crr udp_rr udp_stream psp_stream psp_crr psp_rr clean: - rm -f *.o tcp_rr tcp_stream tcp_crr udp_rr udp_stream psp_stream psp_crr psp_rr + rm -f *.o tcp_rr tcp_stream tcp_crr udp_rr udp_stream psp_stream psp_crr psp_rr $(deps) + +vclean: clean + rm -f *.log IMAGE ?= neper TAG ?= $(shell git describe --tags --always --dirty) image: docker build --tag ${IMAGE}:${TAG} . + +-include $(deps) diff --git a/define_all_flags.c b/define_all_flags.c index 3200d02..a903691 100644 --- a/define_all_flags.c +++ b/define_all_flags.c @@ -33,6 +33,8 @@ struct flags_parser *add_flags_common(struct flags_parser *fp) DEFINE_FLAG(fp, int, listen_backlog, 128, 0, "Backlog size for listen()"); DEFINE_FLAG(fp, int, suicide_length, 0, 's', "Suicide length in seconds"); DEFINE_FLAG(fp, int, source_port, -1, 0, "Sender (source) data port. First data stream will use this port, each next stream will use port one larger than previous one. When not specified, kernel assigns free source ports."); + DEFINE_FLAG(fp, int, discard, 0, 0, "Discard first N samples per thread"); + DEFINE_FLAG(fp, int, busywait, 0, 0, "Use busywait epoll/napi (nsecs); set to 1-999 for epoll busy only"); DEFINE_FLAG(fp, bool, stime_use_proc,false, 'S', "Use global system+IRQ+SoftIRQ time from /proc/stat in place of getrusage ru_stime value. Should only be used on otherwise idle systems or with high workloads!"); DEFINE_FLAG(fp, bool, ipv4, false, '4', "Set desired address family to AF_INET"); DEFINE_FLAG(fp, bool, ipv6, false, '6', "Set desired address family to AF_INET6"); @@ -43,6 +45,7 @@ struct flags_parser *add_flags_common(struct flags_parser *fp) DEFINE_FLAG(fp, bool, logtostderr, false, 0, "Log to stderr"); DEFINE_FLAG(fp, bool, nolog, false, 0, "No logging"); DEFINE_FLAG(fp, bool, nonblocking, false, 0, "Make sure syscalls are all nonblocking"); + DEFINE_FLAG(fp, bool, optimistic, false, 0, "Optimistic I/O (try skipping EPOLLOUT in rr server)"); DEFINE_FLAG(fp, bool, freebind, false, 0, "Set FREEBIND socket option"); DEFINE_FLAG(fp, double, interval, 1.0, 'I', "For how many seconds that a sample is generated"); DEFINE_FLAG(fp, long long, max_pacing_rate, 0, 'm', "SO_MAX_PACING_RATE value; use as 64-bit unsigned"); diff --git a/flow.c b/flow.c index 57a40e2..29f84b2 100644 --- a/flow.c +++ b/flow.c @@ -215,7 +215,8 @@ bool flow_serve_pending(struct thread *t, struct timespec *timeout) /* The default timeout of 1m is an upper bound that will shrink if * there are any pending flows. */ - int64_t ns = t->opts->nonblocking ? 600 * 1000 * 1000 * (int64_t)1000 : -1; + int64_t ns = t->opts->busywait ?: + t->opts->nonblocking ? 600 * 1000 * 1000 * (int64_t)1000 : -1; struct rate_limit *rl = &t->rl; while (rl->pending_count) { diff --git a/histo.c b/histo.c index 9378cdf..f281c3a 100644 --- a/histo.c +++ b/histo.c @@ -44,6 +44,7 @@ struct neper_histo { uint64_t all_count; uint64_t one_count; uint64_t cur_count; + uint64_t discard_count; double all_sum; double one_sum; @@ -203,6 +204,11 @@ void neper_histo_event(struct neper_histo *impl, double delta_s) { int i; + if (impl->thread->opts->discard > impl->discard_count) { + impl->discard_count++; + return; + } + impl->cur_count++; impl->cur_sum += delta_s; impl->cur_sum2 += delta_s * delta_s; @@ -289,7 +295,7 @@ void neper_histo_delete(struct neper_histo *impl) struct neper_histo *neper_histo_new(const struct thread *t, uint8_t k_bits) { - struct neper_histo *ret, histo = {}; + struct neper_histo *ret, histo = {0}; size_t memsize = sizeof(histo); if (k_bits > 10) diff --git a/lib.h b/lib.h index 78eb8f8..416f702 100644 --- a/lib.h +++ b/lib.h @@ -75,6 +75,8 @@ struct options { int send_flags; int mark; int tcp_tx_delay; + int discard; + int busywait; bool stime_use_proc; /* Enable use of /proc/stat values for stime */ bool ipv4; bool ipv6; @@ -89,6 +91,7 @@ struct options { bool logtostderr; bool nolog; bool nonblocking; + bool optimistic; bool freebind; bool tcp_fastopen; bool skip_rx_copy; diff --git a/rr.c b/rr.c index 082121c..7f6cae8 100644 --- a/rr.c +++ b/rr.c @@ -399,7 +399,7 @@ static void crr_client_state_0(struct flow *f, uint32_t events) /* The state machine for servers: */ -static void rr_server_state_2(struct flow *f, uint32_t events) +static bool try_rr_server_state_2(struct flow *f, uint32_t events) { struct rr_state *rr = flow_opaque(f); struct thread *t = flow_thread(f); @@ -412,12 +412,22 @@ static void rr_server_state_2(struct flow *f, uint32_t events) neper_histo_event(histo, 0.0); stat->event(t, stat, 1, false, rr_snapshot); } - flow_mod(f, rr_server_state_0, EPOLLIN, false); + return true; } + return false; +} + +static void rr_server_state_2(struct flow *f, uint32_t events) +{ + if (try_rr_server_state_2(f, events)) + flow_mod(f, rr_server_state_0, EPOLLIN, false); } static void rr_server_state_1(struct flow *f) { + if (flow_thread(f)->opts->optimistic) + if (try_rr_server_state_2(f, EPOLLOUT)) + return; flow_mod(f, rr_server_state_2, EPOLLOUT, false); } diff --git a/thread.c b/thread.c index d18bf95..74079e5 100644 --- a/thread.c +++ b/thread.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "common.h" @@ -40,6 +41,20 @@ #include #endif +/* support for epoll ioctl has not landed in all libc packages */ +#ifndef EPOLL_IOC_TYPE +struct epoll_params { + __u32 busy_poll_usecs; + __u16 busy_poll_budget; + __u8 prefer_busy_poll; + __u8 pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + /* Callbacks for the neper_stats sumforeach() function. */ static int @@ -416,6 +431,11 @@ void start_worker_threads(struct options *opts, struct callbacks *cb, t[i].ai_socktype = fn->fn_type; t[i].ai = copy_addrinfo(ai); t[i].epfd = epoll_create1_or_die(cb); + if (opts->busywait >= 1000) { + struct epoll_params params = { opts->busywait / 1000, 64, 1, 0 }; + int ret = ioctl(t[i].epfd, EPIOCSPARAMS, ¶ms); + assert(ret == 0); (void)ret; + } t[i].stop_efd = eventfd(0, 0); if (t[i].stop_efd == -1) PLOG_FATAL(cb, "eventfd");