// SPDX-License-Identifier: GPL-2.0 /* * tcpdevmem netcat. Works similarly to netcat but does device memory TCP * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. * * Usage: * * On server: * ncdevmem -s [-c ] -f eth1 -l -p 5201 * * On client: * echo -n "hello\nworld" | \ * ncdevmem -s [-c ] -p 5201 -f eth1 * * Note this is compatible with regular netcat. i.e. the sender or receiver can * be replaced with regular netcat to test the RX or TX path in isolation. * * Test data validation (devmem TCP on RX only): * * On server: * ncdevmem -s [-c ] -f eth1 -l -p 5201 -v 7 * * On client: * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ * head -c 1G | \ * nc 5201 -p 5201 * * Test data validation (devmem TCP on RX and TX, validation happens on RX): * * On server: * ncdevmem -s [-c ] -l -p 5201 -v 8 -f eth1 * * On client: * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ * head -c 1M | \ * ncdevmem -s [-c ] -p 5201 -f eth1 */ #define _GNU_SOURCE #define __EXPORTED_HEADERS__ #include #include #include #include #include #include #include #include #define __iovec_defined #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netdev-user.h" #include "ethtool-user.h" #include #define PAGE_SHIFT 12 #define TEST_PREFIX "ncdevmem" #define NUM_PAGES 16000 #ifndef MSG_SOCK_DEVMEM #define MSG_SOCK_DEVMEM 0x2000000 #endif #define MAX_IOV 1024 static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; static size_t do_validation; static int start_queue = -1; static int num_queues = -1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; /* System state loaded by current_config_load() */ #define MAX_FLOWS 8 static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; struct memory_buffer { int fd; size_t size; int devfd; int memfd; char *buf_mem; }; struct memory_provider { struct memory_buffer *(*alloc)(size_t size); void (*free)(struct memory_buffer *ctx); void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, void *src, int n); void (*memcpy_from_device)(void *dst, struct memory_buffer *src, size_t off, int n); }; static void pr_err(const char *fmt, ...) { va_list args; fprintf(stderr, "%s: ", TEST_PREFIX); va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); if (errno != 0) fprintf(stderr, ": %s", strerror(errno)); fprintf(stderr, "\n"); } static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; struct memory_buffer *ctx; int ret; ctx = malloc(sizeof(*ctx)); if (!ctx) return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); if (ctx->devfd < 0) { pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); goto err_free_ctx; } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); if (ctx->memfd < 0) { pr_err("[skip,no-memfd]"); goto err_close_dev; } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); if (ret < 0) { pr_err("[skip,fcntl-add-seals]"); goto err_close_memfd; } ret = ftruncate(ctx->memfd, size); if (ret == -1) { pr_err("[FAIL,memfd-truncate]"); goto err_close_memfd; } memset(&create, 0, sizeof(create)); create.memfd = ctx->memfd; create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); if (ctx->fd < 0) { pr_err("[FAIL, create udmabuf]"); goto err_close_fd; } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); if (ctx->buf_mem == MAP_FAILED) { pr_err("[FAIL, map udmabuf]"); goto err_close_fd; } return ctx; err_close_fd: close(ctx->fd); err_close_memfd: close(ctx->memfd); err_close_dev: close(ctx->devfd); err_free_ctx: free(ctx); return NULL; } static void udmabuf_free(struct memory_buffer *ctx) { munmap(ctx->buf_mem, ctx->size); close(ctx->fd); close(ctx->memfd); close(ctx->devfd); free(ctx); } static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, void *src, int n) { struct dma_buf_sync sync = {}; sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); memcpy(dst->buf_mem + off, src, n); sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); } static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, size_t off, int n) { struct dma_buf_sync sync = {}; sync.flags = DMA_BUF_SYNC_START; ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); memcpy(dst, src->buf_mem + off, n); sync.flags = DMA_BUF_SYNC_END; ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); } static struct memory_provider udmabuf_memory_provider = { .alloc = udmabuf_alloc, .free = udmabuf_free, .memcpy_to_device = udmabuf_memcpy_to_device, .memcpy_from_device = udmabuf_memcpy_from_device, }; static struct memory_provider *provider = &udmabuf_memory_provider; static void print_nonzero_bytes(void *ptr, size_t size) { unsigned char *p = ptr; unsigned int i; for (i = 0; i < size; i++) putchar(p[i]); } int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; unsigned char expected; static int errors; size_t i; for (i = 0; i < size; i++) { expected = seed ? seed : '\n'; if (ptr[i] != expected) { fprintf(stderr, "Failed validation: expected=%u, actual=%u, index=%lu\n", expected, ptr[i], i); errors++; if (errors > 20) { pr_err("validation failed"); return -1; } } seed++; if (seed == do_validation) seed = 0; } fprintf(stdout, "Validated buffer\n"); return 0; } static int __run_command(char *out, size_t outlen, const char *cmd, va_list args) { char command[256]; FILE *fp; vsnprintf(command, sizeof(command), cmd, args); fprintf(stderr, "Running: %s\n", command); fp = popen(command, "r"); if (!fp) return -1; if (out) { size_t len; if (!fgets(out, outlen, fp)) return -1; /* Remove trailing newline if present */ len = strlen(out); if (len && out[len - 1] == '\n') out[len - 1] = '\0'; } return pclose(fp); } static int run_command(const char *cmd, ...) { va_list args; int ret; va_start(args, cmd); ret = __run_command(NULL, 0, cmd, args); va_end(args); return ret; } static int ethtool_add_flow(const char *format, ...) { char local_output[256], cmd[256]; const char *id_start; int flow_idx, ret; char *endptr; long flow_id; va_list args; for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) if (ntuple_ids[flow_idx] == -1) break; if (flow_idx == MAX_FLOWS) { fprintf(stderr, "Error: too many flows\n"); return -1; } snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); va_start(args, format); ret = __run_command(local_output, sizeof(local_output), cmd, args); va_end(args); if (ret != 0) return ret; /* Extract the ID from the output */ id_start = strstr(local_output, "Added rule with ID "); if (!id_start) return -1; id_start += strlen("Added rule with ID "); flow_id = strtol(id_start, &endptr, 10); if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) return -1; fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); ntuple_ids[flow_idx] = flow_id; return flow_id; } static int rxq_num(int ifindex) { struct ethtool_channels_get_req *req; struct ethtool_channels_get_rsp *rsp; struct ynl_error yerr; struct ynl_sock *ys; int num = -1; ys = ynl_sock_create(&ynl_ethtool_family, &yerr); if (!ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } req = ethtool_channels_get_req_alloc(); ethtool_channels_get_req_set_header_dev_index(req, ifindex); rsp = ethtool_channels_get(ys, req); if (rsp) num = rsp->rx_count + rsp->combined_count; ethtool_channels_get_req_free(req); ethtool_channels_get_rsp_free(rsp); ynl_sock_destroy(ys); return num; } static void reset_flow_steering(void) { int i; for (i = 0; i < MAX_FLOWS; i++) { if (ntuple_ids[i] == -1) continue; run_command("ethtool -N %s delete %d", ifname, ntuple_ids[i]); ntuple_ids[i] = -1; } } static const char *tcp_data_split_str(int val) { switch (val) { case 0: return "off"; case 1: return "auto"; case 2: return "on"; default: return "?"; } } static struct ethtool_rings_get_rsp *get_ring_config(void) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; struct ynl_error yerr; struct ynl_sock *ys; ys = ynl_sock_create(&ynl_ethtool_family, &yerr); if (!ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return NULL; } get_req = ethtool_rings_get_req_alloc(); ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); get_rsp = ethtool_rings_get(ys, get_req); ethtool_rings_get_req_free(get_req); ynl_sock_destroy(ys); return get_rsp; } static void restore_ring_config(const struct ethtool_rings_get_rsp *config) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; struct ethtool_rings_set_req *req; struct ynl_error yerr; struct ynl_sock *ys; int ret; if (!config) return; ys = ynl_sock_create(&ynl_ethtool_family, &yerr); if (!ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return; } req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); ethtool_rings_set_req_set_tcp_data_split(req, ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); if (config->_present.hds_thresh) ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); get_req = ethtool_rings_get_req_alloc(); ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); get_rsp = ethtool_rings_get(ys, get_req); ethtool_rings_get_req_free(get_req); /* use explicit value if UKNOWN didn't give us the previous */ if (get_rsp->tcp_data_split != config->tcp_data_split) { ethtool_rings_set_req_set_tcp_data_split(req, config->tcp_data_split); ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", ys->err.msg); } ethtool_rings_get_rsp_free(get_rsp); ethtool_rings_set_req_free(req); ynl_sock_destroy(ys); } static int configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; struct ethtool_rings_set_req *req; struct ynl_error yerr; struct ynl_sock *ys; int ret; ys = ynl_sock_create(&ynl_ethtool_family, &yerr); if (!ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); if (on) { ethtool_rings_set_req_set_tcp_data_split(req, ETHTOOL_TCP_DATA_SPLIT_ENABLED); if (old->_present.hds_thresh) ethtool_rings_set_req_set_hds_thresh(req, 0); } else { ethtool_rings_set_req_set_tcp_data_split(req, ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); ethtool_rings_set_req_free(req); if (ret == 0) { get_req = ethtool_rings_get_req_alloc(); ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); get_rsp = ethtool_rings_get(ys, get_req); ethtool_rings_get_req_free(get_req); if (get_rsp) fprintf(stderr, "TCP header split: %s\n", tcp_data_split_str(get_rsp->tcp_data_split)); ethtool_rings_get_rsp_free(get_rsp); } ynl_sock_destroy(ys); return ret; } static int configure_rss(void) { return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); } static void reset_rss(void) { run_command("ethtool -X %s default >&2", ifname, start_queue); } static int check_changing_channels(unsigned int rx, unsigned int tx) { struct ethtool_channels_get_req *gchan; struct ethtool_channels_set_req *schan; struct ethtool_channels_get_rsp *chan; struct ynl_error yerr; struct ynl_sock *ys; int ret; fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); ys = ynl_sock_create(&ynl_ethtool_family, &yerr); if (!ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } gchan = ethtool_channels_get_req_alloc(); if (!gchan) { ret = -1; goto exit_close_sock; } ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); chan = ethtool_channels_get(ys, gchan); ethtool_channels_get_req_free(gchan); if (!chan) { fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); ret = -1; goto exit_close_sock; } schan = ethtool_channels_set_req_alloc(); if (!schan) { ret = -1; goto exit_free_chan; } ethtool_channels_set_req_set_header_dev_index(schan, ifindex); if (chan->_present.combined_count) { if (chan->_present.rx_count || chan->_present.tx_count) { ethtool_channels_set_req_set_rx_count(schan, 0); ethtool_channels_set_req_set_tx_count(schan, 0); } if (rx == tx) { ethtool_channels_set_req_set_combined_count(schan, rx); } else if (rx > tx) { ethtool_channels_set_req_set_combined_count(schan, tx); ethtool_channels_set_req_set_rx_count(schan, rx - tx); } else { ethtool_channels_set_req_set_combined_count(schan, rx); ethtool_channels_set_req_set_tx_count(schan, tx - rx); } } else if (chan->_present.rx_count) { ethtool_channels_set_req_set_rx_count(schan, rx); ethtool_channels_set_req_set_tx_count(schan, tx); } else { fprintf(stderr, "Error: device has neither combined nor rx channels\n"); ret = -1; goto exit_free_schan; } ret = ethtool_channels_set(ys, schan); if (ret) { fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); } else { /* We were expecting a failure, go back to previous settings */ ethtool_channels_set_req_set_combined_count(schan, chan->combined_count); ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); ret = ethtool_channels_set(ys, schan); if (ret) fprintf(stderr, "YNL un-setting channels: %s\n", ys->err.msg); } exit_free_schan: ethtool_channels_set_req_free(schan); exit_free_chan: ethtool_channels_get_rsp_free(chan); exit_close_sock: ynl_sock_destroy(ys); return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) { const char *type = "tcp6"; const char *server_addr; char buf[40]; int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { type = "tcp4"; server_addr = strrchr(server_addr, ':') + 1; } /* Try configure 5-tuple */ flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", type, client_ip ? "src-ip" : "", client_ip ?: "", server_addr, client_ip ? "src-port" : "", client_ip ? port : "", port, start_queue); if (flow_id < 0) { /* If that fails, try configure 3-tuple */ flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", type, server_addr, port, start_queue); if (flow_id < 0) /* If that fails, return error */ return -1; } return 0; } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, struct netdev_queue_id *queues, unsigned int n_queue_index, struct ynl_sock **ys) { struct netdev_bind_rx_req *req = NULL; struct netdev_bind_rx_rsp *rsp = NULL; struct ynl_error yerr; *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } req = netdev_bind_rx_req_alloc(); netdev_bind_rx_req_set_ifindex(req, ifindex); netdev_bind_rx_req_set_fd(req, dmabuf_fd); __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); rsp = netdev_bind_rx(*ys, req); if (!rsp) { perror("netdev_bind_rx"); goto err_close; } if (!rsp->_present.id) { perror("id not present"); goto err_close; } fprintf(stderr, "got dmabuf id=%d\n", rsp->id); dmabuf_id = rsp->id; netdev_bind_rx_req_free(req); netdev_bind_rx_rsp_free(rsp); return 0; err_close: fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); netdev_bind_rx_req_free(req); ynl_sock_destroy(*ys); return -1; } static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, struct ynl_sock **ys) { struct netdev_bind_tx_req *req = NULL; struct netdev_bind_tx_rsp *rsp = NULL; struct ynl_error yerr; *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } req = netdev_bind_tx_req_alloc(); netdev_bind_tx_req_set_ifindex(req, ifindex); netdev_bind_tx_req_set_fd(req, dmabuf_fd); rsp = netdev_bind_tx(*ys, req); if (!rsp) { perror("netdev_bind_tx"); goto err_close; } if (!rsp->_present.id) { perror("id not present"); goto err_close; } fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); tx_dmabuf_id = rsp->id; netdev_bind_tx_req_free(req); netdev_bind_tx_rsp_free(rsp); return 0; err_close: fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); netdev_bind_tx_req_free(req); ynl_sock_destroy(*ys); return -1; } static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); if (ret) { pr_err("SO_REUSEPORT failed"); return -1; } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); if (ret) { pr_err("SO_REUSEADDR failed"); return -1; } return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) { int ret; sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); if (ret != 1) { /* fallback to plain IPv4 */ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); if (ret != 1) return -1; /* add ::ffff prefix */ sin6->sin6_addr.s6_addr32[0] = 0; sin6->sin6_addr.s6_addr32[1] = 0; sin6->sin6_addr.s6_addr16[4] = 0; sin6->sin6_addr.s6_addr16[5] = 0xffff; } return 0; } static struct netdev_queue_id *create_queues(void) { struct netdev_queue_id *queues; size_t i = 0; queues = netdev_queue_id_alloc(num_queues); for (i = 0; i < num_queues; i++) { netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); netdev_queue_id_set_id(&queues[i], start_queue + i); } return queues; } static int do_server(struct memory_buffer *mem) { struct ethtool_rings_get_rsp *ring_config; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; struct sockaddr_in6 server_sin; size_t page_aligned_frags = 0; size_t total_received = 0; socklen_t client_addr_len; bool is_devmem = false; char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; ret = parse_address(server_ip, atoi(port), &server_sin); if (ret < 0) { pr_err("parse server address"); return -1; } ring_config = get_ring_config(); if (!ring_config) { pr_err("Failed to get current ring configuration"); return -1; } if (configure_headersplit(ring_config, 1)) { pr_err("Failed to enable TCP header split"); goto err_free_ring_config; } /* Configure RSS to divert all traffic from our devmem queues */ if (configure_rss()) { pr_err("Failed to configure rss"); goto err_reset_headersplit; } /* Flow steer our devmem flows to start_queue */ if (configure_flow_steering(&server_sin)) { pr_err("Failed to configure flow steering"); goto err_reset_rss; } sleep(1); if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { pr_err("Failed to bind"); goto err_reset_flow_steering; } tmp_mem = malloc(mem->size); if (!tmp_mem) goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); if (socket_fd < 0) { pr_err("Failed to create socket"); goto err_free_tmp; } if (enable_reuseaddr(socket_fd)) goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); if (ret) { pr_err("Failed to bind"); goto err_close_socket; } ret = listen(socket_fd, 1); if (ret) { pr_err("Failed to listen"); goto err_close_socket; } client_addr_len = sizeof(client_addr); inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, sizeof(buffer)); fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); if (client_fd < 0) { pr_err("Failed to accept"); goto err_close_socket; } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); fprintf(stderr, "Got connection from %s:%d\n", buffer, ntohs(client_addr.sin6_port)); while (1) { struct iovec iov = { .iov_base = iobuf, .iov_len = sizeof(iobuf) }; struct dmabuf_cmsg *dmabuf_cmsg = NULL; struct cmsghdr *cm = NULL; struct msghdr msg = { 0 }; struct dmabuf_token token; ssize_t ret; is_devmem = false; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data); ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); fprintf(stderr, "recvmsg ret=%ld\n", ret); if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) continue; if (ret < 0) { perror("recvmsg"); continue; } if (ret == 0) { errno = 0; pr_err("client exited"); goto cleanup; } for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && cm->cmsg_type != SCM_DEVMEM_LINEAR)) { fprintf(stderr, "skipping non-devmem cmsg\n"); continue; } dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); is_devmem = true; if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { /* TODO: process data copied from skb's linear * buffer. */ fprintf(stderr, "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", dmabuf_cmsg->frag_size); continue; } token.token_start = dmabuf_cmsg->frag_token; token.token_count = 1; total_received += dmabuf_cmsg->frag_size; fprintf(stderr, "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", dmabuf_cmsg->frag_offset >> PAGE_SHIFT, dmabuf_cmsg->frag_offset % getpagesize(), dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { pr_err("received on wrong dmabuf_id: flow steering error"); goto err_close_client; } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; else page_aligned_frags++; provider->memcpy_from_device(tmp_mem, mem, dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); if (do_validation) { if (validate_buffer(tmp_mem, dmabuf_cmsg->frag_size)) goto err_close_client; } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); if (ret != 1) { pr_err("SO_DEVMEM_DONTNEED not enough tokens"); goto err_close_client; } } if (!is_devmem) { pr_err("flow steering error"); goto err_close_client; } fprintf(stderr, "total_received=%lu\n", total_received); } fprintf(stderr, "%s: ok\n", TEST_PREFIX); fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); cleanup: err = 0; err_close_client: close(client_fd); err_close_socket: close(socket_fd); err_free_tmp: free(tmp_mem); err_unbind: ynl_sock_destroy(ys); err_reset_flow_steering: reset_flow_steering(); err_reset_rss: reset_rss(); err_reset_headersplit: restore_ring_config(ring_config); err_free_ring_config: ethtool_rings_get_rsp_free(ring_config); return err; } int run_devmem_tests(void) { struct ethtool_rings_get_rsp *ring_config; struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); if (!mem) { pr_err("Failed to allocate memory buffer"); return -1; } ring_config = get_ring_config(); if (!ring_config) { pr_err("Failed to get current ring configuration"); goto err_free_mem; } /* Configure RSS to divert all traffic from our devmem queues */ if (configure_rss()) { pr_err("rss error"); goto err_free_ring_config; } if (configure_headersplit(ring_config, 1)) { pr_err("Failed to configure header split"); goto err_reset_rss; } queues = netdev_queue_id_alloc(num_queues); if (!queues) { pr_err("Failed to allocate empty queues array"); goto err_reset_headersplit; } if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { pr_err("Binding empty queues array should have failed"); goto err_unbind; } if (configure_headersplit(ring_config, 0)) { pr_err("Failed to configure header split"); goto err_reset_headersplit; } queues = create_queues(); if (!queues) { pr_err("Failed to create queues"); goto err_reset_headersplit; } if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { pr_err("Configure dmabuf with header split off should have failed"); goto err_unbind; } if (configure_headersplit(ring_config, 1)) { pr_err("Failed to configure header split"); goto err_reset_headersplit; } queues = create_queues(); if (!queues) { pr_err("Failed to create queues"); goto err_reset_headersplit; } if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { pr_err("Failed to bind"); goto err_reset_headersplit; } /* Deactivating a bound queue should not be legal */ if (!check_changing_channels(num_queues, num_queues)) { pr_err("Deactivating a bound queue should be illegal"); goto err_unbind; } err = 0; goto err_unbind; err_unbind: ynl_sock_destroy(ys); err_reset_headersplit: restore_ring_config(ring_config); err_reset_rss: reset_rss(); err_free_ring_config: ethtool_rings_get_rsp_free(ring_config); err_free_mem: provider->free(mem); return err; } static uint64_t gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); } static int do_poll(int fd) { struct pollfd pfd; int ret; pfd.revents = 0; pfd.fd = fd; ret = poll(&pfd, 1, waittime_ms); if (ret == -1) { pr_err("poll"); return -1; } return ret && (pfd.revents & POLLERR); } static int wait_compl(int fd) { int64_t tstop = gettimeofday_ms() + waittime_ms; char control[CMSG_SPACE(100)] = {}; struct sock_extended_err *serr; struct msghdr msg = {}; struct cmsghdr *cm; __u32 hi, lo; int ret; msg.msg_control = control; msg.msg_controllen = sizeof(control); while (gettimeofday_ms() < tstop) { ret = do_poll(fd); if (ret < 0) return ret; if (!ret) continue; ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret < 0) { if (errno == EAGAIN) continue; pr_err("recvmsg(MSG_ERRQUEUE)"); return -1; } if (msg.msg_flags & MSG_CTRUNC) { pr_err("MSG_CTRUNC"); return -1; } for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_IP && cm->cmsg_level != SOL_IPV6) continue; if (cm->cmsg_level == SOL_IP && cm->cmsg_type != IP_RECVERR) continue; if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) continue; serr = (void *)CMSG_DATA(cm); if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { pr_err("wrong origin %u", serr->ee_origin); return -1; } if (serr->ee_errno != 0) { pr_err("wrong errno %d", serr->ee_errno); return -1; } hi = serr->ee_data; lo = serr->ee_info; fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); return 0; } } pr_err("did not receive tx completion"); return -1; } static int do_client(struct memory_buffer *mem) { char ctrl_data[CMSG_SPACE(sizeof(__u32))]; struct sockaddr_in6 server_sin; struct sockaddr_in6 client_sin; struct ynl_sock *ys = NULL; struct iovec iov[MAX_IOV]; struct msghdr msg = {}; ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; int ret, err = -1; size_t len = 0; int socket_fd; __u32 ddmabuf; int opt = 1; ret = parse_address(server_ip, atoi(port), &server_sin); if (ret < 0) { pr_err("parse server address"); return -1; } if (client_ip) { ret = parse_address(client_ip, atoi(port), &client_sin); if (ret < 0) { pr_err("parse client address"); return ret; } } socket_fd = socket(AF_INET6, SOCK_STREAM, 0); if (socket_fd < 0) { pr_err("create socket"); return -1; } if (enable_reuseaddr(socket_fd)) goto err_close_socket; ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); if (ret) { pr_err("bindtodevice"); goto err_close_socket; } if (bind_tx_queue(ifindex, mem->fd, &ys)) { pr_err("Failed to bind"); goto err_close_socket; } if (client_ip) { ret = bind(socket_fd, &client_sin, sizeof(client_sin)); if (ret) { pr_err("bind"); goto err_unbind; } } ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); if (ret) { pr_err("set sock opt"); goto err_unbind; } fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, ntohs(server_sin.sin6_port), ifname); ret = connect(socket_fd, &server_sin, sizeof(server_sin)); if (ret) { pr_err("connect"); goto err_unbind; } while (1) { free(line); line = NULL; line_size = getline(&line, &len, stdin); if (line_size < 0) break; if (max_chunk) { msg.msg_iovlen = (line_size + max_chunk - 1) / max_chunk; if (msg.msg_iovlen > MAX_IOV) { pr_err("can't partition %zd bytes into maximum of %d chunks", line_size, MAX_IOV); goto err_free_line; } for (int i = 0; i < msg.msg_iovlen; i++) { iov[i].iov_base = (void *)(i * max_chunk); iov[i].iov_len = max_chunk; } iov[msg.msg_iovlen - 1].iov_len = line_size - (msg.msg_iovlen - 1) * max_chunk; } else { iov[0].iov_base = 0; iov[0].iov_len = line_size; msg.msg_iovlen = 1; } msg.msg_iov = iov; provider->memcpy_to_device(mem, 0, line, line_size); msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_DEVMEM_DMABUF; cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); ddmabuf = tx_dmabuf_id; *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); if (ret < 0) { pr_err("Failed sendmsg"); goto err_free_line; } fprintf(stderr, "sendmsg_ret=%d\n", ret); if (ret != line_size) { pr_err("Did not send all bytes %d vs %zd", ret, line_size); goto err_free_line; } if (wait_compl(socket_fd)) goto err_free_line; } fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); err = 0; err_free_line: free(line); err_unbind: ynl_sock_destroy(ys); err_close_socket: close(socket_fd); return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; int ret, err = 1; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { case 'l': is_server = 1; break; case 's': server_ip = optarg; break; case 'c': client_ip = optarg; break; case 'p': port = optarg; break; case 'v': do_validation = atoll(optarg); break; case 'q': num_queues = atoi(optarg); break; case 't': start_queue = atoi(optarg); break; case 'f': ifname = optarg; break; case 'z': max_chunk = atoi(optarg); break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break; } } if (!ifname) { pr_err("Missing -f argument"); return 1; } ifindex = if_nametoindex(ifname); fprintf(stderr, "using ifindex=%u\n", ifindex); if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); if (num_queues < 0) { pr_err("couldn't detect number of queues"); return 1; } if (num_queues < 2) { pr_err("number of device queues is too low"); return 1; } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } if (start_queue < 0 || num_queues < 0) { pr_err("Both -t and -q are required"); return 1; } return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); if (num_queues < 2) { pr_err("number of device queues is too low"); return 1; } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; if (start_queue < 0) { pr_err("couldn't detect number of queues"); return 1; } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); if (start_queue < 0) { pr_err("Missing -t argument"); return 1; } if (num_queues < 0) { pr_err("Missing -q argument"); return 1; } if (!server_ip) { pr_err("Missing -s argument"); return 1; } if (!port) { pr_err("Missing -p argument"); return 1; } mem = provider->alloc(getpagesize() * NUM_PAGES); if (!mem) { pr_err("Failed to allocate memory buffer"); return 1; } ret = is_server ? do_server(mem) : do_client(mem); if (ret) goto err_free_mem; err = 0; err_free_mem: provider->free(mem); return err; }