// SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2025 Broadcom. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bnge.h" #include "bnge_hwrm.h" #include "bnge_hwrm_lib.h" #include "bnge_netdev.h" #include "bnge_rmem.h" #include "bnge_txrx.h" irqreturn_t bnge_msix(int irq, void *dev_instance) { struct bnge_napi *bnapi = dev_instance; struct bnge_nq_ring_info *nqr; struct bnge_net *bn; u32 cons; bn = bnapi->bn; nqr = &bnapi->nq_ring; cons = RING_CMP(bn, nqr->nq_raw_cons); prefetch(&nqr->desc_ring[CP_RING(cons)][CP_IDX(cons)]); napi_schedule(&bnapi->napi); return IRQ_HANDLED; } static struct rx_agg_cmp *bnge_get_tpa_agg(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, u16 agg_id, u16 curr) { struct bnge_tpa_info *tpa_info = &rxr->rx_tpa[agg_id]; return &tpa_info->agg_arr[curr]; } static struct rx_agg_cmp *bnge_get_agg(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u16 cp_cons, u16 curr) { struct rx_agg_cmp *agg; cp_cons = RING_CMP(bn, ADV_RAW_CMP(cp_cons, curr)); agg = (struct rx_agg_cmp *) &cpr->desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; return agg; } static void bnge_reuse_rx_agg_bufs(struct bnge_cp_ring_info *cpr, u16 idx, u16 start, u32 agg_bufs, bool tpa) { struct bnge_napi *bnapi = cpr->bnapi; struct bnge_net *bn = bnapi->bn; struct bnge_rx_ring_info *rxr; u16 prod, sw_prod; u32 i; rxr = bnapi->rx_ring; sw_prod = rxr->rx_sw_agg_prod; prod = rxr->rx_agg_prod; for (i = 0; i < agg_bufs; i++) { struct bnge_sw_rx_agg_bd *cons_rx_buf, *prod_rx_buf; struct rx_agg_cmp *agg; struct rx_bd *prod_bd; netmem_ref netmem; u16 cons; if (tpa) agg = bnge_get_tpa_agg(bn, rxr, idx, start + i); else agg = bnge_get_agg(bn, cpr, idx, start + i); cons = agg->rx_agg_cmp_opaque; __clear_bit(cons, rxr->rx_agg_bmap); if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap))) sw_prod = bnge_find_next_agg_idx(rxr, sw_prod); __set_bit(sw_prod, rxr->rx_agg_bmap); prod_rx_buf = &rxr->rx_agg_buf_ring[sw_prod]; cons_rx_buf = &rxr->rx_agg_buf_ring[cons]; /* It is possible for sw_prod to be equal to cons, so * set cons_rx_buf->netmem to 0 first. */ netmem = cons_rx_buf->netmem; cons_rx_buf->netmem = 0; prod_rx_buf->netmem = netmem; prod_rx_buf->offset = cons_rx_buf->offset; prod_rx_buf->mapping = cons_rx_buf->mapping; prod_bd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bn, prod)] [RX_IDX(prod)]; prod_bd->rx_bd_haddr = cpu_to_le64(cons_rx_buf->mapping); prod_bd->rx_bd_opaque = sw_prod; prod = NEXT_RX_AGG(prod); sw_prod = RING_RX_AGG(bn, NEXT_RX_AGG(sw_prod)); } rxr->rx_agg_prod = prod; rxr->rx_sw_agg_prod = sw_prod; } static int bnge_agg_bufs_valid(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u8 agg_bufs, u32 *raw_cons) { struct rx_agg_cmp *agg; u16 last; *raw_cons = ADV_RAW_CMP(*raw_cons, agg_bufs); last = RING_CMP(bn, *raw_cons); agg = (struct rx_agg_cmp *) &cpr->desc_ring[CP_RING(last)][CP_IDX(last)]; return RX_AGG_CMP_VALID(bn, agg, *raw_cons); } static int bnge_discard_rx(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u32 *raw_cons, void *cmp) { u32 tmp_raw_cons = *raw_cons; struct rx_cmp *rxcmp = cmp; u8 cmp_type, agg_bufs = 0; cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_L2_CMP) { agg_bufs = (le32_to_cpu(rxcmp->rx_cmp_misc_v1) & RX_CMP_AGG_BUFS) >> RX_CMP_AGG_BUFS_SHIFT; } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { return 0; } if (agg_bufs) { if (!bnge_agg_bufs_valid(bn, cpr, agg_bufs, &tmp_raw_cons)) return -EBUSY; } *raw_cons = tmp_raw_cons; return 0; } static u32 __bnge_rx_agg_netmems(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u16 idx, u32 agg_bufs, bool tpa, struct sk_buff *skb) { struct bnge_napi *bnapi = cpr->bnapi; struct skb_shared_info *shinfo; struct bnge_rx_ring_info *rxr; u32 i, total_frag_len = 0; u16 prod; rxr = bnapi->rx_ring; prod = rxr->rx_agg_prod; shinfo = skb_shinfo(skb); for (i = 0; i < agg_bufs; i++) { struct bnge_sw_rx_agg_bd *cons_rx_buf; struct rx_agg_cmp *agg; u16 cons, frag_len; netmem_ref netmem; if (tpa) agg = bnge_get_tpa_agg(bn, rxr, idx, i); else agg = bnge_get_agg(bn, cpr, idx, i); cons = agg->rx_agg_cmp_opaque; frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) & RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_buf_ring[cons]; skb_add_rx_frag_netmem(skb, i, cons_rx_buf->netmem, cons_rx_buf->offset, frag_len, BNGE_RX_PAGE_SIZE); __clear_bit(cons, rxr->rx_agg_bmap); /* It is possible for bnge_alloc_rx_netmem() to allocate * a sw_prod index that equals the cons index, so we * need to clear the cons entry now. */ netmem = cons_rx_buf->netmem; cons_rx_buf->netmem = 0; if (bnge_alloc_rx_netmem(bn, rxr, prod, GFP_ATOMIC) != 0) { skb->len -= frag_len; skb->data_len -= frag_len; skb->truesize -= BNGE_RX_PAGE_SIZE; --shinfo->nr_frags; cons_rx_buf->netmem = netmem; /* Update prod since possibly some netmems have been * allocated already. */ rxr->rx_agg_prod = prod; bnge_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa); return 0; } page_pool_dma_sync_netmem_for_cpu(rxr->page_pool, netmem, 0, BNGE_RX_PAGE_SIZE); total_frag_len += frag_len; prod = NEXT_RX_AGG(prod); } rxr->rx_agg_prod = prod; return total_frag_len; } static struct sk_buff *bnge_rx_agg_netmems_skb(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, struct sk_buff *skb, u16 idx, u32 agg_bufs, bool tpa) { u32 total_frag_len; total_frag_len = __bnge_rx_agg_netmems(bn, cpr, idx, agg_bufs, tpa, skb); if (!total_frag_len) { skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } return skb; } static void bnge_sched_reset_rxr(struct bnge_net *bn, struct bnge_rx_ring_info *rxr) { if (!rxr->bnapi->in_reset) { rxr->bnapi->in_reset = true; /* TODO: Initiate reset task */ } rxr->rx_next_cons = 0xffff; } static void bnge_sched_reset_txr(struct bnge_net *bn, struct bnge_tx_ring_info *txr, u16 curr) { struct bnge_napi *bnapi = txr->bnapi; if (bnapi->tx_fault) return; netdev_err(bn->netdev, "Invalid Tx completion (ring:%d tx_hw_cons:%u cons:%u prod:%u curr:%u)", txr->txq_index, txr->tx_hw_cons, txr->tx_cons, txr->tx_prod, curr); WARN_ON_ONCE(1); bnapi->tx_fault = 1; /* TODO: Initiate reset task */ } static u16 bnge_tpa_alloc_agg_idx(struct bnge_rx_ring_info *rxr, u16 agg_id) { struct bnge_tpa_idx_map *map = rxr->rx_tpa_idx_map; u16 idx = agg_id & MAX_TPA_MASK; if (test_bit(idx, map->agg_idx_bmap)) { idx = find_first_zero_bit(map->agg_idx_bmap, MAX_TPA); if (idx >= MAX_TPA) return INVALID_HW_RING_ID; } __set_bit(idx, map->agg_idx_bmap); map->agg_id_tbl[agg_id] = idx; return idx; } static void bnge_free_agg_idx(struct bnge_rx_ring_info *rxr, u16 idx) { struct bnge_tpa_idx_map *map = rxr->rx_tpa_idx_map; __clear_bit(idx, map->agg_idx_bmap); } static u16 bnge_lookup_agg_idx(struct bnge_rx_ring_info *rxr, u16 agg_id) { struct bnge_tpa_idx_map *map = rxr->rx_tpa_idx_map; return map->agg_id_tbl[agg_id]; } static void bnge_tpa_metadata(struct bnge_tpa_info *tpa_info, struct rx_tpa_start_cmp *tpa_start, struct rx_tpa_start_cmp_ext *tpa_start1) { tpa_info->cfa_code_valid = 1; tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1); tpa_info->vlan_valid = 0; if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) { tpa_info->vlan_valid = 1; tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata); } } static void bnge_tpa_metadata_v2(struct bnge_tpa_info *tpa_info, struct rx_tpa_start_cmp *tpa_start, struct rx_tpa_start_cmp_ext *tpa_start1) { tpa_info->vlan_valid = 0; if (TPA_START_VLAN_VALID(tpa_start)) { u32 tpid_sel = TPA_START_VLAN_TPID_SEL(tpa_start); u32 vlan_proto = ETH_P_8021Q; tpa_info->vlan_valid = 1; if (tpid_sel == RX_TPA_START_METADATA1_TPID_8021AD) vlan_proto = ETH_P_8021AD; tpa_info->metadata = vlan_proto << 16 | TPA_START_METADATA0_TCI(tpa_start1); } } static void bnge_tpa_start(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, u8 cmp_type, struct rx_tpa_start_cmp *tpa_start, struct rx_tpa_start_cmp_ext *tpa_start1) { struct bnge_sw_rx_bd *cons_rx_buf, *prod_rx_buf; struct bnge_tpa_info *tpa_info; u16 cons, prod, agg_id; struct rx_bd *prod_bd; dma_addr_t mapping; agg_id = TPA_START_AGG_ID(tpa_start); agg_id = bnge_tpa_alloc_agg_idx(rxr, agg_id); if (unlikely(agg_id == INVALID_HW_RING_ID)) { netdev_warn(bn->netdev, "Unable to allocate agg ID for ring %d, agg 0x%lx\n", rxr->bnapi->index, TPA_START_AGG_ID(tpa_start)); bnge_sched_reset_rxr(bn, rxr); return; } cons = tpa_start->rx_tpa_start_cmp_opaque; prod = rxr->rx_prod; cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bn, prod)]; tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(cons != rxr->rx_next_cons || TPA_START_ERROR(tpa_start))) { netdev_warn(bn->netdev, "TPA cons %x, expected cons %x, error code %lx\n", cons, rxr->rx_next_cons, TPA_START_ERROR_CODE(tpa_start1)); bnge_sched_reset_rxr(bn, rxr); return; } prod_rx_buf->data = tpa_info->data; prod_rx_buf->data_ptr = tpa_info->data_ptr; mapping = tpa_info->mapping; prod_rx_buf->mapping = mapping; prod_bd = &rxr->rx_desc_ring[RX_RING(bn, prod)][RX_IDX(prod)]; prod_bd->rx_bd_haddr = cpu_to_le64(mapping); tpa_info->data = cons_rx_buf->data; tpa_info->data_ptr = cons_rx_buf->data_ptr; cons_rx_buf->data = NULL; tpa_info->mapping = cons_rx_buf->mapping; tpa_info->len = le32_to_cpu(tpa_start->rx_tpa_start_cmp_len_flags_type) >> RX_TPA_START_CMP_LEN_SHIFT; if (likely(TPA_START_HASH_VALID(tpa_start))) { tpa_info->hash_type = PKT_HASH_TYPE_L4; if (TPA_START_IS_IPV6(tpa_start1)) tpa_info->gso_type = SKB_GSO_TCPV6; else tpa_info->gso_type = SKB_GSO_TCPV4; tpa_info->rss_hash = le32_to_cpu(tpa_start->rx_tpa_start_cmp_rss_hash); } else { tpa_info->hash_type = PKT_HASH_TYPE_NONE; tpa_info->gso_type = 0; netif_warn(bn, rx_err, bn->netdev, "TPA packet without valid hash\n"); } tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2); tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info); if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) bnge_tpa_metadata(tpa_info, tpa_start, tpa_start1); else bnge_tpa_metadata_v2(tpa_info, tpa_start, tpa_start1); tpa_info->agg_count = 0; rxr->rx_prod = NEXT_RX(prod); cons = RING_RX(bn, NEXT_RX(cons)); rxr->rx_next_cons = RING_RX(bn, NEXT_RX(cons)); cons_rx_buf = &rxr->rx_buf_ring[cons]; bnge_reuse_rx_data(rxr, cons, cons_rx_buf->data); rxr->rx_prod = NEXT_RX(rxr->rx_prod); cons_rx_buf->data = NULL; } static void bnge_abort_tpa(struct bnge_cp_ring_info *cpr, u16 idx, u32 agg_bufs) { if (agg_bufs) bnge_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true); } static void bnge_tpa_agg(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, struct rx_agg_cmp *rx_agg) { u16 agg_id = TPA_AGG_AGG_ID(rx_agg); struct bnge_tpa_info *tpa_info; agg_id = bnge_lookup_agg_idx(rxr, agg_id); tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(tpa_info->agg_count >= MAX_SKB_FRAGS)) { netdev_warn(bn->netdev, "TPA completion count %d exceeds limit for ring %d\n", tpa_info->agg_count, rxr->bnapi->index); bnge_sched_reset_rxr(bn, rxr); return; } tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg; } void bnge_reuse_rx_data(struct bnge_rx_ring_info *rxr, u16 cons, void *data) { struct bnge_sw_rx_bd *cons_rx_buf, *prod_rx_buf; struct bnge_net *bn = rxr->bnapi->bn; struct rx_bd *cons_bd, *prod_bd; u16 prod = rxr->rx_prod; prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bn, prod)]; cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf->data = data; prod_rx_buf->data_ptr = cons_rx_buf->data_ptr; prod_rx_buf->mapping = cons_rx_buf->mapping; prod_bd = &rxr->rx_desc_ring[RX_RING(bn, prod)][RX_IDX(prod)]; cons_bd = &rxr->rx_desc_ring[RX_RING(bn, cons)][RX_IDX(cons)]; prod_bd->rx_bd_haddr = cons_bd->rx_bd_haddr; } static void bnge_deliver_skb(struct bnge_net *bn, struct bnge_napi *bnapi, struct sk_buff *skb) { skb_mark_for_recycle(skb); skb_record_rx_queue(skb, bnapi->index); napi_gro_receive(&bnapi->napi, skb); } static struct sk_buff *bnge_copy_skb(struct bnge_napi *bnapi, u8 *data, unsigned int len, dma_addr_t mapping) { struct bnge_net *bn = bnapi->bn; struct bnge_dev *bd = bn->bd; struct sk_buff *skb; skb = napi_alloc_skb(&bnapi->napi, len); if (!skb) return NULL; dma_sync_single_for_cpu(bd->dev, mapping, len, bn->rx_dir); memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN, len + NET_IP_ALIGN); dma_sync_single_for_device(bd->dev, mapping, len, bn->rx_dir); skb_put(skb, len); return skb; } #ifdef CONFIG_INET static void bnge_gro_tunnel(struct sk_buff *skb, __be16 ip_proto) { struct udphdr *uh = NULL; if (ip_proto == htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)skb->data; if (iph->protocol == IPPROTO_UDP) uh = (struct udphdr *)(iph + 1); } else { struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; if (iph->nexthdr == IPPROTO_UDP) uh = (struct udphdr *)(iph + 1); } if (uh) { if (uh->check) skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; else skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } } static struct sk_buff *bnge_gro_func(struct bnge_tpa_info *tpa_info, int payload_off, int tcp_ts, struct sk_buff *skb) { u16 outer_ip_off, inner_ip_off, inner_mac_off; u32 hdr_info = tpa_info->hdr_info; int iphdr_len, nw_off; inner_ip_off = BNGE_TPA_INNER_L3_OFF(hdr_info); inner_mac_off = BNGE_TPA_INNER_L2_OFF(hdr_info); outer_ip_off = BNGE_TPA_OUTER_L3_OFF(hdr_info); nw_off = inner_ip_off - ETH_HLEN; skb_set_network_header(skb, nw_off); iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ? sizeof(struct ipv6hdr) : sizeof(struct iphdr); skb_set_transport_header(skb, nw_off + iphdr_len); if (inner_mac_off) { /* tunnel */ __be16 proto = *((__be16 *)(skb->data + outer_ip_off - ETH_HLEN - 2)); bnge_gro_tunnel(skb, proto); } return skb; } static struct sk_buff *bnge_gro_skb(struct bnge_net *bn, struct bnge_tpa_info *tpa_info, struct rx_tpa_end_cmp *tpa_end, struct rx_tpa_end_cmp_ext *tpa_end1, struct sk_buff *skb) { int payload_off; u16 segs; segs = TPA_END_TPA_SEGS(tpa_end); if (segs == 1) return skb; NAPI_GRO_CB(skb)->count = segs; skb_shinfo(skb)->gso_size = le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len); skb_shinfo(skb)->gso_type = tpa_info->gso_type; payload_off = TPA_END_PAYLOAD_OFF(tpa_end1); skb = bnge_gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb); if (likely(skb)) tcp_gro_complete(skb); return skb; } #endif static struct sk_buff *bnge_tpa_end(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u32 *raw_cons, struct rx_tpa_end_cmp *tpa_end, struct rx_tpa_end_cmp_ext *tpa_end1, u8 *event) { struct bnge_napi *bnapi = cpr->bnapi; struct net_device *dev = bn->netdev; struct bnge_tpa_info *tpa_info; struct bnge_rx_ring_info *rxr; u8 *data_ptr, agg_bufs; struct sk_buff *skb; u16 idx = 0, agg_id; dma_addr_t mapping; unsigned int len; void *data; if (unlikely(bnapi->in_reset)) { int rc = bnge_discard_rx(bn, cpr, raw_cons, tpa_end); if (rc < 0) return ERR_PTR(-EBUSY); return NULL; } rxr = bnapi->rx_ring; agg_id = TPA_END_AGG_ID(tpa_end); agg_id = bnge_lookup_agg_idx(rxr, agg_id); agg_bufs = TPA_END_AGG_BUFS(tpa_end1); tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(agg_bufs != tpa_info->agg_count)) { netdev_warn(bn->netdev, "TPA end agg_buf %d != expected agg_bufs %d\n", agg_bufs, tpa_info->agg_count); agg_bufs = tpa_info->agg_count; } tpa_info->agg_count = 0; *event |= BNGE_AGG_EVENT; bnge_free_agg_idx(rxr, agg_id); idx = agg_id; data = tpa_info->data; data_ptr = tpa_info->data_ptr; prefetch(data_ptr); len = tpa_info->len; mapping = tpa_info->mapping; if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) { bnge_abort_tpa(cpr, idx, agg_bufs); if (agg_bufs > MAX_SKB_FRAGS) netdev_warn(bn->netdev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", agg_bufs, (int)MAX_SKB_FRAGS); return NULL; } if (len <= bn->rx_copybreak) { skb = bnge_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnge_abort_tpa(cpr, idx, agg_bufs); return NULL; } } else { dma_addr_t new_mapping; u8 *new_data; new_data = __bnge_alloc_rx_frag(bn, &new_mapping, rxr, GFP_ATOMIC); if (!new_data) { bnge_abort_tpa(cpr, idx, agg_bufs); return NULL; } tpa_info->data = new_data; tpa_info->data_ptr = new_data + bn->rx_offset; tpa_info->mapping = new_mapping; skb = napi_build_skb(data, bn->rx_buf_size); dma_sync_single_for_cpu(bn->bd->dev, mapping, bn->rx_buf_use_size, bn->rx_dir); if (!skb) { page_pool_free_va(rxr->head_pool, data, true); bnge_abort_tpa(cpr, idx, agg_bufs); return NULL; } skb_mark_for_recycle(skb); skb_reserve(skb, bn->rx_offset); skb_put(skb, len); } if (agg_bufs) { skb = bnge_rx_agg_netmems_skb(bn, cpr, skb, idx, agg_bufs, true); /* Page reuse already handled by bnge_rx_agg_netmems_skb(). */ if (!skb) return NULL; } skb->protocol = eth_type_trans(skb, dev); if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); if (tpa_info->vlan_valid && (dev->features & BNGE_HW_FEATURE_VLAN_ALL_RX)) { __be16 vlan_proto = htons(tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT); u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; if (eth_type_vlan(vlan_proto)) { __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); } else { dev_kfree_skb(skb); return NULL; } } skb_checksum_none_assert(skb); if (likely(tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_L4_CS_CALC)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = (tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3; } #ifdef CONFIG_INET if (bn->priv_flags & BNGE_NET_EN_GRO) skb = bnge_gro_skb(bn, tpa_info, tpa_end, tpa_end1, skb); #endif return skb; } static enum pkt_hash_types bnge_rss_ext_op(struct bnge_net *bn, struct rx_cmp *rxcmp) { u8 ext_op = RX_CMP_V3_HASH_TYPE(bn->bd, rxcmp); switch (ext_op) { case EXT_OP_INNER_4: case EXT_OP_OUTER_4: case EXT_OP_INNFL_3: case EXT_OP_OUTFL_3: return PKT_HASH_TYPE_L4; default: return PKT_HASH_TYPE_L3; } } static struct sk_buff *bnge_rx_vlan(struct sk_buff *skb, u8 cmp_type, struct rx_cmp *rxcmp, struct rx_cmp_ext *rxcmp1) { __be16 vlan_proto; u16 vtag; if (cmp_type == CMP_TYPE_RX_L2_CMP) { __le32 flags2 = rxcmp1->rx_cmp_flags2; u32 meta_data; if (!(flags2 & cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN))) return skb; meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; vlan_proto = htons(meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT); if (eth_type_vlan(vlan_proto)) __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); else goto vlan_err; } else if (cmp_type == CMP_TYPE_RX_L2_V3_CMP) { if (RX_CMP_VLAN_VALID(rxcmp)) { u32 tpid_sel = RX_CMP_VLAN_TPID_SEL(rxcmp); if (tpid_sel == RX_CMP_METADATA1_TPID_8021Q) vlan_proto = htons(ETH_P_8021Q); else if (tpid_sel == RX_CMP_METADATA1_TPID_8021AD) vlan_proto = htons(ETH_P_8021AD); else goto vlan_err; vtag = RX_CMP_METADATA0_TCI(rxcmp1); __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); } } return skb; vlan_err: skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } static struct sk_buff *bnge_rx_skb(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, u16 cons, void *data, u8 *data_ptr, dma_addr_t dma_addr, unsigned int len) { struct bnge_dev *bd = bn->bd; u16 prod = rxr->rx_prod; struct sk_buff *skb; int err; err = bnge_alloc_rx_data(bn, rxr, prod, GFP_ATOMIC); if (unlikely(err)) { bnge_reuse_rx_data(rxr, cons, data); return NULL; } dma_sync_single_for_cpu(bd->dev, dma_addr, len, bn->rx_dir); skb = napi_build_skb(data, bn->rx_buf_size); if (!skb) { page_pool_free_va(rxr->head_pool, data, true); return NULL; } skb_mark_for_recycle(skb); skb_reserve(skb, bn->rx_offset); skb_put(skb, len); return skb; } /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet * -EBUSY - completion ring does not have all the agg buffers yet * -ENOMEM - packet aborted due to out of memory * -EIO - packet aborted due to hw error indicated in BD */ static int bnge_rx_pkt(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u32 *raw_cons, u8 *event) { struct bnge_napi *bnapi = cpr->bnapi; struct net_device *dev = bn->netdev; struct bnge_rx_ring_info *rxr; u32 tmp_raw_cons, flags, misc; struct bnge_sw_rx_bd *rx_buf; struct rx_cmp_ext *rxcmp1; u16 cons, prod, cp_cons; u8 *data_ptr, cmp_type; struct rx_cmp *rxcmp; dma_addr_t dma_addr; struct sk_buff *skb; unsigned int len; u8 agg_bufs; void *data; int rc = 0; rxr = bnapi->rx_ring; tmp_raw_cons = *raw_cons; cp_cons = RING_CMP(bn, tmp_raw_cons); rxcmp = (struct rx_cmp *) &cpr->desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) { bnge_tpa_agg(bn, rxr, (struct rx_agg_cmp *)rxcmp); goto next_rx_no_prod_no_len; } tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(bn, tmp_raw_cons); rxcmp1 = (struct rx_cmp_ext *) &cpr->desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; if (!RX_CMP_VALID(bn, rxcmp1, tmp_raw_cons)) return -EBUSY; /* The valid test of the entry must be done first before * reading any further. */ dma_rmb(); prod = rxr->rx_prod; if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP || cmp_type == CMP_TYPE_RX_L2_TPA_START_V3_CMP) { bnge_tpa_start(bn, rxr, cmp_type, (struct rx_tpa_start_cmp *)rxcmp, (struct rx_tpa_start_cmp_ext *)rxcmp1); *event |= BNGE_RX_EVENT; goto next_rx_no_prod_no_len; } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { skb = bnge_tpa_end(bn, cpr, &tmp_raw_cons, (struct rx_tpa_end_cmp *)rxcmp, (struct rx_tpa_end_cmp_ext *)rxcmp1, event); if (IS_ERR(skb)) return -EBUSY; rc = -ENOMEM; if (likely(skb)) { bnge_deliver_skb(bn, bnapi, skb); rc = 1; } *event |= BNGE_RX_EVENT; goto next_rx_no_prod_no_len; } cons = rxcmp->rx_cmp_opaque; if (unlikely(cons != rxr->rx_next_cons)) { int rc1 = bnge_discard_rx(bn, cpr, &tmp_raw_cons, rxcmp); /* 0xffff is forced error, don't print it */ if (rxr->rx_next_cons != 0xffff) netdev_warn(bn->netdev, "RX cons %x != expected cons %x\n", cons, rxr->rx_next_cons); bnge_sched_reset_rxr(bn, rxr); if (rc1) return rc1; goto next_rx_no_prod_no_len; } rx_buf = &rxr->rx_buf_ring[cons]; data = rx_buf->data; data_ptr = rx_buf->data_ptr; prefetch(data_ptr); misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1); agg_bufs = (misc & RX_CMP_AGG_BUFS) >> RX_CMP_AGG_BUFS_SHIFT; if (agg_bufs) { if (!bnge_agg_bufs_valid(bn, cpr, agg_bufs, &tmp_raw_cons)) return -EBUSY; cp_cons = NEXT_CMP(bn, cp_cons); *event |= BNGE_AGG_EVENT; } *event |= BNGE_RX_EVENT; rx_buf->data = NULL; if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) { bnge_reuse_rx_data(rxr, cons, data); if (agg_bufs) bnge_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs, false); rc = -EIO; goto next_rx_no_len; } flags = le32_to_cpu(rxcmp->rx_cmp_len_flags_type); len = flags >> RX_CMP_LEN_SHIFT; dma_addr = rx_buf->mapping; if (len <= bn->rx_copybreak) { skb = bnge_copy_skb(bnapi, data_ptr, len, dma_addr); bnge_reuse_rx_data(rxr, cons, data); } else { skb = bnge_rx_skb(bn, rxr, cons, data, data_ptr, dma_addr, len); } if (!skb) { if (agg_bufs) bnge_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs, false); goto oom_next_rx; } if (agg_bufs) { skb = bnge_rx_agg_netmems_skb(bn, cpr, skb, cp_cons, agg_bufs, false); if (!skb) goto oom_next_rx; } if (RX_CMP_HASH_VALID(rxcmp)) { enum pkt_hash_types type; if (cmp_type == CMP_TYPE_RX_L2_V3_CMP) { type = bnge_rss_ext_op(bn, rxcmp); } else { u32 itypes = RX_CMP_ITYPES(rxcmp); if (itypes == RX_CMP_FLAGS_ITYPE_TCP || itypes == RX_CMP_FLAGS_ITYPE_UDP) type = PKT_HASH_TYPE_L4; else type = PKT_HASH_TYPE_L3; } skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type); } skb->protocol = eth_type_trans(skb, dev); if (skb->dev->features & BNGE_HW_FEATURE_VLAN_ALL_RX) { skb = bnge_rx_vlan(skb, cmp_type, rxcmp, rxcmp1); if (!skb) goto next_rx; } skb_checksum_none_assert(skb); if (RX_CMP_L4_CS_OK(rxcmp1)) { if (dev->features & NETIF_F_RXCSUM) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = RX_CMP_ENCAP(rxcmp1); } } bnge_deliver_skb(bn, bnapi, skb); rc = 1; next_rx: /* Update Stats */ next_rx_no_len: rxr->rx_prod = NEXT_RX(prod); rxr->rx_next_cons = RING_RX(bn, NEXT_RX(cons)); next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; return rc; oom_next_rx: rc = -ENOMEM; goto next_rx; } /* In netpoll mode, if we are using a combined completion ring, we need to * discard the rx packets and recycle the buffers. */ static int bnge_force_rx_discard(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, u32 *raw_cons, u8 *event) { u32 tmp_raw_cons = *raw_cons; struct rx_cmp_ext *rxcmp1; struct rx_cmp *rxcmp; u16 cp_cons; u8 cmp_type; int rc; cp_cons = RING_CMP(bn, tmp_raw_cons); rxcmp = (struct rx_cmp *) &cpr->desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(bn, tmp_raw_cons); rxcmp1 = (struct rx_cmp_ext *) &cpr->desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; if (!RX_CMP_VALID(bn, rxcmp1, tmp_raw_cons)) return -EBUSY; /* The valid test of the entry must be done first before * reading any further. */ dma_rmb(); cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_L2_CMP || cmp_type == CMP_TYPE_RX_L2_V3_CMP) { rxcmp1->rx_cmp_cfa_code_errors_v2 |= cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { struct rx_tpa_end_cmp_ext *tpa_end1; tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1; tpa_end1->rx_tpa_end_cmp_errors_v2 |= cpu_to_le32(RX_TPA_END_CMP_ERRORS); } rc = bnge_rx_pkt(bn, cpr, raw_cons, event); return rc; } static void __bnge_tx_int(struct bnge_net *bn, struct bnge_tx_ring_info *txr, int budget) { u16 hw_cons = txr->tx_hw_cons; struct bnge_dev *bd = bn->bd; unsigned int tx_bytes = 0; unsigned int tx_pkts = 0; struct netdev_queue *txq; u16 cons = txr->tx_cons; skb_frag_t *frag; txq = netdev_get_tx_queue(bn->netdev, txr->txq_index); while (SW_TX_RING(bn, cons) != hw_cons) { struct bnge_sw_tx_bd *tx_buf; struct sk_buff *skb; int j, last; tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, cons)]; skb = tx_buf->skb; if (unlikely(!skb)) { bnge_sched_reset_txr(bn, txr, cons); return; } cons = NEXT_TX(cons); tx_pkts++; tx_bytes += skb->len; tx_buf->skb = NULL; dma_unmap_single(bd->dev, dma_unmap_addr(tx_buf, mapping), skb_headlen(skb), DMA_TO_DEVICE); last = tx_buf->nr_frags; for (j = 0; j < last; j++) { frag = &skb_shinfo(skb)->frags[j]; cons = NEXT_TX(cons); tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, cons)]; netmem_dma_unmap_page_attrs(bd->dev, dma_unmap_addr(tx_buf, mapping), skb_frag_size(frag), DMA_TO_DEVICE, 0); } cons = NEXT_TX(cons); napi_consume_skb(skb, budget); } WRITE_ONCE(txr->tx_cons, cons); __netif_txq_completed_wake(txq, tx_pkts, tx_bytes, bnge_tx_avail(bn, txr), bn->tx_wake_thresh, (READ_ONCE(txr->dev_state) == BNGE_DEV_STATE_CLOSING)); } static void bnge_tx_int(struct bnge_net *bn, struct bnge_napi *bnapi, int budget) { struct bnge_tx_ring_info *txr; int i; bnge_for_each_napi_tx(i, bnapi, txr) { if (txr->tx_hw_cons != SW_TX_RING(bn, txr->tx_cons)) __bnge_tx_int(bn, txr, budget); } bnapi->events &= ~BNGE_TX_CMP_EVENT; } static void __bnge_poll_work_done(struct bnge_net *bn, struct bnge_napi *bnapi, int budget) { struct bnge_rx_ring_info *rxr = bnapi->rx_ring; if ((bnapi->events & BNGE_TX_CMP_EVENT) && !bnapi->tx_fault) bnge_tx_int(bn, bnapi, budget); if ((bnapi->events & BNGE_RX_EVENT)) { bnge_db_write(bn->bd, &rxr->rx_db, rxr->rx_prod); bnapi->events &= ~BNGE_RX_EVENT; } if (bnapi->events & BNGE_AGG_EVENT) { bnge_db_write(bn->bd, &rxr->rx_agg_db, rxr->rx_agg_prod); bnapi->events &= ~BNGE_AGG_EVENT; } } static void bnge_hwrm_update_token(struct bnge_dev *bd, u16 seq_id, enum bnge_hwrm_wait_state state) { struct bnge_hwrm_wait_token *token; rcu_read_lock(); hlist_for_each_entry_rcu(token, &bd->hwrm_pending_list, node) { if (token->seq_id == seq_id) { WRITE_ONCE(token->state, state); rcu_read_unlock(); return; } } rcu_read_unlock(); dev_err(bd->dev, "Invalid hwrm seq id %d\n", seq_id); } static int bnge_hwrm_handler(struct bnge_dev *bd, struct tx_cmp *txcmp) { struct hwrm_cmpl *h_cmpl = (struct hwrm_cmpl *)txcmp; u16 cmpl_type = TX_CMP_TYPE(txcmp), seq_id; switch (cmpl_type) { case CMPL_BASE_TYPE_HWRM_DONE: seq_id = le16_to_cpu(h_cmpl->sequence_id); bnge_hwrm_update_token(bd, seq_id, BNGE_HWRM_COMPLETE); break; case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: default: break; } return 0; } static int __bnge_poll_work(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, int budget) { struct bnge_napi *bnapi = cpr->bnapi; u32 raw_cons = cpr->cp_raw_cons; struct tx_cmp *txcmp; int rx_pkts = 0; u8 event = 0; u32 cons; cpr->has_more_work = 0; cpr->had_work_done = 1; while (1) { u8 cmp_type; int rc; cons = RING_CMP(bn, raw_cons); txcmp = &cpr->desc_ring[CP_RING(cons)][CP_IDX(cons)]; if (!TX_CMP_VALID(bn, txcmp, raw_cons)) break; /* The valid test of the entry must be done first before * reading any further. */ dma_rmb(); cmp_type = TX_CMP_TYPE(txcmp); if (cmp_type == CMP_TYPE_TX_L2_CMP || cmp_type == CMP_TYPE_TX_L2_COAL_CMP) { u32 opaque = txcmp->tx_cmp_opaque; struct bnge_tx_ring_info *txr; u16 tx_freed; txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)]; event |= BNGE_TX_CMP_EVENT; if (cmp_type == CMP_TYPE_TX_L2_COAL_CMP) txr->tx_hw_cons = TX_CMP_SQ_CONS_IDX(txcmp); else txr->tx_hw_cons = TX_OPAQUE_PROD(bn, opaque); tx_freed = ((txr->tx_hw_cons - txr->tx_cons) & bn->tx_ring_mask); /* return full budget so NAPI will complete. */ if (unlikely(tx_freed >= bn->tx_wake_thresh)) { rx_pkts = budget; raw_cons = NEXT_RAW_CMP(raw_cons); if (budget) cpr->has_more_work = 1; break; } } else if (cmp_type >= CMP_TYPE_RX_L2_CMP && cmp_type <= CMP_TYPE_RX_L2_TPA_START_V3_CMP) { if (likely(budget)) rc = bnge_rx_pkt(bn, cpr, &raw_cons, &event); else rc = bnge_force_rx_discard(bn, cpr, &raw_cons, &event); if (likely(rc >= 0)) rx_pkts += rc; /* Increment rx_pkts when rc is -ENOMEM to count towards * the NAPI budget. Otherwise, we may potentially loop * here forever if we consistently cannot allocate * buffers. */ else if (rc == -ENOMEM && budget) rx_pkts++; else if (rc == -EBUSY) /* partial completion */ break; } else if (unlikely(cmp_type == CMPL_BASE_TYPE_HWRM_DONE || cmp_type == CMPL_BASE_TYPE_HWRM_FWD_REQ || cmp_type == CMPL_BA_TY_HWRM_ASY_EVT)) { bnge_hwrm_handler(bn->bd, txcmp); } raw_cons = NEXT_RAW_CMP(raw_cons); if (rx_pkts && rx_pkts == budget) { cpr->has_more_work = 1; break; } } cpr->cp_raw_cons = raw_cons; bnapi->events |= event; return rx_pkts; } static void __bnge_poll_cqs_done(struct bnge_net *bn, struct bnge_napi *bnapi, u64 dbr_type, int budget) { struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; int i; for (i = 0; i < nqr->cp_ring_count; i++) { struct bnge_cp_ring_info *cpr = &nqr->cp_ring_arr[i]; struct bnge_db_info *db; if (cpr->had_work_done) { u32 tgl = 0; if (dbr_type == DBR_TYPE_CQ_ARMALL) { cpr->had_nqe_notify = 0; tgl = cpr->toggle; } db = &cpr->cp_db; bnge_writeq(bn->bd, db->db_key64 | dbr_type | DB_TOGGLE(tgl) | DB_RING_IDX(db, cpr->cp_raw_cons), db->doorbell); cpr->had_work_done = 0; } } __bnge_poll_work_done(bn, bnapi, budget); } static int __bnge_poll_cqs(struct bnge_net *bn, struct bnge_napi *bnapi, int budget) { struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; int i, work_done = 0; for (i = 0; i < nqr->cp_ring_count; i++) { struct bnge_cp_ring_info *cpr = &nqr->cp_ring_arr[i]; if (cpr->had_nqe_notify) { work_done += __bnge_poll_work(bn, cpr, budget - work_done); nqr->has_more_work |= cpr->has_more_work; } } return work_done; } int bnge_napi_poll(struct napi_struct *napi, int budget) { struct bnge_napi *bnapi = container_of(napi, struct bnge_napi, napi); struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; u32 raw_cons = nqr->nq_raw_cons; struct bnge_net *bn = bnapi->bn; struct bnge_dev *bd = bn->bd; struct nqe_cn *nqcmp; int work_done = 0; u32 cons; if (nqr->has_more_work) { nqr->has_more_work = 0; work_done = __bnge_poll_cqs(bn, bnapi, budget); } while (1) { u16 type; cons = RING_CMP(bn, raw_cons); nqcmp = &nqr->desc_ring[CP_RING(cons)][CP_IDX(cons)]; if (!NQ_CMP_VALID(bn, nqcmp, raw_cons)) { if (nqr->has_more_work) break; __bnge_poll_cqs_done(bn, bnapi, DBR_TYPE_CQ_ARMALL, budget); nqr->nq_raw_cons = raw_cons; if (napi_complete_done(napi, work_done)) BNGE_DB_NQ_ARM(bd, &nqr->nq_db, nqr->nq_raw_cons); goto poll_done; } /* The valid test of the entry must be done first before * reading any further. */ dma_rmb(); type = le16_to_cpu(nqcmp->type); if (NQE_CN_TYPE(type) == NQ_CN_TYPE_CQ_NOTIFICATION) { u32 idx = le32_to_cpu(nqcmp->cq_handle_low); u32 cq_type = BNGE_NQ_HDL_TYPE(idx); struct bnge_cp_ring_info *cpr; /* No more budget for RX work */ if (budget && work_done >= budget && cq_type == BNGE_NQ_HDL_TYPE_RX) break; idx = BNGE_NQ_HDL_IDX(idx); cpr = &nqr->cp_ring_arr[idx]; cpr->had_nqe_notify = 1; cpr->toggle = NQE_CN_TOGGLE(type); work_done += __bnge_poll_work(bn, cpr, budget - work_done); nqr->has_more_work |= cpr->has_more_work; } else { bnge_hwrm_handler(bn->bd, (struct tx_cmp *)nqcmp); } raw_cons = NEXT_RAW_CMP(raw_cons); } __bnge_poll_cqs_done(bn, bnapi, DBR_TYPE_CQ, budget); if (raw_cons != nqr->nq_raw_cons) { nqr->nq_raw_cons = raw_cons; BNGE_DB_NQ(bd, &nqr->nq_db, raw_cons); } poll_done: return work_done; } static u16 bnge_xmit_get_cfa_action(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX) return 0; return md_dst->u.port_info.port_id; } static const u16 bnge_lhint_arr[] = { TX_BD_FLAGS_LHINT_512_AND_SMALLER, TX_BD_FLAGS_LHINT_512_TO_1023, TX_BD_FLAGS_LHINT_1024_TO_2047, TX_BD_FLAGS_LHINT_1024_TO_2047, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, TX_BD_FLAGS_LHINT_2048_AND_LARGER, }; static void bnge_txr_db_kick(struct bnge_net *bn, struct bnge_tx_ring_info *txr, u16 prod) { /* Sync BD data before updating doorbell */ wmb(); bnge_db_write(bn->bd, &txr->tx_db, prod); txr->kick_pending = 0; } static u32 bnge_get_gso_hdr_len(struct sk_buff *skb) { bool udp_gso = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4); u32 hdr_len; if (skb->encapsulation) { if (udp_gso) hdr_len = skb_inner_transport_offset(skb) + sizeof(struct udphdr); else hdr_len = skb_inner_tcp_all_headers(skb); } else if (udp_gso) { hdr_len = skb_transport_offset(skb) + sizeof(struct udphdr); } else { hdr_len = skb_tcp_all_headers(skb); } return hdr_len; } netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev) { u32 len, free_size, vlan_tag_flags, cfa_action, flags; struct bnge_net *bn = netdev_priv(dev); struct bnge_tx_ring_info *txr; struct bnge_dev *bd = bn->bd; struct bnge_sw_tx_bd *tx_buf; struct tx_bd *txbd, *txbd0; struct netdev_queue *txq; struct tx_bd_ext *txbd1; u16 prod, last_frag; unsigned int length; dma_addr_t mapping; __le32 lflags = 0; skb_frag_t *frag; int i; i = skb_get_queue_mapping(skb); txq = netdev_get_tx_queue(dev, i); txr = &bn->tx_ring[bn->tx_ring_map[i]]; prod = txr->tx_prod; free_size = bnge_tx_avail(bn, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { /* We must have raced with NAPI cleanup */ if (net_ratelimit() && txr->kick_pending) netif_warn(bn, tx_err, dev, "bnge: ring busy w/ flush pending!\n"); if (!netif_txq_try_stop(txq, bnge_tx_avail(bn, txr), bn->tx_wake_thresh)) return NETDEV_TX_BUSY; } if (unlikely(ipv6_hopopt_jumbo_remove(skb))) goto tx_free; last_frag = skb_shinfo(skb)->nr_frags; txbd = &txr->tx_desc_ring[TX_RING(bn, prod)][TX_IDX(prod)]; tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, prod)]; tx_buf->skb = skb; tx_buf->nr_frags = last_frag; vlan_tag_flags = 0; cfa_action = bnge_xmit_get_cfa_action(skb); if (skb_vlan_tag_present(skb)) { vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN | skb_vlan_tag_get(skb); /* Currently supports 8021Q, 8021AD vlan offloads * QINQ1, QINQ2, QINQ3 vlan headers are deprecated */ if (skb->vlan_proto == htons(ETH_P_8021Q)) vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; } if (unlikely(skb->no_fcs)) lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC); if (eth_skb_pad(skb)) goto tx_kick_pending; len = skb_headlen(skb); mapping = dma_map_single(bd->dev, skb->data, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(bd->dev, mapping))) goto tx_free; dma_unmap_addr_set(tx_buf, mapping, mapping); flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | TX_BD_CNT(last_frag + 2); txbd->tx_bd_haddr = cpu_to_le64(mapping); txbd->tx_bd_opaque = SET_TX_OPAQUE(bn, txr, prod, 2 + last_frag); prod = NEXT_TX(prod); txbd1 = (struct tx_bd_ext *) &txr->tx_desc_ring[TX_RING(bn, prod)][TX_IDX(prod)]; if (skb_is_gso(skb)) { u32 hdr_len = bnge_get_gso_hdr_len(skb); lflags |= cpu_to_le32(TX_BD_FLAGS_LSO | TX_BD_FLAGS_T_IPID | (hdr_len << (TX_BD_HSIZE_SHIFT - 1))); length = skb_shinfo(skb)->gso_size; txbd1->tx_bd_mss = cpu_to_le32(length); length += hdr_len; } else { length = skb->len; if (skb->ip_summed == CHECKSUM_PARTIAL) { lflags |= cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM); txbd1->tx_bd_mss = 0; } } flags |= bnge_lhint_arr[length >> 9]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd1->tx_bd_hsize_lflags = lflags; txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); txbd0 = txbd; for (i = 0; i < last_frag; i++) { frag = &skb_shinfo(skb)->frags[i]; prod = NEXT_TX(prod); txbd = &txr->tx_desc_ring[TX_RING(bn, prod)][TX_IDX(prod)]; len = skb_frag_size(frag); mapping = skb_frag_dma_map(bd->dev, frag, 0, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(bd->dev, mapping))) goto tx_dma_error; tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, prod)]; netmem_dma_unmap_addr_set(skb_frag_netmem(frag), tx_buf, mapping, mapping); txbd->tx_bd_haddr = cpu_to_le64(mapping); flags = len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); } flags &= ~TX_BD_LEN; txbd->tx_bd_len_flags_type = cpu_to_le32(((len) << TX_BD_LEN_SHIFT) | flags | TX_BD_FLAGS_PACKET_END); netdev_tx_sent_queue(txq, skb->len); prod = NEXT_TX(prod); WRITE_ONCE(txr->tx_prod, prod); if (!netdev_xmit_more() || netif_xmit_stopped(txq)) { bnge_txr_db_kick(bn, txr, prod); } else { if (free_size >= bn->tx_wake_thresh) txbd0->tx_bd_len_flags_type |= cpu_to_le32(TX_BD_FLAGS_NO_CMPL); txr->kick_pending = 1; } if (unlikely(bnge_tx_avail(bn, txr) <= MAX_SKB_FRAGS + 1)) { if (netdev_xmit_more()) { txbd0->tx_bd_len_flags_type &= cpu_to_le32(~TX_BD_FLAGS_NO_CMPL); bnge_txr_db_kick(bn, txr, prod); } netif_txq_try_stop(txq, bnge_tx_avail(bn, txr), bn->tx_wake_thresh); } return NETDEV_TX_OK; tx_dma_error: last_frag = i; /* start back at beginning and unmap skb */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, prod)]; dma_unmap_single(bd->dev, dma_unmap_addr(tx_buf, mapping), skb_headlen(skb), DMA_TO_DEVICE); prod = NEXT_TX(prod); /* unmap remaining mapped pages */ for (i = 0; i < last_frag; i++) { prod = NEXT_TX(prod); tx_buf = &txr->tx_buf_ring[SW_TX_RING(bn, prod)]; frag = &skb_shinfo(skb)->frags[i]; netmem_dma_unmap_page_attrs(bd->dev, dma_unmap_addr(tx_buf, mapping), skb_frag_size(frag), DMA_TO_DEVICE, 0); } tx_free: dev_kfree_skb_any(skb); tx_kick_pending: if (txr->kick_pending) bnge_txr_db_kick(bn, txr, txr->tx_prod); txr->tx_buf_ring[SW_TX_RING(bn, txr->tx_prod)].skb = NULL; dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } netdev_features_t bnge_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { u32 len; features = vlan_features_check(skb, features); #if (MAX_SKB_FRAGS > TX_MAX_FRAGS) if (skb_shinfo(skb)->nr_frags > TX_MAX_FRAGS) features &= ~NETIF_F_SG; #endif if (skb_is_gso(skb)) len = bnge_get_gso_hdr_len(skb) + skb_shinfo(skb)->gso_size; else len = skb->len; len >>= 9; if (unlikely(len >= ARRAY_SIZE(bnge_lhint_arr))) features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; }