/* * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager * * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Written by: Karen Xie (kxie@chelsio.com) */ #define DRV_NAME "libcxgb" #define pr_fmt(fmt) DRV_NAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include "libcxgb_ppm.h" /* Direct Data Placement - * Directly place the iSCSI Data-In or Data-Out PDU's payload into * pre-posted final destination host-memory buffers based on the * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) * in Data-Out PDUs. The host memory address is programmed into * h/w in the format of pagepod entries. The location of the * pagepod entry is encoded into ddp tag which is used as the base * for ITT/TTT. */ /* Direct-Data Placement page size adjustment */ int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz) { struct cxgbi_tag_format *tformat = &ppm->tformat; int i; for (i = 0; i < DDP_PGIDX_MAX; i++) { if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT + tformat->pgsz_order[i])) { pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n", __func__, ppm->ndev->name, pgsz, i); return i; } } pr_info("ippm: ddp page size %lu not supported.\n", pgsz); return DDP_PGIDX_MAX; } /* DDP setup & teardown */ static int ppm_find_unused_entries(unsigned long *bmap, unsigned int max_ppods, unsigned int start, unsigned int nr, unsigned int align_mask) { unsigned long i; i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask); if (unlikely(i >= max_ppods) && (start > nr)) i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1, align_mask); if (unlikely(i >= max_ppods)) return -ENOSPC; bitmap_set(bmap, i, nr); return (int)i; } static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count, unsigned long caller_data) { struct cxgbi_ppod_data *pdata = ppm->ppod_data + i; pdata->caller_data = caller_data; pdata->npods = count; if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1)) pdata->color = 0; else pdata->color++; } static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, unsigned long caller_data) { struct cxgbi_ppm_pool *pool; unsigned int cpu; int i; if (!ppm->pool) return -EINVAL; cpu = get_cpu(); pool = per_cpu_ptr(ppm->pool, cpu); spin_lock_bh(&pool->lock); put_cpu(); i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max, pool->next, count, 0); if (i < 0) { pool->next = 0; spin_unlock_bh(&pool->lock); return -ENOSPC; } pool->next = i + count; if (pool->next >= ppm->pool_index_max) pool->next = 0; spin_unlock_bh(&pool->lock); pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n", __func__, cpu, i, count, i + cpu * ppm->pool_index_max, pool->next); i += cpu * ppm->pool_index_max; ppm_mark_entries(ppm, i, count, caller_data); return i; } static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, unsigned long caller_data) { int i; spin_lock_bh(&ppm->map_lock); i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max, ppm->next, count, 0); if (i < 0) { ppm->next = 0; spin_unlock_bh(&ppm->map_lock); pr_debug("ippm: NO suitable entries %u available.\n", count); return -ENOSPC; } ppm->next = i + count; if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram)) ppm->next = 0; else if (ppm->next >= ppm->bmap_index_max) ppm->next = 0; spin_unlock_bh(&ppm->map_lock); pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n", __func__, i, count, i + ppm->pool_rsvd, ppm->next, caller_data); i += ppm->pool_rsvd; ppm_mark_entries(ppm, i, count, caller_data); return i; } static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count) { pr_debug("%s: idx %d + %d.\n", __func__, i, count); if (i < ppm->pool_rsvd) { unsigned int cpu; struct cxgbi_ppm_pool *pool; cpu = i / ppm->pool_index_max; i %= ppm->pool_index_max; pool = per_cpu_ptr(ppm->pool, cpu); spin_lock_bh(&pool->lock); bitmap_clear(pool->bmap, i, count); if (i < pool->next) pool->next = i; spin_unlock_bh(&pool->lock); pr_debug("%s: cpu %u, idx %d, next %u.\n", __func__, cpu, i, pool->next); } else { spin_lock_bh(&ppm->map_lock); i -= ppm->pool_rsvd; bitmap_clear(ppm->ppod_bmap, i, count); if (i < ppm->next) ppm->next = i; spin_unlock_bh(&ppm->map_lock); pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next); } } void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx) { struct cxgbi_ppod_data *pdata; if (idx >= ppm->ppmax) { pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax); return; } pdata = ppm->ppod_data + idx; if (!pdata->npods) { pr_warn("ippm: idx %u, npods 0.\n", idx); return; } pr_debug("release idx %u, npods %u.\n", idx, pdata->npods); ppm_unmark_entries(ppm, idx, pdata->npods); } EXPORT_SYMBOL(cxgbi_ppm_ppod_release); int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages, u32 per_tag_pg_idx, u32 *ppod_idx, u32 *ddp_tag, unsigned long caller_data) { struct cxgbi_ppod_data *pdata; unsigned int npods; int idx = -1; unsigned int hwidx; u32 tag; npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; if (!npods) { pr_warn("%s: pages %u -> npods %u, full.\n", __func__, nr_pages, npods); return -EINVAL; } /* grab from cpu pool first */ idx = ppm_get_cpu_entries(ppm, npods, caller_data); /* try the general pool */ if (idx < 0) idx = ppm_get_entries(ppm, npods, caller_data); if (idx < 0) { pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n", nr_pages, npods, ppm->next, caller_data); return idx; } pdata = ppm->ppod_data + idx; hwidx = ppm->base_idx + idx; tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color); if (per_tag_pg_idx) tag |= (per_tag_pg_idx << 30) & 0xC0000000; *ppod_idx = idx; *ddp_tag = tag; pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n", nr_pages, tag, idx, npods, caller_data); return npods; } EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve); void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag, unsigned int tid, unsigned int offset, unsigned int length, struct cxgbi_pagepod_hdr *hdr) { /* The ddp tag in pagepod should be with bit 31:30 set to 0. * The ddp Tag on the wire should be with non-zero 31:30 to the peer */ tag &= 0x3FFFFFFF; hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid)); hdr->rsvd = 0; hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask); hdr->max_offset = htonl(length); hdr->page_offset = htonl(offset); pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n", tag, tid, length, offset); } EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr); static void ppm_free(struct cxgbi_ppm *ppm) { vfree(ppm); } static void ppm_destroy(struct kref *kref) { struct cxgbi_ppm *ppm = container_of(kref, struct cxgbi_ppm, refcnt); pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n", ppm->ndev->name, ppm); *ppm->ppm_pp = NULL; free_percpu(ppm->pool); ppm_free(ppm); } int cxgbi_ppm_release(struct cxgbi_ppm *ppm) { if (ppm) { int rv; rv = kref_put(&ppm->refcnt, ppm_destroy); return rv; } return 1; } EXPORT_SYMBOL(cxgbi_ppm_release); static struct cxgbi_ppm_pool __percpu * ppm_alloc_cpu_pool(unsigned int *total, unsigned int *pcpu_ppmax) { struct cxgbi_ppm_pool __percpu *pools; unsigned int ppmax = (*total) / num_possible_cpus(); unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3; unsigned int bmap; unsigned int alloc_sz; unsigned int count = 0; unsigned int cpu; /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */ if (ppmax > max) ppmax = max; /* pool size must be multiple of unsigned long */ bmap = ppmax / BITS_PER_TYPE(unsigned long); if (!bmap) return NULL; ppmax = (bmap * sizeof(unsigned long)) << 3; alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool)); if (!pools) return NULL; for_each_possible_cpu(cpu) { struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu); memset(ppool, 0, alloc_sz); spin_lock_init(&ppool->lock); count += ppmax; } *total = count; *pcpu_ppmax = ppmax; return pools; } int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, struct pci_dev *pdev, void *lldev, struct cxgbi_tag_format *tformat, unsigned int iscsi_size, unsigned int llimit, unsigned int start, unsigned int reserve_factor, unsigned int iscsi_edram_start, unsigned int iscsi_edram_size) { struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); struct cxgbi_ppm_pool __percpu *pool = NULL; unsigned int pool_index_max = 0; unsigned int ppmax_pool = 0; unsigned int ppod_bmap_size; unsigned int alloc_sz; unsigned int ppmax; if (!iscsi_edram_start) iscsi_edram_size = 0; if (iscsi_edram_size && ((iscsi_edram_start + iscsi_edram_size) != start)) { pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x " "size 0x%x DDR start 0x%x\n", iscsi_edram_start, iscsi_edram_size, start); return -EINVAL; } if (iscsi_edram_size) { reserve_factor = 0; start = iscsi_edram_start; } ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT; if (ppm) { pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax); kref_get(&ppm->refcnt); return 1; } if (reserve_factor) { ppmax_pool = ppmax / reserve_factor; pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); if (!pool) { ppmax_pool = 0; reserve_factor = 0; } pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", ndev->name, ppmax, ppmax_pool, pool_index_max); } ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool); alloc_sz = sizeof(struct cxgbi_ppm) + ppmax * (sizeof(struct cxgbi_ppod_data)) + ppod_bmap_size * sizeof(unsigned long); ppm = vzalloc(alloc_sz); if (!ppm) goto release_ppm_pool; ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]); if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) { unsigned int start = ppmax - ppmax_pool; unsigned int end = ppod_bmap_size >> 3; bitmap_set(ppm->ppod_bmap, ppmax, end - start); pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n", __func__, ppmax, ppmax_pool, ppod_bmap_size, start, end); } if (iscsi_edram_size) { unsigned int first_ddr_idx = iscsi_edram_size >> PPOD_SIZE_SHIFT; ppm->max_index_in_edram = first_ddr_idx - 1; bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1); pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx); } spin_lock_init(&ppm->map_lock); kref_init(&ppm->refcnt); memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format)); ppm->ppm_pp = ppm_pp; ppm->ndev = ndev; ppm->pdev = pdev; ppm->lldev = lldev; ppm->ppmax = ppmax; ppm->next = 0; ppm->llimit = llimit; ppm->base_idx = start > llimit ? (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0; ppm->bmap_index_max = ppmax - ppmax_pool; ppm->pool = pool; ppm->pool_rsvd = ppmax_pool; ppm->pool_index_max = pool_index_max; /* check one more time */ if (*ppm_pp) { ppm_free(ppm); ppm = (struct cxgbi_ppm *)(*ppm_pp); pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax); kref_get(&ppm->refcnt); return 1; } *ppm_pp = ppm; ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE); pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n", ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE, ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd, ppm->pool_index_max); return 0; release_ppm_pool: free_percpu(pool); return -ENOMEM; } EXPORT_SYMBOL(cxgbi_ppm_init); unsigned int cxgbi_tagmask_set(unsigned int ppmax) { unsigned int bits = fls(ppmax); if (bits > PPOD_IDX_MAX_SIZE) bits = PPOD_IDX_MAX_SIZE; pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n", ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT)); return 1 << (bits + PPOD_IDX_SHIFT); } EXPORT_SYMBOL(cxgbi_tagmask_set); MODULE_AUTHOR("Chelsio Communications"); MODULE_DESCRIPTION("Chelsio common library"); MODULE_LICENSE("Dual BSD/GPL");