// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Link Layer Control (LLC) * * Copyright IBM Corp. 2016 * * Author(s): Klaus Wacker * Ursula Braun */ #include #include #include "smc.h" #include "smc_core.h" #include "smc_clc.h" #include "smc_llc.h" #include "smc_pnet.h" #define SMC_LLC_DATA_LEN 40 struct smc_llc_hdr { struct smc_wr_rx_hdr common; union { struct { u8 length; /* 44 */ #if defined(__BIG_ENDIAN_BITFIELD) u8 reserved:4, add_link_rej_rsn:4; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 add_link_rej_rsn:4, reserved:4; #endif }; u16 length_v2; /* 44 - 8192*/ }; u8 flags; } __packed; /* format defined in * IBM Shared Memory Communications Version 2 * (https://www.ibm.com/support/pages/node/6326337) */ #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 struct smc_llc_msg_confirm_link { /* type 0x01 */ struct smc_llc_hdr hd; u8 sender_mac[ETH_ALEN]; u8 sender_gid[SMC_GID_SIZE]; u8 sender_qp_num[3]; u8 link_num; u8 link_uid[SMC_LGR_ID_SIZE]; u8 max_links; u8 max_conns; u8 reserved[8]; }; #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 struct smc_llc_msg_add_link { /* type 0x02 */ struct smc_llc_hdr hd; u8 sender_mac[ETH_ALEN]; u8 reserved2[2]; u8 sender_gid[SMC_GID_SIZE]; u8 sender_qp_num[3]; u8 link_num; #if defined(__BIG_ENDIAN_BITFIELD) u8 reserved3 : 4, qp_mtu : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 qp_mtu : 4, reserved3 : 4; #endif u8 initial_psn[3]; u8 reserved[8]; }; struct smc_llc_msg_add_link_cont_rt { __be32 rmb_key; __be32 rmb_key_new; __be64 rmb_vaddr_new; }; struct smc_llc_msg_add_link_v2_ext { #if defined(__BIG_ENDIAN_BITFIELD) u8 v2_direct : 1, reserved : 7; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 7, v2_direct : 1; #endif u8 reserved2; u8 client_target_gid[SMC_GID_SIZE]; u8 reserved3[8]; u16 num_rkeys; struct smc_llc_msg_add_link_cont_rt rt[]; } __packed; /* format defined in * IBM Shared Memory Communications Version 2 * (https://www.ibm.com/support/pages/node/6326337) */ struct smc_llc_msg_req_add_link_v2 { struct smc_llc_hdr hd; u8 reserved[20]; u8 gid_cnt; u8 reserved2[3]; u8 gid[][SMC_GID_SIZE]; }; #define SMC_LLC_RKEYS_PER_CONT_MSG 2 struct smc_llc_msg_add_link_cont { /* type 0x03 */ struct smc_llc_hdr hd; u8 link_num; u8 num_rkeys; u8 reserved2[2]; struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG]; u8 reserved[4]; } __packed; /* format defined in RFC7609 */ #define SMC_LLC_FLAG_DEL_LINK_ALL 0x40 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20 struct smc_llc_msg_del_link { /* type 0x04 */ struct smc_llc_hdr hd; u8 link_num; __be32 reason; u8 reserved[35]; } __packed; /* format defined in RFC7609 */ struct smc_llc_msg_test_link { /* type 0x07 */ struct smc_llc_hdr hd; u8 user_data[16]; u8 reserved[24]; }; struct smc_rmb_rtoken { union { u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */ /* is actually the num of rtokens, first */ /* rtoken is always for the current link */ u8 link_id; /* link id of the rtoken */ }; __be32 rmb_key; __be64 rmb_vaddr; } __packed; /* format defined in RFC7609 */ #define SMC_LLC_RKEYS_PER_MSG 3 #define SMC_LLC_RKEYS_PER_MSG_V2 255 struct smc_llc_msg_confirm_rkey { /* type 0x06 */ struct smc_llc_hdr hd; struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; u8 reserved; }; #define SMC_LLC_DEL_RKEY_MAX 8 #define SMC_LLC_FLAG_RKEY_RETRY 0x10 #define SMC_LLC_FLAG_RKEY_NEG 0x20 struct smc_llc_msg_delete_rkey { /* type 0x09 */ struct smc_llc_hdr hd; u8 num_rkeys; u8 err_mask; u8 reserved[2]; __be32 rkey[8]; u8 reserved2[4]; }; struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */ struct smc_llc_hdr hd; u8 num_rkeys; u8 num_inval_rkeys; u8 reserved[2]; __be32 rkey[]; }; union smc_llc_msg { struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_add_link add_link; struct smc_llc_msg_req_add_link_v2 req_add_link; struct smc_llc_msg_add_link_cont add_link_cont; struct smc_llc_msg_del_link delete_link; struct smc_llc_msg_confirm_rkey confirm_rkey; struct smc_llc_msg_delete_rkey delete_rkey; struct smc_llc_msg_test_link test_link; struct { struct smc_llc_hdr hdr; u8 data[SMC_LLC_DATA_LEN]; } raw; }; #define SMC_LLC_FLAG_RESP 0x80 struct smc_llc_qentry { struct list_head list; struct smc_link *link; union smc_llc_msg msg; }; static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc); struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow) { struct smc_llc_qentry *qentry = flow->qentry; flow->qentry = NULL; return qentry; } void smc_llc_flow_qentry_del(struct smc_llc_flow *flow) { struct smc_llc_qentry *qentry; if (flow->qentry) { qentry = flow->qentry; flow->qentry = NULL; kfree(qentry); } } static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) { flow->qentry = qentry; } static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, struct smc_llc_qentry *qentry) { u8 msg_type = qentry->msg.raw.hdr.common.llc_type; if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && flow_type != msg_type && !lgr->delayed_event) { lgr->delayed_event = qentry; return; } /* drop parallel or already-in-progress llc requests */ if (flow_type != msg_type) pr_warn_once("smc: SMC-R lg %*phN net %llu dropped parallel " "LLC msg: msg %d flow %d role %d\n", SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie, qentry->msg.raw.hdr.common.type, flow_type, lgr->role); kfree(qentry); } /* try to start a new llc flow, initiated by an incoming llc msg */ static bool smc_llc_flow_start(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = qentry->link->lgr; spin_lock_bh(&lgr->llc_flow_lock); if (flow->type) { /* a flow is already active */ smc_llc_flow_parallel(lgr, flow->type, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return false; } switch (qentry->msg.raw.hdr.common.llc_type) { case SMC_LLC_ADD_LINK: flow->type = SMC_LLC_FLOW_ADD_LINK; break; case SMC_LLC_DELETE_LINK: flow->type = SMC_LLC_FLOW_DEL_LINK; break; case SMC_LLC_CONFIRM_RKEY: case SMC_LLC_DELETE_RKEY: flow->type = SMC_LLC_FLOW_RKEY; break; default: flow->type = SMC_LLC_FLOW_NONE; } smc_llc_flow_qentry_set(flow, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return true; } /* start a new local llc flow, wait till current flow finished */ int smc_llc_flow_initiate(struct smc_link_group *lgr, enum smc_llc_flowtype type) { enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE; int rc; /* all flows except confirm_rkey and delete_rkey are exclusive, * confirm/delete rkey flows can run concurrently (local and remote) */ if (type == SMC_LLC_FLOW_RKEY) allowed_remote = SMC_LLC_FLOW_RKEY; again: if (list_empty(&lgr->list)) return -ENODEV; spin_lock_bh(&lgr->llc_flow_lock); if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || lgr->llc_flow_rmt.type == allowed_remote)) { lgr->llc_flow_lcl.type = type; spin_unlock_bh(&lgr->llc_flow_lock); return 0; } spin_unlock_bh(&lgr->llc_flow_lock); rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || lgr->llc_flow_rmt.type == allowed_remote))), SMC_LLC_WAIT_TIME * 10); if (!rc) return -ETIMEDOUT; goto again; } /* finish the current llc flow */ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) { spin_lock_bh(&lgr->llc_flow_lock); memset(flow, 0, sizeof(*flow)); flow->type = SMC_LLC_FLOW_NONE; spin_unlock_bh(&lgr->llc_flow_lock); if (!list_empty(&lgr->list) && lgr->delayed_event && flow == &lgr->llc_flow_lcl) schedule_work(&lgr->llc_event_work); else wake_up(&lgr->llc_flow_waiter); } /* lnk is optional and used for early wakeup when link goes down, useful in * cases where we wait for a response on the link after we sent a request */ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, struct smc_link *lnk, int time_out, u8 exp_msg) { struct smc_llc_flow *flow = &lgr->llc_flow_lcl; u8 rcv_msg; wait_event_timeout(lgr->llc_msg_waiter, (flow->qentry || (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)), time_out); if (!flow->qentry || (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) { smc_llc_flow_qentry_del(flow); goto out; } rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type; if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && rcv_msg == SMC_LLC_DELETE_LINK) { /* flow_start will delay the unexpected msg */ smc_llc_flow_start(&lgr->llc_flow_lcl, smc_llc_flow_qentry_clr(flow)); return NULL; } pr_warn_once("smc: SMC-R lg %*phN net %llu dropped unexpected LLC msg: " "msg %d exp %d flow %d role %d flags %x\n", SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie, rcv_msg, exp_msg, flow->type, lgr->role, flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); } out: return flow->qentry; } /********************************** send *************************************/ struct smc_llc_tx_pend { }; /* handler for send/transmission completion of an LLC msg */ static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend, struct smc_link *link, enum ib_wc_status wc_status) { /* future work: handle wc_status error for recovery and failover */ } /** * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits * @link: Pointer to SMC link used for sending LLC control message. * @wr_buf: Out variable returning pointer to work request payload buffer. * @pend: Out variable returning pointer to private pending WR tracking. * It's the context the transmit complete handler will get. * * Reserves and pre-fills an entry for a pending work request send/tx. * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx. * Can sleep due to smc_get_ctrl_buf (if not in softirq context). * * Return: 0 on success, otherwise an error value. */ static int smc_llc_add_pending_send(struct smc_link *link, struct smc_wr_buf **wr_buf, struct smc_wr_tx_pend_priv **pend) { int rc; rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE, "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)"); BUILD_BUG_ON_MSG( sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE, "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()"); BUILD_BUG_ON_MSG( sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE, "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)"); return 0; } static int smc_llc_add_pending_send_v2(struct smc_link *link, struct smc_wr_v2_buf **wr_buf, struct smc_wr_tx_pend_priv **pend) { int rc; rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend); if (rc < 0) return rc; return 0; } static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr, struct smc_link_group *lgr, size_t len) { if (lgr->smc_version == SMC_V2) { hdr->common.llc_version = SMC_V2; hdr->length_v2 = len; } else { hdr->common.llc_version = 0; hdr->length = len; } } /* high-level API to send LLC confirm link */ int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) { struct smc_llc_msg_confirm_link *confllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK; smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc)); confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE); confllc->max_links = SMC_LINKS_ADD_LNK_MAX; if (link->lgr->smc_version == SMC_V2 && link->lgr->peer_smc_release >= SMC_RELEASE_1) { confllc->max_conns = link->lgr->max_conns; confllc->max_links = link->lgr->max_links; } /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* send LLC confirm rkey request */ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, struct smc_buf_desc *rmb_desc) { struct smc_llc_msg_confirm_rkey *rkeyllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; struct smc_link *link; int i, rc, rtok_ix; if (!smc_wr_tx_link_hold(send_link)) return -ENOLINK; rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend); if (rc) goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY; smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc)); rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { link = &send_link->lgr->lnk[i]; if (smc_link_active(link) && link != send_link) { rkeyllc->rtoken[rtok_ix].link_id = link->link_id; rkeyllc->rtoken[rtok_ix].rmb_key = htonl(rmb_desc->mr[link->link_idx]->rkey); rkeyllc->rtoken[rtok_ix].rmb_vaddr = rmb_desc->is_vm ? cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) : cpu_to_be64((u64)sg_dma_address (rmb_desc->sgt[link->link_idx].sgl)); rtok_ix++; } } /* rkey of send_link is in rtoken[0] */ rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1; rkeyllc->rtoken[0].rmb_key = htonl(rmb_desc->mr[send_link->link_idx]->rkey); rkeyllc->rtoken[0].rmb_vaddr = rmb_desc->is_vm ? cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) : cpu_to_be64((u64)sg_dma_address (rmb_desc->sgt[send_link->link_idx].sgl)); /* send llc message */ rc = smc_wr_tx_send(send_link, pend); put_out: smc_wr_tx_link_put(send_link); return rc; } /* send LLC delete rkey request */ static int smc_llc_send_delete_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc) { struct smc_llc_msg_delete_rkey *rkeyllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY; smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); rkeyllc->num_rkeys = 1; rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey); /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* return first buffer from any of the next buf lists */ static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, int *buf_lst) { struct smc_buf_desc *buf_pos; while (*buf_lst < SMC_RMBE_SIZES) { buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], struct smc_buf_desc, list); if (buf_pos) return buf_pos; (*buf_lst)++; } return NULL; } /* return next rmb from buffer lists */ static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, int *buf_lst, struct smc_buf_desc *buf_pos) { struct smc_buf_desc *buf_next; if (!buf_pos) return _smc_llc_get_next_rmb(lgr, buf_lst); if (list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { (*buf_lst)++; return _smc_llc_get_next_rmb(lgr, buf_lst); } buf_next = list_next_entry(buf_pos, list); return buf_next; } static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, int *buf_lst) { *buf_lst = 0; return smc_llc_get_next_rmb(lgr, buf_lst, NULL); } static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, struct smc_link *link, struct smc_link *link_new) { struct smc_link_group *lgr = link->lgr; struct smc_buf_desc *buf_pos; int prim_lnk_idx, lnk_idx, i; struct smc_buf_desc *rmb; int len = sizeof(*ext); int buf_lst; ext->v2_direct = !lgr->uses_gateway; memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE); prim_lnk_idx = link->link_idx; lnk_idx = link_new->link_idx; down_write(&lgr->rmbs_lock); ext->num_rkeys = lgr->conns_num; if (!ext->num_rkeys) goto out; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); for (i = 0; i < ext->num_rkeys; i++) { while (buf_pos && !(buf_pos)->used) buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); if (!buf_pos) break; rmb = buf_pos; ext->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey); ext->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey); ext->rt[i].rmb_vaddr_new = rmb->is_vm ? cpu_to_be64((uintptr_t)rmb->cpu_addr) : cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); } len += i * sizeof(ext->rt[0]); out: up_write(&lgr->rmbs_lock); return len; } /* send ADD LINK request or response */ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_link *link_new, enum smc_llc_reqresp reqresp) { struct smc_llc_msg_add_link_v2_ext *ext = NULL; struct smc_llc_msg_add_link *addllc; struct smc_wr_tx_pend_priv *pend; int len = sizeof(*addllc); int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; if (link->lgr->smc_version == SMC_V2) { struct smc_wr_v2_buf *wr_buf; rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); if (rc) goto put_out; addllc = (struct smc_llc_msg_add_link *)wr_buf; ext = (struct smc_llc_msg_add_link_v2_ext *) &wr_buf->raw[sizeof(*addllc)]; memset(ext, 0, SMC_WR_TX_SIZE); } else { struct smc_wr_buf *wr_buf; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; addllc = (struct smc_llc_msg_add_link *)wr_buf; } memset(addllc, 0, sizeof(*addllc)); addllc->hd.common.llc_type = SMC_LLC_ADD_LINK; if (reqresp == SMC_LLC_RESP) addllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(addllc->sender_mac, mac, ETH_ALEN); memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); if (link_new) { addllc->link_num = link_new->link_id; hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num); hton24(addllc->initial_psn, link_new->psn_initial); if (reqresp == SMC_LLC_REQ) addllc->qp_mtu = link_new->path_mtu; else addllc->qp_mtu = min(link_new->path_mtu, link_new->peer_mtu); } if (ext && link_new) len += smc_llc_fill_ext_v2(ext, link, link_new); smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len); /* send llc message */ if (link->lgr->smc_version == SMC_V2) rc = smc_wr_tx_v2_send(link, pend, len); else rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* send DELETE LINK request or response */ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, enum smc_llc_reqresp reqresp, bool orderly, u32 reason) { struct smc_llc_msg_del_link *delllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK; smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc)); if (reqresp == SMC_LLC_RESP) delllc->hd.flags |= SMC_LLC_FLAG_RESP; if (orderly) delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; if (link_del_id) delllc->link_num = link_del_id; else delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; delllc->reason = htonl(reason); /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* send LLC test link request */ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) { struct smc_llc_msg_test_link *testllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); testllc->hd.common.llc_type = SMC_LLC_TEST_LINK; smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc)); memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* schedule an llc send on link, may wait for buffers */ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) { struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } /* schedule an llc send on link, may wait for buffers, * and wait for send completion notification. * @return 0 on success */ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) { struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); put_out: smc_wr_tx_link_put(link); return rc; } /********************************* receive ***********************************/ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, enum smc_lgr_type lgr_new_t) { int i; if (lgr->type == SMC_LGR_SYMMETRIC || (lgr->type != SMC_LGR_SINGLE && (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || lgr_new_t == SMC_LGR_ASYMMETRIC_PEER))) return -EMLINK; if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) { for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--) if (lgr->lnk[i].state == SMC_LNK_UNUSED) return i; } else { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) if (lgr->lnk[i].state == SMC_LNK_UNUSED) return i; } return -EMLINK; } /* send one add_link_continue msg */ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_link *link_new, u8 *num_rkeys_todo, int *buf_lst, struct smc_buf_desc **buf_pos) { struct smc_llc_msg_add_link_cont *addc_llc; struct smc_link_group *lgr = link->lgr; int prim_lnk_idx, lnk_idx, i, rc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; struct smc_buf_desc *rmb; u8 n; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) goto put_out; addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf; memset(addc_llc, 0, sizeof(*addc_llc)); prim_lnk_idx = link->link_idx; lnk_idx = link_new->link_idx; addc_llc->link_num = link_new->link_id; addc_llc->num_rkeys = *num_rkeys_todo; n = *num_rkeys_todo; for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) { while (*buf_pos && !(*buf_pos)->used) *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); if (!*buf_pos) { addc_llc->num_rkeys = addc_llc->num_rkeys - *num_rkeys_todo; *num_rkeys_todo = 0; break; } rmb = *buf_pos; addc_llc->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey); addc_llc->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey); addc_llc->rt[i].rmb_vaddr_new = rmb->is_vm ? cpu_to_be64((uintptr_t)rmb->cpu_addr) : cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); (*num_rkeys_todo)--; *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; } static int smc_llc_cli_rkey_exchange(struct smc_link *link, struct smc_link *link_new) { struct smc_llc_msg_add_link_cont *addc_llc; struct smc_link_group *lgr = link->lgr; u8 max, num_rkeys_send, num_rkeys_recv; struct smc_llc_qentry *qentry; struct smc_buf_desc *buf_pos; int buf_lst; int rc = 0; int i; down_write(&lgr->rmbs_lock); num_rkeys_send = lgr->conns_num; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); do { qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK_CONT); if (!qentry) { rc = -ETIMEDOUT; break; } addc_llc = &qentry->msg.add_link_cont; num_rkeys_recv = addc_llc->num_rkeys; max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG); for (i = 0; i < max; i++) { smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, addc_llc->rt[i].rmb_key, addc_llc->rt[i].rmb_vaddr_new, addc_llc->rt[i].rmb_key_new); num_rkeys_recv--; } smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send, &buf_lst, &buf_pos); if (rc) break; } while (num_rkeys_send || num_rkeys_recv); up_write(&lgr->rmbs_lock); return rc; } /* prepare and send an add link reject response */ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) { qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, sizeof(qentry->msg)); return smc_llc_send_message(qentry->link, &qentry->msg); } static int smc_llc_cli_conf_link(struct smc_link *link, struct smc_init_info *ini, struct smc_link *link_new, enum smc_lgr_type lgr_new_t) { struct smc_link_group *lgr = link->lgr; struct smc_llc_qentry *qentry = NULL; int rc = 0; /* receive CONFIRM LINK request over RoCE fabric */ qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0); if (!qentry) { rc = smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* received DELETE_LINK instead */ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } smc_llc_save_peer_uid(qentry); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_modify_qp_rts(link_new); if (rc) { smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } smc_wr_remember_qp_attr(link_new); rc = smcr_buf_reg_lgr(link_new); if (rc) { smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP); if (rc) { smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } smc_llc_link_active(link_new); if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); else smcr_lgr_set_type(lgr, lgr_new_t); return 0; } static void smc_llc_save_add_link_rkeys(struct smc_link *link, struct smc_link *link_new, u8 *llc_msg) { struct smc_llc_msg_add_link_v2_ext *ext; struct smc_link_group *lgr = link->lgr; int max, i; ext = (struct smc_llc_msg_add_link_v2_ext *)(llc_msg + SMC_WR_TX_SIZE); max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); down_write(&lgr->rmbs_lock); for (i = 0; i < max; i++) { smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, ext->rt[i].rmb_key, ext->rt[i].rmb_vaddr_new, ext->rt[i].rmb_key_new); } up_write(&lgr->rmbs_lock); } static void smc_llc_save_add_link_info(struct smc_link *link, struct smc_llc_msg_add_link *add_llc) { link->peer_qpn = ntoh24(add_llc->sender_qp_num); memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE); memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN); link->peer_psn = ntoh24(add_llc->initial_psn); link->peer_mtu = add_llc->qp_mtu; } /* as an SMC client, process an add link request */ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) { struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = smc_get_lgr(link); struct smc_init_info *ini = NULL; struct smc_link *lnk_new = NULL; int lnk_idx, rc = 0; if (!llc->qp_mtu) goto out_reject; ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) { rc = -ENOMEM; goto out_reject; } if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) { rc = 0; goto out_reject; } ini->vlan_id = lgr->vlan_id; if (lgr->smc_version == SMC_V2) { ini->check_smcrv2 = true; ini->smcrv2.saddr = lgr->saddr; ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid); } smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && (lgr->smc_version == SMC_V2 || !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) { if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2) goto out_reject; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->smcrv2.ib_dev_v2 = link->smcibdev; ini->smcrv2.ib_port_v2 = link->ibport; } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) goto out_reject; lnk_new = &lgr->lnk[lnk_idx]; rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini); if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); lnk_new->link_id = llc->link_num; /* SMC server assigns link id */ smc_llc_link_set_uid(lnk_new); rc = smc_ib_ready_link(lnk_new); if (rc) goto out_clear_lnk; rc = smcr_buf_map_lgr(lnk_new); if (rc) goto out_clear_lnk; rc = smc_llc_send_add_link(link, lnk_new->smcibdev->mac[lnk_new->ibport - 1], lnk_new->gid, lnk_new, SMC_LLC_RESP); if (rc) goto out_clear_lnk; if (lgr->smc_version == SMC_V2) { u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? (u8 *)lgr->wr_rx_buf_v2 : (u8 *)llc; smc_llc_save_add_link_rkeys(link, lnk_new, llc_msg); } else { rc = smc_llc_cli_rkey_exchange(link, lnk_new); if (rc) { rc = 0; goto out_clear_lnk; } } rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); if (!rc) goto out; out_clear_lnk: lnk_new->state = SMC_LNK_INACTIVE; smcr_link_clear(lnk_new, false); out_reject: smc_llc_cli_add_link_reject(qentry); out: kfree(ini); kfree(qentry); return rc; } static void smc_llc_send_request_add_link(struct smc_link *link) { struct smc_llc_msg_req_add_link_v2 *llc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_v2_buf *wr_buf; struct smc_gidlist gidlist; int rc, len, i; if (!smc_wr_tx_link_hold(link)) return; if (link->lgr->type == SMC_LGR_SYMMETRIC || link->lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto put_out; smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid); if (gidlist.len <= 1) goto put_out; rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); if (rc) goto put_out; llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf; memset(llc, 0, SMC_WR_TX_SIZE); llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK; for (i = 0; i < gidlist.len; i++) memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0])); llc->gid_cnt = gidlist.len; len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0])); smc_llc_init_msg_hdr(&llc->hd, link->lgr, len); rc = smc_wr_tx_v2_send(link, pend, len); if (!rc) /* set REQ_ADD_LINK flow and wait for response from peer */ link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK; put_out: smc_wr_tx_link_put(link); } /* as an SMC client, invite server to start the add_link processing */ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = smc_get_lgr(link); struct smc_init_info *ini = NULL; if (lgr->smc_version == SMC_V2) { smc_llc_send_request_add_link(link); goto out; } if (lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) goto out; ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) goto out; ini->vlan_id = lgr->vlan_id; smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (!ini->ib_dev) goto out; smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1], ini->ib_gid, NULL, SMC_LLC_REQ); out: kfree(ini); kfree(qentry); } static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) { int i; for (i = 0; i < ARRAY_SIZE(llc->raw.data); i++) if (llc->raw.data[i]) return false; return true; } static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) { if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK && smc_llc_is_empty_llc_message(llc)) return true; return false; } static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry; qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); down_write(&lgr->llc_conf_mutex); if (smc_llc_is_local_add_link(&qentry->msg)) smc_llc_cli_add_link_invite(qentry->link, qentry); else smc_llc_cli_add_link(qentry->link, qentry); up_write(&lgr->llc_conf_mutex); } static int smc_llc_active_link_count(struct smc_link_group *lgr) { int i, link_count = 0; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue; link_count++; } return link_count; } /* find the asymmetric link when 3 links are established */ static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr) { int asym_idx = -ENOENT; int i, j, k; bool found; /* determine asymmetric link */ found = false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { if (!smc_link_usable(&lgr->lnk[i]) || !smc_link_usable(&lgr->lnk[j])) continue; if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid, SMC_GID_SIZE)) { found = true; /* asym_lnk is i or j */ break; } } if (found) break; } if (!found) goto out; /* no asymmetric link */ for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) { if (!smc_link_usable(&lgr->lnk[k])) continue; if (k != i && !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid, SMC_GID_SIZE)) { asym_idx = i; break; } if (k != j && !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid, SMC_GID_SIZE)) { asym_idx = j; break; } } out: return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx]; } static void smc_llc_delete_asym_link(struct smc_link_group *lgr) { struct smc_link *lnk_new = NULL, *lnk_asym; struct smc_llc_qentry *qentry; int rc; lnk_asym = smc_llc_find_asym_link(lgr); if (!lnk_asym) return; /* no asymmetric link */ if (!smc_link_downing(&lnk_asym->state)) return; lnk_new = smc_switch_conns(lgr, lnk_asym, false); smc_wr_tx_wait_no_pending_sends(lnk_asym); if (!lnk_new) goto out_free; /* change flow type from ADD_LINK into DEL_LINK */ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK; rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ, true, SMC_LLC_DEL_NO_ASYM_NEEDED); if (rc) { smcr_link_down_cond(lnk_new); goto out_free; } qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME, SMC_LLC_DELETE_LINK); if (!qentry) { smcr_link_down_cond(lnk_new); goto out_free; } smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); out_free: smcr_link_clear(lnk_asym, true); } static int smc_llc_srv_rkey_exchange(struct smc_link *link, struct smc_link *link_new) { struct smc_llc_msg_add_link_cont *addc_llc; struct smc_link_group *lgr = link->lgr; u8 max, num_rkeys_send, num_rkeys_recv; struct smc_llc_qentry *qentry = NULL; struct smc_buf_desc *buf_pos; int buf_lst; int rc = 0; int i; down_write(&lgr->rmbs_lock); num_rkeys_send = lgr->conns_num; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); do { smc_llc_add_link_cont(link, link_new, &num_rkeys_send, &buf_lst, &buf_pos); qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK_CONT); if (!qentry) { rc = -ETIMEDOUT; goto out; } addc_llc = &qentry->msg.add_link_cont; num_rkeys_recv = addc_llc->num_rkeys; max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG); for (i = 0; i < max; i++) { smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, addc_llc->rt[i].rmb_key, addc_llc->rt[i].rmb_vaddr_new, addc_llc->rt[i].rmb_key_new); num_rkeys_recv--; } smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); } while (num_rkeys_send || num_rkeys_recv); out: up_write(&lgr->rmbs_lock); return rc; } static int smc_llc_srv_conf_link(struct smc_link *link, struct smc_link *link_new, enum smc_lgr_type lgr_new_t) { struct smc_link_group *lgr = link->lgr; struct smc_llc_qentry *qentry = NULL; int rc; /* send CONFIRM LINK request over the RoCE fabric */ rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ); if (rc) return -ENOLINK; /* receive CONFIRM LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); if (!qentry || qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); if (qentry) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } smc_llc_save_peer_uid(qentry); smc_llc_link_active(link_new); if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); else smcr_lgr_set_type(lgr, lgr_new_t); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return 0; } static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry) { qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, sizeof(qentry->msg)); memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data)); smc_llc_send_message(qentry->link, &qentry->msg); } int smc_llc_srv_add_link(struct smc_link *link, struct smc_llc_qentry *req_qentry) { enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; bool send_req_add_link_resp = false; struct smc_link *link_new = NULL; struct smc_init_info *ini = NULL; int lnk_idx, rc = 0; if (req_qentry && req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK) send_req_add_link_resp = true; ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) { rc = -ENOMEM; goto out; } if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) { rc = 0; goto out; } /* ignore client add link recommendation, start new flow */ ini->vlan_id = lgr->vlan_id; if (lgr->smc_version == SMC_V2) { ini->check_smcrv2 = true; ini->smcrv2.saddr = lgr->saddr; if (send_req_add_link_resp) { struct smc_llc_msg_req_add_link_v2 *req_add = &req_qentry->msg.req_add_link; ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]); } } smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->smcrv2.ib_dev_v2 = link->smcibdev; ini->smcrv2.ib_port_v2 = link->ibport; } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; ini->ib_dev = link->smcibdev; ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) { rc = 0; goto out; } rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini); if (rc) goto out; link_new = &lgr->lnk[lnk_idx]; rc = smcr_buf_map_lgr(link_new); if (rc) goto out_err; rc = smc_llc_send_add_link(link, link_new->smcibdev->mac[link_new->ibport-1], link_new->gid, link_new, SMC_LLC_REQ); if (rc) goto out_err; send_req_add_link_resp = false; /* receive ADD LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); if (!qentry) { rc = -ETIMEDOUT; goto out_err; } add_llc = &qentry->msg.add_link; if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) { smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = -ENOLINK; goto out_err; } if (lgr->type == SMC_LGR_SINGLE && (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && (lgr->smc_version == SMC_V2 || !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) { lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } smc_llc_save_add_link_info(link_new, add_llc); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_ready_link(link_new); if (rc) goto out_err; rc = smcr_buf_reg_lgr(link_new); if (rc) goto out_err; if (lgr->smc_version == SMC_V2) { u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? (u8 *)lgr->wr_rx_buf_v2 : (u8 *)add_llc; smc_llc_save_add_link_rkeys(link, link_new, llc_msg); } else { rc = smc_llc_srv_rkey_exchange(link, link_new); if (rc) goto out_err; } rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; kfree(ini); return 0; out_err: if (link_new) { link_new->state = SMC_LNK_INACTIVE; smcr_link_clear(link_new, false); } out: kfree(ini); if (send_req_add_link_resp) smc_llc_send_req_add_link_response(req_qentry); return rc; } static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) { struct smc_link *link = lgr->llc_flow_lcl.qentry->link; struct smc_llc_qentry *qentry; int rc; qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); down_write(&lgr->llc_conf_mutex); rc = smc_llc_srv_add_link(link, qentry); if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { /* delete any asymmetric link */ smc_llc_delete_asym_link(lgr); } up_write(&lgr->llc_conf_mutex); kfree(qentry); } /* enqueue a local add_link req to trigger a new add_link flow */ void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {}; add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK; smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc)); /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } /* worker to process an add link message */ static void smc_llc_add_link_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(work, struct smc_link_group, llc_add_link_work); if (list_empty(&lgr->list)) { /* link group is terminating */ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); goto out; } if (lgr->role == SMC_CLNT) smc_llc_process_cli_add_link(lgr); else smc_llc_process_srv_add_link(lgr); out: if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK) smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } /* enqueue a local del_link msg to trigger a new del_link flow, * called only for role SMC_SERV */ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) { struct smc_llc_msg_del_link del_llc = {}; del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK; smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc)); del_llc.link_num = del_link_id; del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc); } static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) { struct smc_link *lnk_del = NULL, *lnk_asym, *lnk; struct smc_llc_msg_del_link *del_llc; struct smc_llc_qentry *qentry; int active_links; int lnk_idx; qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); lnk = qentry->link; del_llc = &qentry->msg.delete_link; if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { smc_lgr_terminate_sched(lgr); goto out; } down_write(&lgr->llc_conf_mutex); /* delete single link */ for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) { if (lgr->lnk[lnk_idx].link_id != del_llc->link_num) continue; lnk_del = &lgr->lnk[lnk_idx]; break; } del_llc->hd.flags |= SMC_LLC_FLAG_RESP; if (!lnk_del) { /* link was not found */ del_llc->reason = htonl(SMC_LLC_DEL_NOLNK); smc_llc_send_message(lnk, &qentry->msg); goto out_unlock; } lnk_asym = smc_llc_find_asym_link(lgr); del_llc->reason = 0; smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) smc_switch_conns(lgr, lnk_del, false); smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); if (lnk_del == lnk_asym) { /* expected deletion of asym link, don't change lgr state */ } else if (active_links == 1) { smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } out_unlock: up_write(&lgr->llc_conf_mutex); out: kfree(qentry); } /* try to send a DELETE LINK ALL request on any active link, * waiting for send completion */ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) { struct smc_llc_msg_del_link delllc = {}; int i; delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK; smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc)); if (ord) delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; } } static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) { struct smc_llc_msg_del_link *del_llc; struct smc_link *lnk, *lnk_del; struct smc_llc_qentry *qentry; int active_links; int i; down_write(&lgr->llc_conf_mutex); qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); lnk = qentry->link; del_llc = &qentry->msg.delete_link; if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { /* delete entire lgr */ smc_llc_send_link_delete_all(lgr, true, ntohl( qentry->msg.delete_link.reason)); smc_lgr_terminate_sched(lgr); goto out; } /* delete single link */ lnk_del = NULL; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].link_id == del_llc->link_num) { lnk_del = &lgr->lnk[i]; break; } } if (!lnk_del) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { if (smc_switch_conns(lgr, lnk_del, false)) smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { /* qentry is either a request from peer (send it back to * initiate the DELETE_LINK processing), or a locally * enqueued DELETE_LINK request (forward it) */ if (!smc_llc_send_message(lnk, &qentry->msg)) { struct smc_llc_qentry *qentry2; qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME, SMC_LLC_DELETE_LINK); if (qentry2) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); } } smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); if (active_links == 1) { smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) { /* trigger setup of asymm alt link */ smc_llc_add_link_local(lnk); } out: up_write(&lgr->llc_conf_mutex); kfree(qentry); } static void smc_llc_delete_link_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(work, struct smc_link_group, llc_del_link_work); if (list_empty(&lgr->list)) { /* link group is terminating */ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); goto out; } if (lgr->role == SMC_CLNT) smc_llc_process_cli_delete_link(lgr); else smc_llc_process_srv_delete_link(lgr); out: smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } /* process a confirm_rkey request from peer, remote flow */ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) { struct smc_llc_msg_confirm_rkey *llc; struct smc_llc_qentry *qentry; struct smc_link *link; int num_entries; int rk_idx; int i; qentry = lgr->llc_flow_rmt.qentry; llc = &qentry->msg.confirm_rkey; link = qentry->link; num_entries = llc->rtoken[0].num_rkeys; if (num_entries > SMC_LLC_RKEYS_PER_MSG) goto out_err; /* first rkey entry is for receiving link */ rk_idx = smc_rtoken_add(link, llc->rtoken[0].rmb_vaddr, llc->rtoken[0].rmb_key); if (rk_idx < 0) goto out_err; for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++) smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id, llc->rtoken[i].rmb_vaddr, llc->rtoken[i].rmb_key); /* max links is 3 so there is no need to support conf_rkey_cont msgs */ goto out; out_err: llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; out: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } /* process a delete_rkey request from peer, remote flow */ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) { struct smc_llc_msg_delete_rkey *llc; struct smc_llc_qentry *qentry; struct smc_link *link; u8 err_mask = 0; int i, max; qentry = lgr->llc_flow_rmt.qentry; llc = &qentry->msg.delete_rkey; link = qentry->link; if (lgr->smc_version == SMC_V2) { struct smc_llc_msg_delete_rkey_v2 *llcv2; if (smc_link_shared_v2_rxbuf(link)) { memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; } else { llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)llc; } llcv2->num_inval_rkeys = 0; max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); for (i = 0; i < max; i++) { if (smc_rtoken_delete(link, llcv2->rkey[i])) llcv2->num_inval_rkeys++; } memset(&llc->rkey[0], 0, sizeof(llc->rkey)); memset(&llc->reserved2, 0, sizeof(llc->reserved2)); smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); if (llcv2->num_inval_rkeys) { llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->err_mask = llcv2->num_inval_rkeys; } goto finish; } max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { if (smc_rtoken_delete(link, llc->rkey[i])) err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); } if (err_mask) { llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->err_mask = err_mask; } finish: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) { pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu LLC protocol violation: " "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie, type); smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); smc_lgr_terminate_sched(lgr); } /* flush the llc event queue */ static void smc_llc_event_flush(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry, *q; spin_lock_bh(&lgr->llc_event_q_lock); list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) { list_del_init(&qentry->list); kfree(qentry); } spin_unlock_bh(&lgr->llc_event_q_lock); } static void smc_llc_event_handler(struct smc_llc_qentry *qentry) { union smc_llc_msg *llc = &qentry->msg; struct smc_link *link = qentry->link; struct smc_link_group *lgr = link->lgr; if (!smc_link_usable(link)) goto out; switch (llc->raw.hdr.common.llc_type) { case SMC_LLC_TEST_LINK: llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, llc); break; case SMC_LLC_ADD_LINK: if (list_empty(&lgr->list)) goto out; /* lgr is terminating */ if (lgr->role == SMC_CLNT) { if (smc_llc_is_local_add_link(llc)) { if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) break; /* add_link in progress */ if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); } return; } if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && !lgr->llc_flow_lcl.qentry) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); return; } if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK) { /* server started add_link processing */ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); schedule_work(&lgr->llc_add_link_work); return; } if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); } } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { /* as smc server, handle client suggestion */ schedule_work(&lgr->llc_add_link_work); } return; case SMC_LLC_CONFIRM_LINK: case SMC_LLC_ADD_LINK_CONT: if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); return; } break; case SMC_LLC_DELETE_LINK: if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && !lgr->llc_flow_lcl.qentry) { /* DEL LINK REQ during ADD LINK SEQ */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_del_link_work); } return; case SMC_LLC_CONFIRM_RKEY: /* new request from remote, assign to remote flow */ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { /* process here, does not wait for more llc msgs */ smc_llc_rmt_conf_rkey(lgr); smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3, and 3 rkeys fit into * one CONFIRM_RKEY message */ break; case SMC_LLC_DELETE_RKEY: /* new request from remote, assign to remote flow */ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { /* process here, does not wait for more llc msgs */ smc_llc_rmt_delete_rkey(lgr); smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; case SMC_LLC_REQ_ADD_LINK: /* handle response here, smc_llc_flow_stop() cannot be called * in tasklet context */ if (lgr->role == SMC_CLNT && lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK && (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) { smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl); } else if (lgr->role == SMC_SERV) { if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { /* as smc server, handle client suggestion */ lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; schedule_work(&lgr->llc_add_link_work); } return; } break; default: smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); break; } out: kfree(qentry); } /* worker to process llc messages on the event queue */ static void smc_llc_event_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(work, struct smc_link_group, llc_event_work); struct smc_llc_qentry *qentry; if (!lgr->llc_flow_lcl.type && lgr->delayed_event) { qentry = lgr->delayed_event; lgr->delayed_event = NULL; if (smc_link_usable(qentry->link)) smc_llc_event_handler(qentry); else kfree(qentry); } again: spin_lock_bh(&lgr->llc_event_q_lock); if (!list_empty(&lgr->llc_event_q)) { qentry = list_first_entry(&lgr->llc_event_q, struct smc_llc_qentry, list); list_del_init(&qentry->list); spin_unlock_bh(&lgr->llc_event_q_lock); smc_llc_event_handler(qentry); goto again; } spin_unlock_bh(&lgr->llc_event_q_lock); } /* process llc responses in tasklet context */ static void smc_llc_rx_response(struct smc_link *link, struct smc_llc_qentry *qentry) { enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; u8 llc_type = qentry->msg.raw.hdr.common.llc_type; switch (llc_type) { case SMC_LLC_TEST_LINK: if (smc_link_active(link)) complete(&link->llc_testlink_resp); break; case SMC_LLC_ADD_LINK: case SMC_LLC_ADD_LINK_CONT: case SMC_LLC_CONFIRM_LINK: if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry) break; /* drop out-of-flow response */ goto assign; case SMC_LLC_DELETE_LINK: if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry) break; /* drop out-of-flow response */ goto assign; case SMC_LLC_CONFIRM_RKEY: case SMC_LLC_DELETE_RKEY: if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) break; /* drop out-of-flow response */ goto assign; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; default: smc_llc_protocol_violation(link->lgr, qentry->msg.raw.hdr.common.type); break; } kfree(qentry); return; assign: /* assign responses to the local flow, we requested them */ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); wake_up(&link->lgr->llc_msg_waiter); } static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) { struct smc_link_group *lgr = link->lgr; struct smc_llc_qentry *qentry; unsigned long flags; qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC); if (!qentry) return; qentry->link = link; INIT_LIST_HEAD(&qentry->list); memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); /* process responses immediately */ if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) && llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) { smc_llc_rx_response(link, qentry); return; } /* add requests to event queue */ spin_lock_irqsave(&lgr->llc_event_q_lock, flags); list_add_tail(&qentry->list, &lgr->llc_event_q); spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); queue_work(system_highpri_wq, &lgr->llc_event_work); } /* copy received msg and add it to the event queue */ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) { struct smc_link *link = (struct smc_link *)wc->qp->qp_context; union smc_llc_msg *llc = buf; if (wc->byte_len < sizeof(*llc)) return; /* short message */ if (!llc->raw.hdr.common.llc_version) { if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ } else { if (llc->raw.hdr.length_v2 < sizeof(*llc)) return; /* invalid message */ } smc_llc_enqueue(link, llc); } /***************************** worker, utils *********************************/ static void smc_llc_testlink_work(struct work_struct *work) { struct smc_link *link = container_of(to_delayed_work(work), struct smc_link, llc_testlink_wrk); unsigned long next_interval; unsigned long expire_time; u8 user_data[16] = { 0 }; int rc; if (!smc_link_active(link)) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; if (time_is_after_jiffies(expire_time)) { next_interval = expire_time - jiffies; goto out; } reinit_completion(&link->llc_testlink_resp); smc_llc_send_test_link(link, user_data); /* receive TEST LINK response over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (!smc_link_active(link)) return; /* link state changed */ if (rc <= 0) { smcr_link_down_cond_sched(link); return; } next_interval = link->llc_testlink_time; out: schedule_delayed_work(&link->llc_testlink_wrk, next_interval); } void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); INIT_WORK(&lgr->llc_event_work, smc_llc_event_work); INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work); INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work); INIT_LIST_HEAD(&lgr->llc_event_q); spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); init_rwsem(&lgr->llc_conf_mutex); lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); } /* called after lgr was removed from lgr_list */ void smc_llc_lgr_clear(struct smc_link_group *lgr) { smc_llc_event_flush(lgr); wake_up_all(&lgr->llc_flow_waiter); wake_up_all(&lgr->llc_msg_waiter); cancel_work_sync(&lgr->llc_event_work); cancel_work_sync(&lgr->llc_add_link_work); cancel_work_sync(&lgr->llc_del_link_work); if (lgr->delayed_event) { kfree(lgr->delayed_event); lgr->delayed_event = NULL; } } int smc_llc_link_init(struct smc_link *link) { init_completion(&link->llc_testlink_resp); INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); return 0; } void smc_llc_link_active(struct smc_link *link) { pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link added: id %*phN, " "peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { link->llc_testlink_time = link->lgr->llc_testlink_time; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } } /* called in worker context */ void smc_llc_link_clear(struct smc_link *link, bool log) { if (log) pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link removed: id %*phN" ", peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); } /* register a new rtoken at the remote peer (for all links) */ int smc_llc_do_confirm_rkey(struct smc_link *send_link, struct smc_buf_desc *rmb_desc) { struct smc_link_group *lgr = send_link->lgr; struct smc_llc_qentry *qentry = NULL; int rc = 0; rc = smc_llc_send_confirm_rkey(send_link, rmb_desc); if (rc) goto out; /* receive CONFIRM RKEY response from server over RoCE fabric */ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, SMC_LLC_CONFIRM_RKEY); if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) rc = -EFAULT; out: if (qentry) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return rc; } /* unregister an rtoken at the remote peer */ int smc_llc_do_delete_rkey(struct smc_link_group *lgr, struct smc_buf_desc *rmb_desc) { struct smc_llc_qentry *qentry = NULL; struct smc_link *send_link; int rc = 0; send_link = smc_llc_usable_link(lgr); if (!send_link) return -ENOLINK; /* protected by llc_flow control */ rc = smc_llc_send_delete_rkey(send_link, rmb_desc); if (rc) goto out; /* receive DELETE RKEY response from server over RoCE fabric */ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, SMC_LLC_DELETE_RKEY); if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) rc = -EFAULT; out: if (qentry) smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return rc; } void smc_llc_link_set_uid(struct smc_link *link) { __be32 link_uid; link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id); memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE); } /* save peers link user id, used for debug purposes */ void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry) { memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid, SMC_LGR_ID_SIZE); } /* evaluate confirm link request or response */ int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type) { if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */ qentry->link->link_id = qentry->msg.confirm_link.link_num; smc_llc_link_set_uid(qentry->link); } if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) return -ENOTSUPP; return 0; } /***************************** init, exit, misc ******************************/ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { { .handler = smc_llc_rx_handler, .type = SMC_LLC_CONFIRM_LINK }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_TEST_LINK }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_ADD_LINK }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_ADD_LINK_CONT }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_LINK }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_CONFIRM_RKEY }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_CONFIRM_RKEY_CONT }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY }, /* V2 types */ { .handler = smc_llc_rx_handler, .type = SMC_LLC_CONFIRM_LINK_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_TEST_LINK_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_ADD_LINK_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_LINK_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_REQ_ADD_LINK_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_CONFIRM_RKEY_V2 }, { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY_V2 }, { .handler = NULL, } }; int __init smc_llc_init(void) { struct smc_wr_rx_handler *handler; int rc = 0; for (handler = smc_llc_rx_handlers; handler->handler; handler++) { INIT_HLIST_NODE(&handler->list); rc = smc_wr_rx_register_handler(handler); if (rc) break; } return rc; }