// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2023, Microsoft Corporation. * * mshv_root module's main interrupt handler and associated functionality. * * Authors: Microsoft Linux virtualization team */ #include #include #include #include #include #include #include "mshv_eventfd.h" #include "mshv.h" static u32 synic_event_ring_get_queued_port(u32 sint_index) { struct hv_synic_event_ring_page **event_ring_page; volatile struct hv_synic_event_ring *ring; struct hv_synic_pages *spages; u8 **synic_eventring_tail; u32 message; u8 tail; spages = this_cpu_ptr(mshv_root.synic_pages); event_ring_page = &spages->synic_event_ring_page; synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); if (unlikely(!*synic_eventring_tail)) { pr_debug("Missing synic event ring tail!\n"); return 0; } tail = (*synic_eventring_tail)[sint_index]; if (unlikely(!*event_ring_page)) { pr_debug("Missing synic event ring page!\n"); return 0; } ring = &(*event_ring_page)->sint_event_ring[sint_index]; /* * Get the message. */ message = ring->data[tail]; if (!message) { if (ring->ring_full) { /* * Ring is marked full, but we would have consumed all * the messages. Notify the hypervisor that ring is now * empty and check again. */ ring->ring_full = 0; hv_call_notify_port_ring_empty(sint_index); message = ring->data[tail]; } if (!message) { ring->signal_masked = 0; /* * Unmask the signal and sync with hypervisor * before one last check for any message. */ mb(); message = ring->data[tail]; /* * Ok, lets bail out. */ if (!message) return 0; } ring->signal_masked = 1; } /* * Clear the message in the ring buffer. */ ring->data[tail] = 0; if (++tail == HV_SYNIC_EVENT_RING_MESSAGE_COUNT) tail = 0; (*synic_eventring_tail)[sint_index] = tail; return message; } static bool mshv_doorbell_isr(struct hv_message *msg) { struct hv_notification_message_payload *notification; u32 port; if (msg->header.message_type != HVMSG_SYNIC_SINT_INTERCEPT) return false; notification = (struct hv_notification_message_payload *)msg->u.payload; if (notification->sint_index != HV_SYNIC_DOORBELL_SINT_INDEX) return false; while ((port = synic_event_ring_get_queued_port(HV_SYNIC_DOORBELL_SINT_INDEX))) { struct port_table_info ptinfo = { 0 }; if (mshv_portid_lookup(port, &ptinfo)) { pr_debug("Failed to get port info from port_table!\n"); continue; } if (ptinfo.hv_port_type != HV_PORT_TYPE_DOORBELL) { pr_debug("Not a doorbell port!, port: %d, port_type: %d\n", port, ptinfo.hv_port_type); continue; } /* Invoke the callback */ ptinfo.hv_port_doorbell.doorbell_cb(port, ptinfo.hv_port_doorbell.data); } return true; } static bool mshv_async_call_completion_isr(struct hv_message *msg) { bool handled = false; struct hv_async_completion_message_payload *async_msg; struct mshv_partition *partition; u64 partition_id; if (msg->header.message_type != HVMSG_ASYNC_CALL_COMPLETION) goto out; async_msg = (struct hv_async_completion_message_payload *)msg->u.payload; partition_id = async_msg->partition_id; /* * Hold this lock for the rest of the isr, because the partition could * be released anytime. * e.g. the MSHV_RUN_VP thread could wake on another cpu; it could * release the partition unless we hold this! */ rcu_read_lock(); partition = mshv_partition_find(partition_id); if (unlikely(!partition)) { pr_debug("failed to find partition %llu\n", partition_id); goto unlock_out; } partition->async_hypercall_status = async_msg->status; complete(&partition->async_hypercall); handled = true; unlock_out: rcu_read_unlock(); out: return handled; } static void kick_vp(struct mshv_vp *vp) { atomic64_inc(&vp->run.vp_signaled_count); vp->run.kicked_by_hv = 1; wake_up(&vp->run.vp_suspend_queue); } static void handle_bitset_message(const struct hv_vp_signal_bitset_scheduler_message *msg) { int bank_idx, vps_signaled = 0, bank_mask_size; struct mshv_partition *partition; const struct hv_vpset *vpset; const u64 *bank_contents; u64 partition_id = msg->partition_id; if (msg->vp_bitset.bitset.format != HV_GENERIC_SET_SPARSE_4K) { pr_debug("scheduler message format is not HV_GENERIC_SET_SPARSE_4K"); return; } if (msg->vp_count == 0) { pr_debug("scheduler message with no VP specified"); return; } rcu_read_lock(); partition = mshv_partition_find(partition_id); if (unlikely(!partition)) { pr_debug("failed to find partition %llu\n", partition_id); goto unlock_out; } vpset = &msg->vp_bitset.bitset; bank_idx = -1; bank_contents = vpset->bank_contents; bank_mask_size = sizeof(vpset->valid_bank_mask) * BITS_PER_BYTE; while (true) { int vp_bank_idx = -1; int vp_bank_size = sizeof(*bank_contents) * BITS_PER_BYTE; int vp_index; bank_idx = find_next_bit((unsigned long *)&vpset->valid_bank_mask, bank_mask_size, bank_idx + 1); if (bank_idx == bank_mask_size) break; while (true) { struct mshv_vp *vp; vp_bank_idx = find_next_bit((unsigned long *)bank_contents, vp_bank_size, vp_bank_idx + 1); if (vp_bank_idx == vp_bank_size) break; vp_index = (bank_idx * vp_bank_size) + vp_bank_idx; /* This shouldn't happen, but just in case. */ if (unlikely(vp_index >= MSHV_MAX_VPS)) { pr_debug("VP index %u out of bounds\n", vp_index); goto unlock_out; } vp = partition->pt_vp_array[vp_index]; if (unlikely(!vp)) { pr_debug("failed to find VP %u\n", vp_index); goto unlock_out; } kick_vp(vp); vps_signaled++; } bank_contents++; } unlock_out: rcu_read_unlock(); if (vps_signaled != msg->vp_count) pr_debug("asked to signal %u VPs but only did %u\n", msg->vp_count, vps_signaled); } static void handle_pair_message(const struct hv_vp_signal_pair_scheduler_message *msg) { struct mshv_partition *partition = NULL; struct mshv_vp *vp; int idx; rcu_read_lock(); for (idx = 0; idx < msg->vp_count; idx++) { u64 partition_id = msg->partition_ids[idx]; u32 vp_index = msg->vp_indexes[idx]; if (idx == 0 || partition->pt_id != partition_id) { partition = mshv_partition_find(partition_id); if (unlikely(!partition)) { pr_debug("failed to find partition %llu\n", partition_id); break; } } /* This shouldn't happen, but just in case. */ if (unlikely(vp_index >= MSHV_MAX_VPS)) { pr_debug("VP index %u out of bounds\n", vp_index); break; } vp = partition->pt_vp_array[vp_index]; if (!vp) { pr_debug("failed to find VP %u\n", vp_index); break; } kick_vp(vp); } rcu_read_unlock(); } static bool mshv_scheduler_isr(struct hv_message *msg) { if (msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_BITSET && msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_PAIR) return false; if (msg->header.message_type == HVMSG_SCHEDULER_VP_SIGNAL_BITSET) handle_bitset_message((struct hv_vp_signal_bitset_scheduler_message *) msg->u.payload); else handle_pair_message((struct hv_vp_signal_pair_scheduler_message *) msg->u.payload); return true; } static bool mshv_intercept_isr(struct hv_message *msg) { struct mshv_partition *partition; bool handled = false; struct mshv_vp *vp; u64 partition_id; u32 vp_index; partition_id = msg->header.sender; rcu_read_lock(); partition = mshv_partition_find(partition_id); if (unlikely(!partition)) { pr_debug("failed to find partition %llu\n", partition_id); goto unlock_out; } if (msg->header.message_type == HVMSG_X64_APIC_EOI) { /* * Check if this gsi is registered in the * ack_notifier list and invoke the callback * if registered. */ /* * If there is a notifier, the ack callback is supposed * to handle the VMEXIT. So we need not pass this message * to vcpu thread. */ struct hv_x64_apic_eoi_message *eoi_msg = (struct hv_x64_apic_eoi_message *)&msg->u.payload[0]; if (mshv_notify_acked_gsi(partition, eoi_msg->interrupt_vector)) { handled = true; goto unlock_out; } } /* * We should get an opaque intercept message here for all intercept * messages, since we're using the mapped VP intercept message page. * * The intercept message will have been placed in intercept message * page at this point. * * Make sure the message type matches our expectation. */ if (msg->header.message_type != HVMSG_OPAQUE_INTERCEPT) { pr_debug("wrong message type %d", msg->header.message_type); goto unlock_out; } /* * Since we directly index the vp, and it has to exist for us to be here * (because the vp is only deleted when the partition is), no additional * locking is needed here */ vp_index = ((struct hv_opaque_intercept_message *)msg->u.payload)->vp_index; vp = partition->pt_vp_array[vp_index]; if (unlikely(!vp)) { pr_debug("failed to find VP %u\n", vp_index); goto unlock_out; } kick_vp(vp); handled = true; unlock_out: rcu_read_unlock(); return handled; } void mshv_isr(void) { struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); struct hv_message_page **msg_page = &spages->synic_message_page; struct hv_message *msg; bool handled; if (unlikely(!(*msg_page))) { pr_debug("Missing synic page!\n"); return; } msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]); /* * If the type isn't set, there isn't really a message; * it may be some other hyperv interrupt */ if (msg->header.message_type == HVMSG_NONE) return; handled = mshv_doorbell_isr(msg); if (!handled) handled = mshv_scheduler_isr(msg); if (!handled) handled = mshv_async_call_completion_isr(msg); if (!handled) handled = mshv_intercept_isr(msg); if (handled) { /* * Acknowledge message with hypervisor if another message is * pending. */ msg->header.message_type = HVMSG_NONE; /* * Ensure the write is complete so the hypervisor will deliver * the next message if available. */ mb(); if (msg->header.message_flags.msg_pending) hv_set_non_nested_msr(HV_MSR_EOM, 0); #ifdef HYPERVISOR_CALLBACK_VECTOR add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR); #endif } else { pr_warn_once("%s: unknown message type 0x%x\n", __func__, msg->header.message_type); } } int mshv_synic_init(unsigned int cpu) { union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; #ifdef HYPERVISOR_CALLBACK_VECTOR union hv_synic_sint sint; #endif union hv_synic_scontrol sctrl; struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); struct hv_message_page **msg_page = &spages->synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; struct hv_synic_event_ring_page **event_ring_page = &spages->synic_event_ring_page; /* Setup the Synic's message page */ simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); simp.simp_enabled = true; *msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, MEMREMAP_WB); if (!(*msg_page)) return -EFAULT; hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); /* Setup the Synic's event flags page */ siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); siefp.siefp_enabled = true; *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); if (!(*event_flags_page)) goto cleanup; hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); /* Setup the Synic's event ring page */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); sirbp.sirbp_enabled = true; *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); if (!(*event_ring_page)) goto cleanup; hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); #ifdef HYPERVISOR_CALLBACK_VECTOR /* Enable intercepts */ sint.as_uint64 = 0; sint.vector = HYPERVISOR_CALLBACK_VECTOR; sint.masked = false; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64); /* Doorbell SINT */ sint.as_uint64 = 0; sint.vector = HYPERVISOR_CALLBACK_VECTOR; sint.masked = false; sint.as_intercept = 1; sint.auto_eoi = hv_recommend_using_aeoi(); hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); #endif /* Enable global synic bit */ sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); sctrl.enable = 1; hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); return 0; cleanup: if (*event_ring_page) { sirbp.sirbp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); memunmap(*event_ring_page); } if (*event_flags_page) { siefp.siefp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); memunmap(*event_flags_page); } if (*msg_page) { simp.simp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); memunmap(*msg_page); } return -EFAULT; } int mshv_synic_cleanup(unsigned int cpu) { union hv_synic_sint sint; union hv_synic_simp simp; union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; union hv_synic_scontrol sctrl; struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages); struct hv_message_page **msg_page = &spages->synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; struct hv_synic_event_ring_page **event_ring_page = &spages->synic_event_ring_page; /* Disable the interrupt */ sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX); sint.masked = true; hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64); /* Disable Doorbell SINT */ sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX); sint.masked = true; hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); /* Disable Synic's event ring page */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); sirbp.sirbp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); memunmap(*event_ring_page); /* Disable Synic's event flags page */ siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); siefp.siefp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); memunmap(*event_flags_page); /* Disable Synic's message page */ simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); simp.simp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); memunmap(*msg_page); /* Disable global synic bit */ sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); sctrl.enable = 0; hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); return 0; } int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void *data, u64 gpa, u64 val, u64 flags) { struct hv_connection_info connection_info = { 0 }; union hv_connection_id connection_id = { 0 }; struct port_table_info *port_table_info; struct hv_port_info port_info = { 0 }; union hv_port_id port_id = { 0 }; int ret; port_table_info = kmalloc(sizeof(*port_table_info), GFP_KERNEL); if (!port_table_info) return -ENOMEM; port_table_info->hv_port_type = HV_PORT_TYPE_DOORBELL; port_table_info->hv_port_doorbell.doorbell_cb = doorbell_cb; port_table_info->hv_port_doorbell.data = data; ret = mshv_portid_alloc(port_table_info); if (ret < 0) { kfree(port_table_info); return ret; } port_id.u.id = ret; port_info.port_type = HV_PORT_TYPE_DOORBELL; port_info.doorbell_port_info.target_sint = HV_SYNIC_DOORBELL_SINT_INDEX; port_info.doorbell_port_info.target_vp = HV_ANY_VP; ret = hv_call_create_port(hv_current_partition_id, port_id, partition_id, &port_info, 0, 0, NUMA_NO_NODE); if (ret < 0) { mshv_portid_free(port_id.u.id); return ret; } connection_id.u.id = port_id.u.id; connection_info.port_type = HV_PORT_TYPE_DOORBELL; connection_info.doorbell_connection_info.gpa = gpa; connection_info.doorbell_connection_info.trigger_value = val; connection_info.doorbell_connection_info.flags = flags; ret = hv_call_connect_port(hv_current_partition_id, port_id, partition_id, connection_id, &connection_info, 0, NUMA_NO_NODE); if (ret < 0) { hv_call_delete_port(hv_current_partition_id, port_id); mshv_portid_free(port_id.u.id); return ret; } // lets use the port_id as the doorbell_id return port_id.u.id; } void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) { union hv_port_id port_id = { 0 }; union hv_connection_id connection_id = { 0 }; connection_id.u.id = doorbell_portid; hv_call_disconnect_port(partition_id, connection_id); port_id.u.id = doorbell_portid; hv_call_delete_port(hv_current_partition_id, port_id); mshv_portid_free(doorbell_portid); }