// SPDX-License-Identifier: GPL-2.0-only /* Copyright 2019 Linaro, Ltd, Rob Herring */ /* Copyright 2019 Collabora ltd. */ /* Copyright 2024-2025 Tomeu Vizoso */ #include #include #include #include #include #include #include #include #include "rocket_core.h" #include "rocket_device.h" #include "rocket_drv.h" #include "rocket_job.h" #include "rocket_registers.h" #define JOB_TIMEOUT_MS 500 static struct rocket_job * to_rocket_job(struct drm_sched_job *sched_job) { return container_of(sched_job, struct rocket_job, base); } static const char *rocket_fence_get_driver_name(struct dma_fence *fence) { return "rocket"; } static const char *rocket_fence_get_timeline_name(struct dma_fence *fence) { return "rockchip-npu"; } static const struct dma_fence_ops rocket_fence_ops = { .get_driver_name = rocket_fence_get_driver_name, .get_timeline_name = rocket_fence_get_timeline_name, }; static struct dma_fence *rocket_fence_create(struct rocket_core *core) { struct dma_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (!fence) return ERR_PTR(-ENOMEM); dma_fence_init(fence, &rocket_fence_ops, &core->fence_lock, core->fence_context, ++core->emit_seqno); return fence; } static int rocket_copy_tasks(struct drm_device *dev, struct drm_file *file_priv, struct drm_rocket_job *job, struct rocket_job *rjob) { int ret = 0; if (job->task_struct_size < sizeof(struct drm_rocket_task)) return -EINVAL; rjob->task_count = job->task_count; if (!rjob->task_count) return 0; rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL); if (!rjob->tasks) { drm_dbg(dev, "Failed to allocate task array\n"); return -ENOMEM; } for (int i = 0; i < rjob->task_count; i++) { struct drm_rocket_task task = {0}; if (copy_from_user(&task, u64_to_user_ptr(job->tasks) + i * job->task_struct_size, sizeof(task))) { drm_dbg(dev, "Failed to copy incoming tasks\n"); ret = -EFAULT; goto fail; } if (task.regcmd_count == 0) { drm_dbg(dev, "regcmd_count field in drm_rocket_task should be > 0.\n"); ret = -EINVAL; goto fail; } rjob->tasks[i].regcmd = task.regcmd; rjob->tasks[i].regcmd_count = task.regcmd_count; } return 0; fail: kvfree(rjob->tasks); return ret; } static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job) { struct rocket_task *task; unsigned int extra_bit; /* Don't queue the job if a reset is in progress */ if (atomic_read(&core->reset.pending)) return; /* GO ! */ task = &job->tasks[job->next_task_idx]; job->next_task_idx++; rocket_pc_writel(core, BASE_ADDRESS, 0x1); /* From rknpu, in the TRM this bit is marked as reserved */ extra_bit = 0x10000000 * core->index; rocket_cna_writel(core, S_POINTER, CNA_S_POINTER_POINTER_PP_EN(1) | CNA_S_POINTER_EXECUTER_PP_EN(1) | CNA_S_POINTER_POINTER_PP_MODE(1) | extra_bit); rocket_core_writel(core, S_POINTER, CORE_S_POINTER_POINTER_PP_EN(1) | CORE_S_POINTER_EXECUTER_PP_EN(1) | CORE_S_POINTER_POINTER_PP_MODE(1) | extra_bit); rocket_pc_writel(core, BASE_ADDRESS, task->regcmd); rocket_pc_writel(core, REGISTER_AMOUNTS, PC_REGISTER_AMOUNTS_PC_DATA_AMOUNT((task->regcmd_count + 1) / 2 - 1)); rocket_pc_writel(core, INTERRUPT_MASK, PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1); rocket_pc_writel(core, INTERRUPT_CLEAR, PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1); rocket_pc_writel(core, TASK_CON, PC_TASK_CON_RESERVED_0(1) | PC_TASK_CON_TASK_COUNT_CLEAR(1) | PC_TASK_CON_TASK_NUMBER(1) | PC_TASK_CON_TASK_PP_EN(1)); rocket_pc_writel(core, TASK_DMA_BASE_ADDR, PC_TASK_DMA_BASE_ADDR_DMA_BASE_ADDR(0x0)); rocket_pc_writel(core, OPERATION_ENABLE, PC_OPERATION_ENABLE_OP_EN(1)); dev_dbg(core->dev, "Submitted regcmd at 0x%llx to core %d", task->regcmd, core->index); } static int rocket_acquire_object_fences(struct drm_gem_object **bos, int bo_count, struct drm_sched_job *job, bool is_write) { int i, ret; for (i = 0; i < bo_count; i++) { ret = dma_resv_reserve_fences(bos[i]->resv, 1); if (ret) return ret; ret = drm_sched_job_add_implicit_dependencies(job, bos[i], is_write); if (ret) return ret; } return 0; } static void rocket_attach_object_fences(struct drm_gem_object **bos, int bo_count, struct dma_fence *fence) { int i; for (i = 0; i < bo_count; i++) dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); } static int rocket_job_push(struct rocket_job *job) { struct rocket_device *rdev = job->rdev; struct drm_gem_object **bos; struct ww_acquire_ctx acquire_ctx; int ret = 0; bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *), GFP_KERNEL); memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *)); memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *)); ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); if (ret) goto err; scoped_guard(mutex, &rdev->sched_lock) { drm_sched_job_arm(&job->base); job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false); if (ret) goto err_unlock; ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true); if (ret) goto err_unlock; kref_get(&job->refcount); /* put by scheduler job completion */ drm_sched_entity_push_job(&job->base); } rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence); err_unlock: drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx); err: kvfree(bos); return ret; } static void rocket_job_cleanup(struct kref *ref) { struct rocket_job *job = container_of(ref, struct rocket_job, refcount); unsigned int i; rocket_iommu_domain_put(job->domain); dma_fence_put(job->done_fence); dma_fence_put(job->inference_done_fence); if (job->in_bos) { for (i = 0; i < job->in_bo_count; i++) drm_gem_object_put(job->in_bos[i]); kvfree(job->in_bos); } if (job->out_bos) { for (i = 0; i < job->out_bo_count; i++) drm_gem_object_put(job->out_bos[i]); kvfree(job->out_bos); } kvfree(job->tasks); kfree(job); } static void rocket_job_put(struct rocket_job *job) { kref_put(&job->refcount, rocket_job_cleanup); } static void rocket_job_free(struct drm_sched_job *sched_job) { struct rocket_job *job = to_rocket_job(sched_job); drm_sched_job_cleanup(sched_job); rocket_job_put(job); } static struct rocket_core *sched_to_core(struct rocket_device *rdev, struct drm_gpu_scheduler *sched) { unsigned int core; for (core = 0; core < rdev->num_cores; core++) { if (&rdev->cores[core].sched == sched) return &rdev->cores[core]; } return NULL; } static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job) { struct rocket_job *job = to_rocket_job(sched_job); struct rocket_device *rdev = job->rdev; struct rocket_core *core = sched_to_core(rdev, sched_job->sched); struct dma_fence *fence = NULL; int ret; if (unlikely(job->base.s_fence->finished.error)) return NULL; /* * Nothing to execute: can happen if the job has finished while * we were resetting the NPU. */ if (job->next_task_idx == job->task_count) return NULL; fence = rocket_fence_create(core); if (IS_ERR(fence)) return fence; if (job->done_fence) dma_fence_put(job->done_fence); job->done_fence = dma_fence_get(fence); ret = pm_runtime_get_sync(core->dev); if (ret < 0) return fence; ret = iommu_attach_group(job->domain->domain, core->iommu_group); if (ret < 0) return fence; scoped_guard(mutex, &core->job_lock) { core->in_flight_job = job; rocket_job_hw_submit(core, job); } return fence; } static void rocket_job_handle_irq(struct rocket_core *core) { pm_runtime_mark_last_busy(core->dev); rocket_pc_writel(core, OPERATION_ENABLE, 0x0); rocket_pc_writel(core, INTERRUPT_CLEAR, 0x1ffff); scoped_guard(mutex, &core->job_lock) if (core->in_flight_job) { if (core->in_flight_job->next_task_idx < core->in_flight_job->task_count) { rocket_job_hw_submit(core, core->in_flight_job); return; } iommu_detach_group(NULL, iommu_group_get(core->dev)); dma_fence_signal(core->in_flight_job->done_fence); pm_runtime_put_autosuspend(core->dev); core->in_flight_job = NULL; } } static void rocket_reset(struct rocket_core *core, struct drm_sched_job *bad) { if (!atomic_read(&core->reset.pending)) return; drm_sched_stop(&core->sched, bad); /* * Remaining interrupts have been handled, but we might still have * stuck jobs. Let's make sure the PM counters stay balanced by * manually calling pm_runtime_put_noidle(). */ scoped_guard(mutex, &core->job_lock) { if (core->in_flight_job) pm_runtime_put_noidle(core->dev); iommu_detach_group(NULL, core->iommu_group); core->in_flight_job = NULL; } /* Proceed with reset now. */ rocket_core_reset(core); /* NPU has been reset, we can clear the reset pending bit. */ atomic_set(&core->reset.pending, 0); /* Restart the scheduler */ drm_sched_start(&core->sched, 0); } static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job) { struct rocket_job *job = to_rocket_job(sched_job); struct rocket_device *rdev = job->rdev; struct rocket_core *core = sched_to_core(rdev, sched_job->sched); dev_err(core->dev, "NPU job timed out"); atomic_set(&core->reset.pending, 1); rocket_reset(core, sched_job); return DRM_GPU_SCHED_STAT_RESET; } static void rocket_reset_work(struct work_struct *work) { struct rocket_core *core; core = container_of(work, struct rocket_core, reset.work); rocket_reset(core, NULL); } static const struct drm_sched_backend_ops rocket_sched_ops = { .run_job = rocket_job_run, .timedout_job = rocket_job_timedout, .free_job = rocket_job_free }; static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data) { struct rocket_core *core = data; rocket_job_handle_irq(core); return IRQ_HANDLED; } static irqreturn_t rocket_job_irq_handler(int irq, void *data) { struct rocket_core *core = data; u32 raw_status = rocket_pc_readl(core, INTERRUPT_RAW_STATUS); WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR); WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_WRITE_ERROR); if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 || raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1)) return IRQ_NONE; rocket_pc_writel(core, INTERRUPT_MASK, 0x0); return IRQ_WAKE_THREAD; } int rocket_job_init(struct rocket_core *core) { struct drm_sched_init_args args = { .ops = &rocket_sched_ops, .num_rqs = DRM_SCHED_PRIORITY_COUNT, .credit_limit = 1, .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), .name = dev_name(core->dev), .dev = core->dev, }; int ret; INIT_WORK(&core->reset.work, rocket_reset_work); spin_lock_init(&core->fence_lock); mutex_init(&core->job_lock); core->irq = platform_get_irq(to_platform_device(core->dev), 0); if (core->irq < 0) return core->irq; ret = devm_request_threaded_irq(core->dev, core->irq, rocket_job_irq_handler, rocket_job_irq_handler_thread, IRQF_SHARED, dev_name(core->dev), core); if (ret) { dev_err(core->dev, "failed to request job irq"); return ret; } core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index); if (!core->reset.wq) return -ENOMEM; core->fence_context = dma_fence_context_alloc(1); args.timeout_wq = core->reset.wq; ret = drm_sched_init(&core->sched, &args); if (ret) { dev_err(core->dev, "Failed to create scheduler: %d.", ret); goto err_sched; } return 0; err_sched: drm_sched_fini(&core->sched); destroy_workqueue(core->reset.wq); return ret; } void rocket_job_fini(struct rocket_core *core) { drm_sched_fini(&core->sched); cancel_work_sync(&core->reset.work); destroy_workqueue(core->reset.wq); } int rocket_job_open(struct rocket_file_priv *rocket_priv) { struct rocket_device *rdev = rocket_priv->rdev; struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores, sizeof(*scheds), GFP_KERNEL); unsigned int core; int ret; for (core = 0; core < rdev->num_cores; core++) scheds[core] = &rdev->cores[core].sched; ret = drm_sched_entity_init(&rocket_priv->sched_entity, DRM_SCHED_PRIORITY_NORMAL, scheds, rdev->num_cores, NULL); if (WARN_ON(ret)) return ret; return 0; } void rocket_job_close(struct rocket_file_priv *rocket_priv) { struct drm_sched_entity *entity = &rocket_priv->sched_entity; kfree(entity->sched_list); drm_sched_entity_destroy(entity); } int rocket_job_is_idle(struct rocket_core *core) { /* If there are any jobs in this HW queue, we're not idle */ if (atomic_read(&core->sched.credit_count)) return false; return true; } static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, struct drm_rocket_job *job) { struct rocket_device *rdev = to_rocket_device(dev); struct rocket_file_priv *file_priv = file->driver_priv; struct rocket_job *rjob = NULL; int ret = 0; if (job->task_count == 0) return -EINVAL; rjob = kzalloc(sizeof(*rjob), GFP_KERNEL); if (!rjob) return -ENOMEM; kref_init(&rjob->refcount); rjob->rdev = rdev; ret = drm_sched_job_init(&rjob->base, &file_priv->sched_entity, 1, NULL, file->client_id); if (ret) goto out_put_job; ret = rocket_copy_tasks(dev, file, job, rjob); if (ret) goto out_cleanup_job; ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->in_bo_handles), job->in_bo_handle_count, &rjob->in_bos); if (ret) goto out_cleanup_job; rjob->in_bo_count = job->in_bo_handle_count; ret = drm_gem_objects_lookup(file, u64_to_user_ptr(job->out_bo_handles), job->out_bo_handle_count, &rjob->out_bos); if (ret) goto out_cleanup_job; rjob->out_bo_count = job->out_bo_handle_count; rjob->domain = rocket_iommu_domain_get(file_priv); ret = rocket_job_push(rjob); if (ret) goto out_cleanup_job; out_cleanup_job: if (ret) drm_sched_job_cleanup(&rjob->base); out_put_job: rocket_job_put(rjob); return ret; } int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_rocket_submit *args = data; struct drm_rocket_job *jobs; int ret = 0; unsigned int i = 0; if (args->job_count == 0) return 0; if (args->job_struct_size < sizeof(struct drm_rocket_job)) { drm_dbg(dev, "job_struct_size field in drm_rocket_submit struct is too small.\n"); return -EINVAL; } if (args->reserved != 0) { drm_dbg(dev, "Reserved field in drm_rocket_submit struct should be 0.\n"); return -EINVAL; } jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL); if (!jobs) { drm_dbg(dev, "Failed to allocate incoming job array\n"); return -ENOMEM; } for (i = 0; i < args->job_count; i++) { if (copy_from_user(&jobs[i], u64_to_user_ptr(args->jobs) + i * args->job_struct_size, sizeof(*jobs))) { ret = -EFAULT; drm_dbg(dev, "Failed to copy incoming job array\n"); goto exit; } } for (i = 0; i < args->job_count; i++) rocket_ioctl_submit_job(dev, file, &jobs[i]); exit: kvfree(jobs); return ret; }