summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/asahi/queue/compute.rs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/asahi/queue/compute.rs')
-rw-r--r--drivers/gpu/drm/asahi/queue/compute.rs371
1 files changed, 371 insertions, 0 deletions
diff --git a/drivers/gpu/drm/asahi/queue/compute.rs b/drivers/gpu/drm/asahi/queue/compute.rs
new file mode 100644
index 000000000000..6590382c75af
--- /dev/null
+++ b/drivers/gpu/drm/asahi/queue/compute.rs
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(clippy::unusual_byte_groupings)]
+
+//! Compute work queue.
+//!
+//! A compute queue consists of one underlying WorkQueue.
+//! This module is in charge of creating all of the firmware structures required to submit compute
+//! work to the GPU, based on the userspace command buffer.
+
+use super::common;
+use crate::alloc::Allocator;
+use crate::debug::*;
+use crate::fw::types::*;
+use crate::gpu::GpuManager;
+use crate::{box_in_place, inner_ptr, inner_weak_ptr, place};
+use crate::{fw, gpu, microseq};
+use core::mem::MaybeUninit;
+use core::sync::atomic::Ordering;
+use kernel::bindings;
+use kernel::dma_fence::RawDmaFence;
+use kernel::drm::sched::Job;
+use kernel::io_buffer::IoBufferReader;
+use kernel::prelude::*;
+use kernel::sync::Arc;
+use kernel::user_ptr::UserSlicePtr;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Compute;
+
+#[versions(AGX)]
+impl super::Queue::ver {
+ /// Submit work to a compute queue.
+ pub(super) fn submit_compute(
+ &self,
+ job: &mut Job<super::QueueJob::ver>,
+ cmd: &bindings::drm_asahi_command,
+ result_writer: Option<super::ResultWriter>,
+ id: u64,
+ flush_stamps: bool,
+ ) -> Result {
+ if cmd.cmd_type != bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE {
+ return Err(EINVAL);
+ }
+
+ let dev = self.dev.data();
+ let gpu = match dev.gpu.as_any().downcast_ref::<gpu::GpuManager::ver>() {
+ Some(gpu) => gpu,
+ None => {
+ dev_crit!(self.dev, "GpuManager mismatched with Queue!\n");
+ return Err(EIO);
+ }
+ };
+
+ let mut alloc = gpu.alloc();
+ let kalloc = &mut *alloc;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Compute!\n", id);
+
+ let mut cmdbuf_reader = unsafe {
+ UserSlicePtr::new(
+ cmd.cmd_buffer as usize as *mut _,
+ core::mem::size_of::<bindings::drm_asahi_cmd_compute>(),
+ )
+ .reader()
+ };
+
+ let mut cmdbuf: MaybeUninit<bindings::drm_asahi_cmd_compute> = MaybeUninit::uninit();
+ unsafe {
+ cmdbuf_reader.read_raw(
+ cmdbuf.as_mut_ptr() as *mut u8,
+ core::mem::size_of::<bindings::drm_asahi_cmd_compute>(),
+ )?;
+ }
+ let cmdbuf = unsafe { cmdbuf.assume_init() };
+
+ if cmdbuf.flags != 0 {
+ return Err(EINVAL);
+ }
+
+ // This sequence number increases per new client/VM? assigned to some slot,
+ // but it's unclear *which* slot...
+ let slot_client_seq: u8 = (self.id & 0xff) as u8;
+
+ let vm_bind = job.vm_bind.clone();
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] VM slot = {}\n",
+ id,
+ vm_bind.slot()
+ );
+
+ let notifier = self.notifier.clone();
+
+ let fence = job.fence.clone();
+ let comp_job = job.get_comp()?;
+ let ev_comp = comp_job.event_info();
+
+ // TODO: Is this the same on all GPUs? Is this really for preemption?
+ let preempt_size = 0x7fa0;
+ let preempt2_off = 0x7f80;
+ let preempt3_off = 0x7f88;
+ let preempt4_off = 0x7f90;
+ let preempt5_off = 0x7f98;
+
+ let preempt_buf = self.ualloc.lock().array_empty(preempt_size)?;
+
+ let mut seq_buf = self.ualloc.lock().array_empty(0x800)?;
+ for i in 1..0x400 {
+ seq_buf[i] = (i + 1) as u64;
+ }
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Event #{} {:#x?} -> {:#x?}\n",
+ id,
+ ev_comp.slot,
+ ev_comp.value,
+ ev_comp.value.next(),
+ );
+
+ let timestamps = Arc::try_new(kalloc.shared.new_default::<fw::job::JobTimestamps>()?)?;
+
+ let uuid = cmdbuf.cmd_id;
+
+ let unk3 = debug_enabled(debug::DebugFlags::Debug3);
+
+ mod_dev_dbg!(self.dev, "[Submission {}] UUID = {:#x?}\n", id, uuid);
+
+ // TODO: check
+ #[ver(V >= V13_0B4)]
+ let count = self.counter.fetch_add(1, Ordering::Relaxed);
+
+ let comp = GpuObject::new_prealloc(
+ kalloc.private.alloc_object()?,
+ |ptr: GpuWeakPointer<fw::compute::RunCompute::ver>| {
+ let mut builder = microseq::Builder::new();
+
+ let stats = gpu.initdata.runtime_pointers.stats.comp.weak_pointer();
+
+ let start_comp = builder.add(microseq::StartCompute::ver {
+ header: microseq::op::StartCompute::HEADER,
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ job_params1: inner_weak_ptr!(ptr, job_params1),
+ stats,
+ work_queue: ev_comp.info_ptr,
+ vm_slot: vm_bind.slot(),
+ unk_28: 0x1,
+ event_generation: self.id as u32,
+ cmd_seq: U64(ev_comp.cmd_seq),
+ unk_38: 0x0,
+ job_params2: inner_weak_ptr!(ptr, job_params2),
+ unk_44: 0x0,
+ uuid,
+ attachments: common::build_attachments(
+ cmdbuf.attachments,
+ cmdbuf.attachment_count,
+ )?,
+ padding: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: inner_weak_ptr!(ptr, unk_flag),
+ #[ver(V >= V13_0B4)]
+ counter: U64(count),
+ #[ver(V >= V13_0B4)]
+ notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf),
+ })?;
+
+ if result_writer.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(true),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, start_ts),
+ work_queue: ev_comp.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ builder.add(microseq::WaitForIdle {
+ header: microseq::op::WaitForIdle::new(microseq::Pipe::Compute),
+ })?;
+
+ if result_writer.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(false),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, end_ts),
+ work_queue: ev_comp.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ let off = builder.offset_to(start_comp);
+ builder.add(microseq::FinalizeCompute::ver {
+ header: microseq::op::FinalizeCompute::HEADER,
+ stats,
+ work_queue: ev_comp.info_ptr,
+ vm_slot: vm_bind.slot(),
+ #[ver(V < V13_0B4)]
+ unk_18: 0,
+ job_params2: inner_weak_ptr!(ptr, job_params2),
+ unk_24: 0,
+ uuid,
+ fw_stamp: ev_comp.fw_stamp_pointer,
+ stamp_value: ev_comp.value.next(),
+ unk_38: 0,
+ unk_3c: 0,
+ unk_40: 0,
+ unk_44: 0,
+ unk_48: 0,
+ unk_4c: 0,
+ unk_50: 0,
+ unk_54: 0,
+ unk_58: 0,
+ #[ver(G == G14 && V < V13_0B4)]
+ unk_5c_g14: U64(0),
+ restart_branch_offset: off,
+ unk_60: unk3.into(),
+ #[ver(V >= V13_0B4)]
+ unk_64: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: inner_weak_ptr!(ptr, unk_flag),
+ #[ver(V >= V13_0B4)]
+ unk_79: Default::default(),
+ })?;
+
+ builder.add(microseq::RetireStamp {
+ header: microseq::op::RetireStamp::HEADER,
+ })?;
+
+ Ok(box_in_place!(fw::compute::RunCompute::ver {
+ notifier: notifier.clone(),
+ preempt_buf: preempt_buf,
+ seq_buf: seq_buf,
+ micro_seq: builder.build(&mut kalloc.private)?,
+ vm_bind: vm_bind.clone(),
+ timestamps: timestamps.clone(),
+ })?)
+ },
+ |inner, ptr| {
+ Ok(place!(
+ ptr,
+ fw::compute::raw::RunCompute::ver {
+ tag: fw::workqueue::CommandType::RunCompute,
+ #[ver(V >= V13_0B4)]
+ counter: U64(count),
+ unk_4: 0,
+ vm_slot: vm_bind.slot(),
+ notifier: inner.notifier.gpu_pointer(),
+ unk_pointee: Default::default(),
+ job_params1: fw::compute::raw::JobParameters1 {
+ preempt_buf1: inner.preempt_buf.gpu_pointer(),
+ encoder: U64(cmdbuf.encoder_ptr),
+ // buf2-5 Only if internal program is used
+ preempt_buf2: inner.preempt_buf.gpu_offset_pointer(preempt2_off),
+ preempt_buf3: inner.preempt_buf.gpu_offset_pointer(preempt3_off),
+ preempt_buf4: inner.preempt_buf.gpu_offset_pointer(preempt4_off),
+ preempt_buf5: inner.preempt_buf.gpu_offset_pointer(preempt5_off),
+ pipeline_base: U64(0x11_00000000),
+ unk_38: U64(0x8c60),
+ unk_40: cmdbuf.ctx_switch_prog, // Internal program addr | 1
+ unk_44: 0,
+ compute_layout_addr: U64(cmdbuf.buffer_descriptor), // Only if internal program used
+ unk_50: cmdbuf.buffer_descriptor_size, // 0x40 if internal program used
+ unk_54: 0,
+ unk_58: 1,
+ unk_5c: 0,
+ iogpu_unk_40: cmdbuf.iogpu_unk_40, // 0x1c if internal program used
+ },
+ unk_b8: Default::default(),
+ microsequence: inner.micro_seq.gpu_pointer(),
+ microsequence_size: inner.micro_seq.len() as u32,
+ job_params2: fw::compute::raw::JobParameters2::ver {
+ #[ver(V >= V13_0B4)]
+ unk_0_0: 0,
+ unk_0: Default::default(),
+ preempt_buf1: inner.preempt_buf.gpu_pointer(),
+ encoder_end: U64(cmdbuf.encoder_end),
+ unk_34: Default::default(),
+ #[ver(V < V13_0B4)]
+ unk_5c: 0,
+ },
+ encoder_params: fw::job::raw::EncoderParams {
+ unk_8: 0x0, // fixed
+ unk_c: 0x0, // fixed
+ unk_10: 0x0, // fixed
+ encoder_id: cmdbuf.encoder_id,
+ unk_18: 0x0, // fixed
+ iogpu_compute_unk44: cmdbuf.iogpu_unk_44,
+ seq_buffer: inner.seq_buf.gpu_pointer(),
+ unk_28: U64(0x0), // fixed
+ },
+ meta: fw::job::raw::JobMeta {
+ unk_4: 0,
+ stamp: ev_comp.stamp_pointer,
+ fw_stamp: ev_comp.fw_stamp_pointer,
+ stamp_value: ev_comp.value.next(),
+ stamp_slot: ev_comp.slot,
+ evctl_index: 0, // fixed
+ flush_stamps: flush_stamps as u32,
+ uuid: uuid,
+ cmd_seq: ev_comp.cmd_seq as u32,
+ },
+ cur_ts: U64(0),
+ start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), start)),
+ end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), end)),
+ unk_2c0: 0,
+ unk_2c4: 0,
+ unk_2c8: 0,
+ unk_2cc: 0,
+ client_sequence: slot_client_seq,
+ pad_2d1: Default::default(),
+ unk_2d4: 0,
+ unk_2d8: 0,
+ #[ver(V >= V13_0B4)]
+ unk_ts: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_2e1: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: U32(0),
+ #[ver(V >= V13_0B4)]
+ unk_pad: Default::default(),
+ }
+ ))
+ },
+ )?;
+
+ core::mem::drop(alloc);
+
+ fence.add_command();
+ comp_job.add_cb(comp, vm_bind.slot(), move |cmd, error| {
+ if let Some(err) = error {
+ fence.set_error(err.into())
+ }
+ if let Some(mut rw) = result_writer {
+ let mut result: bindings::drm_asahi_result_compute = Default::default();
+
+ cmd.timestamps.with(|raw, _inner| {
+ result.ts_start = raw.start.load(Ordering::Relaxed);
+ result.ts_end = raw.end.load(Ordering::Relaxed);
+ });
+
+ if let Some(err) = error {
+ result.info = err.into();
+ } else {
+ result.info.status = bindings::drm_asahi_status_DRM_ASAHI_STATUS_COMPLETE;
+ }
+
+ rw.write(result);
+ }
+
+ fence.command_complete();
+ })?;
+
+ notifier.threshold.with(|raw, _inner| {
+ raw.increment();
+ });
+
+ comp_job.next_seq();
+
+ Ok(())
+ }
+}